/* findstring.c - by Daniel Rice, 11/91.
   Permission is granted for any use of this code. */

#include <stdio.h>

#define MAX_TEXTLEN 305445 /* Size of the Torah. */
#define MAX_STRLEN 25 /* String being searched for. */
#define HUGE (MAX_TEXTLEN+1) /* Bigger than any possible skip. */

char text[MAX_TEXTLEN];
char string[MAX_STRLEN];
int textlen = 0;
int stringlen = 0;

int same (a, b)
char a, b;

/* Handle equivalence of regular and 'sofiot' letters. */

{
	static int urtype[27] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11,
					 12, 12, 13, 13, 14, 15, 16, 16, 17, 17, 18, 19, 20, 21};

	return (urtype[a - 0200] == urtype[b - 0200]);
}


string_here_p (offset, skip)
int offset, skip;

/* Is the string s present in text at the given offset and skip? 
   We assume that text[offset] == string[0]. */

{
	int i, end;

	if ((end = offset + (stringlen - 1)*skip) >= textlen || end < 0)
		return 0; /* String would run off the end. */

	for (i = 1; i < stringlen; ++i)
		if (!same(text[offset + i*skip], string[i]))
			return 0;

	return 1;
}


main (argc, argv)
char **argv;

{
	int c, i;
	int offset;
	int skip;
	int min_skip = -50;
	int max_skip = 50;
	int include_skip_one = 0;
	int pos = 1;
	int matches = 0;
	int minimum_pos = HUGE;
	int minimum_neg = -HUGE;

	if (argc < 2) {
		fprintf(stderr, "usage: %s [-o] string [min max]\n", argv[0]);
		fprintf(stderr, "'string' is the string to be searched for, using the encoding shown\n");
		fprintf(stderr, "In the file 'ENCODING'.  Regular letters and sofiot are\n");
		fprintf(stderr, "interchangeable.  'max' and 'min' are the smallest and largest skips to\n");
		fprintf(stderr, "search for.  A range of -50 to 50 is the default.  Negative values are\n");
		fprintf(stderr, "permitted, and cause the search to look backwards through the text.\n");
		fprintf(stderr, "The -o flag includes a skip of 1 (i.e., straight text) in the search range.n");
		fprintf(stderr, "The input text (stdin) must be raw; that is, it should contain only\n");
		fprintf(stderr, "the actual letters of the Tanach.  Use 'makeraw' to create such a\n");
		fprintf(stderr, "file.\n");
		exit(0);
	}

	if (argv[pos][0] == '-' && argv[pos][1] == 'o') {
		include_skip_one = 1;
		++pos;
	}

	strcpy (string, argv[pos++]);
	stringlen = strlen (string);
	for (i = 0; i < stringlen; ++i)
		string[i] += 0200 - '`';

	if (argc - pos >= 2) {
		min_skip = atoi(argv[pos++]);
		max_skip = atoi(argv[pos++]);
	}

	while ((c = getchar()) != EOF) { /* Read in the entire text. */
		text[textlen++] = c;
	}

	for (offset = 0; offset < textlen; ++offset) {
		if (same(text[offset], string[0])) {
			for (skip = min_skip; skip <= max_skip; ++skip) {
				if (skip == 0 || ((skip == 1) && !include_skip_one))
					continue;
				if (string_here_p (offset, skip)) {
					++matches;
					if (skip > 0 && skip < minimum_pos)
						minimum_pos = skip;
					else if (skip < 0 && skip > minimum_neg)
						minimum_neg = skip;
					printf("offset = %d, skip = %d.\n", offset, skip);
					fflush(stdout);
				}
			}
		}
	}

	printf("--------------------------\n");
	printf("Total matches: %d.\n", matches);
	if (minimum_pos < HUGE)
		printf ("The minimal positive skip was %d.\n", minimum_pos);
	if (minimum_neg > -HUGE)
		printf ("The minimal negative skip was %d.\n", minimum_neg);
}
