/* ================================================================= *\

   biblook -- look up references in a bibindexed BibTeX file

   This program was specifically developed for use with the
   computational geometry bibliographic database.  The database
   can be obtained by anonymous ftp from cs.usask.ca in the file
   `pub/geometry/geombib.tar.Z'.

   Version 1.0 written by Jeff Erickson <jeff@ics.uci.edu>, 27 Mar 92
   Version 2.0 written by Jeff Erickson <jeff@ics.uci.edu>, 17 Jun 92

   This program is in the public domain.  You may use it or modify
   it to your heart's content, at your own risk.  Bouquets, brickbats,
   and bug fixes may be sent to Jeff Erickson, jeffe@cs.berkeley.edu.

   %Make% gcc -O -o biblook biblook.c

   Usage: biblook bibfile [savefile]

   -----------------------------------------------------------------

   HOW IT WORKS:

   The user can enter any of the following commands:

   f[ind] [not] <field> <words>
	Find the entries containing the given words in any field
	with a prefix matching the <field> argument.  For example,
	`a' matches both `author' and `address', and `au' matches
	`author' only.  If the <field> argument is `-' (or any
	string with no letters or numbers), match any field.

	If `not' appears before the <field>, the sense of the search
	is reversed.  The symbols `~' and `!' can be used in place
	of `not'.

	Each word is a contiguous sequence of letters and digits.
	Case is ignored; accents should be omitted; apostrophes are
	not required.  Single characters and a few common words are
	also ignored.  Any word ending with an asterisk is treated
	as a prefix.  Thus, `point*' matches `point', `points',
	`pointer', etc.

   and [not] <field> <words>
   or [not] <field> <words>
	Intersect (resp. union) the results of the given search
	with the previous search.  Several of these commands may be
	combined on a single line.  Commands are handled in the order
	in which they appear; there is no precedence.  Unlike other
	commands, and like `not', these must be spelled out
	completely.  `&' can be used in place of `and', and `|' can
	be used in place of `or'.

   d[isplay]
	Display the results of the previous search.

   s[ave] [<filename>]
	Save the results of the previous results into the specified
	file.  If <filename> is omitted, the previous save file is
	used.  If no save file has ever been specified, results are
	saved in the file specified on the command line.  If no such
	file is specified, `save.bib' is used.  If the save file
	exists, results are appended to it.

   q[uit]/EOF
	Quit.

   Several commands can be combined on a single line by separating
   them with semicolons.  For example, the following command displays
   all STOC papers cowritten by Erdo"s without `Voronoi diagrams' in
   the title:

   f b stoc* | b symp* theory comp* & au erdos & ~t voronoi diagrams ; d

   -----------------------------------------------------------------
   Version history

   1.0 <jge> 3/29/92	Initial version complete
   1.1 <jge> 4/3/92	Fixed GetToken bug.
			Prompts and feedback messages sent to stderr
			instead of stdout, so results can be
			redirected to a file.

   2.0 <jge> 6/17/92	Major change in file format and commands.
	1. Allow searching on any field or all fields.
	2. More extensive boolean queries (and, or, not)
	3. New command to save results to a file
	4. New command to display results, rather than displaying
	   them automatically.
	5. Allow searching for prefixes
	6. Pipe display results through $PAGER or /usr/ucb/more
   2.1 <jge> 7/8/92	Minor bug fixes.
   2.3 Bill Jones <jones@cs.usask.ca> 93/01/29
	1. Declarations common to bibindex.c and biblook.c factored out
	   to new file biblook.h.
	2. Index type of (signed) short overflows early; created typedef
	   Index_t, defined as unsigned short.
   2.4 Nelson H. F. Beebe <beebe@math.utah.edu> [01-Jun-1993]
        1. Remove some mixed-mode arithmetic.
	2. Add cast to return value of fork().
	3. Correct use and type of numoffsets so that code works
	   if Index_t is "unsigned int" or "unsigned long".

\* ================================================================= */


#include "biblook.h"

/* ======================= UTILITY FUNCTIONS ======================= */

/* ----------------------------------------------------------------- *\
|  void die(const char *msg1, const char *msg2) -- print an error message and die
\* ----------------------------------------------------------------- */
void die(const char *msg1, const char *msg2)
{
    fprintf(stderr, "Error: %s %s\n", msg1, msg2);
    exit(1);
}

/* ----------------------------------------------------------------- *\
|  void safefread(void *ptr, size_t size, size_t num, FILE *fp)
|
|  Read from the file, but die if there's an error.
\* ----------------------------------------------------------------- */
void safefread(void *ptr, size_t size, size_t num, FILE *fp)
{
    if (fread(ptr, size, num, fp) < num)
	die("Unexpected EOF in bix file.", "");
}

/* ----------------------------------------------------------------- *\
|  char safegetc(FILE *fp)
|
|  Get the next character safely.  Used by routines that assume that
|  they won't run into the end of file.
\* ----------------------------------------------------------------- */
char safegetc(FILE *fp)
{
    if (feof(fp))
	die("Unexpected EOF in bib file.", "");
    return getc(fp);
}


/* ========================== INDEX TABLES ========================= */

typedef struct
{
    Word  theword;
    Index_t numindex;
    Index_t *index;
} Index, *IndexPtr;

typedef struct
{
    Word     thefield;
    Index_t  numwords;
    IndexPtr words;
} IndexTable;

char numfields;
IndexTable *fieldtable;

Index_t numoffsets;
long *offsets;

/* ----------------------------------------------------------------- *\
|  void ReadWord(FILE *ifp, Word word)
|
|  Read a "pascal" string into the given buffer
\* ----------------------------------------------------------------- */
void ReadWord(FILE *ifp, Word word)
{
    char length;

    safefread((void *) &length, sizeof(char), 1, ifp);
    if (length > MAXWORD)
	die("Index file is corrupt", "(word too long).");

    safefread((void *) word, sizeof(char), length, ifp);
    word[length] = 0;
}

/* ----------------------------------------------------------------- *\
|  void GetOneTable(FILE *ifp, IndexTable *table)
|
|  Get one index table from the file
\* ----------------------------------------------------------------- */
void GetOneTable(FILE *ifp, register IndexTable *table)
{
    Index_t i, num;

    safefread((void *) &table->numwords, sizeof(Index_t), 1, ifp);
    table->words = (IndexPtr) malloc(table->numwords * sizeof(Index));

    for (i=0; i<table->numwords; i++)
    {
	ReadWord(ifp, table->words[i].theword);
	safefread((void *) &num, sizeof(Index_t), 1, ifp);
	table->words[i].numindex = num;
	table->words[i].index = (Index_t *) malloc(num * sizeof(Index_t));
	safefread((void *) table->words[i].index, sizeof(Index_t), num,
		  ifp);
    }
}

/* ----------------------------------------------------------------- *\
|  void GetTables(char *filename)
|
|  Get the tables from the index file.
\* ----------------------------------------------------------------- */
void GetTables(char *filename)
{
    int version, i;
    FILE *ifp;

    ifp = fopen(filename, "r");
    if (!ifp)
	die("Can't read", filename);
    if (fscanf(ifp, "bibindex %d %*[^\n]%*c", &version) < 1)
	die(filename, "is not a bibindex file!");
    if (version != FILE_VERSION)
	die(filename, "is the wrong version.  Please rerun bibindex.");

    safefread((void *) &numoffsets, sizeof(Index_t), 1, ifp);
    offsets = (long *) malloc(numoffsets * sizeof(long));
    safefread((void *) offsets, sizeof(long), numoffsets, ifp);

    safefread((void *) &numfields, sizeof(char), 1, ifp);
    fieldtable = (IndexTable *) malloc(numfields * sizeof(IndexTable));

    for (i=0; i<numfields; i++)
	ReadWord(ifp, fieldtable[i].thefield);

    for (i=0; i<numfields; i++)
	GetOneTable(ifp, fieldtable+i);

    fclose(ifp);
}

/* ----------------------------------------------------------------- *\
|  void FreeTables(void)
|
|  Free the index tables.
\* ----------------------------------------------------------------- */
void FreeTables(VOID)
{
    register int i;
    register Index_t j;

    for (i=0; i<numfields; i++)
    {
	for (j=0; j<fieldtable[i].numwords; j++)
	    free(fieldtable[i].words[j].index);
	free(fieldtable[i].words);
    }
    free(fieldtable);
    free(offsets);
}


/* ----------------------------------------------------------------- *\
|  Index_t FindIndex(IndexTable table, char *word, char prefix)
|
|  Find the index of a word in a table.  Return INDEX_NAN if the word isn't
|  there.  If prefix is true, return the index of the first matching
|  word.
\* ----------------------------------------------------------------- */
Index_t FindIndex(IndexTable table, char *word, char prefix)
{
    register IndexPtr words = table.words;
    register int hi, lo, mid;	/* must be signed */
    register int cmp;

    hi = table.numwords-1;
    lo = 0;

    while (hi>=lo)
    {
	mid = (hi+lo)/2;
	cmp = strcmp(word, words[mid].theword);

	if (cmp == 0)
	    return mid;
	else if (cmp < 0)
	    hi = mid-1;
	else if (cmp > 0)
	    lo = mid+1;
    }

    if (prefix && !strncmp(word, words[lo].theword, strlen(word)))
	return lo;
    else
	return INDEX_NAN;
}


/* =================== SET MANIPULATION ROUTINES =================== */

#define SETSCALE (sizeof(unsigned long)*8)

static Index_t setsize;
static unsigned long setmask;	   /* used to erase extra bits */
typedef unsigned long *Set;

/* ----------------------------------------------------------------- *\
|  Set NewSet(void)
|
|  Get a new variable to hold sets of integers in the range
|  [0, numoffsets].  Set setsize and setmask.
\* ----------------------------------------------------------------- */
Set NewSet(VOID)
{
    setsize = (numoffsets + SETSCALE - 1)/SETSCALE;	/* HACK */
    setmask = (1<<(numoffsets%SETSCALE)) - 1;		/* KLUDGE */

    return (Set) malloc(setsize * SETSCALE);
}

/* ----------------------------------------------------------------- *\
|  void EmptySet(Set theset)
|
|  Empty the set.
\* ----------------------------------------------------------------- */
void EmptySet(Set theset)
{
    register Index_t i;
    for (i=0; i<setsize; i++)
	theset[i] = 0L;
}

/* ----------------------------------------------------------------- *\
|  void SetUnion(Set src1, Set src2, Set result)
|
|  Get the union of two sets
\* ----------------------------------------------------------------- */
void SetUnion(Set src1, Set src2, Set result)
{
    register Index_t i;
    for (i=0; i<setsize; i++)
	result[i] = src1[i] | src2[i];
}

/* ----------------------------------------------------------------- *\
|  void SetInsersection(Set src1, Set src2, Set result)
|
|  Get the intersection of two sets
\* ----------------------------------------------------------------- */
void SetIntersection(Set src1, Set src2, Set result)
{
    register Index_t i;
    for (i=0; i<setsize; i++)
	result[i] = src1[i] & src2[i];
}

/* ----------------------------------------------------------------- *\
|  void SetComplement(Set src, Set result)
|
|  Get the complement of a set
\* ----------------------------------------------------------------- */
void SetComplement(Set src, Set result)
{
    register Index_t i;
    for (i=0; i<setsize; i++)
	result[i] = ~src[i];
    result[setsize-1] &= setmask;	/* clear those last few bits */
}

/* ----------------------------------------------------------------- *\
|  void CopySet(Set src, Set result)
|
|  Copy one set into another
\* ----------------------------------------------------------------- */
void CopySet(Set src, Set result)
{
    register Index_t i;
    for (i=0; i<setsize; i++)
	result[i] = src[i];
}


/* ----------------------------------------------------------------- *\
|  int CountSet(Set theset)
|
|  Get the cardinality of the set
\* ----------------------------------------------------------------- */
int CountSet(Set theset)
{
    register unsigned i, j, count;

    count = 0;
    for (i=0; i<(unsigned)setsize; i++)
	for (j=0; j<(unsigned)SETSCALE; j++)
	    if (theset[i] & (1<<j))
		count++;

    return count;
}

/* ----------------------------------------------------------------- *\
|  void BuildSet(Set theset, Index_t *thelist, Index_t length)
|
|  Build a set out of a list of integers
\* ----------------------------------------------------------------- */
void BuildSet(Set theset, Index_t *thelist, Index_t length)
{
    register unsigned i;

    EmptySet(theset);
    for (i=0; i<(unsigned)length; i++)
	theset[thelist[i]/SETSCALE] |= 1 << (thelist[i] % SETSCALE);
}

/* ----------------------------------------------------------------- *\
|  void DoForSet(Set theset, void (*action)(int, void *), void *arg)
|
|  Do something to every element in a set
\* ----------------------------------------------------------------- */
void DoForSet(Set theset, void (*action)(int, void *), void *arg)
{
    register unsigned i, j;

    for (i=0; i<(unsigned)setsize; i++)
	for (j=0; j<(unsigned)SETSCALE; j++)
	    if (theset[i] & (1<<j))
		(*action)(SETSCALE*i + j, arg);
}


/* ======================== SEARCH ROUTINES ======================== */

Set results, oldresults, oneword, onefield;
short firstfield, lastfield;		/* indices into fieldtable */

/* ----------------------------------------------------------------- *\
|  void InitSearch(void)
|
|  Initialize the search lists
\* ----------------------------------------------------------------- */
void InitSearch(VOID)
{
    results = NewSet();
    oldresults = NewSet();
    oneword = NewSet();
    onefield = NewSet();
    firstfield = lastfield = -1;
}

/* ----------------------------------------------------------------- *\
|  void FreeSearch(void)
|
|  Free the search list
\* ----------------------------------------------------------------- */
void FreeSearch(VOID)
{
    free(results);
    free(oldresults);
    free(oneword);
    free(onefield);
}

/* ----------------------------------------------------------------- *\
|  void ClearResults(void)
|
|  Clear the current and old results
\* ----------------------------------------------------------------- */
void ClearResults(VOID)
{
    EmptySet(results);
    SetComplement(results, results);
    CopySet(results, oldresults);
}

/* ----------------------------------------------------------------- *\
|  void SaveResults(void)
|
|  Save and clear the current results
\* ----------------------------------------------------------------- */
void SaveResults(VOID)
{
    CopySet(results, oldresults);
    EmptySet(results);
    SetComplement(results, results);
}

/* ----------------------------------------------------------------- *\
|  void CombineResults(char invert, char intersect)
|
|  Combine current results with old results
\* ----------------------------------------------------------------- */
void CombineResults(char invert, char intersect)
{
    if (invert)
	SetComplement(results, results);
    if (intersect)
	SetIntersection(results, oldresults, results);
    else
	SetUnion(results, oldresults, results);
}


/* ----------------------------------------------------------------- *\
|  char SetUpField(char *field)
|
|  Set up the search fields.  Return the number of searchable fields.
\* ----------------------------------------------------------------- */
char SetUpField(char *field)
{
    int i, len;

    firstfield = -1;
    len = strlen(field);

    for (i=0; i<numfields; i++)
    {
	if (!strncmp(field, fieldtable[i].thefield, len))
	{
	    if (firstfield == -1)
		firstfield = i;
	    lastfield = i;
	}
    }

    if (firstfield == -1)
    {
	printf("\tNo searchable fields matching \"%s\".\n", field);
	return 0;
    }
    else
	return lastfield - firstfield + 1;

}

const char *badwords[] = BADWORDS;

/* ----------------------------------------------------------------- *\
|  void FindWord(char *word, char prefix)
|
|  Find a word in the currently active field and update `results'.
|  If the prefix flag is set, find all words having the given prefix.
\* ----------------------------------------------------------------- */
void FindWord(register char *word, char prefix)
{
    register IndexPtr words;
    Index_t win, len;
    int i;

    if (!prefix)
    {
	if (!word[0])
	{
	    printf("\t[ignoring empty string]\n");
	    return;
	}
	if (!word[1])
	{
	    printf("\t[ignoring \"%s\"]\n", word);
	    return;
	}
	for (i=0; badwords[i]; i++)
	{
	    if (!strcmp(badwords[i], word))
	    {
		printf("\t[ignoring \"%s\"]\n", word);
		return;
	    }
	}
    }

    EmptySet(oneword);
    len = strlen(word);

    for (i=firstfield; i<=lastfield; i++)
    {
	words = fieldtable[i].words;
	win = FindIndex(fieldtable[i], word, prefix);

	if (win != INDEX_NAN)
	{
	    if (prefix)
	    {
		while ((win < fieldtable[i].numwords) &&
		       !strncmp(words[win].theword, word, len))
		{
		    BuildSet(onefield, words[win].index,
			     words[win].numindex);
		    SetUnion(oneword, onefield, oneword);
		    win++;
		}
	    }
	    else
	    {
		BuildSet(onefield, words[win].index,
			 words[win].numindex);
		SetUnion(oneword, onefield, oneword);
	    }
	}
    }

    SetIntersection(oneword, results, results);
}


/* ============================= OUTPUT ============================ */
FILE *bibfp;

/* ----------------------------------------------------------------- *\
|  void ReportResults(void)
|
|  Report the results of the previous search.
\* ----------------------------------------------------------------- */
void ReportResults(VOID)
{
    int numresults;

    numresults = CountSet(results);

    if (numresults == 0)
	printf("\tNo matches found.\n");
    else if (numresults == 1)
	printf("\t1 match found.\n");
    else
	printf("\t%d matches found.\n", numresults);
}

/* ----------------------------------------------------------------- *\
|  void PrintEntry(int entry, FILE *ofp)
|
|  Print the entry.
\* ----------------------------------------------------------------- */
void PrintEntry(int entry, FILE *ofp)
{
    char ch;
    char braces;
    char quotes;

    if (entry >= (int)numoffsets)	/* extra bits might be set */
	return;

    putc('\n', ofp);
    if (fseek(bibfp, offsets[entry], 0))
	die("Index file is corrupt.", "");

    ch = safegetc(bibfp);

    while (ch != '@')
    {
	putc(ch, ofp);
	ch = safegetc(bibfp);
    }
	while ((ch != '{') && (ch != '('))
    {
	putc(ch, ofp);
	ch = safegetc(bibfp);
    }

    braces = quotes = 0;

    putc(ch, ofp);
    ch = safegetc(bibfp);
    while (braces || quotes || ((ch != '}') && (ch != ')')))
    {
	if (ch == '{')
	    braces++;
	else if (ch == '}')
	    braces--;
	else if ((ch == '"') && !braces)
	    quotes = !quotes;
	putc(ch, ofp);
	ch = safegetc(bibfp);
    }

    putc(ch, ofp);
    putc('\n', ofp);
}

/* ----------------------------------------------------------------- *\
|  void PrintResults(char *filename)
|
|  Print the current search results into the given file.  If the
|  filename is NULL, pipe the output through $PAGER.
\* ----------------------------------------------------------------- */
void PrintResults(char *filename)
{
    int numresults;
    FILE *ofp;
    char *pager;
    char *the_tmpfile = (char*)NULL;
    int childpid;

    numresults = CountSet(results);
    if (numresults == 0)
	printf("\tNothing to display!\n");
    else if (numresults == numoffsets)
	printf("\tI can't display the entire bibliography!\n");
    else
    {
	if (filename)
	{
	    ofp = fopen(filename, "a");
	    if (!ofp)
	    {
		printf("\tCan't write to \"%s\"!\n", filename);
		return;
	    }
	}
	else
	{
	    the_tmpfile = (char*)tempnam(NULL, "bibl.");
	    ofp = fopen(the_tmpfile, "w");
	    if (!ofp)
	    {
		printf("\tCan't write to \"%s\"!\n", the_tmpfile);
		return;
	    }
	}

	if (filename)
	{
	    time_t now = time(0);
	    fprintf(ofp, "%% Retrieved by biblook %d.%d at %s",
		    MAJOR_VERSION, MINOR_VERSION, ctime(&now));
	}

	DoForSet(results, (void (*)(int, void *)) PrintEntry,
		 (void *) ofp);

	fclose(ofp);
	if (filename)
	    printf("\tResults saved in \"%s\"\n", filename);
	else
	{
	    pager = (char*)getenv("PAGER");

	    if ((childpid = (int)fork()) != 0)
		waitpid(childpid, (int *) 0, 0);
	    else if (pager)
	    {
		execlp(pager, pager, the_tmpfile, (char *) 0);
		perror(pager);		/* should never get here! */
		exit(0);
	    }
	    else
	    {
		execl("/usr/ucb/more", "more", the_tmpfile, (char *) 0);
		perror("/usr/ucb/more");
		exit(0);
	    }

	    unlink(the_tmpfile);
	    free(the_tmpfile);	    	/* malloc'ed by tempnam() */
	    putchar('\n');
	}
    }
}


/* ======================== USER INTERFACE ========================= */

typedef enum {
    T_Find, T_Display, T_Save, T_Quit, T_Word,
    T_And, T_Or, T_Not, T_Semi, T_Return, T_Help
} Token;

/* ----------------------------------------------------------------- *\
|  Token GetToken(char *tokenstr)
|
|  Get the next input token.
\* ----------------------------------------------------------------- */
Token GetToken(char *tokenstr)
{
    static char line[256];
    static short pos;
    static char neednew = 1;
    short tlen = 0;

    *tokenstr = 0;

    if (neednew)
    {
	printf("biblook: ");
	if (!fgets(line, 254, stdin))
	    return T_Quit;

	pos = 0;
	neednew = 0;
    }

    while ((line[pos] == ' ') || (line[pos] == '\t'))
	pos++;

    switch (line[pos])
    {
	case '\n':
	    pos++;
	    neednew = 1;
	    return T_Return;

	case '&':
	    pos++;
	    return T_And;

	case '|':
	    pos++;
	    return T_Or;

	case '~':
	case '!':
	    pos++;
	    return T_Not;

	case ';':
	    pos++;
	    return T_Semi;

	default:
	    tokenstr[tlen++] = tolower(line[pos++]);
	    while (!isspace(line[pos]) && (line[pos] != ';') &&
		   (line[pos] != '&') && (line[pos] != '|'))
	    {
		tokenstr[tlen++] = tolower(line[pos++]);
	    }
	    tokenstr[tlen] = 0;

	    /* I really ought to use a hash table here. */

	    if (!strncmp(tokenstr, "find", tlen))
		return T_Find;
	    else if (!strncmp(tokenstr, "display", tlen))
		return T_Display;
	    else if (!strncmp(tokenstr, "help", tlen))
		return T_Help;
	    else if (!strncmp(tokenstr, "?", tlen))
		return T_Help;
	    else if (!strncmp(tokenstr, "save", tlen))
		return T_Save;
	    else if (!strncmp(tokenstr, "quit", tlen))
		return T_Quit;
	    else if (!strcmp(tokenstr, "and"))
		return T_And;
	    else if (!strcmp(tokenstr, "or"))
		return T_Or;
	    else if (!strcmp(tokenstr, "not"))
		return T_Not;
	    else
		return T_Word;
    }
}


/* ----------------------------------------------------------------- *\
|  char Strip(char *string)
|
|  Strip all but alphanumeric characters out of the string.  Return
|  true if the original string ended with the prefix character '*'.
\* ----------------------------------------------------------------- */
char Strip(char *string)
{
    char prefix = 0;
    char *src = string;

    while (*src)
    {
	prefix = (*src == '*');
	if (isalnum(*src))
	    *string++ = *src;
	src++;
    }
    *string = 0;
    return prefix;
}

/* ----------------------------------------------------------------- *\
|  void CmdError(void)
|
|  Print syntax error message
\* ----------------------------------------------------------------- */
void CmdError(VOID)
{
    printf("?? Syntax error ??\n");
}

static const char* helplines[] =
{
    "biblook permits rapid lookup in a BibTeX bibliography data",
    "base, using a compact binary index file prepared by bibindex(1).",
    "",
    "Available commands:",
    "? or h[elp]",
    "     Display this help message.",
    "",
    "f[ind] [not] <field> <words>",
    "     Find the entries containing the given words in any",
    "     field with a prefix matching the <field> argument.  For",
    "     example, `a' matches both `author' and `address', and",
    "     `au' matches `author' only.  If the <field> argument is",
    "     `-' (or any string with no letters or numbers), match",
    "     any field.",
    "",
    "     If `not' appears before the <field>, the sense of the",
    "     search is reversed.  The symbols `~' and `!' can be",
    "     used in place of `not'.",
    "",
    "     Each word is a contiguous sequence of letters and",
    "     digits.  Case is ignored; accents should be omitted;",
    "     apostrophes are not required.  Single characters and a",
    "     few common words are also ignored.  Any word ending",
    "     with an asterisk is treated as a prefix.  Thus,",
    "     `point*' matches `point', `points', `pointer', etc.",
    "",
    "and [not] <field> <words>",
    "or [not] <field> <words>",
    "     Intersect (resp. union) the results of the given search",
    "     with the previous search.  Several of these commands",
    "     may be combined on a single line.  Commands are handled",
    "     in the order in which they appear; there is no pre-",
    "     cedence.  Unlike other commands, and like `not', these",
    "     must be spelled out completely.  `&' can be used in",
    "     place of `and', and `|' can be used in place of `or'.",
    "",
    "d[isplay]",
    "     Display the results of the previous search.",
    "",
    "s[ave] [<filename>]",
    "     Save the results of the previous results into the",
    "     specified file.  If <filename> is omitted, the previous",
    "     save file is used.  If no save file has ever been",
    "     specified, results are saved in the file specified on",
    "     the command line.  If no such file is specified,",
    "     `save.bib' is used.  If the save file exists, results",
    "     are appended to it.",
    "",
    "q[uit]/EOF",
    "     Quit.",
    "",
    "Several commands can be combined on a single line by",
    "separating them with semicolons.  For example, the following",
    "command displays all STOC papers cowritten by Erdo\"s",
    "without `Voronoi diagrams' in the title:",
    "",
    "f b stoc* | b symp* theory comp* & au erdos & ~t voronoi diagrams ; d",
    "",
    (const char*)NULL,
};

/* ----------------------------------------------------------------- *\
|  void GiveHelp(void)
|
|  Print a help message.  Lines are stored as separate strings to
|  avoid hitting compiler limits.
\* ----------------------------------------------------------------- */

void GiveHelp(VOID)
{
    int k;

    for (k = 0; helplines[k]; ++k)
	printf("\t%s\n",helplines[k]);
}

/* ----------------------------------------------------------------- *\
|  States for Lookup()
\* ----------------------------------------------------------------- */
typedef enum {
    Wait, Find, FindN, FindF, FindW,
    Display, Save, SaveF, Error
} CmdState;

/* ----------------------------------------------------------------- *\
|  void Lookup(const char *defsave)
|
|  Execute commands until the user quits.  Defsave is the default
|  save file name.  This is one big finite state machine.  It's long
|  and boring, but that's interface code for ya!
\* ----------------------------------------------------------------- */
void Lookup(const char *defsave)
{
    char tokenstr[256];
    char savefile[256];
    CmdState state = Wait;
    Token thetoken;
    char intersect = 1;		/* 1 = intersect, 0 = union */
    char invert = 0;		/* 1 = invert */
    char prefix;		/* 1 = word is really a prefix */

    ClearResults();
    strcpy(savefile, defsave);

    while (1)
    {
	thetoken = GetToken(tokenstr);

	if ((thetoken == T_Quit) && !tokenstr[0])
	    return;
	else if (thetoken == T_Help)
	{
	    GiveHelp();
	    continue;
	}

	switch (state)
	{
	    case Wait:
		switch (thetoken)
		{
		    case T_Quit:
			return;
		    case T_Find:
			state = Find;
			ClearResults();
			break;
		    case T_And:
			state = Find;
			SaveResults();
			break;
		    case T_Or:
			state = Find;
			intersect = 0;
			SaveResults();
			break;
		    case T_Display:
			state = Display;
			break;
		    case T_Save:
			state = Save;
			break;
		    case T_Return:
		    case T_Semi:
			break;
		    default:
			state = Error; CmdError(); break;
		}
		break;

	    case Find:
		if (thetoken == T_Not)
		{
		    state = FindN;
		    invert = 1;
		}
		else
		{
		    if (tokenstr[0])
		    {
			state = FindF;
			Strip(tokenstr);
			if (!SetUpField(tokenstr))
			    state = Error;
		    }
		    else
		    {
			state = (thetoken == T_Return) ? Wait : Error;
			CmdError();
		    }
		}
		break;

	    case FindN:
		if (tokenstr[0])
		{
		    state = FindF;
		    Strip(tokenstr);
		    if (!SetUpField(tokenstr))
			state = Error;
		}
		else
		{
		    state = (thetoken == T_Return) ? Wait : Error;
		    CmdError();
		}
		break;

	    case FindF:
		if (tokenstr[0])
		{
		    state = FindW;
		    prefix = Strip(tokenstr);
		    FindWord(tokenstr, prefix);
		}
		else
		{
		    state = (thetoken == T_Return) ? Wait : Error;
		    CmdError();
		}
		break;

	    case FindW:
		switch (thetoken)
		{
		    case T_And:
			state = Find;
			CombineResults(invert, intersect);
			SaveResults();
			invert = 0;
			intersect = 1;
			break;
		    case T_Or:
			state = Find;
			CombineResults(invert, intersect);
			SaveResults();
			invert = 0;
			intersect = 0;
			break;
		    case T_Semi:
			state = Wait;
			CombineResults(invert, intersect);
			invert = 0;
			intersect = 1;
			break;
		    case T_Return:
			state = Wait;
			CombineResults(invert, intersect);
			ReportResults();
			invert = 0;
			intersect = 1;
			break;
		    default:
			if (tokenstr[0])
			{
			    state = FindW;
			    prefix = Strip(tokenstr);
			    FindWord(tokenstr, prefix);
			}
			else
			{
			    state = Error;
			    CmdError();
			}
			break;
		}
		break;

	    case Display:
		if ((thetoken == T_Semi) || (thetoken == T_Return))
		{
		    state = Wait;
		    PrintResults(NULL);
		}
		else
		{
		    state = Error;
		    CmdError();
		}
		break;

	    case Save:
		if (tokenstr[0])
		{
		    state = SaveF;
		    strcpy(savefile, tokenstr);
		}
		else if ((thetoken == T_Semi) || (thetoken == T_Return))
		{
		    state = Wait;
		    PrintResults(savefile);
		}
		else
		{
		    state = Error;
		    CmdError();
		}
		break;

	    case SaveF:
		if ((thetoken == T_Semi) || (thetoken == T_Return))
		{
		    state = Wait;
		    PrintResults(savefile);
		}
		else
		{
		    state = Error;
		    CmdError();
		}
		break;

	    case Error:
		switch (thetoken)
		{
		    case T_Quit:
			return;
		    case T_Return:
			state = Wait;
			break;
		    default:
			break;
		}
		break;
	}
    }
}


/* ================================================================= *\
|  The main program
\* ================================================================= */
int main(int argc, char **argv)
{
    char bibfile[256];
    char bixfile[256];
    struct stat bibstat, bixstat;
    char *p;

    printf("biblook version %d.%d  file version %d\n",
	   (int)MAJOR_VERSION, (int)MINOR_VERSION, (int)FILE_VERSION);
    printf("Type ? or h for help\n");

    if ((argc != 2) && (argc != 3))
    {
	fprintf(stderr, "Usage: biblook bib [savefile]\n");
	exit(1);
    }

    if (((p = strrchr(argv[1],'.')) != (char*)NULL) &&
	(strcmp(p, ".bib") == 0))
	*p = '\0';			/* remove any .bib extension */

    sprintf(bibfile, "%s.bib", argv[1]);
    sprintf(bixfile, "%s.bix", argv[1]);

    stat(bibfile, &bibstat);
    stat(bixfile, &bixstat);
    if (bibstat.st_mtime > bixstat.st_mtime)
	die(bixfile, "is out of date.  Please rerun bibindex.");

    bibfp = fopen(bibfile, "r");
    if (!bibfp)
	die("Can't read", bibfile);

    GetTables(bixfile);
    InitSearch();

    if (argc == 3)
	Lookup(argv[2]);
    else
	Lookup("save.bib");

    FreeSearch();
    FreeTables();

    fclose(bibfp);
    exit(0);
    return(0);
}
