#include <stdio.h>
#include <ctype.h>
#include "ptrie.h"
#include "stack.h"

static char *program;

static char *pin_table[20];
static int num_pin;

static void
#ifdef __STDC__
stack_free_func(char *data)
#else /* !__STDC__ */
stack_free_func(data)
char *data;
#endif /* !__STDC__ */
{
    if (data != NULL)
      free(data);
}

static void
#ifdef __STDC__
ptrie_free_func(stack data)
#else /* !__STDC__ */
ptrie_free_func(data)
stack data;
#endif /* !__STDC__ */
{
    if (data != NULL)
      stack_free(data, stack_free_func);
}

static int
#ifdef __STDC__
add_line(char *line, ptrie tree)
#else /* !__STDC__ */
add_line(line, tree)
char *line;
ptrie tree;
#endif /* !__STDC__ */
{
    char *pinyin, *ptr;
    unsigned char danci[3];
    stack nstack;
    int dcount;

    ptr = line;
    /*
     * Go to the end of the Pinyin.
     */
    while(*ptr && *ptr != ' ')
      ptr++;

    /*
     * Make a copy of the Pinyin.
     */
    pinyin = (char *)malloc(sizeof(char) * (int)(ptr - line + 1));
    (void)strncpy(pinyin, line, (int)(ptr - line));

    /*
     * Move to the first danci.
     */
    while(*ptr && !(*ptr & 0x80))
      ptr++;

    /*
     * Do this while more danci remain in the line.
     */
    dcount = 0;
    while(*ptr != '\0') {
        danci[0] = *ptr++;
        danci[1] = *ptr++;
        danci[2] = '\0';
        dcount++;
        nstack = (stack)ptrie_find(tree, (unsigned char *)danci);
        if (nstack == NULL) {
            nstack = stack_create(20);
            /*
             * A stack overflow on a newly created stack should
             * not be possible if the stack size is greater than 0.
             * But, may as well be careful.
             */
            if (stack_push(nstack, (void *)pinyin) < 0) {
                fprintf(stderr, "%s: 0 stack overflow (%s %d)\n", program,
                        pinyin, dcount);
                stack_free(nstack, stack_free_func);
                return(-1);
            }
            /*
             * Insert the danci/pinyin stack pair into the search
             * trie.
             */
            ptrie_insert(tree, (unsigned char *)danci, (void *)nstack);
        } else {
            /*
             * Make sure we don't overflow the stack when pushing
             * a new Pinyin on it.
             */
            if (stack_push(nstack, (void *)pinyin) < 0) {
                fprintf(stderr, "%s: 1 stack overflow (%s %d)\n", program,
                        pinyin, dcount);
                return(-1);
            }
        }

        /*
         * Move to next danci.
         */
        while(*ptr && !(*ptr & 0x80))
          ptr++;
    }
    return(0);
}

static ptrie
#ifdef __STDC__
load_danci(FILE *dfile)
#else /* !__STDC__ */
load_danci(dfile)
FILE *dfile;
#endif /* !__STDC__ */
{
    char buf[BUFSIZ];
    ptrie tree;

    /*
     * Create the search trie for the danci.
     */
    tree = ptrie_create();

    fgets(buf, BUFSIZ, dfile);
    while(!feof(dfile)) {
        if (add_line((char *)buf, tree) < 0) {
            ptrie_free(tree, ptrie_free_func);
            return(NULL);
        }
        fgets(buf, BUFSIZ, dfile);
    }
    return(tree);
}

static void
#ifdef __STDC__
gather_stack(char *pinyin)
#else /* !__STDC__ */
gather_stack(pinyin)
char *pinyin;
#endif /* !__STDC__ */
{
    if (pinyin != NULL)
      pin_table[num_pin++] = pinyin;
}

static int
#ifdef __STDC__
sort_cmp(char **s1, char **s2)
#else /* !__STDC__ */
sort_cmp(s1, s2)
char **s1, **s2;
#endif /* !__STDC__ */
{
    return(-(strcmp(*s1, *s2)));
}

static char *
#ifdef __STDC__
pinyin_match(char *pstr, stack pstack, int *advance)
#else /* !__STDC__ */
pinyin_match(pstr, pstack, advance)
char *pstr;
stack pstack;
int *advance;
#endif /* !__STDC__ */
{
    char *ptr;
    int i, len, pstr_len;

    pstr_len = strlen(pstr);
    /*
     * Dump the stack into an array of Pinyin
     * to match against.
     */
    num_pin = 0;
    stack_dump(pstack, gather_stack);

    qsort((char *)pin_table, num_pin, sizeof(char *), sort_cmp);

    /*
     * Do comparisons in longest first order to avoid
     * matching things like SU before SUO.
     */
    i = 0;
    while(i < num_pin) {
        /*
         * Back up over the tone number and colon (:) accent
         * so comparisons will only be on letters.
         */
        ptr = pin_table[i] + (strlen(pin_table[i]) - 1);
        while(ptr > pin_table[i] && !isalpha(*ptr))
          ptr--;
        ptr++;
        len = (int)(ptr - pin_table[i]);

        if (pstr_len >= len) {
            if (strncmp(pstr, pin_table[i], len) == 0) {
                *advance = len;
                return(pin_table[i]);
            }
        }
        i++;
    }
    *advance = 0;
    return(NULL);
}

static void
#ifdef __STDC__
addtones(FILE *in, FILE *fix, ptrie tree)
#else /* !__STDC__ */
addtones(in, fix, tree)
FILE *in, *fix;
ptrie tree;
#endif /* !__STDC__ */
{
    char buf[BUFSIZ], *ptr;
    char work_pinyin[128], *wptr;
    char *pinyin, new_pinyin[BUFSIZ], *nptr;
    int c, line_no = 0, i, doskip = 0, num;
    unsigned char danci[3];
    stack nstack;

    (void)fgets(buf, BUFSIZ, in);
    line_no++;
    while(!feof(in)) {
        /*
         * Remove trailing newlines, space, and tabs.
         */
        i = strlen(buf) - 1;
        while(i > 0 && !(buf[i] & 0x80) && isspace(buf[i]))
          buf[i--] = '\0';

        /*
         * Get the pinyin without tones.
         */
        i = 0;
        ptr = (char *)buf;
        while(*ptr && *ptr != ' ')
          work_pinyin[i++] = *ptr++;;
        work_pinyin[i] = '\0';
        wptr = (char *)work_pinyin;
        nptr = (char *)new_pinyin;
        doskip = 0;
        /*
         * Move to the beginning of the first danci.
         */
        ptr++;

        /*
         * Do this loop until the end of the line, or until
         * a comma (,) is encountered.  Since a comma indicates
         * another possible cihui item with the same pronunciation,
         * it shouldn't need to be looked at.
         */
        while(doskip == 0 && *ptr != '\0' && *ptr != ',') {
            danci[0] = *ptr++;
            danci[1] = *ptr++;
            danci[2] = '\0';
            nstack = (stack)ptrie_find(tree, (unsigned char *)danci);
            if (nstack == NULL || stack_size(nstack) == 0) {
                /*
                 * This danci doesn't exist in lookup tree.  So just
                 * emit the whole line to the fix file.
                 */
                fprintf(fix, "line (%d):%s: %s\n", line_no, wptr, buf);
                doskip = 1;
            } else {
                pinyin = pinyin_match(wptr, nstack, &num);
                if (pinyin == NULL) {
                    /*
                     * None of the Pinyin matched, so just emit the
                     * remaining Pinyin with no tones to the fix file.
                     */
                    fprintf(fix, "line (%d):%s: %s\n", line_no, wptr, buf);
                    doskip = 1;
                } else {
                    /*
                     * Add the replacement Pinyin with tone number
                     * and skip to the next Pinyin with no tone
                     * in the ``work_pinyin'' buffer.  The amount that
                     * ``wptr'' needs to be incremented by is returned
                     * from the ``pinyin_match'' routine.
                     */
                    (void)strcpy(nptr, pinyin);
                    nptr += strlen(pinyin);
                    wptr += num;
                }
            }
        }

        if (doskip == 0) {
            ptr = (char *)buf;
            while(*ptr && !(*ptr & 0x80))
              ptr++;
            /*
             * Emit the new pinyin with tones followed by its cihui list.
             */
            printf("%s %s\n", new_pinyin, ptr);
        }

        (void)fgets(buf, BUFSIZ, in);
        line_no++;
    }
}

static void
#ifdef __STDC__
usage(void)
#else /* !__STDC__ */
usage()
#endif /* !__STDC__ */
{
    fprintf(stderr, "usage  %s -d danci-file [input-file]\n",
            program);
    exit(-1);
}

int
#ifdef __STDC__
main(int argc, char **argv)
#else /* !__STDC__ */
main(argc, argv)
int argc;
char **argv;
#endif /* !__STDC__ */
{
    FILE *dfile = NULL, *fixfile = stderr, *infile = stdin;
    char *dfile_name, *fixfile_name, *infile_name;
    ptrie tree = NULL;
    char fixbuf[BUFSIZ];

    program = argv[0];

    if (argc < 3)
      usage();

    argc--;
    *argv++;
    while(argc != 0) {
        if (argv[0][0] == '-') {
            switch(argv[0][1]) {
              case 'd': case 'D':
                argc--;
                *argv++;
                dfile_name = argv[0];

                /*
                 * A new danci file needs to be loaded.
                 */
                dfile = fopen(dfile_name, "r");
                if (dfile == NULL) {
                    fprintf(stderr, "%s: problem opening danci file \"%s\"\n",
                            program, dfile_name);
                    if (fixfile != NULL && fixfile != stderr)
                      fclose(fixfile);
                    if (tree != NULL)
                      ptrie_free(tree, ptrie_free_func);
                    exit(-1);
                }
                if (tree != NULL)
                  ptrie_free(tree, ptrie_free_func);
                tree = load_danci(dfile);
                fclose(dfile);
                if (tree == NULL) {
                    fprintf(stderr, "%s: problem loading danci file \"%s\"\n",
                            program, dfile_name);
                    if (fixfile != NULL && fixfile != stderr)
                      fclose(fixfile);
                    exit(-1);
                }
                break;
              default:
                if (tree != NULL)
                  ptrie_free(tree, ptrie_free_func);
                fprintf(stderr, "%s: unknown command line switch\n", program);
                usage();
                break;
            }
        } else {
            if (argc != 0) {
                infile_name = argv[0];
                infile = fopen(infile_name, "r");
                if (infile == NULL) {
                    fprintf(stderr, "%s: problem opening input file \"%s\"\n",
                            program, infile_name);
                    if (tree != NULL)
                      ptrie_free(tree, ptrie_free_func);
                }
                sprintf(fixbuf, "%s.fix", infile_name);
            } else
              sprintf(fixbuf, "stdin.fix");
            fixfile = fopen(fixbuf, "w");
            if (fixfile == NULL) {
                if (tree != NULL)
                  ptrie_free(tree, ptrie_free_func);
                fclose(infile);
                fprintf(stderr, "%s: couldn't open the fix file \"%s\"\n",
                        program, fixbuf);
                exit(0);
            }
            addtones(infile, fixfile, tree);
            if (infile != stdin) {
                fclose(infile);
                infile = stdin;
            }
        }
        argc--;
        *argv++;
    }
    exit(0);
}
