Home | History | Annotate | Download | only in hyphenation
      1 #include <string.h>
      2 #include <stdlib.h>
      3 #include <stdio.h>
      4 
      5 #include "hyphen.h"
      6 #include "csutil.h"
      7 
      8 #define BUFSIZE 1000
      9 
     10 void help() {
     11     fprintf(stderr,"correct syntax is:\n");
     12     fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
     13     fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
     14     fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
     15 }
     16 
     17 /* get a pointer to the nth 8-bit or UTF-8 character of the word */
     18 char * hindex(char * word, int n, int utf8) {
     19     int j = 0;
     20     while (j < n) {
     21         j++;
     22         word++;
     23         while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++;
     24     }
     25     return word;
     26 }
     27 
     28 /* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */
     29 void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) {
     30     int i, k, j = 0;
     31     char r;
     32     for (i = 0; (i + 1) < strlen(word); i++) {
     33         if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue;
     34         if ((hyphen[j] & 1)) {
     35             if (rep && rep[j]) {
     36               k = hindex(word, j - pos[j] + 1, utf8) - word;
     37               r = word[k];
     38               word[k] = 0;
     39               printf(" - %s%s", word, rep[j]);
     40               word[k] = r;
     41               printf("%s\n", hindex(word + k, cut[j], utf8));
     42             } else {
     43               k = hindex(word, j + 1, utf8) - word;
     44               r = word[k];
     45               word[k] = 0;
     46               printf(" - %s=", word);
     47               word[k] = r;
     48               printf("%s\n", word + k);
     49             }
     50         }
     51         j++;
     52     }
     53 }
     54 
     55 int
     56 main(int argc, char** argv)
     57 {
     58 
     59     HyphenDict *dict;
     60     int df;
     61     int wtc;
     62     FILE* wtclst;
     63     int k, n, i, j, c;
     64     char buf[BUFSIZE + 1];
     65     int  nHyphCount;
     66     char *hyphens;
     67     char *lcword;
     68     char *hyphword;
     69     char hword[BUFSIZE * 2];
     70     int arg = 1;
     71     int optd = 1;
     72     int optdd = 0;
     73     char ** rep;
     74     int * pos;
     75     int * cut;
     76 
     77   /* first parse the command line options */
     78   /* arg1 - hyphen dictionary file, arg2 - file of words to check */
     79 
     80   if (argv[arg]) {
     81        if (strcmp(argv[arg], "-o") == 0) {
     82             optd = 0;
     83             arg++;
     84        }
     85        if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
     86             optd = 1;
     87             optdd = 1;
     88             arg++;
     89        }
     90   }
     91 
     92   if (argv[arg]) {
     93        df = arg++;
     94   } else {
     95     help();
     96     exit(1);
     97   }
     98 
     99   if (argv[arg]) {
    100        wtc = arg++;
    101   } else {
    102     help();
    103     exit(1);
    104   }
    105 
    106   /* load the hyphenation dictionary */
    107   if ((dict = hnj_hyphen_load(argv[df])) == NULL) {
    108        fprintf(stderr, "Couldn't find file %s\n", argv[df]);
    109        fflush(stderr);
    110        exit(1);
    111   }
    112 
    113   /* open the words to check list */
    114   wtclst = fopen(argv[wtc],"r");
    115   if (!wtclst) {
    116     fprintf(stderr,"Error - could not open file of words to check\n");
    117     exit(1);
    118   }
    119 
    120 
    121   /* now read each word from the wtc file */
    122     while(fgets(buf,BUFSIZE,wtclst)) {
    123        k = strlen(buf);
    124        if (buf[k - 1] == '\n') buf[k - 1] = '\0';
    125        if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0';
    126 
    127        /* set aside some buffers to hold lower cased */
    128        /* and hyphen information */
    129        lcword = (char *) malloc(k+1);
    130        hyphens = (char *)malloc(k+5);
    131        if (dict->utf8) {
    132          strcpy(lcword, buf);
    133        } else {
    134          enmkallsmall(lcword,buf,dict->cset);
    135        }
    136 
    137        /* first remove any trailing periods */
    138        n = k-1;
    139        while((n >=0) && (lcword[n] == '.')) n--;
    140        n++;
    141 
    142        /* now actually try to hyphenate the word */
    143 
    144        rep = NULL;
    145        pos = NULL;
    146        cut = NULL;
    147        hword[0] = '\0';
    148 
    149        if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) ||
    150 	    (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) {
    151              free(hyphens);
    152              free(lcword);
    153              fprintf(stderr, "hyphenation error\n");
    154              exit(1);
    155        }
    156 
    157        if (!optd) {
    158          /* now backfill hyphens[] for any removed periods */
    159          for (c = n; c < k; c++) hyphens[c] = '0';
    160          hyphens[k] = '\0';
    161 
    162          /* now create a new char string showing hyphenation positions */
    163          /* count the hyphens and allocate space for the new hypehanted string */
    164          nHyphCount = 0;
    165          for (i = 0; i < n; i++)
    166            if (hyphens[i]&1)
    167              nHyphCount++;
    168          hyphword = (char *) malloc(k+1+nHyphCount);
    169          j = 0;
    170          for (i = 0; i < n; i++) {
    171 	   hyphword[j++] = buf[i];
    172            if (hyphens[i]&1) {
    173 	      hyphword[j++] = '-';
    174 	   }
    175          }
    176          hyphword[j] = '\0';
    177          fprintf(stdout,"%s\n",hyphword);
    178          fflush(stdout);
    179          free(hyphword);
    180       } else {
    181          fprintf(stdout,"%s\n", hword);
    182          if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8);
    183          if (rep) {
    184             for (i = 0; i < n - 1; i++) {
    185                 if (rep[i]) free(rep[i]);
    186             }
    187             free(rep);
    188             free(pos);
    189             free(cut);
    190          }
    191       }
    192       free(hyphens);
    193       free(lcword);
    194     }
    195 
    196     fclose(wtclst);
    197     hnj_hyphen_free(dict);
    198     return 0;
    199 }
    200