1 #include <string.h> 2 #include <stdlib.h> 3 #include <stdio.h> 4 5 #include "hyphen.h" 6 #include "csutil.h" 7 8 #define BUFSIZE 1000 9 10 void help() { 11 fprintf(stderr,"correct syntax is:\n"); 12 fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n"); 13 fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n"); 14 fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n"); 15 } 16 17 /* get a pointer to the nth 8-bit or UTF-8 character of the word */ 18 char * hindex(char * word, int n, int utf8) { 19 int j = 0; 20 while (j < n) { 21 j++; 22 word++; 23 while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++; 24 } 25 return word; 26 } 27 28 /* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */ 29 void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) { 30 int i, k, j = 0; 31 char r; 32 for (i = 0; (i + 1) < strlen(word); i++) { 33 if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue; 34 if ((hyphen[j] & 1)) { 35 if (rep && rep[j]) { 36 k = hindex(word, j - pos[j] + 1, utf8) - word; 37 r = word[k]; 38 word[k] = 0; 39 printf(" - %s%s", word, rep[j]); 40 word[k] = r; 41 printf("%s\n", hindex(word + k, cut[j], utf8)); 42 } else { 43 k = hindex(word, j + 1, utf8) - word; 44 r = word[k]; 45 word[k] = 0; 46 printf(" - %s=", word); 47 word[k] = r; 48 printf("%s\n", word + k); 49 } 50 } 51 j++; 52 } 53 } 54 55 int 56 main(int argc, char** argv) 57 { 58 59 HyphenDict *dict; 60 int df; 61 int wtc; 62 FILE* wtclst; 63 int k, n, i, j, c; 64 char buf[BUFSIZE + 1]; 65 int nHyphCount; 66 char *hyphens; 67 char *lcword; 68 char *hyphword; 69 char hword[BUFSIZE * 2]; 70 int arg = 1; 71 int optd = 1; 72 int optdd = 0; 73 char ** rep; 74 int * pos; 75 int * cut; 76 77 /* first parse the command line options */ 78 /* arg1 - hyphen dictionary file, arg2 - file of words to check */ 79 80 if (argv[arg]) { 81 if (strcmp(argv[arg], "-o") == 0) { 82 optd = 0; 83 arg++; 84 } 85 if (argv[arg] && strcmp(argv[arg], "-d") == 0) { 86 optd = 1; 87 optdd = 1; 88 arg++; 89 } 90 } 91 92 if (argv[arg]) { 93 df = arg++; 94 } else { 95 help(); 96 exit(1); 97 } 98 99 if (argv[arg]) { 100 wtc = arg++; 101 } else { 102 help(); 103 exit(1); 104 } 105 106 /* load the hyphenation dictionary */ 107 if ((dict = hnj_hyphen_load(argv[df])) == NULL) { 108 fprintf(stderr, "Couldn't find file %s\n", argv[df]); 109 fflush(stderr); 110 exit(1); 111 } 112 113 /* open the words to check list */ 114 wtclst = fopen(argv[wtc],"r"); 115 if (!wtclst) { 116 fprintf(stderr,"Error - could not open file of words to check\n"); 117 exit(1); 118 } 119 120 121 /* now read each word from the wtc file */ 122 while(fgets(buf,BUFSIZE,wtclst)) { 123 k = strlen(buf); 124 if (buf[k - 1] == '\n') buf[k - 1] = '\0'; 125 if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0'; 126 127 /* set aside some buffers to hold lower cased */ 128 /* and hyphen information */ 129 lcword = (char *) malloc(k+1); 130 hyphens = (char *)malloc(k+5); 131 if (dict->utf8) { 132 strcpy(lcword, buf); 133 } else { 134 enmkallsmall(lcword,buf,dict->cset); 135 } 136 137 /* first remove any trailing periods */ 138 n = k-1; 139 while((n >=0) && (lcword[n] == '.')) n--; 140 n++; 141 142 /* now actually try to hyphenate the word */ 143 144 rep = NULL; 145 pos = NULL; 146 cut = NULL; 147 hword[0] = '\0'; 148 149 if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) || 150 (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) { 151 free(hyphens); 152 free(lcword); 153 fprintf(stderr, "hyphenation error\n"); 154 exit(1); 155 } 156 157 if (!optd) { 158 /* now backfill hyphens[] for any removed periods */ 159 for (c = n; c < k; c++) hyphens[c] = '0'; 160 hyphens[k] = '\0'; 161 162 /* now create a new char string showing hyphenation positions */ 163 /* count the hyphens and allocate space for the new hypehanted string */ 164 nHyphCount = 0; 165 for (i = 0; i < n; i++) 166 if (hyphens[i]&1) 167 nHyphCount++; 168 hyphword = (char *) malloc(k+1+nHyphCount); 169 j = 0; 170 for (i = 0; i < n; i++) { 171 hyphword[j++] = buf[i]; 172 if (hyphens[i]&1) { 173 hyphword[j++] = '-'; 174 } 175 } 176 hyphword[j] = '\0'; 177 fprintf(stdout,"%s\n",hyphword); 178 fflush(stdout); 179 free(hyphword); 180 } else { 181 fprintf(stdout,"%s\n", hword); 182 if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8); 183 if (rep) { 184 for (i = 0; i < n - 1; i++) { 185 if (rep[i]) free(rep[i]); 186 } 187 free(rep); 188 free(pos); 189 free(cut); 190 } 191 } 192 free(hyphens); 193 free(lcword); 194 } 195 196 fclose(wtclst); 197 hnj_hyphen_free(dict); 198 return 0; 199 } 200