1 From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001 2 Date: Tue, 1 Jun 2010 17:27:23 -0700 3 Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer. 4 5 Previous file reading is kept and enhanced with mmap. 6 7 This is the prepration for reading the dictionary from asset. 8 9 issue: 2672163 10 Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857 11 --- 12 hyphen.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++---------- 13 hyphen.h | 2 + 14 2 files changed, 60 insertions(+), 12 deletions(-) 15 16 diff --git a/hyphen.c b/hyphen.c 17 index 974d87f..446d5bd 100644 18 --- a/hyphen.c 19 +++ b/hyphen.c 20 @@ -36,13 +36,13 @@ 21 * MPL. 22 * 23 */ 24 +#include <fcntl.h> 25 +#include <sys/mman.h> 26 +#include <sys/stat.h> 27 #include <stdlib.h> /* for NULL, malloc */ 28 #include <stdio.h> /* for fprintf */ 29 #include <string.h> /* for strdup */ 30 - 31 -#ifdef UNX 32 -#include <unistd.h> /* for exit */ 33 -#endif 34 +#include <unistd.h> /* for close */ 35 36 #define noVERBOSE 37 38 @@ -230,12 +230,57 @@ get_state_str (int state) 39 } 40 #endif 41 42 +// Get a line from the dictionary contents. 43 +static char * 44 +get_line (char *s, int size, const char *dict_contents, int dict_length, 45 + int *dict_ptr) 46 +{ 47 + int len = 0; 48 + while (len < (size - 1) && *dict_ptr < dict_length) { 49 + s[len++] = *(dict_contents + *dict_ptr); 50 + (*dict_ptr)++; 51 + if (s[len - 1] == '\n') 52 + break; 53 + } 54 + s[len] = '\0'; 55 + if (len > 0) { 56 + return s; 57 + } else { 58 + return NULL; 59 + } 60 +} 61 + 62 HyphenDict * 63 hnj_hyphen_load (const char *fn) 64 { 65 + if (fn == NULL) 66 + return NULL; 67 + const int fd = open(fn, O_RDONLY); 68 + if (fd == -1) 69 + return NULL; 70 + struct stat sb; 71 + if (fstat(fd, &sb) == -1) { /* To obtain file size */ 72 + close(fd); 73 + return NULL; 74 + } 75 + 76 + const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 77 + if (addr == MAP_FAILED) { 78 + close(fd); 79 + return NULL; 80 + } 81 + HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size); 82 + munmap((void *)addr, sb.st_size); 83 + close(fd); 84 + 85 + return dict; 86 +} 87 + 88 +HyphenDict * 89 +hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length) 90 +{ 91 HyphenDict *dict[2]; 92 HashTab *hashtab; 93 - FILE *f; 94 char buf[MAX_CHARS]; 95 char word[MAX_CHARS]; 96 char pattern[MAX_CHARS]; 97 @@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn) 98 HashEntry *e; 99 int nextlevel = 0; 100 101 - f = fopen (fn, "r"); 102 - if (f == NULL) 103 + if (dict_contents == NULL) 104 return NULL; 105 106 + int dict_ptr = 0; 107 // loading one or two dictionaries (separated by NEXTLEVEL keyword) 108 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 109 hashtab = hnj_hash_new (); 110 @@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn) 111 /* read in character set info */ 112 if (k == 0) { 113 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; 114 - fgets(dict[k]->cset, sizeof(dict[k]->cset),f); 115 + get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents, 116 + dict_length, &dict_ptr); 117 for (i=0;i<MAX_NAME;i++) 118 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) 119 dict[k]->cset[i] = 0; 120 @@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn) 121 dict[k]->utf8 = dict[0]->utf8; 122 } 123 124 - while (fgets (buf, sizeof(buf), f) != NULL) 125 + while (get_line(buf, sizeof(buf), dict_contents, dict_length, 126 + &dict_ptr) != NULL) 127 { 128 if (buf[0] != '%') 129 { 130 @@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn) 131 #endif 132 state_num = 0; 133 } 134 - fclose(f); 135 if (k == 2) dict[0]->nextlevel = dict[1]; 136 return dict[0]; 137 } 138 @@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, 139 hyphens2 = hnj_malloc (word_size); 140 } 141 for (i = 0; i < word_size; i++) rep2[i] = NULL; 142 - for (i = 0; i < word_size; i++) if 143 - (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { 144 + for (i = 0; i < word_size; i++) 145 + if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { 146 if (i - begin > 1) { 147 int hyph = 0; 148 prep_word[i + 2] = '\0'; 149 diff --git a/hyphen.h b/hyphen.h 150 index 5d79308..29a0701 100644 151 --- a/hyphen.h 152 +++ b/hyphen.h 153 @@ -91,6 +91,8 @@ struct _HyphenTrans { 154 }; 155 156 HyphenDict *hnj_hyphen_load (const char *fn); 157 +HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents, 158 + int dict_length); 159 void hnj_hyphen_free (HyphenDict *dict); 160 161 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ 162 -- 163 1.7.0.1 164 165