Home | History | Annotate | Download | only in patches
      1 From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001
      2 Date: Tue, 1 Jun 2010 17:27:23 -0700
      3 Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer.
      4 
      5 Previous file reading is kept and enhanced with mmap.
      6 
      7 This is the prepration for reading the dictionary from asset.
      8 
      9 issue: 2672163
     10 Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857
     11 ---
     12  hyphen.c |   70 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
     13  hyphen.h |    2 +
     14  2 files changed, 60 insertions(+), 12 deletions(-)
     15 
     16 diff --git a/hyphen.c b/hyphen.c
     17 index 974d87f..446d5bd 100644
     18 --- a/hyphen.c
     19 +++ b/hyphen.c
     20 @@ -36,13 +36,13 @@
     21   * MPL.
     22   *
     23   */
     24 +#include <fcntl.h>
     25 +#include <sys/mman.h>
     26 +#include <sys/stat.h>
     27  #include <stdlib.h> /* for NULL, malloc */
     28  #include <stdio.h>  /* for fprintf */
     29  #include <string.h> /* for strdup */
     30 -
     31 -#ifdef UNX
     32 -#include <unistd.h> /* for exit */
     33 -#endif
     34 +#include <unistd.h> /* for close */
     35  
     36  #define noVERBOSE
     37  
     38 @@ -230,12 +230,57 @@ get_state_str (int state)
     39  }
     40  #endif
     41  
     42 +// Get a line from the dictionary contents.
     43 +static char *
     44 +get_line (char *s, int size, const char *dict_contents, int dict_length,
     45 +    int *dict_ptr)
     46 +{
     47 +    int len = 0;
     48 +    while (len < (size - 1) && *dict_ptr < dict_length) {
     49 +        s[len++] = *(dict_contents + *dict_ptr);
     50 +        (*dict_ptr)++;
     51 +        if (s[len - 1] == '\n')
     52 +            break;
     53 +    }
     54 +    s[len] = '\0';
     55 +    if (len > 0) {
     56 +        return s;
     57 +    } else {
     58 +        return NULL;
     59 +    }
     60 +}
     61 +
     62  HyphenDict *
     63  hnj_hyphen_load (const char *fn)
     64  {
     65 +    if (fn == NULL)
     66 +        return NULL;
     67 +    const int fd = open(fn, O_RDONLY);
     68 +    if (fd == -1)
     69 +        return NULL;
     70 +    struct stat sb;
     71 +    if (fstat(fd, &sb) == -1)  {  /* To obtain file size */
     72 +        close(fd);
     73 +        return NULL;
     74 +    }
     75 +
     76 +    const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
     77 +    if (addr == MAP_FAILED) {
     78 +        close(fd);
     79 +        return NULL;
     80 +    }
     81 +    HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size);
     82 +    munmap((void *)addr, sb.st_size);
     83 +    close(fd);
     84 +
     85 +    return dict;
     86 +}
     87 +
     88 +HyphenDict *
     89 +hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length)
     90 +{
     91      HyphenDict *dict[2];
     92      HashTab *hashtab;
     93 -    FILE *f;
     94      char buf[MAX_CHARS];
     95      char word[MAX_CHARS];
     96      char pattern[MAX_CHARS];
     97 @@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn)
     98      HashEntry *e;
     99      int nextlevel = 0;
    100  
    101 -    f = fopen (fn, "r");
    102 -    if (f == NULL)
    103 +    if (dict_contents == NULL)
    104          return NULL;
    105  
    106 +    int dict_ptr = 0;
    107  // loading one or two dictionaries (separated by NEXTLEVEL keyword)
    108      for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
    109          hashtab = hnj_hash_new ();
    110 @@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn)
    111          /* read in character set info */
    112          if (k == 0) {
    113              for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
    114 -            fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
    115 +            get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents,
    116 +                dict_length, &dict_ptr);
    117              for (i=0;i<MAX_NAME;i++)
    118                  if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
    119                      dict[k]->cset[i] = 0;
    120 @@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn)
    121              dict[k]->utf8 = dict[0]->utf8;
    122          }
    123  
    124 -        while (fgets (buf, sizeof(buf), f) != NULL)
    125 +        while (get_line(buf, sizeof(buf), dict_contents, dict_length,
    126 +                &dict_ptr) != NULL)
    127          {
    128              if (buf[0] != '%')
    129              {
    130 @@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn)
    131  #endif
    132          state_num = 0;
    133      }
    134 -    fclose(f);
    135      if (k == 2) dict[0]->nextlevel = dict[1];
    136      return dict[0];
    137  }
    138 @@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
    139              hyphens2 = hnj_malloc (word_size);
    140          }
    141          for (i = 0; i < word_size; i++) rep2[i] = NULL;
    142 -        for (i = 0; i < word_size; i++) if 
    143 -                                            (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
    144 +        for (i = 0; i < word_size; i++)
    145 +            if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
    146                  if (i - begin > 1) {
    147                      int hyph = 0;
    148                      prep_word[i + 2] = '\0';
    149 diff --git a/hyphen.h b/hyphen.h
    150 index 5d79308..29a0701 100644
    151 --- a/hyphen.h
    152 +++ b/hyphen.h
    153 @@ -91,6 +91,8 @@ struct _HyphenTrans {
    154  };
    155  
    156  HyphenDict *hnj_hyphen_load (const char *fn);
    157 +HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents,
    158 +    int dict_length);
    159  void hnj_hyphen_free (HyphenDict *dict);
    160  
    161  /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
    162 -- 
    163 1.7.0.1
    164 
    165