Home | History | Annotate | Download | only in android
      1 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk
      2 index 91754f1..ccac4d1 100644
      3 --- a/source/data/brkitr/brklocal.mk
      4 +++ b/source/data/brkitr/brklocal.mk
      5 @@ -34,15 +34,15 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS)
      6  
      7  
      8  # List of compact trie dictionary files (ctd).
      9 -BRK_CTD_SOURCE = thaidict.txt cjdict.txt
     10 +BRK_CTD_SOURCE = thaidict.txt
     11  
     12  
     13  # List of break iterator files (brk).
     14 -# Chrome change: remove word_ja.txt and line_he.txt
     15 -BRK_SOURCE =  sent_el.txt word_POSIX.txt line_fi.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
     16 +# Chrome change: remove line_he.txt
     17 +BRK_SOURCE =  sent_el.txt word_POSIX.txt line_fi.txt word_ja.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
     18  
     19  
     20  # Ordinary resources
     21 -# Chrome change: remove ja.txt and he.txt
     22 +# Chrome change: remove he.txt
     23  BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt\
     24 - fi.txt   th.txt
     25 + fi.txt ja.txt th.txt
     26 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt
     27 index fb83ac3..5d839bd 100644
     28 --- a/source/data/brkitr/root.txt
     29 +++ b/source/data/brkitr/root.txt
     30 @@ -17,8 +17,5 @@ root{
     31      }
     32      dictionaries{
     33          Thai:process(dependency){"thaidict.ctd"}
     34 -        Hani:process(dependency){"cjdict.ctd"}
     35 -        Hira:process(dependency){"cjdict.ctd"}
     36 -        Kata:process(dependency){"cjdict.ctd"}
     37      }
     38  }
     39 diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt
     40 index 0b49377..a0e1ceb 100644
     41 --- a/source/data/brkitr/word.txt
     42 +++ b/source/data/brkitr/word.txt
     43 @@ -60,11 +60,10 @@ $Control        = [\p{Grapheme_Cluster_Break = Control}];
     44  $HangulSyllable = [\uac00-\ud7a3];
     45  $ComplexContext = [:LineBreak = Complex_Context:];
     46  $KanaKanji      = [$Han $Hiragana $Katakana];
     47 -$dictionaryCJK  = [$KanaKanji $HangulSyllable];
     48 -$dictionary     = [$ComplexContext $dictionaryCJK];
     49 +$dictionary   = [:LineBreak = Complex_Context:];
     50  
     51 -# leave CJK scripts out of ALetterPlus
     52 -$ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
     53 +$ALetterPlus  = [$ALetter [$dictionary-$Extend-$Control]];   # Note:  default ALetter does not
     54 +                                                             #  include the dictionary characters.
     55  
     56  
     57  #
     58 @@ -99,8 +98,7 @@ $CR $LF;
     59  #          begins with a group of Format chars, or with a "word" consisting of a single
     60  #          char that is not in any of the listed word break categories followed by
     61  #          format char(s).
     62 - #          format char(s), or is not a CJK dictionary character.
     63 -[^$CR $LF $Newline $dictionaryCJK]? ($Extend |  $Format)+;
     64 +[^$CR $LF $Newline]? ($Extend |  $Format)+;
     65  
     66  $NumericEx {100};
     67  $ALetterEx {200};
     68 @@ -155,9 +153,6 @@ $ExtendNumLetEx $ALetterEx  {200};    #  (13b)
     69  $ExtendNumLetEx $NumericEx  {100};    #  (13b)
     70  $ExtendNumLetEx $KatakanaEx {400};    #  (13b)
     71  
     72 -# special handling for CJK characters: chain for later dictionary segmentation
     73 -$HangulSyllable $HangulSyllable {200};
     74 -$KanaKanji $KanaKanji {400}; #different rule status if both kanji and kana found
     75  
     76  
     77  ## -------------------------------------------------
     78 @@ -179,7 +174,7 @@ $BackHebrewLetEx   = ($Format | $Extend)* $HebrewLet;
     79  $LF $CR;
     80  
     81  # rule 4
     82 -($Format | $Extend)*  [^$CR $LF $Newline $dictionaryCJK]?;
     83 +($Format | $Extend)*  [^$CR $LF $Newline]?;
     84  
     85  # rule 5
     86  
     87 @@ -217,10 +212,6 @@ $BackKatakanaEx $BackKatakanaEx;
     88  $BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
     89  ($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; 
     90  
     91 -# special handling for CJK characters: chain for later dictionary segmentation
     92 -$HangulSyllable $HangulSyllable;
     93 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
     94 -
     95  ## -------------------------------------------------
     96  
     97  !!safe_reverse;
     98