1 Index: source/data/brkitr/word.txt 2 =================================================================== 3 --- source/data/brkitr/word.txt (revision 264859) 4 +++ source/data/brkitr/word.txt (working copy) 5 @@ -56,15 +56,13 @@ 6 # 5.0 or later as the definition of Complex_Context was corrected to include all 7 # characters requiring dictionary break. 8 9 -$Control = [\p{Grapheme_Cluster_Break = Control}]; 10 +$Control = [\p{Grapheme_Cluster_Break = Control}]; 11 $HangulSyllable = [\uac00-\ud7a3]; 12 $ComplexContext = [:LineBreak = Complex_Context:]; 13 $KanaKanji = [$Han $Hiragana $Katakana]; 14 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; 15 -$dictionary = [$ComplexContext $dictionaryCJK]; 16 +$dictionary = [$ComplexContext]; 17 18 -# leave CJK scripts out of ALetterPlus 19 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; 20 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; 21 22 23 # 24 @@ -166,11 +164,6 @@ 25 26 $Regional_IndicatorEx $Regional_IndicatorEx; 27 28 -# special handling for CJK characters: chain for later dictionary segmentation 29 -$HangulSyllable $HangulSyllable {200}; 30 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found 31 - 32 - 33 ## ------------------------------------------------- 34 35 !!reverse; 36 @@ -237,10 +230,6 @@ 37 38 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; 39 40 -# special handling for CJK characters: chain for later dictionary segmentation 41 -$HangulSyllable $HangulSyllable; 42 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found 43 - 44 ## ------------------------------------------------- 45 46 !!safe_reverse; 47 Index: source/data/brkitr/brklocal.mk 48 =================================================================== 49 --- source/data/brkitr/brklocal.mk (revision 264859) 50 +++ source/data/brkitr/brklocal.mk (working copy) 51 @@ -34,13 +34,13 @@ 52 53 54 # List of dictionary files (dict). 55 -BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt 56 +BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt 57 58 59 # List of break iterator files (brk). 60 -BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt 61 +BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt word_ja.txt 62 63 64 # Ordinary resources 65 -BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt 66 +BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt 67 68 Index: source/data/brkitr/root.txt 69 =================================================================== 70 --- source/data/brkitr/root.txt (revision 264859) 71 +++ source/data/brkitr/root.txt (working copy) 72 @@ -16,9 +16,6 @@ 73 word:process(dependency){"word.brk"} 74 } 75 dictionaries{ 76 - Hani:process(dependency){"cjdict.dict"} 77 - Hira:process(dependency){"cjdict.dict"} 78 - Kata:process(dependency){"cjdict.dict"} 79 Khmr:process(dependency){"khmerdict.dict"} 80 Laoo:process(dependency){"laodict.dict"} 81 Thai:process(dependency){"thaidict.dict"} 82 Index: source/data/brkitr/ja.txt 83 =================================================================== 84 --- source/data/brkitr/ja.txt (revision 264859) 85 +++ source/data/brkitr/ja.txt (working copy) 86 @@ -9,6 +9,6 @@ 87 ja{ 88 Version{"1.1"} 89 boundaries{ 90 - line:process(dependency){"line_ja.brk"} 91 + word:process(dependency){"word_ja.brk"} 92 } 93 } 94