1 # 2 # Copyright (C) 2002-2013, International Business Machines Corporation and others. 3 # All Rights Reserved. 4 # 5 # file: char.txt 6 # 7 # ICU Character Break Rules, also known as Grapheme Cluster Boundaries 8 # See Unicode Standard Annex #29. 9 # These rules are based on UAX #29 Revision 20 for Unicode Version 6.2 10 # 11 12 # 13 # Character Class Definitions. 14 # 15 $CR = [\p{Grapheme_Cluster_Break = CR}]; 16 $LF = [\p{Grapheme_Cluster_Break = LF}]; 17 $Control = [\p{Grapheme_Cluster_Break = Control}]; 18 # TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; 19 $Extend = [\p{Grapheme_Cluster_Break = Extend}]; 20 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; 21 $Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}]; 22 23 # 24 # Korean Syllable Definitions 25 # 26 $L = [\p{Grapheme_Cluster_Break = L}]; 27 $V = [\p{Grapheme_Cluster_Break = V}]; 28 $T = [\p{Grapheme_Cluster_Break = T}]; 29 30 $LV = [\p{Grapheme_Cluster_Break = LV}]; 31 $LVT = [\p{Grapheme_Cluster_Break = LVT}]; 32 33 34 ## ------------------------------------------------- 35 !!chain; 36 37 !!forward; 38 39 $CR $LF; 40 41 $L ($L | $V | $LV | $LVT); 42 ($LV | $V) ($V | $T); 43 ($LVT | $T) $T; 44 45 $Regional_Indicator $Regional_Indicator; 46 47 [^$Control $CR $LF] $Extend; 48 49 [^$Control $CR $LF] $SpacingMark; 50 # TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF]; 51 52 53 ## ------------------------------------------------- 54 55 !!reverse; 56 $LF $CR; 57 ($L | $V | $LV | $LVT) $L; 58 ($V | $T) ($LV | $V); 59 $T ($LVT | $T); 60 61 $Regional_Indicator $Regional_Indicator; 62 63 $Extend [^$Control $CR $LF]; 64 $SpacingMark [^$Control $CR $LF]; 65 # TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend; 66 67 68 ## ------------------------------------------------- 69 # We don't logically need safe char break rules, but if we don't provide any at all 70 # the engine for preceding() and following() will fall back to the 71 # old style inefficient algorithm. 72 73 !!safe_reverse; 74 $LF $CR; 75 76 ## ------------------------------------------------- 77 78 !!safe_forward; 79 $CR $LF; 80 81