1 # 2 # Copyright (C) 2002-2009, International Business Machines Corporation and others. 3 # All Rights Reserved. 4 # 5 # file: char_th.txt 6 # 7 # ICU Character Break Rules, also known as Grapheme Cluster Boundaries 8 # See Unicode Standard Annex #29. 9 # These rules are based on TR29 Revision 13, for Unicode Version 5.1 10 # 11 12 # 13 # Character Class Definitions. 14 # 15 $CR = [\p{Grapheme_Cluster_Break = CR}]; 16 $LF = [\p{Grapheme_Cluster_Break = LF}]; 17 $Control = [\p{Grapheme_Cluster_Break = Control}]; 18 $Extend = [\p{Grapheme_Cluster_Break = Extend} - [\u0E30 \u0E32 \u0E45 \u0EB0 \u0EB2]]; 19 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; 20 21 # 22 # Korean Syllable Definitions 23 # 24 $L = [\p{Grapheme_Cluster_Break = L}]; 25 $V = [\p{Grapheme_Cluster_Break = V}]; 26 $T = [\p{Grapheme_Cluster_Break = T}]; 27 28 $LV = [\p{Grapheme_Cluster_Break = LV}]; 29 $LVT = [\p{Grapheme_Cluster_Break = LVT}]; 30 31 32 ## ------------------------------------------------- 33 !!chain; 34 35 !!forward; 36 37 $CR $LF; 38 39 $L ($L | $V | $LV | $LVT); 40 ($LV | $V) ($V | $T); 41 ($LVT | $T) $T; 42 43 [^$Control $CR $LF] $Extend; 44 45 [^$Control $CR $LF] $SpacingMark; 46 47 48 ## ------------------------------------------------- 49 50 !!reverse; 51 $LF $CR; 52 ($L | $V | $LV | $LVT) $L; 53 ($V | $T) ($LV | $V); 54 $T ($LVT | $T); 55 56 $Extend [^$Control $CR $LF]; 57 $SpacingMark [^$Control $CR $LF]; 58 59 60 ## ------------------------------------------------- 61 62 !!safe_reverse; 63 64 65 ## ------------------------------------------------- 66 67 !!safe_forward; 68 69