Home | History | Annotate | Download | only in brkitr
      1 #
      2 #   Copyright (C) 2002-2009, International Business Machines Corporation and others.
      3 #       All Rights Reserved.
      4 #
      5 #   file:  char_th.txt 
      6 #
      7 #   ICU Character Break Rules, also known as Grapheme Cluster Boundaries
      8 #      See Unicode Standard Annex #29.
      9 #      These rules are based on TR29 Revision 13, for Unicode Version 5.1
     10 #
     11 
     12 #
     13 #  Character Class Definitions.
     14 #
     15 $CR          = [\p{Grapheme_Cluster_Break = CR}];
     16 $LF          = [\p{Grapheme_Cluster_Break = LF}];
     17 $Control     = [\p{Grapheme_Cluster_Break = Control}];
     18 $Extend      = [\p{Grapheme_Cluster_Break = Extend} - [\u0E30 \u0E32 \u0E45 \u0EB0 \u0EB2]];
     19 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
     20 
     21 #
     22 # Korean Syllable Definitions
     23 #
     24 $L       = [\p{Grapheme_Cluster_Break = L}];
     25 $V       = [\p{Grapheme_Cluster_Break = V}];
     26 $T       = [\p{Grapheme_Cluster_Break = T}];
     27 
     28 $LV      = [\p{Grapheme_Cluster_Break = LV}];
     29 $LVT     = [\p{Grapheme_Cluster_Break = LVT}];
     30 
     31 
     32 ## -------------------------------------------------
     33 !!chain;
     34 
     35 !!forward;
     36 
     37 $CR $LF;
     38 
     39 $L ($L | $V | $LV | $LVT);
     40 ($LV | $V) ($V | $T);
     41 ($LVT | $T) $T;
     42 
     43 [^$Control $CR $LF] $Extend;
     44 
     45 [^$Control $CR $LF] $SpacingMark;
     46 
     47 
     48 ## -------------------------------------------------
     49 
     50 !!reverse;
     51 $LF $CR;
     52 ($L | $V | $LV | $LVT) $L;
     53 ($V | $T) ($LV | $V);
     54 $T ($LVT | $T);
     55 
     56 $Extend      [^$Control $CR $LF];
     57 $SpacingMark [^$Control $CR $LF];
     58 
     59 
     60 ## -------------------------------------------------
     61 
     62 !!safe_reverse;
     63 
     64 
     65 ## -------------------------------------------------
     66 
     67 !!safe_forward;
     68 
     69