Home | History | Annotate | Download | only in segments
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
      3 <!--
      4 Copyright  1991-2015 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <ldml>
      9 	<identity>
     10 		<version number="$Revision: 14457 $"/>
     11 		<language type="root"/>
     12 	</identity>
     13 	<segmentations>
     14 		<segmentation type="GraphemeClusterBreak">
     15 			<variables>
     16 				<variable id="$CR">\p{Grapheme_Cluster_Break=CR}</variable>
     17 				<variable id="$LF">\p{Grapheme_Cluster_Break=LF}</variable>
     18 				<variable id="$Control">\p{Grapheme_Cluster_Break=Control}</variable>
     19 				<variable id="$Extend">\p{Grapheme_Cluster_Break=Extend}</variable>
     20 				<variable id="$ZWJ">\p{Grapheme_Cluster_Break=ZWJ}</variable>
     21 				<variable id="$RI">\p{Grapheme_Cluster_Break=Regional_Indicator}</variable>
     22 				<variable id="$Prepend">\p{Grapheme_Cluster_Break=Prepend}</variable>
     23 				<variable id="$SpacingMark">\p{Grapheme_Cluster_Break=SpacingMark}</variable>
     24 				<variable id="$L">\p{Grapheme_Cluster_Break=L}</variable>
     25 				<variable id="$V">\p{Grapheme_Cluster_Break=V}</variable>
     26 				<variable id="$T">\p{Grapheme_Cluster_Break=T}</variable>
     27 				<variable id="$LV">\p{Grapheme_Cluster_Break=LV}</variable>
     28 				<variable id="$LVT">\p{Grapheme_Cluster_Break=LVT}</variable>
     29 				<variable id="$ExtPict">\p{Extended_Pictographic}</variable>
     30 				<variable id="$ExtCccZwj">[[$Extend-\p{ccc=0}] $ZWJ]</variable>
     31 			</variables>
     32 			<segmentRules>
     33 				<!-- Rules -->
     34 				<!-- Break at the start and end of text, unless the text is empty. -->
     35 				<!-- Do not break between a CR and LF. Otherwise, break before and after controls. -->
     36 				<rule id="3"> $CR  $LF </rule>
     37 				<rule id="4"> ( $Control | $CR | $LF )  </rule>
     38 				<rule id="5">  ( $Control | $CR | $LF ) </rule>
     39 				<!-- Do not break Hangul syllable sequences. -->
     40 				<rule id="6"> $L  ( $L | $V | $LV | $LVT ) </rule>
     41 				<rule id="7"> ( $LV | $V )  ( $V | $T ) </rule>
     42 				<rule id="8"> ( $LVT | $T)  $T </rule>
     43 				<!-- Do not break before extending characters or ZWJ. -->
     44 				<rule id="9">  ($Extend | $ZWJ) </rule>
     45 				<!-- Only for extended grapheme clusters: Do not break before SpacingMarks, or after Prepend characters. -->
     46 				<rule id="9.1">  $SpacingMark </rule>
     47 				<rule id="9.2"> $Prepend  </rule>
     48 				<!-- Do not break within emoji modifier sequences or emoji zwj sequences. -->
     49 				<rule id="11"> $ExtPict $Extend* $ZWJ  $ExtPict </rule>
     50 				<!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. -->
     51 				<rule id="12"> ^ ($RI $RI)* $RI  $RI </rule>
     52 				<rule id="13"> [^$RI] ($RI $RI)* $RI  $RI </rule>
     53 			</segmentRules>
     54 			<!-- Otherwise, break everywhere. -->
     55 		</segmentation>
     56 		<segmentation type="LineBreak">
     57 			<variables>
     58 				<!-- Variables -->
     59 				<variable id="$AI">\p{Line_Break=Ambiguous}</variable>
     60 				<variable id="$AL">\p{Line_Break=Alphabetic}</variable>
     61 				<variable id="$B2">\p{Line_Break=Break_Both}</variable>
     62 				<variable id="$BA">\p{Line_Break=Break_After}</variable>
     63 				<variable id="$BB">\p{Line_Break=Break_Before}</variable>
     64 				<variable id="$BK">\p{Line_Break=Mandatory_Break}</variable>
     65 				<variable id="$CB">\p{Line_Break=Contingent_Break}</variable>
     66 				<variable id="$CL">\p{Line_Break=Close_Punctuation}</variable>
     67 				<variable id="$CP">\p{Line_Break=CP}</variable>
     68 				<variable id="$CM1">\p{Line_Break=Combining_Mark}</variable>
     69 				<variable id="$CR">\p{Line_Break=Carriage_Return}</variable>
     70 				<variable id="$EX">\p{Line_Break=Exclamation}</variable>
     71 				<variable id="$GL">\p{Line_Break=Glue}</variable>
     72 				<variable id="$H2">\p{Line_Break=H2}</variable>
     73 				<variable id="$H3">\p{Line_Break=H3}</variable>
     74 				<variable id="$HL">\p{Line_Break=HL}</variable>
     75 				<variable id="$HY">\p{Line_Break=Hyphen}</variable>
     76 				<variable id="$ID">\p{Line_Break=Ideographic}</variable>
     77 				<variable id="$IN">\p{Line_Break=Inseparable}</variable>
     78 				<variable id="$IS">\p{Line_Break=Infix_Numeric}</variable>
     79 				<variable id="$JL">\p{Line_Break=JL}</variable>
     80 				<variable id="$JT">\p{Line_Break=JT}</variable>
     81 				<variable id="$JV">\p{Line_Break=JV}</variable>
     82 				<variable id="$LF">\p{Line_Break=Line_Feed}</variable>
     83 				<variable id="$NL">\p{Line_Break=Next_Line}</variable>
     84 				<variable id="$NS">\p{Line_Break=Nonstarter}</variable>
     85 				<variable id="$NU">\p{Line_Break=Numeric}</variable>
     86 				<variable id="$OP">\p{Line_Break=Open_Punctuation}</variable>
     87 				<variable id="$PO">\p{Line_Break=Postfix_Numeric}</variable>
     88 				<variable id="$PR">\p{Line_Break=Prefix_Numeric}</variable>
     89 				<variable id="$QU">\p{Line_Break=Quotation}</variable>
     90 				<variable id="$SA">\p{Line_Break=Complex_Context}</variable>
     91 				<variable id="$SG">\p{Line_Break=Surrogate}</variable>
     92 				<variable id="$SP">\p{Line_Break=Space}</variable>
     93 				<variable id="$SY">\p{Line_Break=Break_Symbols}</variable>
     94 				<variable id="$WJ">\p{Line_Break=Word_Joiner}</variable>
     95 				<variable id="$XX">\p{Line_Break=Unknown}</variable>
     96 				<variable id="$ZW">\p{Line_Break=ZWSpace}</variable>
     97 				<variable id="$CJ">\p{Line_Break=Conditional_Japanese_Starter}</variable>
     98 				<variable id="$RI">\p{Line_Break=Regional_Indicator}</variable>
     99 				<variable id="$EB">\p{Line_Break=E_Base}</variable>
    100 				<variable id="$EM">\p{Line_Break=E_Modifier}</variable>
    101 				<variable id="$ZWJ_O">\p{Line_Break=ZWJ}</variable>
    102 				<variable id="$ZWJ">\p{Line_Break=ZWJ}</variable>
    103 				<!-- Macros -->
    104 				<variable id="$CM">[$CM1 $ZWJ]</variable>
    105 				<!-- LB 1  Assign a line breaking class to each code point of the input. -->
    106 				<!-- Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm. -->
    107 				<!-- NOTE: CB is ok to fall through, but must handle others here. -->
    108 				<variable id="$AL">[$AI $AL $SG $XX $SA]</variable>
    109 				<variable id="$NS">[$NS $CJ]</variable>
    110 				<!-- WARNING: Fixes for Rule 9 -->
    111 				<!-- Treat X (CM|ZWJ* as if it were X. -->
    112 				<!-- Where X is any line break class except SP, BK, CR, LF, NL or ZW. -->
    113 				<variable id="$X">$CM*</variable>
    114 				<!-- Macros -->
    115 				<variable id="$Spec1_">[$SP $BK $CR $LF $NL $ZW]</variable>
    116 				<variable id="$Spec2_">[^ $SP $BK $CR $LF $NL $ZW]</variable>
    117 				<variable id="$Spec3a_">[^ $SP $BA $HY $CM]</variable>
    118 				<variable id="$Spec3b_">[^ $BA $HY $CM]</variable>
    119 				<variable id="$Spec4_">[^ $NU $CM]</variable>
    120 				<variable id="$Spec5_">[$BK $CB $CR $LF $NL $SP $ZW]</variable>
    121 				<variable id="$AI">($AI $X)</variable>
    122 				<variable id="$AL">($AL $X)</variable>
    123 				<variable id="$B2">($B2 $X)</variable>
    124 				<variable id="$BA">($BA $X)</variable>
    125 				<variable id="$BB">($BB $X)</variable>
    126 				<variable id="$CB">($CB $X)</variable>
    127 				<variable id="$CL">($CL $X)</variable>
    128 				<variable id="$CP">($CP $X)</variable>
    129 				<variable id="$CM">($CM $X)</variable>
    130 				<variable id="$EX">($EX $X)</variable>
    131 				<variable id="$GL">($GL $X)</variable>
    132 				<variable id="$H2">($H2 $X)</variable>
    133 				<variable id="$H3">($H3 $X)</variable>
    134 				<variable id="$HL">($HL $X)</variable>
    135 				<variable id="$HY">($HY $X)</variable>
    136 				<variable id="$ID">($ID $X)</variable>
    137 				<variable id="$IN">($IN $X)</variable>
    138 				<variable id="$IS">($IS $X)</variable>
    139 				<variable id="$JL">($JL $X)</variable>
    140 				<variable id="$JT">($JT $X)</variable>
    141 				<variable id="$JV">($JV $X)</variable>
    142 				<variable id="$NS">($NS $X)</variable>
    143 				<variable id="$NU">($NU $X)</variable>
    144 				<variable id="$OP">($OP $X)</variable>
    145 				<variable id="$PO">($PO $X)</variable>
    146 				<variable id="$PR">($PR $X)</variable>
    147 				<variable id="$QU">($QU $X)</variable>
    148 				<variable id="$SA">($SA $X)</variable>
    149 				<variable id="$SG">($SG $X)</variable>
    150 				<variable id="$SY">($SY $X)</variable>
    151 				<variable id="$WJ">($WJ $X)</variable>
    152 				<variable id="$XX">($XX $X)</variable>
    153 				<variable id="$RI">($RI $X)</variable>
    154 				<variable id="$EB">($EB $X)</variable>
    155 				<variable id="$EM">($EM $X)</variable>
    156 				<variable id="$ZWJ">($ZWJ $X)</variable>
    157 				<!-- OUT OF ORDER ON PURPOSE -->
    158 				<!-- LB 10  Treat any remaining combining mark as AL. -->
    159 				<variable id="$AL">($AL | ^ $CM | (?&lt;=$Spec1_) $CM)</variable>
    160 			</variables>
    161 			<segmentRules>
    162 				<!-- Rules -->
    163 				<!-- LB 4  Always break after hard line breaks (but never between CR and LF). -->
    164 				<rule id="4"> $BK  </rule>
    165 				<!-- LB 5  Treat CR followed by LF, as well as CR, LF and NL as hard line breaks. -->
    166 				<rule id="5.01"> $CR  $LF </rule>
    167 				<rule id="5.02"> $CR  </rule>
    168 				<rule id="5.03"> $LF  </rule>
    169 				<rule id="5.04"> $NL  </rule>
    170 				<!-- LB 6  Do not break before hard line breaks. -->
    171 				<rule id="6">  ( $BK | $CR | $LF | $NL ) </rule>
    172 				<!-- LB 7  Do not break before spaces or zero-width space. -->
    173 				<rule id="7.01">  $SP </rule>
    174 				<rule id="7.02">  $ZW </rule>
    175 				<!-- LB 8  Break before any character following a zero-width space, even if one or more spaces intervene. -->
    176 				<rule id="8"> $ZW $SP*  </rule>
    177 				<!-- LB 8a  Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences) -->
    178 				<rule id="8.1"> $ZWJ_O  </rule>
    179 				<!-- LB 9  Do not break a combining character sequence; treat it as if it has the LB class of the base character -->
    180 				<!-- in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.) -->
    181 				<rule id="9"> $Spec2_  $CM </rule>
    182 				<!-- WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)! -->
    183 				<!-- LB 11  Do not break before or after WORD JOINER and related characters. -->
    184 				<rule id="11.01">  $WJ </rule>
    185 				<rule id="11.02"> $WJ  </rule>
    186 				<!-- LB 12  Do not break after NBSP and related characters. -->
    187 				<rule id="12"> $GL  </rule>
    188 				<rule id="12.1"> $Spec3a_  $GL </rule>
    189 				<rule id="12.2"> $Spec3b_ $CM+  $GL </rule>
    190 				<rule id="12.3"> ^ $CM+  $GL </rule>
    191 				<!-- LB 13  Do not break before ] or ! or ; or /, even after spaces. -->
    192 				<!-- Using customization 7. -->
    193 				<rule id="13.01">  $EX </rule>
    194 				<rule id="13.02"> $Spec4_  ($CL | $CP | $IS | $SY) </rule>
    195 				<rule id="13.03"> $Spec4_ $CM+  ($CL | $CP | $IS | $SY) </rule>
    196 				<rule id="13.04"> ^ $CM+  ($CL | $CP | $IS | $SY) </rule>
    197 				<!-- LB 14  Do not break after [, even after spaces. -->
    198 				<rule id="14"> $OP $SP*  </rule>
    199 				<!-- LB 15  Do not break within "[, even with intervening spaces. -->
    200 				<rule id="15"> $QU $SP*  $OP </rule>
    201 				<!-- LB 16  Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces. -->
    202 				<rule id="16"> ($CL | $CP) $SP*  $NS </rule>
    203 				<!-- LB 17  Do not break within , even with intervening spaces. -->
    204 				<rule id="17"> $B2 $SP*  $B2 </rule>
    205 				<!-- LB 18  Break after spaces. -->
    206 				<rule id="18"> $SP  </rule>
    207 				<!-- LB 19  Do not break before or after ". -->
    208 				<rule id="19.01">  $QU </rule>
    209 				<rule id="19.02"> $QU  </rule>
    210 				<!-- LB 20  Break before and after unresolved CB. -->
    211 				<rule id="20.01">  $CB </rule>
    212 				<rule id="20.02"> $CB  </rule>
    213 				<!-- LB 20.9  Don't break between Hyphens and Letters when there is a break preceding the hyphen. -->
    214 				<!-- Originally added as a Finnish tailoring, now promoted to default CLDR/ICU behavior. -->
    215 				<!-- Must be before LB 21. Note: this is not default UAX-14 behaviour. See ICU issue ICU-8151. -->
    216 				<!-- (Unlike in ICU, here we just check a limited set of known breaks, ignoring some cases like LB 14). -->
    217 				<rule id="20.09"> $Spec5_ ($HY | $HH)  $AL </rule>
    218 				<!-- LB 21  Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents. -->
    219 				<rule id="21.01">  $BA </rule>
    220 				<rule id="21.02">  $HY </rule>
    221 				<rule id="21.03">  $NS </rule>
    222 				<rule id="21.04"> $BB  </rule>
    223 				<!-- LB 21a  Don't break after Hebrew + Hyphen. -->
    224 				<rule id="21.1"> $HL ($HY | $BA)  </rule>
    225 				<!-- LB 21b Dont break between Solidus and Hebrew letters. -->
    226 				<rule id="21.2"> $SY  $HL </rule>
    227 				<!-- LB 22  Do not break between two ellipses, or between letters, numbers or exclamations and ellipsis. -->
    228 				<rule id="22.01"> ($AL | $HL)  $IN </rule>
    229 				<rule id="22.02"> $EX  $IN </rule>
    230 				<rule id="22.03"> ($ID | $EB | $EM)  $IN </rule>
    231 				<rule id="22.04"> $IN  $IN </rule>
    232 				<rule id="22.05"> $NU  $IN </rule>
    233 				<!-- LB 23  Do not break between digits and letters. -->
    234 				<rule id="23.02"> ($AL | $HL)  $NU </rule>
    235 				<rule id="23.03"> $NU  ($AL | $HL) </rule>
    236 				<!-- LB 24  Do not break between prefix and letters or ideographs. -->
    237 				<rule id="23.12"> $PR  ($ID | $EB | $EM) </rule>
    238 				<rule id="23.13"> ($ID | $EB | $EM)  $PO </rule>
    239 				<!-- LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix. -->
    240 				<rule id="24.02"> ($PR | $PO)  ($AL | $HL) </rule>
    241 				<rule id="24.03"> ($AL | $HL)  ($PR | $PO) </rule>
    242 				<!-- Using customization 7 -->
    243 				<!-- LB Alternative: ( PR | PO) ? ( OP | HY ) ? NU (NU | SY | IS) * (CL | CP) ? ( PR | PO) ? -->
    244 				<!-- Insert  every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after -->
    245 				<rule id="25.01"> ($PR | $PO)  ( $OP | $HY )? $NU </rule>
    246 				<rule id="25.02"> ( $OP | $HY )  $NU </rule>
    247 				<rule id="25.03"> $NU  ($NU | $SY | $IS) </rule>
    248 				<rule id="25.04"> $NU ($NU | $SY | $IS)*  ($NU | $SY | $IS | $CL | $CP) </rule>
    249 				<rule id="25.05"> $NU ($NU | $SY | $IS)* ($CL | $CP)?  ($PO | $PR) </rule>
    250 				<!-- LB 26 Do not break a Korean syllable. -->
    251 				<rule id="26.01"> $JL  $JL | $JV | $H2 | $H3 </rule>
    252 				<rule id="26.02"> $JV | $H2  $JV | $JT </rule>
    253 				<rule id="26.03"> $JT | $H3  $JT </rule>
    254 				<!-- LB 27 Treat a Korean Syllable Block the same as ID. -->
    255 				<rule id="27.01"> $JL | $JV | $JT | $H2 | $H3  $IN </rule>
    256 				<rule id="27.02"> $JL | $JV | $JT | $H2 | $H3  $PO </rule>
    257 				<rule id="27.03"> $PR  $JL | $JV | $JT | $H2 | $H3 </rule>
    258 				<!-- LB 28  Do not break between alphabetics ("at"). -->
    259 				<rule id="28"> ($AL | $HL)  ($AL | $HL) </rule>
    260 				<!-- LB 29  Do not break between numeric punctuation and alphabetics ("e.g."). -->
    261 				<rule id="29"> $IS  ($AL | $HL) </rule>
    262 				<!-- LB 30  Do not break between letters, numbers or ordinary symbols and opening or closing punctuation. -->
    263 				<rule id="30.01"> ($AL | $HL | $NU)  $OP </rule>
    264 				<rule id="30.02"> $CP  ($AL | $HL | $NU) </rule>
    265 				<!-- LB 30a  Break between two Regional Indicators if and only if there is an even number of them before the point being considered. -->
    266 				<rule id="30.11"> ^ ($RI $RI)* $RI  $RI </rule>
    267 				<rule id="30.12"> [^$RI] ($RI $RI)* $RI  $RI </rule>
    268 				<rule id="30.13"> $RI  $RI </rule>
    269 				<rule id="30.2"> $EB  $EM </rule>
    270 			</segmentRules>
    271 		</segmentation>
    272 		<segmentation type="SentenceBreak">
    273 			<variables>
    274 				<variable id="$CR">\p{Sentence_Break=CR}</variable>
    275 				<variable id="$LF">\p{Sentence_Break=LF}</variable>
    276 				<variable id="$Extend">\p{Sentence_Break=Extend}</variable>
    277 				<variable id="$Format">\p{Sentence_Break=Format}</variable>
    278 				<variable id="$Sep">\p{Sentence_Break=Sep}</variable>
    279 				<variable id="$Sp">\p{Sentence_Break=Sp}</variable>
    280 				<variable id="$Lower">\p{Sentence_Break=Lower}</variable>
    281 				<variable id="$Upper">\p{Sentence_Break=Upper}</variable>
    282 				<variable id="$OLetter">\p{Sentence_Break=OLetter}</variable>
    283 				<variable id="$Numeric">\p{Sentence_Break=Numeric}</variable>
    284 				<variable id="$ATerm">\p{Sentence_Break=ATerm}</variable>
    285 				<variable id="$STerm">\p{Sentence_Break=STerm}</variable>
    286 				<variable id="$Close">\p{Sentence_Break=Close}</variable>
    287 				<variable id="$SContinue">\p{Sentence_Break=SContinue}</variable>
    288 				<variable id="$Any">.</variable>
    289 				<!-- Expresses the negation in rule 8; can't do this with normal regex, but works with UnicodeSet, which is all we need. -->
    290 				<!-- WARNING: For Rule 5, now add format and extend to everything but Sep, Format, and Extend -->
    291 				<variable id="$FE">[$Format $Extend]</variable>
    292 				<!-- Special rules -->
    293 				<variable id="$NotPreLower_">[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]</variable>
    294 				<variable id="$Sp">($Sp $FE*)</variable>
    295 				<variable id="$Lower">($Lower $FE*)</variable>
    296 				<variable id="$Upper">($Upper $FE*)</variable>
    297 				<variable id="$OLetter">($OLetter $FE*)</variable>
    298 				<variable id="$Numeric">($Numeric $FE*)</variable>
    299 				<variable id="$ATerm">($ATerm $FE*)</variable>
    300 				<variable id="$STerm">($STerm $FE*)</variable>
    301 				<variable id="$Close">($Close $FE*)</variable>
    302 				<variable id="$SContinue">($SContinue $FE*)</variable>
    303 				<!-- Macros -->
    304 				<variable id="$ParaSep">($Sep | $CR | $LF)</variable>
    305 				<variable id="$SATerm">($STerm | $ATerm)</variable>
    306 			</variables>
    307 			<segmentRules>
    308 				<!-- Rules -->
    309 				<!-- Break at the start and end of text, unless the text is empty. -->
    310 				<!-- Do not break within CRLF. -->
    311 				<rule id="3"> $CR  $LF </rule>
    312 				<!-- Break after paragraph separators. -->
    313 				<rule id="4"> $ParaSep  </rule>
    314 				<!-- Ignore Format and Extend characters, except after sot, ParaSep, and within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any  (Format | Extend) -->
    315 				<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
    316 				<!-- AND add format and extend in all variables definitions that appear after this point! -->
    317 				<rule id="5">  [$Format $Extend] </rule>
    318 				<!-- Do not break after full stop in certain contexts. [See note below.] -->
    319 				<!-- Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter, -->
    320 				<!-- is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase. -->
    321 				<!-- For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence. -->
    322 				<rule id="6"> $ATerm  $Numeric </rule>
    323 				<rule id="7"> ($Upper | $Lower) $ATerm  $Upper </rule>
    324 				<rule id="8"> $ATerm $Close* $Sp*  $NotPreLower_* $Lower </rule>
    325 				<rule id="8.1"> $SATerm $Close* $Sp*  ($SContinue | $SATerm) </rule>
    326 				<!-- Break after sentence terminators, but include closing punctuation, trailing spaces, and any paragraph separator. [See note below.] Include closing punctuation, trailing spaces, and (optionally) a paragraph separator. -->
    327 				<rule id="9"> $SATerm $Close*  ( $Close | $Sp | $ParaSep ) </rule>
    328 				<!-- Note the fix to $Sp*, $Sep? -->
    329 				<rule id="10"> $SATerm $Close* $Sp*  ( $Sp | $ParaSep ) </rule>
    330 				<rule id="11"> $SATerm $Close* $Sp* $ParaSep?  </rule>
    331 				<!-- Otherwise, do not break -->
    332 				<rule id="998">  $Any </rule>
    333 			</segmentRules>
    334 			<suppressions type="standard">
    335 				<!-- root suppression is empty. -->
    336 			</suppressions>
    337 		</segmentation>
    338 		<segmentation type="WordBreak">
    339 			<variables>
    340 				<variable id="$CR">\p{Word_Break=CR}</variable>
    341 				<variable id="$LF">\p{Word_Break=LF}</variable>
    342 				<variable id="$Newline">\p{Word_Break=Newline}</variable>
    343 				<variable id="$Extend">\p{Word_Break=Extend}</variable>
    344 				<!-- Now normal variables -->
    345 				<variable id="$Format">\p{Word_Break=Format}</variable>
    346 				<variable id="$Katakana">\p{Word_Break=Katakana}</variable>
    347 				<variable id="$ALetter">\p{Word_Break=ALetter}</variable>
    348 				<variable id="$MidLetter">\p{Word_Break=MidLetter}</variable>
    349 				<variable id="$MidNum">\p{Word_Break=MidNum}</variable>
    350 				<variable id="$MidNumLet">\p{Word_Break=MidNumLet}</variable>
    351 				<variable id="$Numeric">\p{Word_Break=Numeric}</variable>
    352 				<variable id="$ExtendNumLet">\p{Word_Break=ExtendNumLet}</variable>
    353 				<variable id="$RI">\p{Word_Break=Regional_Indicator}</variable>
    354 				<variable id="$Hebrew_Letter">\p{Word_Break=Hebrew_Letter}</variable>
    355 				<variable id="$Double_Quote">\p{Word_Break=Double_Quote}</variable>
    356 				<variable id="$Single_Quote">\p{Word_Break=Single_Quote}</variable>
    357 				<variable id="$ZWJ">\p{Word_Break=ZWJ}</variable>
    358 				<variable id="$ExtPict">\p{Extended_Pictographic}</variable>
    359 				<variable id="$WSegSpace">\p{Word_Break=WSegSpace}</variable>
    360 				<!-- Macros -->
    361 				<variable id="$AHLetter">($ALetter | $Hebrew_Letter)</variable>
    362 				<variable id="$MidNumLetQ">($MidNumLet | $Single_Quote)</variable>
    363 				<!-- WARNING: For Rule 4: Fixes for GC, Format -->
    364 				<!-- Add format and extend to everything -->
    365 				<variable id="$FE">[$Format $Extend $ZWJ]</variable>
    366 				<!-- Special rules -->
    367 				<variable id="$NotBreak_">[^ $Newline $CR $LF ]</variable>
    368 				<variable id="$Katakana">($Katakana $FE*)</variable>
    369 				<variable id="$ALetter">($ALetter $FE*)</variable>
    370 				<variable id="$MidLetter">($MidLetter $FE*)</variable>
    371 				<variable id="$MidNum">($MidNum $FE*)</variable>
    372 				<variable id="$MidNumLet">($MidNumLet $FE*)</variable>
    373 				<variable id="$Numeric">($Numeric $FE*)</variable>
    374 				<variable id="$ExtendNumLet">($ExtendNumLet $FE*)</variable>
    375 				<variable id="$RI">($RI $FE*)</variable>
    376 				<variable id="$Hebrew_Letter">($Hebrew_Letter $FE*)</variable>
    377 				<variable id="$Double_Quote">($Double_Quote $FE*)</variable>
    378 				<variable id="$Single_Quote">($Single_Quote $FE*)</variable>
    379 				<variable id="$AHLetter">($AHLetter $FE*)</variable>
    380 				<variable id="$MidNumLetQ">($MidNumLetQ $FE*)</variable>
    381 			</variables>
    382 			<segmentRules>
    383 				<!-- Rules -->
    384 				<!-- Break at the start and end of text, unless the text is empty. -->
    385 				<!-- Do not break within CRLF. -->
    386 				<rule id="3"> $CR  $LF </rule>
    387 				<!-- Otherwise break before and after Newlines (including CR and LF) -->
    388 				<rule id="3.1"> ($Newline | $CR | $LF)  </rule>
    389 				<rule id="3.2">  ($Newline | $CR | $LF) </rule>
    390 				<!-- Do not break within emoji zwj sequences. -->
    391 				<rule id="3.3"> $ZWJ  $ExtPict </rule>
    392 				<rule id="3.4"> $WSegSpace  $WSegSpace </rule>
    393 				<!-- Ignore Format and Extend characters, except after sot, CR, LF, and Newline. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any  (Format | Extend) -->
    394 				<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
    395 				<!-- AND add format and extend in all variables definitions that appear after this point! -->
    396 				<rule id="4"> $NotBreak_  [$Format $Extend $ZWJ] </rule>
    397 				<!-- Vanilla rules -->
    398 				<!-- Do not break between most letters. -->
    399 				<rule id="5"> $AHLetter  $AHLetter </rule>
    400 				<!-- Do not break letters across certain punctuation. -->
    401 				<rule id="6"> $AHLetter  ($MidLetter | $MidNumLetQ) $AHLetter </rule>
    402 				<rule id="7"> $AHLetter ($MidLetter | $MidNumLetQ)  $AHLetter </rule>
    403 				<rule id="7.1"> $Hebrew_Letter  $Single_Quote </rule>
    404 				<rule id="7.2"> $Hebrew_Letter  $Double_Quote $Hebrew_Letter </rule>
    405 				<rule id="7.3"> $Hebrew_Letter $Double_Quote  $Hebrew_Letter </rule>
    406 				<!-- Do not break within sequences of digits, or digits adjacent to letters (3a, or A3). -->
    407 				<rule id="8"> $Numeric  $Numeric </rule>
    408 				<rule id="9"> $AHLetter  $Numeric </rule>
    409 				<rule id="10"> $Numeric  $AHLetter </rule>
    410 				<!-- Do not break within sequences, such as 3.2 or 3,456.789. -->
    411 				<rule id="11"> $Numeric ($MidNum | $MidNumLetQ)  $Numeric </rule>
    412 				<rule id="12"> $Numeric  ($MidNum | $MidNumLetQ) $Numeric </rule>
    413 				<!-- Do not break between Katakana. -->
    414 				<rule id="13"> $Katakana  $Katakana </rule>
    415 				<!-- Do not break from extenders. -->
    416 				<rule id="13.1"> ($AHLetter | $Numeric | $Katakana | $ExtendNumLet)  $ExtendNumLet </rule>
    417 				<rule id="13.2"> $ExtendNumLet  ($AHLetter | $Numeric | $Katakana) </rule>
    418 				<!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. -->
    419 				<rule id="15"> ^ ($RI $RI)* $RI  $RI </rule>
    420 				<rule id="16"> [^$RI] ($RI $RI)* $RI  $RI </rule>
    421 			</segmentRules>
    422 			<!-- Otherwise, break everywhere (including around ideographs). -->
    423 		</segmentation>
    424 	</segmentations>
    425 </ldml>
    426