1 <?xml version="1.0" encoding="UTF-8" ?> 2 <!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd"> 3 <!-- 4 Copyright 1991-2015 Unicode, Inc. 5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6 For terms of use, see http://www.unicode.org/copyright.html 7 --> 8 <ldml> 9 <identity> 10 <version number="$Revision: 14457 $"/> 11 <language type="root"/> 12 </identity> 13 <segmentations> 14 <segmentation type="GraphemeClusterBreak"> 15 <variables> 16 <variable id="$CR">\p{Grapheme_Cluster_Break=CR}</variable> 17 <variable id="$LF">\p{Grapheme_Cluster_Break=LF}</variable> 18 <variable id="$Control">\p{Grapheme_Cluster_Break=Control}</variable> 19 <variable id="$Extend">\p{Grapheme_Cluster_Break=Extend}</variable> 20 <variable id="$ZWJ">\p{Grapheme_Cluster_Break=ZWJ}</variable> 21 <variable id="$RI">\p{Grapheme_Cluster_Break=Regional_Indicator}</variable> 22 <variable id="$Prepend">\p{Grapheme_Cluster_Break=Prepend}</variable> 23 <variable id="$SpacingMark">\p{Grapheme_Cluster_Break=SpacingMark}</variable> 24 <variable id="$L">\p{Grapheme_Cluster_Break=L}</variable> 25 <variable id="$V">\p{Grapheme_Cluster_Break=V}</variable> 26 <variable id="$T">\p{Grapheme_Cluster_Break=T}</variable> 27 <variable id="$LV">\p{Grapheme_Cluster_Break=LV}</variable> 28 <variable id="$LVT">\p{Grapheme_Cluster_Break=LVT}</variable> 29 <variable id="$ExtPict">\p{Extended_Pictographic}</variable> 30 <variable id="$ExtCccZwj">[[$Extend-\p{ccc=0}] $ZWJ]</variable> 31 </variables> 32 <segmentRules> 33 <!-- Rules --> 34 <!-- Break at the start and end of text, unless the text is empty. --> 35 <!-- Do not break between a CR and LF. Otherwise, break before and after controls. --> 36 <rule id="3"> $CR $LF </rule> 37 <rule id="4"> ( $Control | $CR | $LF ) </rule> 38 <rule id="5"> ( $Control | $CR | $LF ) </rule> 39 <!-- Do not break Hangul syllable sequences. --> 40 <rule id="6"> $L ( $L | $V | $LV | $LVT ) </rule> 41 <rule id="7"> ( $LV | $V ) ( $V | $T ) </rule> 42 <rule id="8"> ( $LVT | $T) $T </rule> 43 <!-- Do not break before extending characters or ZWJ. --> 44 <rule id="9"> ($Extend | $ZWJ) </rule> 45 <!-- Only for extended grapheme clusters: Do not break before SpacingMarks, or after Prepend characters. --> 46 <rule id="9.1"> $SpacingMark </rule> 47 <rule id="9.2"> $Prepend </rule> 48 <!-- Do not break within emoji modifier sequences or emoji zwj sequences. --> 49 <rule id="11"> $ExtPict $Extend* $ZWJ $ExtPict </rule> 50 <!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. --> 51 <rule id="12"> ^ ($RI $RI)* $RI $RI </rule> 52 <rule id="13"> [^$RI] ($RI $RI)* $RI $RI </rule> 53 </segmentRules> 54 <!-- Otherwise, break everywhere. --> 55 </segmentation> 56 <segmentation type="LineBreak"> 57 <variables> 58 <!-- Variables --> 59 <variable id="$AI">\p{Line_Break=Ambiguous}</variable> 60 <variable id="$AL">\p{Line_Break=Alphabetic}</variable> 61 <variable id="$B2">\p{Line_Break=Break_Both}</variable> 62 <variable id="$BA">\p{Line_Break=Break_After}</variable> 63 <variable id="$BB">\p{Line_Break=Break_Before}</variable> 64 <variable id="$BK">\p{Line_Break=Mandatory_Break}</variable> 65 <variable id="$CB">\p{Line_Break=Contingent_Break}</variable> 66 <variable id="$CL">\p{Line_Break=Close_Punctuation}</variable> 67 <variable id="$CP">\p{Line_Break=CP}</variable> 68 <variable id="$CM1">\p{Line_Break=Combining_Mark}</variable> 69 <variable id="$CR">\p{Line_Break=Carriage_Return}</variable> 70 <variable id="$EX">\p{Line_Break=Exclamation}</variable> 71 <variable id="$GL">\p{Line_Break=Glue}</variable> 72 <variable id="$H2">\p{Line_Break=H2}</variable> 73 <variable id="$H3">\p{Line_Break=H3}</variable> 74 <variable id="$HL">\p{Line_Break=HL}</variable> 75 <variable id="$HY">\p{Line_Break=Hyphen}</variable> 76 <variable id="$ID">\p{Line_Break=Ideographic}</variable> 77 <variable id="$IN">\p{Line_Break=Inseparable}</variable> 78 <variable id="$IS">\p{Line_Break=Infix_Numeric}</variable> 79 <variable id="$JL">\p{Line_Break=JL}</variable> 80 <variable id="$JT">\p{Line_Break=JT}</variable> 81 <variable id="$JV">\p{Line_Break=JV}</variable> 82 <variable id="$LF">\p{Line_Break=Line_Feed}</variable> 83 <variable id="$NL">\p{Line_Break=Next_Line}</variable> 84 <variable id="$NS">\p{Line_Break=Nonstarter}</variable> 85 <variable id="$NU">\p{Line_Break=Numeric}</variable> 86 <variable id="$OP">\p{Line_Break=Open_Punctuation}</variable> 87 <variable id="$PO">\p{Line_Break=Postfix_Numeric}</variable> 88 <variable id="$PR">\p{Line_Break=Prefix_Numeric}</variable> 89 <variable id="$QU">\p{Line_Break=Quotation}</variable> 90 <variable id="$SA">\p{Line_Break=Complex_Context}</variable> 91 <variable id="$SG">\p{Line_Break=Surrogate}</variable> 92 <variable id="$SP">\p{Line_Break=Space}</variable> 93 <variable id="$SY">\p{Line_Break=Break_Symbols}</variable> 94 <variable id="$WJ">\p{Line_Break=Word_Joiner}</variable> 95 <variable id="$XX">\p{Line_Break=Unknown}</variable> 96 <variable id="$ZW">\p{Line_Break=ZWSpace}</variable> 97 <variable id="$CJ">\p{Line_Break=Conditional_Japanese_Starter}</variable> 98 <variable id="$RI">\p{Line_Break=Regional_Indicator}</variable> 99 <variable id="$EB">\p{Line_Break=E_Base}</variable> 100 <variable id="$EM">\p{Line_Break=E_Modifier}</variable> 101 <variable id="$ZWJ_O">\p{Line_Break=ZWJ}</variable> 102 <variable id="$ZWJ">\p{Line_Break=ZWJ}</variable> 103 <!-- Macros --> 104 <variable id="$CM">[$CM1 $ZWJ]</variable> 105 <!-- LB 1 Assign a line breaking class to each code point of the input. --> 106 <!-- Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm. --> 107 <!-- NOTE: CB is ok to fall through, but must handle others here. --> 108 <variable id="$AL">[$AI $AL $SG $XX $SA]</variable> 109 <variable id="$NS">[$NS $CJ]</variable> 110 <!-- WARNING: Fixes for Rule 9 --> 111 <!-- Treat X (CM|ZWJ* as if it were X. --> 112 <!-- Where X is any line break class except SP, BK, CR, LF, NL or ZW. --> 113 <variable id="$X">$CM*</variable> 114 <!-- Macros --> 115 <variable id="$Spec1_">[$SP $BK $CR $LF $NL $ZW]</variable> 116 <variable id="$Spec2_">[^ $SP $BK $CR $LF $NL $ZW]</variable> 117 <variable id="$Spec3a_">[^ $SP $BA $HY $CM]</variable> 118 <variable id="$Spec3b_">[^ $BA $HY $CM]</variable> 119 <variable id="$Spec4_">[^ $NU $CM]</variable> 120 <variable id="$Spec5_">[$BK $CB $CR $LF $NL $SP $ZW]</variable> 121 <variable id="$AI">($AI $X)</variable> 122 <variable id="$AL">($AL $X)</variable> 123 <variable id="$B2">($B2 $X)</variable> 124 <variable id="$BA">($BA $X)</variable> 125 <variable id="$BB">($BB $X)</variable> 126 <variable id="$CB">($CB $X)</variable> 127 <variable id="$CL">($CL $X)</variable> 128 <variable id="$CP">($CP $X)</variable> 129 <variable id="$CM">($CM $X)</variable> 130 <variable id="$EX">($EX $X)</variable> 131 <variable id="$GL">($GL $X)</variable> 132 <variable id="$H2">($H2 $X)</variable> 133 <variable id="$H3">($H3 $X)</variable> 134 <variable id="$HL">($HL $X)</variable> 135 <variable id="$HY">($HY $X)</variable> 136 <variable id="$ID">($ID $X)</variable> 137 <variable id="$IN">($IN $X)</variable> 138 <variable id="$IS">($IS $X)</variable> 139 <variable id="$JL">($JL $X)</variable> 140 <variable id="$JT">($JT $X)</variable> 141 <variable id="$JV">($JV $X)</variable> 142 <variable id="$NS">($NS $X)</variable> 143 <variable id="$NU">($NU $X)</variable> 144 <variable id="$OP">($OP $X)</variable> 145 <variable id="$PO">($PO $X)</variable> 146 <variable id="$PR">($PR $X)</variable> 147 <variable id="$QU">($QU $X)</variable> 148 <variable id="$SA">($SA $X)</variable> 149 <variable id="$SG">($SG $X)</variable> 150 <variable id="$SY">($SY $X)</variable> 151 <variable id="$WJ">($WJ $X)</variable> 152 <variable id="$XX">($XX $X)</variable> 153 <variable id="$RI">($RI $X)</variable> 154 <variable id="$EB">($EB $X)</variable> 155 <variable id="$EM">($EM $X)</variable> 156 <variable id="$ZWJ">($ZWJ $X)</variable> 157 <!-- OUT OF ORDER ON PURPOSE --> 158 <!-- LB 10 Treat any remaining combining mark as AL. --> 159 <variable id="$AL">($AL | ^ $CM | (?<=$Spec1_) $CM)</variable> 160 </variables> 161 <segmentRules> 162 <!-- Rules --> 163 <!-- LB 4 Always break after hard line breaks (but never between CR and LF). --> 164 <rule id="4"> $BK </rule> 165 <!-- LB 5 Treat CR followed by LF, as well as CR, LF and NL as hard line breaks. --> 166 <rule id="5.01"> $CR $LF </rule> 167 <rule id="5.02"> $CR </rule> 168 <rule id="5.03"> $LF </rule> 169 <rule id="5.04"> $NL </rule> 170 <!-- LB 6 Do not break before hard line breaks. --> 171 <rule id="6"> ( $BK | $CR | $LF | $NL ) </rule> 172 <!-- LB 7 Do not break before spaces or zero-width space. --> 173 <rule id="7.01"> $SP </rule> 174 <rule id="7.02"> $ZW </rule> 175 <!-- LB 8 Break before any character following a zero-width space, even if one or more spaces intervene. --> 176 <rule id="8"> $ZW $SP* </rule> 177 <!-- LB 8a Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences) --> 178 <rule id="8.1"> $ZWJ_O </rule> 179 <!-- LB 9 Do not break a combining character sequence; treat it as if it has the LB class of the base character --> 180 <!-- in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.) --> 181 <rule id="9"> $Spec2_ $CM </rule> 182 <!-- WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)! --> 183 <!-- LB 11 Do not break before or after WORD JOINER and related characters. --> 184 <rule id="11.01"> $WJ </rule> 185 <rule id="11.02"> $WJ </rule> 186 <!-- LB 12 Do not break after NBSP and related characters. --> 187 <rule id="12"> $GL </rule> 188 <rule id="12.1"> $Spec3a_ $GL </rule> 189 <rule id="12.2"> $Spec3b_ $CM+ $GL </rule> 190 <rule id="12.3"> ^ $CM+ $GL </rule> 191 <!-- LB 13 Do not break before ] or ! or ; or /, even after spaces. --> 192 <!-- Using customization 7. --> 193 <rule id="13.01"> $EX </rule> 194 <rule id="13.02"> $Spec4_ ($CL | $CP | $IS | $SY) </rule> 195 <rule id="13.03"> $Spec4_ $CM+ ($CL | $CP | $IS | $SY) </rule> 196 <rule id="13.04"> ^ $CM+ ($CL | $CP | $IS | $SY) </rule> 197 <!-- LB 14 Do not break after [, even after spaces. --> 198 <rule id="14"> $OP $SP* </rule> 199 <!-- LB 15 Do not break within "[, even with intervening spaces. --> 200 <rule id="15"> $QU $SP* $OP </rule> 201 <!-- LB 16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces. --> 202 <rule id="16"> ($CL | $CP) $SP* $NS </rule> 203 <!-- LB 17 Do not break within , even with intervening spaces. --> 204 <rule id="17"> $B2 $SP* $B2 </rule> 205 <!-- LB 18 Break after spaces. --> 206 <rule id="18"> $SP </rule> 207 <!-- LB 19 Do not break before or after ". --> 208 <rule id="19.01"> $QU </rule> 209 <rule id="19.02"> $QU </rule> 210 <!-- LB 20 Break before and after unresolved CB. --> 211 <rule id="20.01"> $CB </rule> 212 <rule id="20.02"> $CB </rule> 213 <!-- LB 20.9 Don't break between Hyphens and Letters when there is a break preceding the hyphen. --> 214 <!-- Originally added as a Finnish tailoring, now promoted to default CLDR/ICU behavior. --> 215 <!-- Must be before LB 21. Note: this is not default UAX-14 behaviour. See ICU issue ICU-8151. --> 216 <!-- (Unlike in ICU, here we just check a limited set of known breaks, ignoring some cases like LB 14). --> 217 <rule id="20.09"> $Spec5_ ($HY | $HH) $AL </rule> 218 <!-- LB 21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents. --> 219 <rule id="21.01"> $BA </rule> 220 <rule id="21.02"> $HY </rule> 221 <rule id="21.03"> $NS </rule> 222 <rule id="21.04"> $BB </rule> 223 <!-- LB 21a Don't break after Hebrew + Hyphen. --> 224 <rule id="21.1"> $HL ($HY | $BA) </rule> 225 <!-- LB 21b Dont break between Solidus and Hebrew letters. --> 226 <rule id="21.2"> $SY $HL </rule> 227 <!-- LB 22 Do not break between two ellipses, or between letters, numbers or exclamations and ellipsis. --> 228 <rule id="22.01"> ($AL | $HL) $IN </rule> 229 <rule id="22.02"> $EX $IN </rule> 230 <rule id="22.03"> ($ID | $EB | $EM) $IN </rule> 231 <rule id="22.04"> $IN $IN </rule> 232 <rule id="22.05"> $NU $IN </rule> 233 <!-- LB 23 Do not break between digits and letters. --> 234 <rule id="23.02"> ($AL | $HL) $NU </rule> 235 <rule id="23.03"> $NU ($AL | $HL) </rule> 236 <!-- LB 24 Do not break between prefix and letters or ideographs. --> 237 <rule id="23.12"> $PR ($ID | $EB | $EM) </rule> 238 <rule id="23.13"> ($ID | $EB | $EM) $PO </rule> 239 <!-- LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix. --> 240 <rule id="24.02"> ($PR | $PO) ($AL | $HL) </rule> 241 <rule id="24.03"> ($AL | $HL) ($PR | $PO) </rule> 242 <!-- Using customization 7 --> 243 <!-- LB Alternative: ( PR | PO) ? ( OP | HY ) ? NU (NU | SY | IS) * (CL | CP) ? ( PR | PO) ? --> 244 <!-- Insert every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after --> 245 <rule id="25.01"> ($PR | $PO) ( $OP | $HY )? $NU </rule> 246 <rule id="25.02"> ( $OP | $HY ) $NU </rule> 247 <rule id="25.03"> $NU ($NU | $SY | $IS) </rule> 248 <rule id="25.04"> $NU ($NU | $SY | $IS)* ($NU | $SY | $IS | $CL | $CP) </rule> 249 <rule id="25.05"> $NU ($NU | $SY | $IS)* ($CL | $CP)? ($PO | $PR) </rule> 250 <!-- LB 26 Do not break a Korean syllable. --> 251 <rule id="26.01"> $JL $JL | $JV | $H2 | $H3 </rule> 252 <rule id="26.02"> $JV | $H2 $JV | $JT </rule> 253 <rule id="26.03"> $JT | $H3 $JT </rule> 254 <!-- LB 27 Treat a Korean Syllable Block the same as ID. --> 255 <rule id="27.01"> $JL | $JV | $JT | $H2 | $H3 $IN </rule> 256 <rule id="27.02"> $JL | $JV | $JT | $H2 | $H3 $PO </rule> 257 <rule id="27.03"> $PR $JL | $JV | $JT | $H2 | $H3 </rule> 258 <!-- LB 28 Do not break between alphabetics ("at"). --> 259 <rule id="28"> ($AL | $HL) ($AL | $HL) </rule> 260 <!-- LB 29 Do not break between numeric punctuation and alphabetics ("e.g."). --> 261 <rule id="29"> $IS ($AL | $HL) </rule> 262 <!-- LB 30 Do not break between letters, numbers or ordinary symbols and opening or closing punctuation. --> 263 <rule id="30.01"> ($AL | $HL | $NU) $OP </rule> 264 <rule id="30.02"> $CP ($AL | $HL | $NU) </rule> 265 <!-- LB 30a Break between two Regional Indicators if and only if there is an even number of them before the point being considered. --> 266 <rule id="30.11"> ^ ($RI $RI)* $RI $RI </rule> 267 <rule id="30.12"> [^$RI] ($RI $RI)* $RI $RI </rule> 268 <rule id="30.13"> $RI $RI </rule> 269 <rule id="30.2"> $EB $EM </rule> 270 </segmentRules> 271 </segmentation> 272 <segmentation type="SentenceBreak"> 273 <variables> 274 <variable id="$CR">\p{Sentence_Break=CR}</variable> 275 <variable id="$LF">\p{Sentence_Break=LF}</variable> 276 <variable id="$Extend">\p{Sentence_Break=Extend}</variable> 277 <variable id="$Format">\p{Sentence_Break=Format}</variable> 278 <variable id="$Sep">\p{Sentence_Break=Sep}</variable> 279 <variable id="$Sp">\p{Sentence_Break=Sp}</variable> 280 <variable id="$Lower">\p{Sentence_Break=Lower}</variable> 281 <variable id="$Upper">\p{Sentence_Break=Upper}</variable> 282 <variable id="$OLetter">\p{Sentence_Break=OLetter}</variable> 283 <variable id="$Numeric">\p{Sentence_Break=Numeric}</variable> 284 <variable id="$ATerm">\p{Sentence_Break=ATerm}</variable> 285 <variable id="$STerm">\p{Sentence_Break=STerm}</variable> 286 <variable id="$Close">\p{Sentence_Break=Close}</variable> 287 <variable id="$SContinue">\p{Sentence_Break=SContinue}</variable> 288 <variable id="$Any">.</variable> 289 <!-- Expresses the negation in rule 8; can't do this with normal regex, but works with UnicodeSet, which is all we need. --> 290 <!-- WARNING: For Rule 5, now add format and extend to everything but Sep, Format, and Extend --> 291 <variable id="$FE">[$Format $Extend]</variable> 292 <!-- Special rules --> 293 <variable id="$NotPreLower_">[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]</variable> 294 <variable id="$Sp">($Sp $FE*)</variable> 295 <variable id="$Lower">($Lower $FE*)</variable> 296 <variable id="$Upper">($Upper $FE*)</variable> 297 <variable id="$OLetter">($OLetter $FE*)</variable> 298 <variable id="$Numeric">($Numeric $FE*)</variable> 299 <variable id="$ATerm">($ATerm $FE*)</variable> 300 <variable id="$STerm">($STerm $FE*)</variable> 301 <variable id="$Close">($Close $FE*)</variable> 302 <variable id="$SContinue">($SContinue $FE*)</variable> 303 <!-- Macros --> 304 <variable id="$ParaSep">($Sep | $CR | $LF)</variable> 305 <variable id="$SATerm">($STerm | $ATerm)</variable> 306 </variables> 307 <segmentRules> 308 <!-- Rules --> 309 <!-- Break at the start and end of text, unless the text is empty. --> 310 <!-- Do not break within CRLF. --> 311 <rule id="3"> $CR $LF </rule> 312 <!-- Break after paragraph separators. --> 313 <rule id="4"> $ParaSep </rule> 314 <!-- Ignore Format and Extend characters, except after sot, ParaSep, and within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any (Format | Extend) --> 315 <!-- WARNING: Implemented as don't break before format (except after linebreaks), --> 316 <!-- AND add format and extend in all variables definitions that appear after this point! --> 317 <rule id="5"> [$Format $Extend] </rule> 318 <!-- Do not break after full stop in certain contexts. [See note below.] --> 319 <!-- Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter, --> 320 <!-- is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase. --> 321 <!-- For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence. --> 322 <rule id="6"> $ATerm $Numeric </rule> 323 <rule id="7"> ($Upper | $Lower) $ATerm $Upper </rule> 324 <rule id="8"> $ATerm $Close* $Sp* $NotPreLower_* $Lower </rule> 325 <rule id="8.1"> $SATerm $Close* $Sp* ($SContinue | $SATerm) </rule> 326 <!-- Break after sentence terminators, but include closing punctuation, trailing spaces, and any paragraph separator. [See note below.] Include closing punctuation, trailing spaces, and (optionally) a paragraph separator. --> 327 <rule id="9"> $SATerm $Close* ( $Close | $Sp | $ParaSep ) </rule> 328 <!-- Note the fix to $Sp*, $Sep? --> 329 <rule id="10"> $SATerm $Close* $Sp* ( $Sp | $ParaSep ) </rule> 330 <rule id="11"> $SATerm $Close* $Sp* $ParaSep? </rule> 331 <!-- Otherwise, do not break --> 332 <rule id="998"> $Any </rule> 333 </segmentRules> 334 <suppressions type="standard"> 335 <!-- root suppression is empty. --> 336 </suppressions> 337 </segmentation> 338 <segmentation type="WordBreak"> 339 <variables> 340 <variable id="$CR">\p{Word_Break=CR}</variable> 341 <variable id="$LF">\p{Word_Break=LF}</variable> 342 <variable id="$Newline">\p{Word_Break=Newline}</variable> 343 <variable id="$Extend">\p{Word_Break=Extend}</variable> 344 <!-- Now normal variables --> 345 <variable id="$Format">\p{Word_Break=Format}</variable> 346 <variable id="$Katakana">\p{Word_Break=Katakana}</variable> 347 <variable id="$ALetter">\p{Word_Break=ALetter}</variable> 348 <variable id="$MidLetter">\p{Word_Break=MidLetter}</variable> 349 <variable id="$MidNum">\p{Word_Break=MidNum}</variable> 350 <variable id="$MidNumLet">\p{Word_Break=MidNumLet}</variable> 351 <variable id="$Numeric">\p{Word_Break=Numeric}</variable> 352 <variable id="$ExtendNumLet">\p{Word_Break=ExtendNumLet}</variable> 353 <variable id="$RI">\p{Word_Break=Regional_Indicator}</variable> 354 <variable id="$Hebrew_Letter">\p{Word_Break=Hebrew_Letter}</variable> 355 <variable id="$Double_Quote">\p{Word_Break=Double_Quote}</variable> 356 <variable id="$Single_Quote">\p{Word_Break=Single_Quote}</variable> 357 <variable id="$ZWJ">\p{Word_Break=ZWJ}</variable> 358 <variable id="$ExtPict">\p{Extended_Pictographic}</variable> 359 <variable id="$WSegSpace">\p{Word_Break=WSegSpace}</variable> 360 <!-- Macros --> 361 <variable id="$AHLetter">($ALetter | $Hebrew_Letter)</variable> 362 <variable id="$MidNumLetQ">($MidNumLet | $Single_Quote)</variable> 363 <!-- WARNING: For Rule 4: Fixes for GC, Format --> 364 <!-- Add format and extend to everything --> 365 <variable id="$FE">[$Format $Extend $ZWJ]</variable> 366 <!-- Special rules --> 367 <variable id="$NotBreak_">[^ $Newline $CR $LF ]</variable> 368 <variable id="$Katakana">($Katakana $FE*)</variable> 369 <variable id="$ALetter">($ALetter $FE*)</variable> 370 <variable id="$MidLetter">($MidLetter $FE*)</variable> 371 <variable id="$MidNum">($MidNum $FE*)</variable> 372 <variable id="$MidNumLet">($MidNumLet $FE*)</variable> 373 <variable id="$Numeric">($Numeric $FE*)</variable> 374 <variable id="$ExtendNumLet">($ExtendNumLet $FE*)</variable> 375 <variable id="$RI">($RI $FE*)</variable> 376 <variable id="$Hebrew_Letter">($Hebrew_Letter $FE*)</variable> 377 <variable id="$Double_Quote">($Double_Quote $FE*)</variable> 378 <variable id="$Single_Quote">($Single_Quote $FE*)</variable> 379 <variable id="$AHLetter">($AHLetter $FE*)</variable> 380 <variable id="$MidNumLetQ">($MidNumLetQ $FE*)</variable> 381 </variables> 382 <segmentRules> 383 <!-- Rules --> 384 <!-- Break at the start and end of text, unless the text is empty. --> 385 <!-- Do not break within CRLF. --> 386 <rule id="3"> $CR $LF </rule> 387 <!-- Otherwise break before and after Newlines (including CR and LF) --> 388 <rule id="3.1"> ($Newline | $CR | $LF) </rule> 389 <rule id="3.2"> ($Newline | $CR | $LF) </rule> 390 <!-- Do not break within emoji zwj sequences. --> 391 <rule id="3.3"> $ZWJ $ExtPict </rule> 392 <rule id="3.4"> $WSegSpace $WSegSpace </rule> 393 <!-- Ignore Format and Extend characters, except after sot, CR, LF, and Newline. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any (Format | Extend) --> 394 <!-- WARNING: Implemented as don't break before format (except after linebreaks), --> 395 <!-- AND add format and extend in all variables definitions that appear after this point! --> 396 <rule id="4"> $NotBreak_ [$Format $Extend $ZWJ] </rule> 397 <!-- Vanilla rules --> 398 <!-- Do not break between most letters. --> 399 <rule id="5"> $AHLetter $AHLetter </rule> 400 <!-- Do not break letters across certain punctuation. --> 401 <rule id="6"> $AHLetter ($MidLetter | $MidNumLetQ) $AHLetter </rule> 402 <rule id="7"> $AHLetter ($MidLetter | $MidNumLetQ) $AHLetter </rule> 403 <rule id="7.1"> $Hebrew_Letter $Single_Quote </rule> 404 <rule id="7.2"> $Hebrew_Letter $Double_Quote $Hebrew_Letter </rule> 405 <rule id="7.3"> $Hebrew_Letter $Double_Quote $Hebrew_Letter </rule> 406 <!-- Do not break within sequences of digits, or digits adjacent to letters (3a, or A3). --> 407 <rule id="8"> $Numeric $Numeric </rule> 408 <rule id="9"> $AHLetter $Numeric </rule> 409 <rule id="10"> $Numeric $AHLetter </rule> 410 <!-- Do not break within sequences, such as 3.2 or 3,456.789. --> 411 <rule id="11"> $Numeric ($MidNum | $MidNumLetQ) $Numeric </rule> 412 <rule id="12"> $Numeric ($MidNum | $MidNumLetQ) $Numeric </rule> 413 <!-- Do not break between Katakana. --> 414 <rule id="13"> $Katakana $Katakana </rule> 415 <!-- Do not break from extenders. --> 416 <rule id="13.1"> ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) $ExtendNumLet </rule> 417 <rule id="13.2"> $ExtendNumLet ($AHLetter | $Numeric | $Katakana) </rule> 418 <!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. --> 419 <rule id="15"> ^ ($RI $RI)* $RI $RI </rule> 420 <rule id="16"> [^$RI] ($RI $RI)* $RI $RI </rule> 421 </segmentRules> 422 <!-- Otherwise, break everywhere (including around ideographs). --> 423 </segmentation> 424 </segmentations> 425 </ldml> 426