1 <?xml version="1.0" encoding="UTF-8" ?> 2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3 <!-- 4 Copyright 1991-2013 Unicode, Inc. 5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6 For terms of use, see http://www.unicode.org/copyright.html 7 --> 8 <supplementalData> 9 <version number="$Revision: 12137 $"/> 10 <transforms> 11 <transform source="Han" target="Spacedhan" direction="both" visibility="internal"> 12 <tRule> 13 # Only intended for internal use 14 # Make sure Han are normalized, including characters that contain them. 15 # The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:] 16 # Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release! 17 :: [[------- ---][:ideographic:][:sc=han:]] nfkc; 18 :: fullwidth-halfwidth; 19 '.'; 20 $terminalPunct = [\.\,\:\;\?\![:Pe:][:Pf:]]; 21 $initialPunct = [:Ps:][:Pi:]; 22 # add space between any Han or terminal punctuation and letters, and 23 # between letters and Han or initial punct 24 [[:Ideographic:] $terminalPunct] {} [:Letter:] ' ' ; 25 [:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] ' ' ; 26 # remove spacing between ideographs and other letters 27 [:Ideographic:] { ' ' } [:Letter:] ; 28 [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ; 29 </tRule> 30 </transform> 31 </transforms> 32 </supplementalData> 33