Home | History | Annotate | Download | only in transforms
      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
      3 <!--
      4 Copyright  1991-2013 Unicode, Inc.
      5 CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
      6 For terms of use, see http://www.unicode.org/copyright.html
      7 -->
      8 <supplementalData>
      9 	<version number="$Revision: 12137 $"/>
     10 	<transforms>
     11 		<transform source="Han" target="Spacedhan" direction="both" visibility="internal">
     12 			<tRule>
     13 # Only intended for internal use
     14 # Make sure Han are normalized, including characters that contain them.
     15 # The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
     16 # Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
     17 :: [[------- ---][:ideographic:][:sc=han:]] nfkc;
     18 :: fullwidth-halfwidth;
     19   '.';
     20 $terminalPunct = [\.\,\:\;\?\![:Pe:][:Pf:]];
     21 $initialPunct = [:Ps:][:Pi:];
     22 # add space between any Han or terminal punctuation and letters, and
     23 # between letters and Han or initial punct
     24 [[:Ideographic:] $terminalPunct] {} [:Letter:]  ' ' ;
     25 [:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct]  ' ' ;
     26 # remove spacing between ideographs and other letters
     27  [:Ideographic:] { ' ' } [:Letter:] ;
     28  [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
     29 			</tRule>
     30 		</transform>
     31 	</transforms>
     32 </supplementalData>
     33