Home | History | Annotate | Download | only in testdata
      1 # Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # *******************************************************************************
      4 # * Copyright (C) 2001-2003, International Business Machines
      5 # * Corporation and others.  All Rights Reserved.
      6 # *******************************************************************************
      7 #
      8 # test4.ucm
      9 #
     10 # Test file for MBCS conversion with four-byte codepage data.
     11 
     12 <code_set_name>     "test4"
     13 <mb_cur_max>        4
     14 <mb_cur_min>        1
     15 <uconv_class>       "MBCS"
     16 
     17 # both subchars are single-byters, which does not make sense
     18 # but works - adding subchar1 for tests but don't want to
     19 # change old tests for a new subchar -- markus 20031028
     20 <subchar>           \xff
     21 <subchar1>          \xe1
     22 <icu:state>         0, 1:1, 5-9, e1, ff
     23 <icu:state>         2:2
     24 <icu:state>         3:3
     25 <icu:state>         a-f.p, ff
     26 
     27 CHARMAP
     28 
     29 # fromUnicode result is zero byte from other than U+0000
     30 <U20ac>     \x00 |0
     31 
     32 # fallback from non-zero to zero possible with extension table
     33 <U20ad>     \x00 |1
     34 
     35 # nothing special
     36 <U0005>     \x05 |0
     37 
     38 # toUnicode result is fallback direct
     39 <U0006>     \x06 |3
     40 
     41 # toUnicode result is direct non-BMP code point
     42 <U101234>   \x07 |0
     43 <Ufebcd>    \x08 |3
     44 
     45 #unassigned \x09
     46 
     47 # toUnicode result is surrogate pair: test real pair, single unit, unassigned
     48 <U23456>    \x01\x02\x03\x0a |0
     49 <U000b>     \x01\x02\x03\x0b |0
     50 #unassigned \x01\x02\x03\x0c
     51 <U34567>    \x01\x02\x03\x0d |3
     52 <U000e>     \x01\x02\x03\x0e |3
     53 #unassigned \x01\x02\x03\x0f
     54 
     55 # <subchar1> non-mapping
     56 <U50005>    \xe1 |2
     57 # add a mapping that turns the above's Unicode side into a prefix
     58 <U50005><U60006> \x06 |1
     59 
     60 # many bytes, and bytes per UChar
     61 <U30ab><U309a> \x01\x02\x03\x0a\x01\x02\x03\x0b\x01\x02\x03\x0c\x01\x02\x03\x0d\x01\x02\x03\x0e\x01\x02\x03\x0f\x01\x02\x03\x0a\x05\x06\x07 |0
     62 
     63 # many UChars, and UChars per byte
     64 <U304b><U309a><U304d><U309a><U304f><U309a><U3051><U309a><U3053><U309a><U30ab><U309a><U30ad><U309a><U30af><U309a><U30b1><U309a><U0300> \x08\x09 |0
     65 
     66 END CHARMAP
     67