Home | History | Annotate | Download | only in mappings
      1 # Copyright (C) 2017 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 #
      4 # Name:             GSM 03.38 to Unicode
      5 # Unicode version:  3.0
      6 # Table version:    2.0
      7 # Date:             2009 Nov 10
      8 # Authors:          Ken Whistler
      9 #                   Kent Karlsson
     10 #                   Markus Kuhn
     11 #
     12 # Source:           http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
     13 # See there for the license and for a description of the charset.
     14 # Formatted into ICU .ucm format by Markus Scherer on 2006-nov-02.
     15 # Updated to table version 2.0 by Fredrik Roubert on 2017-feb-08.
     16 # Commented-out mappings are turned into fallbacks (|1), all others are turned
     17 # into round-trips (|0).
     18 # Multi-byte mappings are preserved as multi-single-byte character mappings,
     19 # using ICU's m:n conversion capability.
     20 #
     21 # The substitution character is not documented in the Unicode file.
     22 # \x3F is chosen here because \x1A is a graphic character.
     23 #
     24 # Other deviations from the Unicode file:
     25 # a)
     26 # The GSM standard specifies that one or two ESC bytes (\x1B), if not followed
     27 # by a recognized final byte, be mapped to spaces (that is, reverse fallbacks
     28 # to U+0020).
     29 # The Unicode file round-trips a single \x1B to U+00A0 (NBSP) and has no mapping
     30 # for \x1B\x1B.
     31 # (Reverse fallbacks to U+00A0 would result in Unicode text that cannot be
     32 # converted back to GSM 03.38. A roundtrip for U+00A0 adds a character that is
     33 # not mappable in the standard.)
     34 #
     35 # See the ietf-charsets list email "Re: GSM 03.38 substitution character?"
     36 # at http://mail.apps.ietf.org/ietf/charsets/msg01696.html
     37 #
     38 # b)
     39 # The GSM standard maps U+00C7 capital C-cedilla to \x09 but the Unicode file
     40 # contains and documents a "fix" to map U+00E7 small c-cedilla instead, based on
     41 # an interpretation of the intent of the standard. Prevailing implementations
     42 # in mobile phones follow the standard.
     43 #
     44 # This file follows the GSM standard.
     45 #
     46 # See the GSM standard at
     47 # http://www.3gpp.org/ftp/Specs/archive/03_series/03.38/0338-720.zip
     48 #
     49 # For problems with the table format please submit a bug
     50 # at http://www.icu-project.org/ .
     51 # For issues with the mappings please contact Unicode
     52 # at http://www.unicode.org/reporting.html
     53 
     54 <code_set_name>     "gsm-03.38-2009"
     55 <char_name_mask>    "AXXXX"
     56 <mb_cur_max>        1
     57 <mb_cur_min>        1
     58 <uconv_class>       "SBCS"
     59 <icu:state>         0-7f
     60 <subchar>           \x3F
     61 <icu:charsetFamily> "ASCII"
     62 
     63 CHARMAP
     64 <U0000> \x00 |1
     65 <U000A> \x0A |0
     66 <U000C> \x1B\x0A |0
     67 <U000D> \x0D |0
     68 <U0020> \x20 |0
     69 <U0020> \x1B |3
     70 <U0020> \x1B\x1B |3
     71 <U0021> \x21 |0
     72 <U0022> \x22 |0
     73 <U0023> \x23 |0
     74 <U0024> \x02 |0
     75 <U0025> \x25 |0
     76 <U0026> \x26 |0
     77 <U0027> \x27 |0
     78 <U0028> \x28 |0
     79 <U0029> \x29 |0
     80 <U002A> \x2A |0
     81 <U002B> \x2B |0
     82 <U002C> \x2C |0
     83 <U002D> \x2D |0
     84 <U002E> \x2E |0
     85 <U002F> \x2F |0
     86 <U0030> \x30 |0
     87 <U0031> \x31 |0
     88 <U0032> \x32 |0
     89 <U0033> \x33 |0
     90 <U0034> \x34 |0
     91 <U0035> \x35 |0
     92 <U0036> \x36 |0
     93 <U0037> \x37 |0
     94 <U0038> \x38 |0
     95 <U0039> \x39 |0
     96 <U003A> \x3A |0
     97 <U003B> \x3B |0
     98 <U003C> \x3C |0
     99 <U003D> \x3D |0
    100 <U003E> \x3E |0
    101 <U003F> \x3F |0
    102 <U0040> \x00 |0
    103 <U0041> \x41 |0
    104 <U0042> \x42 |0
    105 <U0043> \x43 |0
    106 <U0044> \x44 |0
    107 <U0045> \x45 |0
    108 <U0046> \x46 |0
    109 <U0047> \x47 |0
    110 <U0048> \x48 |0
    111 <U0049> \x49 |0
    112 <U004A> \x4A |0
    113 <U004B> \x4B |0
    114 <U004C> \x4C |0
    115 <U004D> \x4D |0
    116 <U004E> \x4E |0
    117 <U004F> \x4F |0
    118 <U0050> \x50 |0
    119 <U0051> \x51 |0
    120 <U0052> \x52 |0
    121 <U0053> \x53 |0
    122 <U0054> \x54 |0
    123 <U0055> \x55 |0
    124 <U0056> \x56 |0
    125 <U0057> \x57 |0
    126 <U0058> \x58 |0
    127 <U0059> \x59 |0
    128 <U005A> \x5A |0
    129 <U005B> \x1B\x3C |0
    130 <U005C> \x1B\x2F |0
    131 <U005D> \x1B\x3E |0
    132 <U005E> \x1B\x14 |0
    133 <U005F> \x11 |0
    134 <U0061> \x61 |0
    135 <U0062> \x62 |0
    136 <U0063> \x63 |0
    137 <U0064> \x64 |0
    138 <U0065> \x65 |0
    139 <U0066> \x66 |0
    140 <U0067> \x67 |0
    141 <U0068> \x68 |0
    142 <U0069> \x69 |0
    143 <U006A> \x6A |0
    144 <U006B> \x6B |0
    145 <U006C> \x6C |0
    146 <U006D> \x6D |0
    147 <U006E> \x6E |0
    148 <U006F> \x6F |0
    149 <U0070> \x70 |0
    150 <U0071> \x71 |0
    151 <U0072> \x72 |0
    152 <U0073> \x73 |0
    153 <U0074> \x74 |0
    154 <U0075> \x75 |0
    155 <U0076> \x76 |0
    156 <U0077> \x77 |0
    157 <U0078> \x78 |0
    158 <U0079> \x79 |0
    159 <U007A> \x7A |0
    160 <U007B> \x1B\x28 |0
    161 <U007C> \x1B\x40 |0
    162 <U007D> \x1B\x29 |0
    163 <U007E> \x1B\x3D |0
    164 <U00A1> \x40 |0
    165 <U00A3> \x01 |0
    166 <U00A4> \x24 |0
    167 <U00A5> \x03 |0
    168 <U00A7> \x5F |0
    169 <U00BF> \x60 |0
    170 <U00C4> \x5B |0
    171 <U00C5> \x0E |0
    172 <U00C6> \x1C |0
    173 <U00C7> \x09 |0
    174 <U00C9> \x1F |0
    175 <U00D1> \x5D |0
    176 <U00D6> \x5C |0
    177 <U00D8> \x0B |0
    178 <U00DC> \x5E |0
    179 <U00DF> \x1E |0
    180 <U00E0> \x7F |0
    181 <U00E4> \x7B |0
    182 <U00E5> \x0F |0
    183 <U00E6> \x1D |0
    184 <U00E8> \x04 |0
    185 <U00E9> \x05 |0
    186 <U00EC> \x07 |0
    187 <U00F1> \x7D |0
    188 <U00F2> \x08 |0
    189 <U00F6> \x7C |0
    190 <U00F8> \x0C |0
    191 <U00F9> \x06 |0
    192 <U00FC> \x7E |0
    193 <U0391> \x41 |1
    194 <U0392> \x42 |1
    195 <U0393> \x13 |0
    196 <U0394> \x10 |0
    197 <U0395> \x45 |1
    198 <U0396> \x5A |1
    199 <U0397> \x48 |1
    200 <U0398> \x19 |0
    201 <U0399> \x49 |1
    202 <U039A> \x4B |1
    203 <U039B> \x14 |0
    204 <U039C> \x4D |1
    205 <U039D> \x4E |1
    206 <U039E> \x1A |0
    207 <U039F> \x4F |1
    208 <U03A0> \x16 |0
    209 <U03A1> \x50 |1
    210 <U03A3> \x18 |0
    211 <U03A4> \x54 |1
    212 <U03A5> \x59 |1
    213 <U03A6> \x12 |0
    214 <U03A7> \x58 |1
    215 <U03A8> \x17 |0
    216 <U03A9> \x15 |0
    217 <U20AC> \x1B\x65 |0
    218 END CHARMAP
    219