Home | History | Annotate | Download | only in cjkcodecs
      1 /*
      2  * _codecs_hk.c: Codecs collection for encodings from Hong Kong
      3  *
      4  * Written by Hye-Shik Chang <perky (at) FreeBSD.org>
      5  */
      6 
      7 #define USING_IMPORTED_MAPS
      8 
      9 #include "cjkcodecs.h"
     10 #include "mappings_hk.h"
     11 
     12 /*
     13  * BIG5HKSCS codec
     14  */
     15 
     16 static const encode_map *big5_encmap = NULL;
     17 static const decode_map *big5_decmap = NULL;
     18 
     19 CODEC_INIT(big5hkscs)
     20 {
     21     static int initialized = 0;
     22 
     23     if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
     24         return -1;
     25     initialized = 1;
     26     return 0;
     27 }
     28 
     29 /*
     30  * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
     31  *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
     32  *  U+00CA U+030C -> 8864
     33  *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
     34  *  U+00EA U+030C -> 88a5
     35  * These are handled by not mapping tables but a hand-written code.
     36  */
     37 static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
     38 
     39 ENCODER(big5hkscs)
     40 {
     41     while (inleft > 0) {
     42         ucs4_t c = **inbuf;
     43         DBCHAR code;
     44         Py_ssize_t insize;
     45 
     46         if (c < 0x80) {
     47             REQUIRE_OUTBUF(1)
     48             **outbuf = (unsigned char)c;
     49             NEXT(1, 1)
     50             continue;
     51         }
     52 
     53         DECODE_SURROGATE(c)
     54         insize = GET_INSIZE(c);
     55 
     56         REQUIRE_OUTBUF(2)
     57 
     58         if (c < 0x10000) {
     59             TRYMAP_ENC(big5hkscs_bmp, code, c) {
     60                 if (code == MULTIC) {
     61                     if (inleft >= 2 &&
     62                         ((c & 0xffdf) == 0x00ca) &&
     63                         (((*inbuf)[1] & 0xfff7) == 0x0304)) {
     64                         code = big5hkscs_pairenc_table[
     65                             ((c >> 4) |
     66                              ((*inbuf)[1] >> 3)) & 3];
     67                         insize = 2;
     68                     }
     69                     else if (inleft < 2 &&
     70                              !(flags & MBENC_FLUSH))
     71                         return MBERR_TOOFEW;
     72                     else {
     73                         if (c == 0xca)
     74                             code = 0x8866;
     75                         else /* c == 0xea */
     76                             code = 0x88a7;
     77                     }
     78                 }
     79             }
     80             else TRYMAP_ENC(big5, code, c);
     81             else return 1;
     82         }
     83         else if (c < 0x20000)
     84             return insize;
     85         else if (c < 0x30000) {
     86             TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
     87             else return insize;
     88         }
     89         else
     90             return insize;
     91 
     92         OUT1(code >> 8)
     93         OUT2(code & 0xFF)
     94         NEXT(insize, 2)
     95     }
     96 
     97     return 0;
     98 }
     99 
    100 #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
    101 
    102 DECODER(big5hkscs)
    103 {
    104     while (inleft > 0) {
    105         unsigned char c = IN1;
    106         ucs4_t decoded;
    107 
    108         REQUIRE_OUTBUF(1)
    109 
    110         if (c < 0x80) {
    111             OUT1(c)
    112             NEXT(1, 1)
    113             continue;
    114         }
    115 
    116         REQUIRE_INBUF(2)
    117 
    118         if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
    119             TRYMAP_DEC(big5, **outbuf, c, IN2) {
    120                 NEXT(2, 1)
    121                 continue;
    122             }
    123         }
    124 
    125         TRYMAP_DEC(big5hkscs, decoded, c, IN2)
    126         {
    127             int s = BH2S(c, IN2);
    128             const unsigned char *hintbase;
    129 
    130             assert(0x87 <= c && c <= 0xfe);
    131             assert(0x40 <= IN2 && IN2 <= 0xfe);
    132 
    133             if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
    134                     hintbase = big5hkscs_phint_0;
    135                     s -= BH2S(0x87, 0x40);
    136             }
    137             else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
    138                     hintbase = big5hkscs_phint_12130;
    139                     s -= BH2S(0xc6, 0xa1);
    140             }
    141             else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
    142                     hintbase = big5hkscs_phint_21924;
    143                     s -= BH2S(0xf9, 0xd6);
    144             }
    145             else
    146                     return MBERR_INTERNAL;
    147 
    148             if (hintbase[s >> 3] & (1 << (s & 7))) {
    149                     WRITEUCS4(decoded | 0x20000)
    150                     NEXT_IN(2)
    151             }
    152             else {
    153                     OUT1(decoded)
    154                     NEXT(2, 1)
    155             }
    156             continue;
    157         }
    158 
    159         switch ((c << 8) | IN2) {
    160         case 0x8862: WRITE2(0x00ca, 0x0304); break;
    161         case 0x8864: WRITE2(0x00ca, 0x030c); break;
    162         case 0x88a3: WRITE2(0x00ea, 0x0304); break;
    163         case 0x88a5: WRITE2(0x00ea, 0x030c); break;
    164         default: return 2;
    165         }
    166 
    167         NEXT(2, 2) /* all decoded code points are pairs, above. */
    168     }
    169 
    170     return 0;
    171 }
    172 
    173 
    174 BEGIN_MAPPINGS_LIST
    175   MAPPING_DECONLY(big5hkscs)
    176   MAPPING_ENCONLY(big5hkscs_bmp)
    177   MAPPING_ENCONLY(big5hkscs_nonbmp)
    178 END_MAPPINGS_LIST
    179 
    180 BEGIN_CODECS_LIST
    181   CODEC_STATELESS_WINIT(big5hkscs)
    182 END_CODECS_LIST
    183 
    184 I_AM_A_MODULE_FOR(hk)
    185