Home | History | Annotate | Download | only in cjkcodecs
      1 /*
      2  * _codecs_hk.c: Codecs collection for encodings from Hong Kong
      3  *
      4  * Written by Hye-Shik Chang <perky (at) FreeBSD.org>
      5  */
      6 
      7 #define USING_IMPORTED_MAPS
      8 
      9 #include "cjkcodecs.h"
     10 #include "mappings_hk.h"
     11 
     12 /*
     13  * BIG5HKSCS codec
     14  */
     15 
     16 static const encode_map *big5_encmap = NULL;
     17 static const decode_map *big5_decmap = NULL;
     18 
     19 CODEC_INIT(big5hkscs)
     20 {
     21     static int initialized = 0;
     22 
     23     if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
     24         return -1;
     25     initialized = 1;
     26     return 0;
     27 }
     28 
     29 /*
     30  * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
     31  *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
     32  *  U+00CA U+030C -> 8864
     33  *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
     34  *  U+00EA U+030C -> 88a5
     35  * These are handled by not mapping tables but a hand-written code.
     36  */
     37 static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
     38 
     39 ENCODER(big5hkscs)
     40 {
     41     while (*inpos < inlen) {
     42         Py_UCS4 c = INCHAR1;
     43         DBCHAR code;
     44         Py_ssize_t insize;
     45 
     46         if (c < 0x80) {
     47             REQUIRE_OUTBUF(1);
     48             **outbuf = (unsigned char)c;
     49             NEXT(1, 1);
     50             continue;
     51         }
     52 
     53         insize = 1;
     54         REQUIRE_OUTBUF(2);
     55 
     56         if (c < 0x10000) {
     57             if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
     58                 if (code == MULTIC) {
     59                     Py_UCS4 c2;
     60                     if (inlen - *inpos >= 2)
     61                         c2 = INCHAR2;
     62                     else
     63                         c2 = 0;
     64 
     65                     if (inlen - *inpos >= 2 &&
     66                         ((c & 0xffdf) == 0x00ca) &&
     67                         ((c2 & 0xfff7) == 0x0304)) {
     68                         code = big5hkscs_pairenc_table[
     69                             ((c >> 4) |
     70                              (c2 >> 3)) & 3];
     71                         insize = 2;
     72                     }
     73                     else if (inlen - *inpos < 2 &&
     74                              !(flags & MBENC_FLUSH))
     75                         return MBERR_TOOFEW;
     76                     else {
     77                         if (c == 0xca)
     78                             code = 0x8866;
     79                         else /* c == 0xea */
     80                             code = 0x88a7;
     81                     }
     82                 }
     83             }
     84             else if (TRYMAP_ENC(big5, code, c))
     85                 ;
     86             else
     87                 return 1;
     88         }
     89         else if (c < 0x20000)
     90             return insize;
     91         else if (c < 0x30000) {
     92             if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
     93                 ;
     94             else
     95                 return insize;
     96         }
     97         else
     98             return insize;
     99 
    100         OUTBYTE1(code >> 8);
    101         OUTBYTE2(code & 0xFF);
    102         NEXT(insize, 2);
    103     }
    104 
    105     return 0;
    106 }
    107 
    108 #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
    109 
    110 DECODER(big5hkscs)
    111 {
    112     while (inleft > 0) {
    113         unsigned char c = INBYTE1;
    114         Py_UCS4 decoded;
    115 
    116         if (c < 0x80) {
    117             OUTCHAR(c);
    118             NEXT_IN(1);
    119             continue;
    120         }
    121 
    122         REQUIRE_INBUF(2);
    123 
    124         if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
    125             if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
    126                 OUTCHAR(decoded);
    127                 NEXT_IN(2);
    128                 continue;
    129             }
    130         }
    131 
    132         if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
    133         {
    134             int s = BH2S(c, INBYTE2);
    135             const unsigned char *hintbase;
    136 
    137             assert(0x87 <= c && c <= 0xfe);
    138             assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
    139 
    140             if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
    141                     hintbase = big5hkscs_phint_0;
    142                     s -= BH2S(0x87, 0x40);
    143             }
    144             else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
    145                     hintbase = big5hkscs_phint_12130;
    146                     s -= BH2S(0xc6, 0xa1);
    147             }
    148             else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
    149                     hintbase = big5hkscs_phint_21924;
    150                     s -= BH2S(0xf9, 0xd6);
    151             }
    152             else
    153                     return MBERR_INTERNAL;
    154 
    155             if (hintbase[s >> 3] & (1 << (s & 7))) {
    156                     OUTCHAR(decoded | 0x20000);
    157                     NEXT_IN(2);
    158             }
    159             else {
    160                     OUTCHAR(decoded);
    161                     NEXT_IN(2);
    162             }
    163             continue;
    164         }
    165 
    166         switch ((c << 8) | INBYTE2) {
    167         case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
    168         case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
    169         case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
    170         case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
    171         default: return 1;
    172         }
    173 
    174         NEXT_IN(2); /* all decoded code points are pairs, above. */
    175     }
    176 
    177     return 0;
    178 }
    179 
    180 
    181 BEGIN_MAPPINGS_LIST
    182   MAPPING_DECONLY(big5hkscs)
    183   MAPPING_ENCONLY(big5hkscs_bmp)
    184   MAPPING_ENCONLY(big5hkscs_nonbmp)
    185 END_MAPPINGS_LIST
    186 
    187 BEGIN_CODECS_LIST
    188   CODEC_STATELESS_WINIT(big5hkscs)
    189 END_CODECS_LIST
    190 
    191 I_AM_A_MODULE_FOR(hk)
    192