1 /* 2 * cjkcodecs.h: common header for cjkcodecs 3 * 4 * Written by Hye-Shik Chang <perky (at) FreeBSD.org> 5 */ 6 7 #ifndef _CJKCODECS_H_ 8 #define _CJKCODECS_H_ 9 10 #define PY_SSIZE_T_CLEAN 11 #include "Python.h" 12 #include "multibytecodec.h" 13 14 15 /* a unicode "undefined" code point */ 16 #define UNIINV 0xFFFE 17 18 /* internal-use DBCS code points which aren't used by any charsets */ 19 #define NOCHAR 0xFFFF 20 #define MULTIC 0xFFFE 21 #define DBCINV 0xFFFD 22 23 /* shorter macros to save source size of mapping tables */ 24 #define U UNIINV 25 #define N NOCHAR 26 #define M MULTIC 27 #define D DBCINV 28 29 struct dbcs_index { 30 const ucs2_t *map; 31 unsigned char bottom, top; 32 }; 33 typedef struct dbcs_index decode_map; 34 35 struct widedbcs_index { 36 const Py_UCS4 *map; 37 unsigned char bottom, top; 38 }; 39 typedef struct widedbcs_index widedecode_map; 40 41 struct unim_index { 42 const DBCHAR *map; 43 unsigned char bottom, top; 44 }; 45 typedef struct unim_index encode_map; 46 47 struct unim_index_bytebased { 48 const unsigned char *map; 49 unsigned char bottom, top; 50 }; 51 52 struct dbcs_map { 53 const char *charset; 54 const struct unim_index *encmap; 55 const struct dbcs_index *decmap; 56 }; 57 58 struct pair_encodemap { 59 Py_UCS4 uniseq; 60 DBCHAR code; 61 }; 62 63 static const MultibyteCodec *codec_list; 64 static const struct dbcs_map *mapping_list; 65 66 #define CODEC_INIT(encoding) \ 67 static int encoding##_codec_init(const void *config) 68 69 #define ENCODER_INIT(encoding) \ 70 static int encoding##_encode_init( \ 71 MultibyteCodec_State *state, const void *config) 72 #define ENCODER(encoding) \ 73 static Py_ssize_t encoding##_encode( \ 74 MultibyteCodec_State *state, const void *config, \ 75 int kind, void *data, \ 76 Py_ssize_t *inpos, Py_ssize_t inlen, \ 77 unsigned char **outbuf, Py_ssize_t outleft, int flags) 78 #define ENCODER_RESET(encoding) \ 79 static Py_ssize_t encoding##_encode_reset( \ 80 MultibyteCodec_State *state, const void *config, \ 81 unsigned char **outbuf, Py_ssize_t outleft) 82 83 #define DECODER_INIT(encoding) \ 84 static int encoding##_decode_init( \ 85 MultibyteCodec_State *state, const void *config) 86 #define DECODER(encoding) \ 87 static Py_ssize_t encoding##_decode( \ 88 MultibyteCodec_State *state, const void *config, \ 89 const unsigned char **inbuf, Py_ssize_t inleft, \ 90 _PyUnicodeWriter *writer) 91 #define DECODER_RESET(encoding) \ 92 static Py_ssize_t encoding##_decode_reset( \ 93 MultibyteCodec_State *state, const void *config) 94 95 #define NEXT_IN(i) \ 96 do { \ 97 (*inbuf) += (i); \ 98 (inleft) -= (i); \ 99 } while (0) 100 #define NEXT_INCHAR(i) \ 101 do { \ 102 (*inpos) += (i); \ 103 } while (0) 104 #define NEXT_OUT(o) \ 105 do { \ 106 (*outbuf) += (o); \ 107 (outleft) -= (o); \ 108 } while (0) 109 #define NEXT(i, o) \ 110 do { \ 111 NEXT_INCHAR(i); \ 112 NEXT_OUT(o); \ 113 } while (0) 114 115 #define REQUIRE_INBUF(n) \ 116 do { \ 117 if (inleft < (n)) \ 118 return MBERR_TOOFEW; \ 119 } while (0) 120 121 #define REQUIRE_OUTBUF(n) \ 122 do { \ 123 if (outleft < (n)) \ 124 return MBERR_TOOSMALL; \ 125 } while (0) 126 127 #define INBYTE1 ((*inbuf)[0]) 128 #define INBYTE2 ((*inbuf)[1]) 129 #define INBYTE3 ((*inbuf)[2]) 130 #define INBYTE4 ((*inbuf)[3]) 131 132 #define INCHAR1 (PyUnicode_READ(kind, data, *inpos)) 133 #define INCHAR2 (PyUnicode_READ(kind, data, *inpos + 1)) 134 135 #define OUTCHAR(c) \ 136 do { \ 137 if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \ 138 return MBERR_EXCEPTION; \ 139 } while (0) 140 141 #define OUTCHAR2(c1, c2) \ 142 do { \ 143 Py_UCS4 _c1 = (c1); \ 144 Py_UCS4 _c2 = (c2); \ 145 if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \ 146 return MBERR_EXCEPTION; \ 147 PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \ 148 PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \ 149 writer->pos += 2; \ 150 } while (0) 151 152 #define OUTBYTE1(c) \ 153 do { ((*outbuf)[0]) = (c); } while (0) 154 #define OUTBYTE2(c) \ 155 do { ((*outbuf)[1]) = (c); } while (0) 156 #define OUTBYTE3(c) \ 157 do { ((*outbuf)[2]) = (c); } while (0) 158 #define OUTBYTE4(c) \ 159 do { ((*outbuf)[3]) = (c); } while (0) 160 161 #define WRITEBYTE1(c1) \ 162 do { \ 163 REQUIRE_OUTBUF(1); \ 164 (*outbuf)[0] = (c1); \ 165 } while (0) 166 #define WRITEBYTE2(c1, c2) \ 167 do { \ 168 REQUIRE_OUTBUF(2); \ 169 (*outbuf)[0] = (c1); \ 170 (*outbuf)[1] = (c2); \ 171 } while (0) 172 #define WRITEBYTE3(c1, c2, c3) \ 173 do { \ 174 REQUIRE_OUTBUF(3); \ 175 (*outbuf)[0] = (c1); \ 176 (*outbuf)[1] = (c2); \ 177 (*outbuf)[2] = (c3); \ 178 } while (0) 179 #define WRITEBYTE4(c1, c2, c3, c4) \ 180 do { \ 181 REQUIRE_OUTBUF(4); \ 182 (*outbuf)[0] = (c1); \ 183 (*outbuf)[1] = (c2); \ 184 (*outbuf)[2] = (c3); \ 185 (*outbuf)[3] = (c4); \ 186 } while (0) 187 188 #define _TRYMAP_ENC(m, assi, val) \ 189 ((m)->map != NULL && (val) >= (m)->bottom && \ 190 (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 191 (m)->bottom]) != NOCHAR) 192 #define TRYMAP_ENC(charset, assi, uni) \ 193 _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) 194 195 #define _TRYMAP_DEC(m, assi, val) \ 196 ((m)->map != NULL && \ 197 (val) >= (m)->bottom && \ 198 (val)<= (m)->top && \ 199 ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV) 200 #define TRYMAP_DEC(charset, assi, c1, c2) \ 201 _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) 202 203 #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = { 204 #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL}, 205 #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, 206 #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap}, 207 #define END_MAPPINGS_LIST \ 208 {"", NULL, NULL} }; \ 209 static const struct dbcs_map *mapping_list = \ 210 (const struct dbcs_map *)_mapping_list; 211 212 #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { 213 #define _STATEFUL_METHODS(enc) \ 214 enc##_encode, \ 215 enc##_encode_init, \ 216 enc##_encode_reset, \ 217 enc##_decode, \ 218 enc##_decode_init, \ 219 enc##_decode_reset, 220 #define _STATELESS_METHODS(enc) \ 221 enc##_encode, NULL, NULL, \ 222 enc##_decode, NULL, NULL, 223 #define CODEC_STATEFUL(enc) { \ 224 #enc, NULL, NULL, \ 225 _STATEFUL_METHODS(enc) \ 226 }, 227 #define CODEC_STATELESS(enc) { \ 228 #enc, NULL, NULL, \ 229 _STATELESS_METHODS(enc) \ 230 }, 231 #define CODEC_STATELESS_WINIT(enc) { \ 232 #enc, NULL, \ 233 enc##_codec_init, \ 234 _STATELESS_METHODS(enc) \ 235 }, 236 #define END_CODECS_LIST \ 237 {"", NULL,} }; \ 238 static const MultibyteCodec *codec_list = \ 239 (const MultibyteCodec *)_codec_list; 240 241 242 243 static PyObject * 244 getmultibytecodec(void) 245 { 246 static PyObject *cofunc = NULL; 247 248 if (cofunc == NULL) { 249 PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); 250 if (mod == NULL) 251 return NULL; 252 cofunc = PyObject_GetAttrString(mod, "__create_codec"); 253 Py_DECREF(mod); 254 } 255 return cofunc; 256 } 257 258 static PyObject * 259 getcodec(PyObject *self, PyObject *encoding) 260 { 261 PyObject *codecobj, *r, *cofunc; 262 const MultibyteCodec *codec; 263 const char *enc; 264 265 if (!PyUnicode_Check(encoding)) { 266 PyErr_SetString(PyExc_TypeError, 267 "encoding name must be a string."); 268 return NULL; 269 } 270 enc = PyUnicode_AsUTF8(encoding); 271 if (enc == NULL) 272 return NULL; 273 274 cofunc = getmultibytecodec(); 275 if (cofunc == NULL) 276 return NULL; 277 278 for (codec = codec_list; codec->encoding[0]; codec++) 279 if (strcmp(codec->encoding, enc) == 0) 280 break; 281 282 if (codec->encoding[0] == '\0') { 283 PyErr_SetString(PyExc_LookupError, 284 "no such codec is supported."); 285 return NULL; 286 } 287 288 codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); 289 if (codecobj == NULL) 290 return NULL; 291 292 r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL); 293 Py_DECREF(codecobj); 294 295 return r; 296 } 297 298 static struct PyMethodDef __methods[] = { 299 {"getcodec", (PyCFunction)getcodec, METH_O, ""}, 300 {NULL, NULL}, 301 }; 302 303 static int 304 register_maps(PyObject *module) 305 { 306 const struct dbcs_map *h; 307 308 for (h = mapping_list; h->charset[0] != '\0'; h++) { 309 char mhname[256] = "__map_"; 310 int r; 311 strcpy(mhname + sizeof("__map_") - 1, h->charset); 312 r = PyModule_AddObject(module, mhname, 313 PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL)); 314 if (r == -1) 315 return -1; 316 } 317 return 0; 318 } 319 320 #ifdef USING_BINARY_PAIR_SEARCH 321 static DBCHAR 322 find_pairencmap(ucs2_t body, ucs2_t modifier, 323 const struct pair_encodemap *haystack, int haystacksize) 324 { 325 int pos, min, max; 326 Py_UCS4 value = body << 16 | modifier; 327 328 min = 0; 329 max = haystacksize; 330 331 for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) { 332 if (value < haystack[pos].uniseq) { 333 if (max != pos) { 334 max = pos; 335 continue; 336 } 337 } 338 else if (value > haystack[pos].uniseq) { 339 if (min != pos) { 340 min = pos; 341 continue; 342 } 343 } 344 break; 345 } 346 347 if (value == haystack[pos].uniseq) { 348 return haystack[pos].code; 349 } 350 return DBCINV; 351 } 352 #endif 353 354 #ifdef USING_IMPORTED_MAPS 355 #define IMPORT_MAP(locale, charset, encmap, decmap) \ 356 importmap("_codecs_" #locale, "__map_" #charset, \ 357 (const void**)encmap, (const void**)decmap) 358 359 static int 360 importmap(const char *modname, const char *symbol, 361 const void **encmap, const void **decmap) 362 { 363 PyObject *o, *mod; 364 365 mod = PyImport_ImportModule(modname); 366 if (mod == NULL) 367 return -1; 368 369 o = PyObject_GetAttrString(mod, symbol); 370 if (o == NULL) 371 goto errorexit; 372 else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { 373 PyErr_SetString(PyExc_ValueError, 374 "map data must be a Capsule."); 375 goto errorexit; 376 } 377 else { 378 struct dbcs_map *map; 379 map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); 380 if (encmap != NULL) 381 *encmap = map->encmap; 382 if (decmap != NULL) 383 *decmap = map->decmap; 384 Py_DECREF(o); 385 } 386 387 Py_DECREF(mod); 388 return 0; 389 390 errorexit: 391 Py_DECREF(mod); 392 return -1; 393 } 394 #endif 395 396 #define I_AM_A_MODULE_FOR(loc) \ 397 static struct PyModuleDef __module = { \ 398 PyModuleDef_HEAD_INIT, \ 399 "_codecs_"#loc, \ 400 NULL, \ 401 0, \ 402 __methods, \ 403 NULL, \ 404 NULL, \ 405 NULL, \ 406 NULL \ 407 }; \ 408 PyMODINIT_FUNC \ 409 PyInit__codecs_##loc(void) \ 410 { \ 411 PyObject *m = PyModule_Create(&__module); \ 412 if (m != NULL) \ 413 (void)register_maps(m); \ 414 return m; \ 415 } 416 417 #endif 418