1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "autodetect.h" 18 19 struct CharRange { 20 uint16_t first; 21 uint16_t last; 22 }; 23 24 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*x)) 25 26 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT 27 static const CharRange kShiftJISRanges[] = { 28 { 0x8140, 0x817E }, 29 { 0x8180, 0x81AC }, 30 { 0x81B8, 0x81BF }, 31 { 0x81C8, 0x81CE }, 32 { 0x81DA, 0x81E8 }, 33 { 0x81F0, 0x81F7 }, 34 { 0x81FC, 0x81FC }, 35 { 0x824F, 0x8258 }, 36 { 0x8260, 0x8279 }, 37 { 0x8281, 0x829A }, 38 { 0x829F, 0x82F1 }, 39 { 0x8340, 0x837E }, 40 { 0x8380, 0x8396 }, 41 { 0x839F, 0x83B6 }, 42 { 0x83BF, 0x83D6 }, 43 { 0x8440, 0x8460 }, 44 { 0x8470, 0x847E }, 45 { 0x8480, 0x8491 }, 46 { 0x849F, 0x84BE }, 47 { 0x8740, 0x875D }, 48 { 0x875F, 0x8775 }, 49 { 0x877E, 0x877E }, 50 { 0x8780, 0x879C }, 51 { 0x889F, 0x88FC }, 52 { 0x8940, 0x897E }, 53 { 0x8980, 0x89FC }, 54 { 0x8A40, 0x8A7E }, 55 { 0x8A80, 0x8AFC }, 56 { 0x8B40, 0x8B7E }, 57 { 0x8B80, 0x8BFC }, 58 { 0x8C40, 0x8C7E }, 59 { 0x8C80, 0x8CFC }, 60 { 0x8D40, 0x8D7E }, 61 { 0x8D80, 0x8DFC }, 62 { 0x8E40, 0x8E7E }, 63 { 0x8E80, 0x8EFC }, 64 { 0x8F40, 0x8F7E }, 65 { 0x8F80, 0x8FFC }, 66 { 0x9040, 0x907E }, 67 { 0x9080, 0x90FC }, 68 { 0x9140, 0x917E }, 69 { 0x9180, 0x91FC }, 70 { 0x9240, 0x927E }, 71 { 0x9280, 0x92FC }, 72 { 0x9340, 0x937E }, 73 { 0x9380, 0x93FC }, 74 { 0x9440, 0x947E }, 75 { 0x9480, 0x94FC }, 76 { 0x9540, 0x957E }, 77 { 0x9580, 0x95FC }, 78 { 0x9640, 0x967E }, 79 { 0x9680, 0x96FC }, 80 { 0x9740, 0x977E }, 81 { 0x9780, 0x97FC }, 82 { 0x9840, 0x9872 }, 83 { 0x989F, 0x98FC }, 84 { 0x9940, 0x997E }, 85 { 0x9980, 0x99FC }, 86 { 0x9A40, 0x9A7E }, 87 { 0x9A80, 0x9AFC }, 88 { 0x9B40, 0x9B7E }, 89 { 0x9B80, 0x9BFC }, 90 { 0x9C40, 0x9C7E }, 91 { 0x9C80, 0x9CFC }, 92 { 0x9D40, 0x9D7E }, 93 { 0x9D80, 0x9DFC }, 94 { 0x9E40, 0x9E7E }, 95 { 0x9E80, 0x9EFC }, 96 { 0x9F40, 0x9F7E }, 97 { 0x9F80, 0x9FFC }, 98 { 0xE040, 0xE07E }, 99 { 0xE080, 0xE0FC }, 100 { 0xE140, 0xE17E }, 101 { 0xE180, 0xE1FC }, 102 { 0xE240, 0xE27E }, 103 { 0xE280, 0xE2FC }, 104 { 0xE340, 0xE37E }, 105 { 0xE380, 0xE3FC }, 106 { 0xE440, 0xE47E }, 107 { 0xE480, 0xE4FC }, 108 { 0xE540, 0xE57E }, 109 { 0xE580, 0xE5FC }, 110 { 0xE640, 0xE67E }, 111 { 0xE680, 0xE6FC }, 112 { 0xE740, 0xE77E }, 113 { 0xE780, 0xE7FC }, 114 { 0xE840, 0xE87E }, 115 { 0xE880, 0xE8FC }, 116 { 0xE940, 0xE97E }, 117 { 0xE980, 0xE9FC }, 118 { 0xEA40, 0xEA7E }, 119 { 0xEA80, 0xEAA4 }, 120 { 0xED40, 0xED7E }, 121 { 0xED80, 0xEDFC }, 122 { 0xEE40, 0xEE7E }, 123 { 0xEE80, 0xEEEC }, 124 { 0xEEEF, 0xEEFC }, 125 { 0xFA40, 0xFA7E }, 126 { 0xFA80, 0xFAFC }, 127 { 0xFB40, 0xFB7E }, 128 { 0xFB80, 0xFBFC }, 129 { 0xFC40, 0xFC4B }, 130 }; 131 132 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT 133 static const CharRange kGBKRanges[] = { 134 { 0x8140, 0x817E }, 135 { 0x8180, 0x81FE }, 136 { 0x8240, 0x827E }, 137 { 0x8280, 0x82FE }, 138 { 0x8340, 0x837E }, 139 { 0x8380, 0x83FE }, 140 { 0x8440, 0x847E }, 141 { 0x8480, 0x84FE }, 142 { 0x8540, 0x857E }, 143 { 0x8580, 0x85FE }, 144 { 0x8640, 0x867E }, 145 { 0x8680, 0x86FE }, 146 { 0x8740, 0x877E }, 147 { 0x8780, 0x87FE }, 148 { 0x8840, 0x887E }, 149 { 0x8880, 0x88FE }, 150 { 0x8940, 0x897E }, 151 { 0x8980, 0x89FE }, 152 { 0x8A40, 0x8A7E }, 153 { 0x8A80, 0x8AFE }, 154 { 0x8B40, 0x8B7E }, 155 { 0x8B80, 0x8BFE }, 156 { 0x8C40, 0x8C7E }, 157 { 0x8C80, 0x8CFE }, 158 { 0x8D40, 0x8D7E }, 159 { 0x8D80, 0x8DFE }, 160 { 0x8E40, 0x8E7E }, 161 { 0x8E80, 0x8EFE }, 162 { 0x8F40, 0x8F7E }, 163 { 0x8F80, 0x8FFE }, 164 { 0x9040, 0x907E }, 165 { 0x9080, 0x90FE }, 166 { 0x9140, 0x917E }, 167 { 0x9180, 0x91FE }, 168 { 0x9240, 0x927E }, 169 { 0x9280, 0x92FE }, 170 { 0x9340, 0x937E }, 171 { 0x9380, 0x93FE }, 172 { 0x9440, 0x947E }, 173 { 0x9480, 0x94FE }, 174 { 0x9540, 0x957E }, 175 { 0x9580, 0x95FE }, 176 { 0x9640, 0x967E }, 177 { 0x9680, 0x96FE }, 178 { 0x9740, 0x977E }, 179 { 0x9780, 0x97FE }, 180 { 0x9840, 0x987E }, 181 { 0x9880, 0x98FE }, 182 { 0x9940, 0x997E }, 183 { 0x9980, 0x99FE }, 184 { 0x9A40, 0x9A7E }, 185 { 0x9A80, 0x9AFE }, 186 { 0x9B40, 0x9B7E }, 187 { 0x9B80, 0x9BFE }, 188 { 0x9C40, 0x9C7E }, 189 { 0x9C80, 0x9CFE }, 190 { 0x9D40, 0x9D7E }, 191 { 0x9D80, 0x9DFE }, 192 { 0x9E40, 0x9E7E }, 193 { 0x9E80, 0x9EFE }, 194 { 0x9F40, 0x9F7E }, 195 { 0x9F80, 0x9FFE }, 196 { 0xA040, 0xA07E }, 197 { 0xA080, 0xA0FE }, 198 { 0xA1A1, 0xA1FE }, 199 { 0xA2A1, 0xA2AA }, 200 { 0xA2B1, 0xA2E2 }, 201 { 0xA2E5, 0xA2EE }, 202 { 0xA2F1, 0xA2FC }, 203 { 0xA3A1, 0xA3FE }, 204 { 0xA4A1, 0xA4F3 }, 205 { 0xA5A1, 0xA5F6 }, 206 { 0xA6A1, 0xA6B8 }, 207 { 0xA6C1, 0xA6D8 }, 208 { 0xA6E0, 0xA6EB }, 209 { 0xA6EE, 0xA6F2 }, 210 { 0xA6F4, 0xA6F5 }, 211 { 0xA7A1, 0xA7C1 }, 212 { 0xA7D1, 0xA7F1 }, 213 { 0xA840, 0xA87E }, 214 { 0xA880, 0xA895 }, 215 { 0xA8A1, 0xA8BB }, 216 { 0xA8BD, 0xA8BE }, 217 { 0xA8C0, 0xA8C0 }, 218 { 0xA8C5, 0xA8E9 }, 219 { 0xA940, 0xA957 }, 220 { 0xA959, 0xA95A }, 221 { 0xA95C, 0xA95C }, 222 { 0xA960, 0xA97E }, 223 { 0xA980, 0xA988 }, 224 { 0xA996, 0xA996 }, 225 { 0xA9A4, 0xA9EF }, 226 { 0xAA40, 0xAA7E }, 227 { 0xAA80, 0xAAA0 }, 228 { 0xAB40, 0xAB7E }, 229 { 0xAB80, 0xABA0 }, 230 { 0xAC40, 0xAC7E }, 231 { 0xAC80, 0xACA0 }, 232 { 0xAD40, 0xAD7E }, 233 { 0xAD80, 0xADA0 }, 234 { 0xAE40, 0xAE7E }, 235 { 0xAE80, 0xAEA0 }, 236 { 0xAF40, 0xAF7E }, 237 { 0xAF80, 0xAFA0 }, 238 { 0xB040, 0xB07E }, 239 { 0xB080, 0xB0FE }, 240 { 0xB140, 0xB17E }, 241 { 0xB180, 0xB1FE }, 242 { 0xB240, 0xB27E }, 243 { 0xB280, 0xB2FE }, 244 { 0xB340, 0xB37E }, 245 { 0xB380, 0xB3FE }, 246 { 0xB440, 0xB47E }, 247 { 0xB480, 0xB4FE }, 248 { 0xB540, 0xB57E }, 249 { 0xB580, 0xB5FE }, 250 { 0xB640, 0xB67E }, 251 { 0xB680, 0xB6FE }, 252 { 0xB740, 0xB77E }, 253 { 0xB780, 0xB7FE }, 254 { 0xB840, 0xB87E }, 255 { 0xB880, 0xB8FE }, 256 { 0xB940, 0xB97E }, 257 { 0xB980, 0xB9FE }, 258 { 0xBA40, 0xBA7E }, 259 { 0xBA80, 0xBAFE }, 260 { 0xBB40, 0xBB7E }, 261 { 0xBB80, 0xBBFE }, 262 { 0xBC40, 0xBC7E }, 263 { 0xBC80, 0xBCFE }, 264 { 0xBD40, 0xBD7E }, 265 { 0xBD80, 0xBDFE }, 266 { 0xBE40, 0xBE7E }, 267 { 0xBE80, 0xBEFE }, 268 { 0xBF40, 0xBF7E }, 269 { 0xBF80, 0xBFFE }, 270 { 0xC040, 0xC07E }, 271 { 0xC080, 0xC0FE }, 272 { 0xC140, 0xC17E }, 273 { 0xC180, 0xC1FE }, 274 { 0xC240, 0xC27E }, 275 { 0xC280, 0xC2FE }, 276 { 0xC340, 0xC37E }, 277 { 0xC380, 0xC3FE }, 278 { 0xC440, 0xC47E }, 279 { 0xC480, 0xC4FE }, 280 { 0xC540, 0xC57E }, 281 { 0xC580, 0xC5FE }, 282 { 0xC640, 0xC67E }, 283 { 0xC680, 0xC6FE }, 284 { 0xC740, 0xC77E }, 285 { 0xC780, 0xC7FE }, 286 { 0xC840, 0xC87E }, 287 { 0xC880, 0xC8FE }, 288 { 0xC940, 0xC97E }, 289 { 0xC980, 0xC9FE }, 290 { 0xCA40, 0xCA7E }, 291 { 0xCA80, 0xCAFE }, 292 { 0xCB40, 0xCB7E }, 293 { 0xCB80, 0xCBFE }, 294 { 0xCC40, 0xCC7E }, 295 { 0xCC80, 0xCCFE }, 296 { 0xCD40, 0xCD7E }, 297 { 0xCD80, 0xCDFE }, 298 { 0xCE40, 0xCE7E }, 299 { 0xCE80, 0xCEFE }, 300 { 0xCF40, 0xCF7E }, 301 { 0xCF80, 0xCFFE }, 302 { 0xD040, 0xD07E }, 303 { 0xD080, 0xD0FE }, 304 { 0xD140, 0xD17E }, 305 { 0xD180, 0xD1FE }, 306 { 0xD240, 0xD27E }, 307 { 0xD280, 0xD2FE }, 308 { 0xD340, 0xD37E }, 309 { 0xD380, 0xD3FE }, 310 { 0xD440, 0xD47E }, 311 { 0xD480, 0xD4FE }, 312 { 0xD540, 0xD57E }, 313 { 0xD580, 0xD5FE }, 314 { 0xD640, 0xD67E }, 315 { 0xD680, 0xD6FE }, 316 { 0xD740, 0xD77E }, 317 { 0xD780, 0xD7F9 }, 318 { 0xD840, 0xD87E }, 319 { 0xD880, 0xD8FE }, 320 { 0xD940, 0xD97E }, 321 { 0xD980, 0xD9FE }, 322 { 0xDA40, 0xDA7E }, 323 { 0xDA80, 0xDAFE }, 324 { 0xDB40, 0xDB7E }, 325 { 0xDB80, 0xDBFE }, 326 { 0xDC40, 0xDC7E }, 327 { 0xDC80, 0xDCFE }, 328 { 0xDD40, 0xDD7E }, 329 { 0xDD80, 0xDDFE }, 330 { 0xDE40, 0xDE7E }, 331 { 0xDE80, 0xDEFE }, 332 { 0xDF40, 0xDF7E }, 333 { 0xDF80, 0xDFFE }, 334 { 0xE040, 0xE07E }, 335 { 0xE080, 0xE0FE }, 336 { 0xE140, 0xE17E }, 337 { 0xE180, 0xE1FE }, 338 { 0xE240, 0xE27E }, 339 { 0xE280, 0xE2FE }, 340 { 0xE340, 0xE37E }, 341 { 0xE380, 0xE3FE }, 342 { 0xE440, 0xE47E }, 343 { 0xE480, 0xE4FE }, 344 { 0xE540, 0xE57E }, 345 { 0xE580, 0xE5FE }, 346 { 0xE640, 0xE67E }, 347 { 0xE680, 0xE6FE }, 348 { 0xE740, 0xE77E }, 349 { 0xE780, 0xE7FE }, 350 { 0xE840, 0xE87E }, 351 { 0xE880, 0xE8FE }, 352 { 0xE940, 0xE97E }, 353 { 0xE980, 0xE9FE }, 354 { 0xEA40, 0xEA7E }, 355 { 0xEA80, 0xEAFE }, 356 { 0xEB40, 0xEB7E }, 357 { 0xEB80, 0xEBFE }, 358 { 0xEC40, 0xEC7E }, 359 { 0xEC80, 0xECFE }, 360 { 0xED40, 0xED7E }, 361 { 0xED80, 0xEDFE }, 362 { 0xEE40, 0xEE7E }, 363 { 0xEE80, 0xEEFE }, 364 { 0xEF40, 0xEF7E }, 365 { 0xEF80, 0xEFFE }, 366 { 0xF040, 0xF07E }, 367 { 0xF080, 0xF0FE }, 368 { 0xF140, 0xF17E }, 369 { 0xF180, 0xF1FE }, 370 { 0xF240, 0xF27E }, 371 { 0xF280, 0xF2FE }, 372 { 0xF340, 0xF37E }, 373 { 0xF380, 0xF3FE }, 374 { 0xF440, 0xF47E }, 375 { 0xF480, 0xF4FE }, 376 { 0xF540, 0xF57E }, 377 { 0xF580, 0xF5FE }, 378 { 0xF640, 0xF67E }, 379 { 0xF680, 0xF6FE }, 380 { 0xF740, 0xF77E }, 381 { 0xF780, 0xF7FE }, 382 { 0xF840, 0xF87E }, 383 { 0xF880, 0xF8A0 }, 384 { 0xF940, 0xF97E }, 385 { 0xF980, 0xF9A0 }, 386 { 0xFA40, 0xFA7E }, 387 { 0xFA80, 0xFAA0 }, 388 { 0xFB40, 0xFB7E }, 389 { 0xFB80, 0xFBA0 }, 390 { 0xFC40, 0xFC7E }, 391 { 0xFC80, 0xFCA0 }, 392 { 0xFD40, 0xFD7E }, 393 { 0xFD80, 0xFDA0 }, 394 { 0xFE40, 0xFE4F }, 395 }; 396 397 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT 398 static const CharRange kEUCKRRanges[] = { 399 { 0x8141, 0x815A }, 400 { 0x8161, 0x817A }, 401 { 0x8181, 0x81FE }, 402 { 0x8241, 0x825A }, 403 { 0x8261, 0x827A }, 404 { 0x8281, 0x82FE }, 405 { 0x8341, 0x835A }, 406 { 0x8361, 0x837A }, 407 { 0x8381, 0x83FE }, 408 { 0x8441, 0x845A }, 409 { 0x8461, 0x847A }, 410 { 0x8481, 0x84FE }, 411 { 0x8541, 0x855A }, 412 { 0x8561, 0x857A }, 413 { 0x8581, 0x85FE }, 414 { 0x8641, 0x865A }, 415 { 0x8661, 0x867A }, 416 { 0x8681, 0x86FE }, 417 { 0x8741, 0x875A }, 418 { 0x8761, 0x877A }, 419 { 0x8781, 0x87FE }, 420 { 0x8841, 0x885A }, 421 { 0x8861, 0x887A }, 422 { 0x8881, 0x88FE }, 423 { 0x8941, 0x895A }, 424 { 0x8961, 0x897A }, 425 { 0x8981, 0x89FE }, 426 { 0x8A41, 0x8A5A }, 427 { 0x8A61, 0x8A7A }, 428 { 0x8A81, 0x8AFE }, 429 { 0x8B41, 0x8B5A }, 430 { 0x8B61, 0x8B7A }, 431 { 0x8B81, 0x8BFE }, 432 { 0x8C41, 0x8C5A }, 433 { 0x8C61, 0x8C7A }, 434 { 0x8C81, 0x8CFE }, 435 { 0x8D41, 0x8D5A }, 436 { 0x8D61, 0x8D7A }, 437 { 0x8D81, 0x8DFE }, 438 { 0x8E41, 0x8E5A }, 439 { 0x8E61, 0x8E7A }, 440 { 0x8E81, 0x8EFE }, 441 { 0x8F41, 0x8F5A }, 442 { 0x8F61, 0x8F7A }, 443 { 0x8F81, 0x8FFE }, 444 { 0x9041, 0x905A }, 445 { 0x9061, 0x907A }, 446 { 0x9081, 0x90FE }, 447 { 0x9141, 0x915A }, 448 { 0x9161, 0x917A }, 449 { 0x9181, 0x91FE }, 450 { 0x9241, 0x925A }, 451 { 0x9261, 0x927A }, 452 { 0x9281, 0x92FE }, 453 { 0x9341, 0x935A }, 454 { 0x9361, 0x937A }, 455 { 0x9381, 0x93FE }, 456 { 0x9441, 0x945A }, 457 { 0x9461, 0x947A }, 458 { 0x9481, 0x94FE }, 459 { 0x9541, 0x955A }, 460 { 0x9561, 0x957A }, 461 { 0x9581, 0x95FE }, 462 { 0x9641, 0x965A }, 463 { 0x9661, 0x967A }, 464 { 0x9681, 0x96FE }, 465 { 0x9741, 0x975A }, 466 { 0x9761, 0x977A }, 467 { 0x9781, 0x97FE }, 468 { 0x9841, 0x985A }, 469 { 0x9861, 0x987A }, 470 { 0x9881, 0x98FE }, 471 { 0x9941, 0x995A }, 472 { 0x9961, 0x997A }, 473 { 0x9981, 0x99FE }, 474 { 0x9A41, 0x9A5A }, 475 { 0x9A61, 0x9A7A }, 476 { 0x9A81, 0x9AFE }, 477 { 0x9B41, 0x9B5A }, 478 { 0x9B61, 0x9B7A }, 479 { 0x9B81, 0x9BFE }, 480 { 0x9C41, 0x9C5A }, 481 { 0x9C61, 0x9C7A }, 482 { 0x9C81, 0x9CFE }, 483 { 0x9D41, 0x9D5A }, 484 { 0x9D61, 0x9D7A }, 485 { 0x9D81, 0x9DFE }, 486 { 0x9E41, 0x9E5A }, 487 { 0x9E61, 0x9E7A }, 488 { 0x9E81, 0x9EFE }, 489 { 0x9F41, 0x9F5A }, 490 { 0x9F61, 0x9F7A }, 491 { 0x9F81, 0x9FFE }, 492 { 0xA041, 0xA05A }, 493 { 0xA061, 0xA07A }, 494 { 0xA081, 0xA0FE }, 495 { 0xA141, 0xA15A }, 496 { 0xA161, 0xA17A }, 497 { 0xA181, 0xA1FE }, 498 { 0xA241, 0xA25A }, 499 { 0xA261, 0xA27A }, 500 { 0xA281, 0xA2E7 }, 501 { 0xA341, 0xA35A }, 502 { 0xA361, 0xA37A }, 503 { 0xA381, 0xA3FE }, 504 { 0xA441, 0xA45A }, 505 { 0xA461, 0xA47A }, 506 { 0xA481, 0xA4FE }, 507 { 0xA541, 0xA55A }, 508 { 0xA561, 0xA57A }, 509 { 0xA581, 0xA5AA }, 510 { 0xA5B0, 0xA5B9 }, 511 { 0xA5C1, 0xA5D8 }, 512 { 0xA5E1, 0xA5F8 }, 513 { 0xA641, 0xA65A }, 514 { 0xA661, 0xA67A }, 515 { 0xA681, 0xA6E4 }, 516 { 0xA741, 0xA75A }, 517 { 0xA761, 0xA77A }, 518 { 0xA781, 0xA7EF }, 519 { 0xA841, 0xA85A }, 520 { 0xA861, 0xA87A }, 521 { 0xA881, 0xA8A4 }, 522 { 0xA8A6, 0xA8A6 }, 523 { 0xA8A8, 0xA8AF }, 524 { 0xA8B1, 0xA8FE }, 525 { 0xA941, 0xA95A }, 526 { 0xA961, 0xA97A }, 527 { 0xA981, 0xA9FE }, 528 { 0xAA41, 0xAA5A }, 529 { 0xAA61, 0xAA7A }, 530 { 0xAA81, 0xAAF3 }, 531 { 0xAB41, 0xAB5A }, 532 { 0xAB61, 0xAB7A }, 533 { 0xAB81, 0xABF6 }, 534 { 0xAC41, 0xAC5A }, 535 { 0xAC61, 0xAC7A }, 536 { 0xAC81, 0xACC1 }, 537 { 0xACD1, 0xACF1 }, 538 { 0xAD41, 0xAD5A }, 539 { 0xAD61, 0xAD7A }, 540 { 0xAD81, 0xADA0 }, 541 { 0xAE41, 0xAE5A }, 542 { 0xAE61, 0xAE7A }, 543 { 0xAE81, 0xAEA0 }, 544 { 0xAF41, 0xAF5A }, 545 { 0xAF61, 0xAF7A }, 546 { 0xAF81, 0xAFA0 }, 547 { 0xB041, 0xB05A }, 548 { 0xB061, 0xB07A }, 549 { 0xB081, 0xB0FE }, 550 { 0xB141, 0xB15A }, 551 { 0xB161, 0xB17A }, 552 { 0xB181, 0xB1FE }, 553 { 0xB241, 0xB25A }, 554 { 0xB261, 0xB27A }, 555 { 0xB281, 0xB2FE }, 556 { 0xB341, 0xB35A }, 557 { 0xB361, 0xB37A }, 558 { 0xB381, 0xB3FE }, 559 { 0xB441, 0xB45A }, 560 { 0xB461, 0xB47A }, 561 { 0xB481, 0xB4FE }, 562 { 0xB541, 0xB55A }, 563 { 0xB561, 0xB57A }, 564 { 0xB581, 0xB5FE }, 565 { 0xB641, 0xB65A }, 566 { 0xB661, 0xB67A }, 567 { 0xB681, 0xB6FE }, 568 { 0xB741, 0xB75A }, 569 { 0xB761, 0xB77A }, 570 { 0xB781, 0xB7FE }, 571 { 0xB841, 0xB85A }, 572 { 0xB861, 0xB87A }, 573 { 0xB881, 0xB8FE }, 574 { 0xB941, 0xB95A }, 575 { 0xB961, 0xB97A }, 576 { 0xB981, 0xB9FE }, 577 { 0xBA41, 0xBA5A }, 578 { 0xBA61, 0xBA7A }, 579 { 0xBA81, 0xBAFE }, 580 { 0xBB41, 0xBB5A }, 581 { 0xBB61, 0xBB7A }, 582 { 0xBB81, 0xBBFE }, 583 { 0xBC41, 0xBC5A }, 584 { 0xBC61, 0xBC7A }, 585 { 0xBC81, 0xBCFE }, 586 { 0xBD41, 0xBD5A }, 587 { 0xBD61, 0xBD7A }, 588 { 0xBD81, 0xBDFE }, 589 { 0xBE41, 0xBE5A }, 590 { 0xBE61, 0xBE7A }, 591 { 0xBE81, 0xBEFE }, 592 { 0xBF41, 0xBF5A }, 593 { 0xBF61, 0xBF7A }, 594 { 0xBF81, 0xBFFE }, 595 { 0xC041, 0xC05A }, 596 { 0xC061, 0xC07A }, 597 { 0xC081, 0xC0FE }, 598 { 0xC141, 0xC15A }, 599 { 0xC161, 0xC17A }, 600 { 0xC181, 0xC1FE }, 601 { 0xC241, 0xC25A }, 602 { 0xC261, 0xC27A }, 603 { 0xC281, 0xC2FE }, 604 { 0xC341, 0xC35A }, 605 { 0xC361, 0xC37A }, 606 { 0xC381, 0xC3FE }, 607 { 0xC441, 0xC45A }, 608 { 0xC461, 0xC47A }, 609 { 0xC481, 0xC4FE }, 610 { 0xC541, 0xC55A }, 611 { 0xC561, 0xC57A }, 612 { 0xC581, 0xC5FE }, 613 { 0xC641, 0xC652 }, 614 { 0xC6A1, 0xC6FE }, 615 { 0xC7A1, 0xC7FE }, 616 { 0xC8A1, 0xC8FE }, 617 { 0xCAA1, 0xCAFE }, 618 { 0xCBA1, 0xCBFE }, 619 { 0xCCA1, 0xCCFE }, 620 { 0xCDA1, 0xCDFE }, 621 { 0xCEA1, 0xCEFE }, 622 { 0xCFA1, 0xCFFE }, 623 { 0xD0A1, 0xD0FE }, 624 { 0xD1A1, 0xD1FE }, 625 { 0xD2A1, 0xD2FE }, 626 { 0xD3A1, 0xD3FE }, 627 { 0xD4A1, 0xD4FE }, 628 { 0xD5A1, 0xD5FE }, 629 { 0xD6A1, 0xD6FE }, 630 { 0xD7A1, 0xD7FE }, 631 { 0xD8A1, 0xD8FE }, 632 { 0xD9A1, 0xD9FE }, 633 { 0xDAA1, 0xDAFE }, 634 { 0xDBA1, 0xDBFE }, 635 { 0xDCA1, 0xDCFE }, 636 { 0xDDA1, 0xDDFE }, 637 { 0xDEA1, 0xDEFE }, 638 { 0xDFA1, 0xDFFE }, 639 { 0xE0A1, 0xE0FE }, 640 { 0xE1A1, 0xE1FE }, 641 { 0xE2A1, 0xE2FE }, 642 { 0xE3A1, 0xE3FE }, 643 { 0xE4A1, 0xE4FE }, 644 { 0xE5A1, 0xE5FE }, 645 { 0xE6A1, 0xE6FE }, 646 { 0xE7A1, 0xE7FE }, 647 { 0xE8A1, 0xE8FE }, 648 { 0xE9A1, 0xE9FE }, 649 { 0xEAA1, 0xEAFE }, 650 { 0xEBA1, 0xEBFE }, 651 { 0xECA1, 0xECFE }, 652 { 0xEDA1, 0xEDFE }, 653 { 0xEEA1, 0xEEFE }, 654 { 0xEFA1, 0xEFFE }, 655 { 0xF0A1, 0xF0FE }, 656 { 0xF1A1, 0xF1FE }, 657 { 0xF2A1, 0xF2FE }, 658 { 0xF3A1, 0xF3FE }, 659 { 0xF4A1, 0xF4FE }, 660 { 0xF5A1, 0xF5FE }, 661 { 0xF6A1, 0xF6FE }, 662 { 0xF7A1, 0xF7FE }, 663 { 0xF8A1, 0xF8FE }, 664 { 0xF9A1, 0xF9FE }, 665 { 0xFAA1, 0xFAFE }, 666 { 0xFBA1, 0xFBFE }, 667 { 0xFCA1, 0xFCFE }, 668 { 0xFDA1, 0xFDFE }, 669 }; 670 671 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT 672 static const CharRange kBig5Ranges[] = { 673 { 0xA140, 0xA17E }, 674 { 0xA1A1, 0xA1FE }, 675 { 0xA240, 0xA27E }, 676 { 0xA2A1, 0xA2FE }, 677 { 0xA340, 0xA37E }, 678 { 0xA3A1, 0xA3BF }, 679 { 0xA3E1, 0xA3E1 }, 680 { 0xA440, 0xA47E }, 681 { 0xA4A1, 0xA4FE }, 682 { 0xA540, 0xA57E }, 683 { 0xA5A1, 0xA5FE }, 684 { 0xA640, 0xA67E }, 685 { 0xA6A1, 0xA6FE }, 686 { 0xA740, 0xA77E }, 687 { 0xA7A1, 0xA7FE }, 688 { 0xA840, 0xA87E }, 689 { 0xA8A1, 0xA8FE }, 690 { 0xA940, 0xA97E }, 691 { 0xA9A1, 0xA9FE }, 692 { 0xAA40, 0xAA7E }, 693 { 0xAAA1, 0xAAFE }, 694 { 0xAB40, 0xAB7E }, 695 { 0xABA1, 0xABFE }, 696 { 0xAC40, 0xAC7E }, 697 { 0xACA1, 0xACFE }, 698 { 0xAD40, 0xAD7E }, 699 { 0xADA1, 0xADFE }, 700 { 0xAE40, 0xAE7E }, 701 { 0xAEA1, 0xAEFE }, 702 { 0xAF40, 0xAF7E }, 703 { 0xAFA1, 0xAFFE }, 704 { 0xB040, 0xB07E }, 705 { 0xB0A1, 0xB0FE }, 706 { 0xB140, 0xB17E }, 707 { 0xB1A1, 0xB1FE }, 708 { 0xB240, 0xB27E }, 709 { 0xB2A1, 0xB2FE }, 710 { 0xB340, 0xB37E }, 711 { 0xB3A1, 0xB3FE }, 712 { 0xB440, 0xB47E }, 713 { 0xB4A1, 0xB4FE }, 714 { 0xB540, 0xB57E }, 715 { 0xB5A1, 0xB5FE }, 716 { 0xB640, 0xB67E }, 717 { 0xB6A1, 0xB6FE }, 718 { 0xB740, 0xB77E }, 719 { 0xB7A1, 0xB7FE }, 720 { 0xB840, 0xB87E }, 721 { 0xB8A1, 0xB8FE }, 722 { 0xB940, 0xB97E }, 723 { 0xB9A1, 0xB9FE }, 724 { 0xBA40, 0xBA7E }, 725 { 0xBAA1, 0xBAFE }, 726 { 0xBB40, 0xBB7E }, 727 { 0xBBA1, 0xBBFE }, 728 { 0xBC40, 0xBC7E }, 729 { 0xBCA1, 0xBCFE }, 730 { 0xBD40, 0xBD7E }, 731 { 0xBDA1, 0xBDFE }, 732 { 0xBE40, 0xBE7E }, 733 { 0xBEA1, 0xBEFE }, 734 { 0xBF40, 0xBF7E }, 735 { 0xBFA1, 0xBFFE }, 736 { 0xC040, 0xC07E }, 737 { 0xC0A1, 0xC0FE }, 738 { 0xC140, 0xC17E }, 739 { 0xC1A1, 0xC1FE }, 740 { 0xC240, 0xC27E }, 741 { 0xC2A1, 0xC2FE }, 742 { 0xC340, 0xC37E }, 743 { 0xC3A1, 0xC3FE }, 744 { 0xC440, 0xC47E }, 745 { 0xC4A1, 0xC4FE }, 746 { 0xC540, 0xC57E }, 747 { 0xC5A1, 0xC5FE }, 748 { 0xC640, 0xC67E }, 749 { 0xC940, 0xC97E }, 750 { 0xC9A1, 0xC9FE }, 751 { 0xCA40, 0xCA7E }, 752 { 0xCAA1, 0xCAFE }, 753 { 0xCB40, 0xCB7E }, 754 { 0xCBA1, 0xCBFE }, 755 { 0xCC40, 0xCC7E }, 756 { 0xCCA1, 0xCCFE }, 757 { 0xCD40, 0xCD7E }, 758 { 0xCDA1, 0xCDFE }, 759 { 0xCE40, 0xCE7E }, 760 { 0xCEA1, 0xCEFE }, 761 { 0xCF40, 0xCF7E }, 762 { 0xCFA1, 0xCFFE }, 763 { 0xD040, 0xD07E }, 764 { 0xD0A1, 0xD0FE }, 765 { 0xD140, 0xD17E }, 766 { 0xD1A1, 0xD1FE }, 767 { 0xD240, 0xD27E }, 768 { 0xD2A1, 0xD2FE }, 769 { 0xD340, 0xD37E }, 770 { 0xD3A1, 0xD3FE }, 771 { 0xD440, 0xD47E }, 772 { 0xD4A1, 0xD4FE }, 773 { 0xD540, 0xD57E }, 774 { 0xD5A1, 0xD5FE }, 775 { 0xD640, 0xD67E }, 776 { 0xD6A1, 0xD6FE }, 777 { 0xD740, 0xD77E }, 778 { 0xD7A1, 0xD7FE }, 779 { 0xD840, 0xD87E }, 780 { 0xD8A1, 0xD8FE }, 781 { 0xD940, 0xD97E }, 782 { 0xD9A1, 0xD9FE }, 783 { 0xDA40, 0xDA7E }, 784 { 0xDAA1, 0xDAFE }, 785 { 0xDB40, 0xDB7E }, 786 { 0xDBA1, 0xDBFE }, 787 { 0xDC40, 0xDC7E }, 788 { 0xDCA1, 0xDCFE }, 789 { 0xDD40, 0xDD7E }, 790 { 0xDDA1, 0xDDFE }, 791 { 0xDE40, 0xDE7E }, 792 { 0xDEA1, 0xDEFE }, 793 { 0xDF40, 0xDF7E }, 794 { 0xDFA1, 0xDFFE }, 795 { 0xE040, 0xE07E }, 796 { 0xE0A1, 0xE0FE }, 797 { 0xE140, 0xE17E }, 798 { 0xE1A1, 0xE1FE }, 799 { 0xE240, 0xE27E }, 800 { 0xE2A1, 0xE2FE }, 801 { 0xE340, 0xE37E }, 802 { 0xE3A1, 0xE3FE }, 803 { 0xE440, 0xE47E }, 804 { 0xE4A1, 0xE4FE }, 805 { 0xE540, 0xE57E }, 806 { 0xE5A1, 0xE5FE }, 807 { 0xE640, 0xE67E }, 808 { 0xE6A1, 0xE6FE }, 809 { 0xE740, 0xE77E }, 810 { 0xE7A1, 0xE7FE }, 811 { 0xE840, 0xE87E }, 812 { 0xE8A1, 0xE8FE }, 813 { 0xE940, 0xE97E }, 814 { 0xE9A1, 0xE9FE }, 815 { 0xEA40, 0xEA7E }, 816 { 0xEAA1, 0xEAFE }, 817 { 0xEB40, 0xEB7E }, 818 { 0xEBA1, 0xEBFE }, 819 { 0xEC40, 0xEC7E }, 820 { 0xECA1, 0xECFE }, 821 { 0xED40, 0xED7E }, 822 { 0xEDA1, 0xEDFE }, 823 { 0xEE40, 0xEE7E }, 824 { 0xEEA1, 0xEEFE }, 825 { 0xEF40, 0xEF7E }, 826 { 0xEFA1, 0xEFFE }, 827 { 0xF040, 0xF07E }, 828 { 0xF0A1, 0xF0FE }, 829 { 0xF140, 0xF17E }, 830 { 0xF1A1, 0xF1FE }, 831 { 0xF240, 0xF27E }, 832 { 0xF2A1, 0xF2FE }, 833 { 0xF340, 0xF37E }, 834 { 0xF3A1, 0xF3FE }, 835 { 0xF440, 0xF47E }, 836 { 0xF4A1, 0xF4FE }, 837 { 0xF540, 0xF57E }, 838 { 0xF5A1, 0xF5FE }, 839 { 0xF640, 0xF67E }, 840 { 0xF6A1, 0xF6FE }, 841 { 0xF740, 0xF77E }, 842 { 0xF7A1, 0xF7FE }, 843 { 0xF840, 0xF87E }, 844 { 0xF8A1, 0xF8FE }, 845 { 0xF940, 0xF97E }, 846 { 0xF9A1, 0xF9FE }, 847 }; 848 849 static bool charMatchesEncoding(int ch, const CharRange* encodingRanges, int rangeCount) { 850 // Use binary search to see if the character is contained in the encoding 851 int low = 0; 852 int high = rangeCount; 853 854 while (low < high) { 855 int i = (low + high) / 2; 856 const CharRange* range = &encodingRanges[i]; 857 if (ch >= range->first && ch <= range->last) 858 return true; 859 if (ch > range->last) 860 low = i + 1; 861 else 862 high = i; 863 } 864 865 return false; 866 } 867 868 extern uint32_t findPossibleEncodings(int ch) 869 { 870 // ASCII matches everything 871 if (ch < 256) return kEncodingAll; 872 873 int result = kEncodingNone; 874 875 if (charMatchesEncoding(ch, kShiftJISRanges, ARRAY_SIZE(kShiftJISRanges))) 876 result |= kEncodingShiftJIS; 877 if (charMatchesEncoding(ch, kGBKRanges, ARRAY_SIZE(kGBKRanges))) 878 result |= kEncodingGBK; 879 if (charMatchesEncoding(ch, kBig5Ranges, ARRAY_SIZE(kBig5Ranges))) 880 result |= kEncodingBig5; 881 if (charMatchesEncoding(ch, kEUCKRRanges, ARRAY_SIZE(kEUCKRRanges))) 882 result |= kEncodingEUCKR; 883 884 return result; 885 } 886