1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000-2017 Expat development team 11 Licensed under the MIT license: 12 13 Permission is hereby granted, free of charge, to any person obtaining 14 a copy of this software and associated documentation files (the 15 "Software"), to deal in the Software without restriction, including 16 without limitation the rights to use, copy, modify, merge, publish, 17 distribute, sublicense, and/or sell copies of the Software, and to permit 18 persons to whom the Software is furnished to do so, subject to the 19 following conditions: 20 21 The above copyright notice and this permission notice shall be included 22 in all copies or substantial portions of the Software. 23 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30 USE OR OTHER DEALINGS IN THE SOFTWARE. 31 */ 32 33 #include <string.h> 34 #include <stdio.h> 35 #include <stddef.h> 36 37 struct range { 38 int start; 39 int end; 40 }; 41 42 struct range nmstrt[] = { 43 { '_' }, 44 { ':' }, 45 /* BaseChar */ 46 { 0x0041, 0x005a }, 47 { 0x0061, 0x007a }, 48 { 0x00c0, 0x00d6 }, 49 { 0x00d8, 0x00f6 }, 50 { 0x00f8, 0x00ff }, 51 { 0x0100, 0x0131 }, 52 { 0x0134, 0x013e }, 53 { 0x0141, 0x0148 }, 54 { 0x014a, 0x017e }, 55 { 0x0180, 0x01c3 }, 56 { 0x01cd, 0x01f0 }, 57 { 0x01f4, 0x01f5 }, 58 { 0x01fa, 0x0217 }, 59 { 0x0250, 0x02a8 }, 60 { 0x02bb, 0x02c1 }, 61 { 0x0386 }, 62 { 0x0388, 0x038a }, 63 { 0x038c }, 64 { 0x038e, 0x03a1 }, 65 { 0x03a3, 0x03ce }, 66 { 0x03d0, 0x03d6 }, 67 { 0x03da }, 68 { 0x03dc }, 69 { 0x03de }, 70 { 0x03e0 }, 71 { 0x03e2, 0x03f3 }, 72 { 0x0401, 0x040c }, 73 { 0x040e, 0x044f }, 74 { 0x0451, 0x045c }, 75 { 0x045e, 0x0481 }, 76 { 0x0490, 0x04c4 }, 77 { 0x04c7, 0x04c8 }, 78 { 0x04cb, 0x04cc }, 79 { 0x04d0, 0x04eb }, 80 { 0x04ee, 0x04f5 }, 81 { 0x04f8, 0x04f9 }, 82 { 0x0531, 0x0556 }, 83 { 0x0559 }, 84 { 0x0561, 0x0586 }, 85 { 0x05d0, 0x05ea }, 86 { 0x05f0, 0x05f2 }, 87 { 0x0621, 0x063a }, 88 { 0x0641, 0x064a }, 89 { 0x0671, 0x06b7 }, 90 { 0x06ba, 0x06be }, 91 { 0x06c0, 0x06ce }, 92 { 0x06d0, 0x06d3 }, 93 { 0x06d5 }, 94 { 0x06e5, 0x06e6 }, 95 { 0x0905, 0x0939 }, 96 { 0x093d }, 97 { 0x0958, 0x0961 }, 98 { 0x0985, 0x098c }, 99 { 0x098f, 0x0990 }, 100 { 0x0993, 0x09a8 }, 101 { 0x09aa, 0x09b0 }, 102 { 0x09b2 }, 103 { 0x09b6, 0x09b9 }, 104 { 0x09dc, 0x09dd }, 105 { 0x09df, 0x09e1 }, 106 { 0x09f0, 0x09f1 }, 107 { 0x0a05, 0x0a0a }, 108 { 0x0a0f, 0x0a10 }, 109 { 0x0a13, 0x0a28 }, 110 { 0x0a2a, 0x0a30 }, 111 { 0x0a32, 0x0a33 }, 112 { 0x0a35, 0x0a36 }, 113 { 0x0a38, 0x0a39 }, 114 { 0x0a59, 0x0a5c }, 115 { 0x0a5e }, 116 { 0x0a72, 0x0a74 }, 117 { 0x0a85, 0x0a8b }, 118 { 0x0a8d }, 119 { 0x0a8f, 0x0a91 }, 120 { 0x0a93, 0x0aa8 }, 121 { 0x0aaa, 0x0ab0 }, 122 { 0x0ab2, 0x0ab3 }, 123 { 0x0ab5, 0x0ab9 }, 124 { 0x0abd }, 125 { 0x0ae0 }, 126 { 0x0b05, 0x0b0c }, 127 { 0x0b0f, 0x0b10 }, 128 { 0x0b13, 0x0b28 }, 129 { 0x0b2a, 0x0b30 }, 130 { 0x0b32, 0x0b33 }, 131 { 0x0b36, 0x0b39 }, 132 { 0x0b3d }, 133 { 0x0b5c, 0x0b5d }, 134 { 0x0b5f, 0x0b61 }, 135 { 0x0b85, 0x0b8a }, 136 { 0x0b8e, 0x0b90 }, 137 { 0x0b92, 0x0b95 }, 138 { 0x0b99, 0x0b9a }, 139 { 0x0b9c }, 140 { 0x0b9e, 0x0b9f }, 141 { 0x0ba3, 0x0ba4 }, 142 { 0x0ba8, 0x0baa }, 143 { 0x0bae, 0x0bb5 }, 144 { 0x0bb7, 0x0bb9 }, 145 { 0x0c05, 0x0c0c }, 146 { 0x0c0e, 0x0c10 }, 147 { 0x0c12, 0x0c28 }, 148 { 0x0c2a, 0x0c33 }, 149 { 0x0c35, 0x0c39 }, 150 { 0x0c60, 0x0c61 }, 151 { 0x0c85, 0x0c8c }, 152 { 0x0c8e, 0x0c90 }, 153 { 0x0c92, 0x0ca8 }, 154 { 0x0caa, 0x0cb3 }, 155 { 0x0cb5, 0x0cb9 }, 156 { 0x0cde }, 157 { 0x0ce0, 0x0ce1 }, 158 { 0x0d05, 0x0d0c }, 159 { 0x0d0e, 0x0d10 }, 160 { 0x0d12, 0x0d28 }, 161 { 0x0d2a, 0x0d39 }, 162 { 0x0d60, 0x0d61 }, 163 { 0x0e01, 0x0e2e }, 164 { 0x0e30 }, 165 { 0x0e32, 0x0e33 }, 166 { 0x0e40, 0x0e45 }, 167 { 0x0e81, 0x0e82 }, 168 { 0x0e84 }, 169 { 0x0e87, 0x0e88 }, 170 { 0x0e8a }, 171 { 0x0e8d }, 172 { 0x0e94, 0x0e97 }, 173 { 0x0e99, 0x0e9f }, 174 { 0x0ea1, 0x0ea3 }, 175 { 0x0ea5 }, 176 { 0x0ea7 }, 177 { 0x0eaa, 0x0eab }, 178 { 0x0ead, 0x0eae }, 179 { 0x0eb0 }, 180 { 0x0eb2, 0x0eb3 }, 181 { 0x0ebd }, 182 { 0x0ec0, 0x0ec4 }, 183 { 0x0f40, 0x0f47 }, 184 { 0x0f49, 0x0f69 }, 185 { 0x10a0, 0x10c5 }, 186 { 0x10d0, 0x10f6 }, 187 { 0x1100 }, 188 { 0x1102, 0x1103 }, 189 { 0x1105, 0x1107 }, 190 { 0x1109 }, 191 { 0x110b, 0x110c }, 192 { 0x110e, 0x1112 }, 193 { 0x113c }, 194 { 0x113e }, 195 { 0x1140 }, 196 { 0x114c }, 197 { 0x114e }, 198 { 0x1150 }, 199 { 0x1154, 0x1155 }, 200 { 0x1159 }, 201 { 0x115f, 0x1161 }, 202 { 0x1163 }, 203 { 0x1165 }, 204 { 0x1167 }, 205 { 0x1169 }, 206 { 0x116d, 0x116e }, 207 { 0x1172, 0x1173 }, 208 { 0x1175 }, 209 { 0x119e }, 210 { 0x11a8 }, 211 { 0x11ab }, 212 { 0x11ae, 0x11af }, 213 { 0x11b7, 0x11b8 }, 214 { 0x11ba }, 215 { 0x11bc, 0x11c2 }, 216 { 0x11eb }, 217 { 0x11f0 }, 218 { 0x11f9 }, 219 { 0x1e00, 0x1e9b }, 220 { 0x1ea0, 0x1ef9 }, 221 { 0x1f00, 0x1f15 }, 222 { 0x1f18, 0x1f1d }, 223 { 0x1f20, 0x1f45 }, 224 { 0x1f48, 0x1f4d }, 225 { 0x1f50, 0x1f57 }, 226 { 0x1f59 }, 227 { 0x1f5b }, 228 { 0x1f5d }, 229 { 0x1f5f, 0x1f7d }, 230 { 0x1f80, 0x1fb4 }, 231 { 0x1fb6, 0x1fbc }, 232 { 0x1fbe }, 233 { 0x1fc2, 0x1fc4 }, 234 { 0x1fc6, 0x1fcc }, 235 { 0x1fd0, 0x1fd3 }, 236 { 0x1fd6, 0x1fdb }, 237 { 0x1fe0, 0x1fec }, 238 { 0x1ff2, 0x1ff4 }, 239 { 0x1ff6, 0x1ffc }, 240 { 0x2126 }, 241 { 0x212a, 0x212b }, 242 { 0x212e }, 243 { 0x2180, 0x2182 }, 244 { 0x3041, 0x3094 }, 245 { 0x30a1, 0x30fa }, 246 { 0x3105, 0x312c }, 247 { 0xac00, 0xd7a3 }, 248 /* Ideographic */ 249 { 0x4e00, 0x9fa5 }, 250 { 0x3007 }, 251 { 0x3021, 0x3029 }, 252 }; 253 254 /* name chars that are not name start chars */ 255 struct range name[] = { 256 { '.' }, 257 { '-' }, 258 /* CombiningChar */ 259 { 0x0300, 0x0345 }, 260 { 0x0360, 0x0361 }, 261 { 0x0483, 0x0486 }, 262 { 0x0591, 0x05a1 }, 263 { 0x05a3, 0x05b9 }, 264 { 0x05bb, 0x05bd }, 265 { 0x05bf }, 266 { 0x05c1, 0x05c2 }, 267 { 0x05c4 }, 268 { 0x064b, 0x0652 }, 269 { 0x0670 }, 270 { 0x06d6, 0x06dc }, 271 { 0x06dd, 0x06df }, 272 { 0x06e0, 0x06e4 }, 273 { 0x06e7, 0x06e8 }, 274 { 0x06ea, 0x06ed }, 275 { 0x0901, 0x0903 }, 276 { 0x093c }, 277 { 0x093e, 0x094c }, 278 { 0x094d }, 279 { 0x0951, 0x0954 }, 280 { 0x0962, 0x0963 }, 281 { 0x0981, 0x0983 }, 282 { 0x09bc }, 283 { 0x09be }, 284 { 0x09bf }, 285 { 0x09c0, 0x09c4 }, 286 { 0x09c7, 0x09c8 }, 287 { 0x09cb, 0x09cd }, 288 { 0x09d7 }, 289 { 0x09e2, 0x09e3 }, 290 { 0x0a02 }, 291 { 0x0a3c }, 292 { 0x0a3e }, 293 { 0x0a3f }, 294 { 0x0a40, 0x0a42 }, 295 { 0x0a47, 0x0a48 }, 296 { 0x0a4b, 0x0a4d }, 297 { 0x0a70, 0x0a71 }, 298 { 0x0a81, 0x0a83 }, 299 { 0x0abc }, 300 { 0x0abe, 0x0ac5 }, 301 { 0x0ac7, 0x0ac9 }, 302 { 0x0acb, 0x0acd }, 303 { 0x0b01, 0x0b03 }, 304 { 0x0b3c }, 305 { 0x0b3e, 0x0b43 }, 306 { 0x0b47, 0x0b48 }, 307 { 0x0b4b, 0x0b4d }, 308 { 0x0b56, 0x0b57 }, 309 { 0x0b82, 0x0b83 }, 310 { 0x0bbe, 0x0bc2 }, 311 { 0x0bc6, 0x0bc8 }, 312 { 0x0bca, 0x0bcd }, 313 { 0x0bd7 }, 314 { 0x0c01, 0x0c03 }, 315 { 0x0c3e, 0x0c44 }, 316 { 0x0c46, 0x0c48 }, 317 { 0x0c4a, 0x0c4d }, 318 { 0x0c55, 0x0c56 }, 319 { 0x0c82, 0x0c83 }, 320 { 0x0cbe, 0x0cc4 }, 321 { 0x0cc6, 0x0cc8 }, 322 { 0x0cca, 0x0ccd }, 323 { 0x0cd5, 0x0cd6 }, 324 { 0x0d02, 0x0d03 }, 325 { 0x0d3e, 0x0d43 }, 326 { 0x0d46, 0x0d48 }, 327 { 0x0d4a, 0x0d4d }, 328 { 0x0d57 }, 329 { 0x0e31 }, 330 { 0x0e34, 0x0e3a }, 331 { 0x0e47, 0x0e4e }, 332 { 0x0eb1 }, 333 { 0x0eb4, 0x0eb9 }, 334 { 0x0ebb, 0x0ebc }, 335 { 0x0ec8, 0x0ecd }, 336 { 0x0f18, 0x0f19 }, 337 { 0x0f35 }, 338 { 0x0f37 }, 339 { 0x0f39 }, 340 { 0x0f3e }, 341 { 0x0f3f }, 342 { 0x0f71, 0x0f84 }, 343 { 0x0f86, 0x0f8b }, 344 { 0x0f90, 0x0f95 }, 345 { 0x0f97 }, 346 { 0x0f99, 0x0fad }, 347 { 0x0fb1, 0x0fb7 }, 348 { 0x0fb9 }, 349 { 0x20d0, 0x20dc }, 350 { 0x20e1 }, 351 { 0x302a, 0x302f }, 352 { 0x3099 }, 353 { 0x309a }, 354 /* Digit */ 355 { 0x0030, 0x0039 }, 356 { 0x0660, 0x0669 }, 357 { 0x06f0, 0x06f9 }, 358 { 0x0966, 0x096f }, 359 { 0x09e6, 0x09ef }, 360 { 0x0a66, 0x0a6f }, 361 { 0x0ae6, 0x0aef }, 362 { 0x0b66, 0x0b6f }, 363 { 0x0be7, 0x0bef }, 364 { 0x0c66, 0x0c6f }, 365 { 0x0ce6, 0x0cef }, 366 { 0x0d66, 0x0d6f }, 367 { 0x0e50, 0x0e59 }, 368 { 0x0ed0, 0x0ed9 }, 369 { 0x0f20, 0x0f29 }, 370 /* Extender */ 371 { 0xb7 }, 372 { 0x02d0 }, 373 { 0x02d1 }, 374 { 0x0387 }, 375 { 0x0640 }, 376 { 0x0e46 }, 377 { 0x0ec6 }, 378 { 0x3005 }, 379 { 0x3031, 0x3035 }, 380 { 0x309d, 0x309e }, 381 { 0x30fc, 0x30fe }, 382 }; 383 384 static void 385 setTab(char *tab, struct range *ranges, size_t nRanges) 386 { 387 size_t i; 388 int j; 389 for (i = 0; i < nRanges; i++) { 390 if (ranges[i].end) { 391 for (j = ranges[i].start; j <= ranges[i].end; j++) 392 tab[j] = 1; 393 } 394 else 395 tab[ranges[i].start] = 1; 396 } 397 } 398 399 static void 400 printTabs(char *tab) 401 { 402 int nBitmaps = 2; 403 int i, j, k; 404 unsigned char pageIndex[512]; 405 406 printf( 407 "static const unsigned namingBitmap[] = {\n\ 408 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\ 409 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\ 410 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n\ 411 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n"); 412 for (i = 0; i < 512; i++) { 413 int kind = tab[i*256]; 414 for (j = 1; j < 256; j++) 415 if (tab[i*256 +j] != kind) { 416 kind = -1; 417 break; 418 } 419 if (i >= 256 && memcmp(tab + (i - 256)*256, tab + i*256, 256) == 0) 420 pageIndex[i] = pageIndex[i - 256]; 421 else if (kind == -1) { 422 pageIndex[i] = nBitmaps++; 423 for (j = 0; j < 8; j++) { 424 unsigned val = 0; 425 for (k = 0; k < 32; k++) { 426 if (tab[i*256 + j*32 +k]) 427 val |= (1 << k); 428 } 429 printf("0x%08X,", val); 430 putchar((((j + 1) & 3) == 0) ? '\n' : ' '); 431 } 432 } 433 else 434 pageIndex[i] = kind; 435 } 436 printf("};\n"); 437 printf("static const unsigned char nmstrtPages[] = {\n"); 438 for (i = 0; i < 512; i++) { 439 if (i == 256) 440 printf("};\nstatic const unsigned char namePages[] = {\n"); 441 printf("0x%02X,", pageIndex[i]); 442 putchar((((i + 1) & 7) == 0) ? '\n' : ' '); 443 } 444 printf("};\n"); 445 } 446 447 int 448 main() 449 { 450 char tab[2*65536]; 451 memset(tab, 0, 65536); 452 setTab(tab, nmstrt, sizeof(nmstrt)/sizeof(nmstrt[0])); 453 memcpy(tab + 65536, tab, 65536); 454 setTab(tab + 65536, name, sizeof(name)/sizeof(name[0])); 455 printTabs(tab); 456 return 0; 457 } 458