Home | History | Annotate | Download | only in gennmtab
      1 /*
      2                             __  __            _
      3                          ___\ \/ /_ __   __ _| |_
      4                         / _ \\  /| '_ \ / _` | __|
      5                        |  __//  \| |_) | (_| | |_
      6                         \___/_/\_\ .__/ \__,_|\__|
      7                                  |_| XML parser
      8 
      9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
     10    Copyright (c) 2000-2017 Expat development team
     11    Licensed under the MIT license:
     12 
     13    Permission is  hereby granted,  free of charge,  to any  person obtaining
     14    a  copy  of  this  software   and  associated  documentation  files  (the
     15    "Software"),  to  deal in  the  Software  without restriction,  including
     16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
     17    distribute, sublicense, and/or sell copies of the Software, and to permit
     18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
     19    following conditions:
     20 
     21    The above copyright  notice and this permission notice  shall be included
     22    in all copies or substantial portions of the Software.
     23 
     24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
     25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
     26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
     27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
     28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
     29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     30    USE OR OTHER DEALINGS IN THE SOFTWARE.
     31 */
     32 
     33 #include <string.h>
     34 #include <stdio.h>
     35 #include <stddef.h>
     36 
     37 struct range {
     38   int start;
     39   int end;
     40 };
     41 
     42 struct range nmstrt[] = {
     43   { '_' },
     44   { ':' },
     45   /* BaseChar */
     46   { 0x0041, 0x005a },
     47   { 0x0061, 0x007a },
     48   { 0x00c0, 0x00d6 },
     49   { 0x00d8, 0x00f6 },
     50   { 0x00f8, 0x00ff },
     51   { 0x0100, 0x0131 },
     52   { 0x0134, 0x013e },
     53   { 0x0141, 0x0148 },
     54   { 0x014a, 0x017e },
     55   { 0x0180, 0x01c3 },
     56   { 0x01cd, 0x01f0 },
     57   { 0x01f4, 0x01f5 },
     58   { 0x01fa, 0x0217 },
     59   { 0x0250, 0x02a8 },
     60   { 0x02bb, 0x02c1 },
     61   { 0x0386 },
     62   { 0x0388, 0x038a },
     63   { 0x038c },
     64   { 0x038e, 0x03a1 },
     65   { 0x03a3, 0x03ce },
     66   { 0x03d0, 0x03d6 },
     67   { 0x03da },
     68   { 0x03dc },
     69   { 0x03de },
     70   { 0x03e0 },
     71   { 0x03e2, 0x03f3 },
     72   { 0x0401, 0x040c },
     73   { 0x040e, 0x044f },
     74   { 0x0451, 0x045c },
     75   { 0x045e, 0x0481 },
     76   { 0x0490, 0x04c4 },
     77   { 0x04c7, 0x04c8 },
     78   { 0x04cb, 0x04cc },
     79   { 0x04d0, 0x04eb },
     80   { 0x04ee, 0x04f5 },
     81   { 0x04f8, 0x04f9 },
     82   { 0x0531, 0x0556 },
     83   { 0x0559 },
     84   { 0x0561, 0x0586 },
     85   { 0x05d0, 0x05ea },
     86   { 0x05f0, 0x05f2 },
     87   { 0x0621, 0x063a },
     88   { 0x0641, 0x064a },
     89   { 0x0671, 0x06b7 },
     90   { 0x06ba, 0x06be },
     91   { 0x06c0, 0x06ce },
     92   { 0x06d0, 0x06d3 },
     93   { 0x06d5 },
     94   { 0x06e5, 0x06e6 },
     95   { 0x0905, 0x0939 },
     96   { 0x093d },
     97   { 0x0958, 0x0961 },
     98   { 0x0985, 0x098c },
     99   { 0x098f, 0x0990 },
    100   { 0x0993, 0x09a8 },
    101   { 0x09aa, 0x09b0 },
    102   { 0x09b2 },
    103   { 0x09b6, 0x09b9 },
    104   { 0x09dc, 0x09dd },
    105   { 0x09df, 0x09e1 },
    106   { 0x09f0, 0x09f1 },
    107   { 0x0a05, 0x0a0a },
    108   { 0x0a0f, 0x0a10 },
    109   { 0x0a13, 0x0a28 },
    110   { 0x0a2a, 0x0a30 },
    111   { 0x0a32, 0x0a33 },
    112   { 0x0a35, 0x0a36 },
    113   { 0x0a38, 0x0a39 },
    114   { 0x0a59, 0x0a5c },
    115   { 0x0a5e },
    116   { 0x0a72, 0x0a74 },
    117   { 0x0a85, 0x0a8b },
    118   { 0x0a8d },
    119   { 0x0a8f, 0x0a91 },
    120   { 0x0a93, 0x0aa8 },
    121   { 0x0aaa, 0x0ab0 },
    122   { 0x0ab2, 0x0ab3 },
    123   { 0x0ab5, 0x0ab9 },
    124   { 0x0abd },
    125   { 0x0ae0 },
    126   { 0x0b05, 0x0b0c },
    127   { 0x0b0f, 0x0b10 },
    128   { 0x0b13, 0x0b28 },
    129   { 0x0b2a, 0x0b30 },
    130   { 0x0b32, 0x0b33 },
    131   { 0x0b36, 0x0b39 },
    132   { 0x0b3d },
    133   { 0x0b5c, 0x0b5d },
    134   { 0x0b5f, 0x0b61 },
    135   { 0x0b85, 0x0b8a },
    136   { 0x0b8e, 0x0b90 },
    137   { 0x0b92, 0x0b95 },
    138   { 0x0b99, 0x0b9a },
    139   { 0x0b9c },
    140   { 0x0b9e, 0x0b9f },
    141   { 0x0ba3, 0x0ba4 },
    142   { 0x0ba8, 0x0baa },
    143   { 0x0bae, 0x0bb5 },
    144   { 0x0bb7, 0x0bb9 },
    145   { 0x0c05, 0x0c0c },
    146   { 0x0c0e, 0x0c10 },
    147   { 0x0c12, 0x0c28 },
    148   { 0x0c2a, 0x0c33 },
    149   { 0x0c35, 0x0c39 },
    150   { 0x0c60, 0x0c61 },
    151   { 0x0c85, 0x0c8c },
    152   { 0x0c8e, 0x0c90 },
    153   { 0x0c92, 0x0ca8 },
    154   { 0x0caa, 0x0cb3 },
    155   { 0x0cb5, 0x0cb9 },
    156   { 0x0cde },
    157   { 0x0ce0, 0x0ce1 },
    158   { 0x0d05, 0x0d0c },
    159   { 0x0d0e, 0x0d10 },
    160   { 0x0d12, 0x0d28 },
    161   { 0x0d2a, 0x0d39 },
    162   { 0x0d60, 0x0d61 },
    163   { 0x0e01, 0x0e2e },
    164   { 0x0e30 },
    165   { 0x0e32, 0x0e33 },
    166   { 0x0e40, 0x0e45 },
    167   { 0x0e81, 0x0e82 },
    168   { 0x0e84 },
    169   { 0x0e87, 0x0e88 },
    170   { 0x0e8a },
    171   { 0x0e8d },
    172   { 0x0e94, 0x0e97 },
    173   { 0x0e99, 0x0e9f },
    174   { 0x0ea1, 0x0ea3 },
    175   { 0x0ea5 },
    176   { 0x0ea7 },
    177   { 0x0eaa, 0x0eab },
    178   { 0x0ead, 0x0eae },
    179   { 0x0eb0 },
    180   { 0x0eb2, 0x0eb3 },
    181   { 0x0ebd },
    182   { 0x0ec0, 0x0ec4 },
    183   { 0x0f40, 0x0f47 },
    184   { 0x0f49, 0x0f69 },
    185   { 0x10a0, 0x10c5 },
    186   { 0x10d0, 0x10f6 },
    187   { 0x1100 },
    188   { 0x1102, 0x1103 },
    189   { 0x1105, 0x1107 },
    190   { 0x1109 },
    191   { 0x110b, 0x110c },
    192   { 0x110e, 0x1112 },
    193   { 0x113c },
    194   { 0x113e },
    195   { 0x1140 },
    196   { 0x114c },
    197   { 0x114e },
    198   { 0x1150 },
    199   { 0x1154, 0x1155 },
    200   { 0x1159 },
    201   { 0x115f, 0x1161 },
    202   { 0x1163 },
    203   { 0x1165 },
    204   { 0x1167 },
    205   { 0x1169 },
    206   { 0x116d, 0x116e },
    207   { 0x1172, 0x1173 },
    208   { 0x1175 },
    209   { 0x119e },
    210   { 0x11a8 },
    211   { 0x11ab },
    212   { 0x11ae, 0x11af },
    213   { 0x11b7, 0x11b8 },
    214   { 0x11ba },
    215   { 0x11bc, 0x11c2 },
    216   { 0x11eb },
    217   { 0x11f0 },
    218   { 0x11f9 },
    219   { 0x1e00, 0x1e9b },
    220   { 0x1ea0, 0x1ef9 },
    221   { 0x1f00, 0x1f15 },
    222   { 0x1f18, 0x1f1d },
    223   { 0x1f20, 0x1f45 },
    224   { 0x1f48, 0x1f4d },
    225   { 0x1f50, 0x1f57 },
    226   { 0x1f59 },
    227   { 0x1f5b },
    228   { 0x1f5d },
    229   { 0x1f5f, 0x1f7d },
    230   { 0x1f80, 0x1fb4 },
    231   { 0x1fb6, 0x1fbc },
    232   { 0x1fbe },
    233   { 0x1fc2, 0x1fc4 },
    234   { 0x1fc6, 0x1fcc },
    235   { 0x1fd0, 0x1fd3 },
    236   { 0x1fd6, 0x1fdb },
    237   { 0x1fe0, 0x1fec },
    238   { 0x1ff2, 0x1ff4 },
    239   { 0x1ff6, 0x1ffc },
    240   { 0x2126 },
    241   { 0x212a, 0x212b },
    242   { 0x212e },
    243   { 0x2180, 0x2182 },
    244   { 0x3041, 0x3094 },
    245   { 0x30a1, 0x30fa },
    246   { 0x3105, 0x312c },
    247   { 0xac00, 0xd7a3 },
    248   /* Ideographic */
    249   { 0x4e00, 0x9fa5 },
    250   { 0x3007 },
    251   { 0x3021, 0x3029 },
    252 };
    253 
    254 /* name chars that are not name start chars */
    255 struct range name[] = {
    256   { '.' },
    257   { '-' },
    258   /* CombiningChar */
    259   { 0x0300, 0x0345 },
    260   { 0x0360, 0x0361 },
    261   { 0x0483, 0x0486 },
    262   { 0x0591, 0x05a1 },
    263   { 0x05a3, 0x05b9 },
    264   { 0x05bb, 0x05bd },
    265   { 0x05bf },
    266   { 0x05c1, 0x05c2 },
    267   { 0x05c4 },
    268   { 0x064b, 0x0652 },
    269   { 0x0670 },
    270   { 0x06d6, 0x06dc },
    271   { 0x06dd, 0x06df },
    272   { 0x06e0, 0x06e4 },
    273   { 0x06e7, 0x06e8 },
    274   { 0x06ea, 0x06ed },
    275   { 0x0901, 0x0903 },
    276   { 0x093c },
    277   { 0x093e, 0x094c },
    278   { 0x094d },
    279   { 0x0951, 0x0954 },
    280   { 0x0962, 0x0963 },
    281   { 0x0981, 0x0983 },
    282   { 0x09bc },
    283   { 0x09be },
    284   { 0x09bf },
    285   { 0x09c0, 0x09c4 },
    286   { 0x09c7, 0x09c8 },
    287   { 0x09cb, 0x09cd },
    288   { 0x09d7 },
    289   { 0x09e2, 0x09e3 },
    290   { 0x0a02 },
    291   { 0x0a3c },
    292   { 0x0a3e },
    293   { 0x0a3f },
    294   { 0x0a40, 0x0a42 },
    295   { 0x0a47, 0x0a48 },
    296   { 0x0a4b, 0x0a4d },
    297   { 0x0a70, 0x0a71 },
    298   { 0x0a81, 0x0a83 },
    299   { 0x0abc },
    300   { 0x0abe, 0x0ac5 },
    301   { 0x0ac7, 0x0ac9 },
    302   { 0x0acb, 0x0acd },
    303   { 0x0b01, 0x0b03 },
    304   { 0x0b3c },
    305   { 0x0b3e, 0x0b43 },
    306   { 0x0b47, 0x0b48 },
    307   { 0x0b4b, 0x0b4d },
    308   { 0x0b56, 0x0b57 },
    309   { 0x0b82, 0x0b83 },
    310   { 0x0bbe, 0x0bc2 },
    311   { 0x0bc6, 0x0bc8 },
    312   { 0x0bca, 0x0bcd },
    313   { 0x0bd7 },
    314   { 0x0c01, 0x0c03 },
    315   { 0x0c3e, 0x0c44 },
    316   { 0x0c46, 0x0c48 },
    317   { 0x0c4a, 0x0c4d },
    318   { 0x0c55, 0x0c56 },
    319   { 0x0c82, 0x0c83 },
    320   { 0x0cbe, 0x0cc4 },
    321   { 0x0cc6, 0x0cc8 },
    322   { 0x0cca, 0x0ccd },
    323   { 0x0cd5, 0x0cd6 },
    324   { 0x0d02, 0x0d03 },
    325   { 0x0d3e, 0x0d43 },
    326   { 0x0d46, 0x0d48 },
    327   { 0x0d4a, 0x0d4d },
    328   { 0x0d57 },
    329   { 0x0e31 },
    330   { 0x0e34, 0x0e3a },
    331   { 0x0e47, 0x0e4e },
    332   { 0x0eb1 },
    333   { 0x0eb4, 0x0eb9 },
    334   { 0x0ebb, 0x0ebc },
    335   { 0x0ec8, 0x0ecd },
    336   { 0x0f18, 0x0f19 },
    337   { 0x0f35 },
    338   { 0x0f37 },
    339   { 0x0f39 },
    340   { 0x0f3e },
    341   { 0x0f3f },
    342   { 0x0f71, 0x0f84 },
    343   { 0x0f86, 0x0f8b },
    344   { 0x0f90, 0x0f95 },
    345   { 0x0f97 },
    346   { 0x0f99, 0x0fad },
    347   { 0x0fb1, 0x0fb7 },
    348   { 0x0fb9 },
    349   { 0x20d0, 0x20dc },
    350   { 0x20e1 },
    351   { 0x302a, 0x302f },
    352   { 0x3099 },
    353   { 0x309a },
    354   /* Digit */
    355   { 0x0030, 0x0039 },
    356   { 0x0660, 0x0669 },
    357   { 0x06f0, 0x06f9 },
    358   { 0x0966, 0x096f },
    359   { 0x09e6, 0x09ef },
    360   { 0x0a66, 0x0a6f },
    361   { 0x0ae6, 0x0aef },
    362   { 0x0b66, 0x0b6f },
    363   { 0x0be7, 0x0bef },
    364   { 0x0c66, 0x0c6f },
    365   { 0x0ce6, 0x0cef },
    366   { 0x0d66, 0x0d6f },
    367   { 0x0e50, 0x0e59 },
    368   { 0x0ed0, 0x0ed9 },
    369   { 0x0f20, 0x0f29 },
    370   /* Extender */
    371   { 0xb7 },
    372   { 0x02d0 },
    373   { 0x02d1 },
    374   { 0x0387 },
    375   { 0x0640 },
    376   { 0x0e46 },
    377   { 0x0ec6 },
    378   { 0x3005 },
    379   { 0x3031, 0x3035 },
    380   { 0x309d, 0x309e },
    381   { 0x30fc, 0x30fe },
    382 };
    383 
    384 static void
    385 setTab(char *tab, struct range *ranges, size_t nRanges)
    386 {
    387   size_t i;
    388   int j;
    389   for (i = 0; i < nRanges; i++) {
    390     if (ranges[i].end) {
    391       for (j = ranges[i].start; j <= ranges[i].end; j++)
    392         tab[j] = 1;
    393     }
    394     else
    395       tab[ranges[i].start] = 1;
    396   }
    397 }
    398 
    399 static void
    400 printTabs(char *tab)
    401 {
    402   int nBitmaps = 2;
    403   int i, j, k;
    404   unsigned char pageIndex[512];
    405 
    406   printf(
    407 "static const unsigned namingBitmap[] = {\n\
    408 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\
    409 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\
    410 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n\
    411 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n");
    412   for (i = 0; i < 512; i++) {
    413     int kind = tab[i*256];
    414     for (j = 1; j < 256; j++)
    415       if (tab[i*256 +j] != kind) {
    416         kind = -1;
    417         break;
    418       }
    419     if (i >= 256 &&  memcmp(tab + (i - 256)*256, tab + i*256, 256) == 0)
    420       pageIndex[i] = pageIndex[i - 256];
    421     else if (kind == -1) {
    422       pageIndex[i] = nBitmaps++;
    423       for (j = 0; j < 8; j++) {
    424         unsigned val = 0;
    425         for (k = 0; k < 32; k++) {
    426           if (tab[i*256 + j*32 +k])
    427             val |= (1 << k);
    428         }
    429         printf("0x%08X,", val);
    430         putchar((((j + 1) & 3) == 0) ? '\n' : ' ');
    431       }
    432     }
    433     else
    434       pageIndex[i] = kind;
    435   }
    436   printf("};\n");
    437   printf("static const unsigned char nmstrtPages[] = {\n");
    438   for (i = 0; i < 512; i++) {
    439     if (i == 256)
    440       printf("};\nstatic const unsigned char namePages[] = {\n");
    441     printf("0x%02X,", pageIndex[i]);
    442     putchar((((i + 1) & 7) == 0) ? '\n' : ' ');
    443   }
    444   printf("};\n");
    445 }
    446 
    447 int
    448 main()
    449 {
    450   char tab[2*65536];
    451   memset(tab, 0, 65536);
    452   setTab(tab, nmstrt, sizeof(nmstrt)/sizeof(nmstrt[0]));
    453   memcpy(tab + 65536, tab, 65536);
    454   setTab(tab + 65536, name, sizeof(name)/sizeof(name[0]));
    455   printTabs(tab);
    456   return 0;
    457 }
    458