Home | History | Annotate | Download | only in expat
      1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
      2    See the file COPYING for copying permission.
      3 */
      4 
      5 #ifdef COMPILED_FROM_DSP
      6 #include "winconfig.h"
      7 #elif defined(MACOS_CLASSIC)
      8 #include "macconfig.h"
      9 #elif defined(__amigaos4__)
     10 #include "amigaconfig.h"
     11 #else
     12 #ifdef HAVE_EXPAT_CONFIG_H
     13 #include <expat_config.h>
     14 #endif
     15 #endif /* ndef COMPILED_FROM_DSP */
     16 
     17 #include <stddef.h>
     18 
     19 #include "expat_external.h"
     20 #include "internal.h"
     21 #include "xmlrole.h"
     22 #include "ascii.h"
     23 
     24 /* Doesn't check:
     25 
     26  that ,| are not mixed in a model group
     27  content of literals
     28 
     29 */
     30 
     31 static const char KW_ANY[] = {
     32     ASCII_A, ASCII_N, ASCII_Y, '\0' };
     33 static const char KW_ATTLIST[] = {
     34     ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
     35 static const char KW_CDATA[] = {
     36     ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
     37 static const char KW_DOCTYPE[] = {
     38     ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
     39 static const char KW_ELEMENT[] = {
     40     ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
     41 static const char KW_EMPTY[] = {
     42     ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
     43 static const char KW_ENTITIES[] = {
     44     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
     45     '\0' };
     46 static const char KW_ENTITY[] = {
     47     ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
     48 static const char KW_FIXED[] = {
     49     ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
     50 static const char KW_ID[] = {
     51     ASCII_I, ASCII_D, '\0' };
     52 static const char KW_IDREF[] = {
     53     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
     54 static const char KW_IDREFS[] = {
     55     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
     56 static const char KW_IGNORE[] = {
     57     ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
     58 static const char KW_IMPLIED[] = {
     59     ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
     60 static const char KW_INCLUDE[] = {
     61     ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
     62 static const char KW_NDATA[] = {
     63     ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
     64 static const char KW_NMTOKEN[] = {
     65     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
     66 static const char KW_NMTOKENS[] = {
     67     ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
     68     '\0' };
     69 static const char KW_NOTATION[] =
     70     { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
     71       '\0' };
     72 static const char KW_PCDATA[] = {
     73     ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
     74 static const char KW_PUBLIC[] = {
     75     ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
     76 static const char KW_REQUIRED[] = {
     77     ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
     78     '\0' };
     79 static const char KW_SYSTEM[] = {
     80     ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
     81 
     82 #ifndef MIN_BYTES_PER_CHAR
     83 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
     84 #endif
     85 
     86 #ifdef XML_DTD
     87 #define setTopLevel(state) \
     88   ((state)->handler = ((state)->documentEntity \
     89                        ? internalSubset \
     90                        : externalSubset1))
     91 #else /* not XML_DTD */
     92 #define setTopLevel(state) ((state)->handler = internalSubset)
     93 #endif /* not XML_DTD */
     94 
     95 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
     96                                    int tok,
     97                                    const char *ptr,
     98                                    const char *end,
     99                                    const ENCODING *enc);
    100 
    101 static PROLOG_HANDLER
    102   prolog0, prolog1, prolog2,
    103   doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
    104   internalSubset,
    105   entity0, entity1, entity2, entity3, entity4, entity5, entity6,
    106   entity7, entity8, entity9, entity10,
    107   notation0, notation1, notation2, notation3, notation4,
    108   attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
    109   attlist7, attlist8, attlist9,
    110   element0, element1, element2, element3, element4, element5, element6,
    111   element7,
    112 #ifdef XML_DTD
    113   externalSubset0, externalSubset1,
    114   condSect0, condSect1, condSect2,
    115 #endif /* XML_DTD */
    116   declClose,
    117   error;
    118 
    119 static int FASTCALL common(PROLOG_STATE *state, int tok);
    120 
    121 static int PTRCALL
    122 prolog0(PROLOG_STATE *state,
    123         int tok,
    124         const char *ptr,
    125         const char *end,
    126         const ENCODING *enc)
    127 {
    128   switch (tok) {
    129   case XML_TOK_PROLOG_S:
    130     state->handler = prolog1;
    131     return XML_ROLE_NONE;
    132   case XML_TOK_XML_DECL:
    133     state->handler = prolog1;
    134     return XML_ROLE_XML_DECL;
    135   case XML_TOK_PI:
    136     state->handler = prolog1;
    137     return XML_ROLE_PI;
    138   case XML_TOK_COMMENT:
    139     state->handler = prolog1;
    140     return XML_ROLE_COMMENT;
    141   case XML_TOK_BOM:
    142     return XML_ROLE_NONE;
    143   case XML_TOK_DECL_OPEN:
    144     if (!XmlNameMatchesAscii(enc,
    145                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
    146                              end,
    147                              KW_DOCTYPE))
    148       break;
    149     state->handler = doctype0;
    150     return XML_ROLE_DOCTYPE_NONE;
    151   case XML_TOK_INSTANCE_START:
    152     state->handler = error;
    153     return XML_ROLE_INSTANCE_START;
    154   }
    155   return common(state, tok);
    156 }
    157 
    158 static int PTRCALL
    159 prolog1(PROLOG_STATE *state,
    160         int tok,
    161         const char *ptr,
    162         const char *end,
    163         const ENCODING *enc)
    164 {
    165   switch (tok) {
    166   case XML_TOK_PROLOG_S:
    167     return XML_ROLE_NONE;
    168   case XML_TOK_PI:
    169     return XML_ROLE_PI;
    170   case XML_TOK_COMMENT:
    171     return XML_ROLE_COMMENT;
    172   case XML_TOK_BOM:
    173     return XML_ROLE_NONE;
    174   case XML_TOK_DECL_OPEN:
    175     if (!XmlNameMatchesAscii(enc,
    176                              ptr + 2 * MIN_BYTES_PER_CHAR(enc),
    177                              end,
    178                              KW_DOCTYPE))
    179       break;
    180     state->handler = doctype0;
    181     return XML_ROLE_DOCTYPE_NONE;
    182   case XML_TOK_INSTANCE_START:
    183     state->handler = error;
    184     return XML_ROLE_INSTANCE_START;
    185   }
    186   return common(state, tok);
    187 }
    188 
    189 static int PTRCALL
    190 prolog2(PROLOG_STATE *state,
    191         int tok,
    192         const char *ptr,
    193         const char *end,
    194         const ENCODING *enc)
    195 {
    196   switch (tok) {
    197   case XML_TOK_PROLOG_S:
    198     return XML_ROLE_NONE;
    199   case XML_TOK_PI:
    200     return XML_ROLE_PI;
    201   case XML_TOK_COMMENT:
    202     return XML_ROLE_COMMENT;
    203   case XML_TOK_INSTANCE_START:
    204     state->handler = error;
    205     return XML_ROLE_INSTANCE_START;
    206   }
    207   return common(state, tok);
    208 }
    209 
    210 static int PTRCALL
    211 doctype0(PROLOG_STATE *state,
    212          int tok,
    213          const char *ptr,
    214          const char *end,
    215          const ENCODING *enc)
    216 {
    217   switch (tok) {
    218   case XML_TOK_PROLOG_S:
    219     return XML_ROLE_DOCTYPE_NONE;
    220   case XML_TOK_NAME:
    221   case XML_TOK_PREFIXED_NAME:
    222     state->handler = doctype1;
    223     return XML_ROLE_DOCTYPE_NAME;
    224   }
    225   return common(state, tok);
    226 }
    227 
    228 static int PTRCALL
    229 doctype1(PROLOG_STATE *state,
    230          int tok,
    231          const char *ptr,
    232          const char *end,
    233          const ENCODING *enc)
    234 {
    235   switch (tok) {
    236   case XML_TOK_PROLOG_S:
    237     return XML_ROLE_DOCTYPE_NONE;
    238   case XML_TOK_OPEN_BRACKET:
    239     state->handler = internalSubset;
    240     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
    241   case XML_TOK_DECL_CLOSE:
    242     state->handler = prolog2;
    243     return XML_ROLE_DOCTYPE_CLOSE;
    244   case XML_TOK_NAME:
    245     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
    246       state->handler = doctype3;
    247       return XML_ROLE_DOCTYPE_NONE;
    248     }
    249     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
    250       state->handler = doctype2;
    251       return XML_ROLE_DOCTYPE_NONE;
    252     }
    253     break;
    254   }
    255   return common(state, tok);
    256 }
    257 
    258 static int PTRCALL
    259 doctype2(PROLOG_STATE *state,
    260          int tok,
    261          const char *ptr,
    262          const char *end,
    263          const ENCODING *enc)
    264 {
    265   switch (tok) {
    266   case XML_TOK_PROLOG_S:
    267     return XML_ROLE_DOCTYPE_NONE;
    268   case XML_TOK_LITERAL:
    269     state->handler = doctype3;
    270     return XML_ROLE_DOCTYPE_PUBLIC_ID;
    271   }
    272   return common(state, tok);
    273 }
    274 
    275 static int PTRCALL
    276 doctype3(PROLOG_STATE *state,
    277          int tok,
    278          const char *ptr,
    279          const char *end,
    280          const ENCODING *enc)
    281 {
    282   switch (tok) {
    283   case XML_TOK_PROLOG_S:
    284     return XML_ROLE_DOCTYPE_NONE;
    285   case XML_TOK_LITERAL:
    286     state->handler = doctype4;
    287     return XML_ROLE_DOCTYPE_SYSTEM_ID;
    288   }
    289   return common(state, tok);
    290 }
    291 
    292 static int PTRCALL
    293 doctype4(PROLOG_STATE *state,
    294          int tok,
    295          const char *ptr,
    296          const char *end,
    297          const ENCODING *enc)
    298 {
    299   switch (tok) {
    300   case XML_TOK_PROLOG_S:
    301     return XML_ROLE_DOCTYPE_NONE;
    302   case XML_TOK_OPEN_BRACKET:
    303     state->handler = internalSubset;
    304     return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
    305   case XML_TOK_DECL_CLOSE:
    306     state->handler = prolog2;
    307     return XML_ROLE_DOCTYPE_CLOSE;
    308   }
    309   return common(state, tok);
    310 }
    311 
    312 static int PTRCALL
    313 doctype5(PROLOG_STATE *state,
    314          int tok,
    315          const char *ptr,
    316          const char *end,
    317          const ENCODING *enc)
    318 {
    319   switch (tok) {
    320   case XML_TOK_PROLOG_S:
    321     return XML_ROLE_DOCTYPE_NONE;
    322   case XML_TOK_DECL_CLOSE:
    323     state->handler = prolog2;
    324     return XML_ROLE_DOCTYPE_CLOSE;
    325   }
    326   return common(state, tok);
    327 }
    328 
    329 static int PTRCALL
    330 internalSubset(PROLOG_STATE *state,
    331                int tok,
    332                const char *ptr,
    333                const char *end,
    334                const ENCODING *enc)
    335 {
    336   switch (tok) {
    337   case XML_TOK_PROLOG_S:
    338     return XML_ROLE_NONE;
    339   case XML_TOK_DECL_OPEN:
    340     if (XmlNameMatchesAscii(enc,
    341                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
    342                             end,
    343                             KW_ENTITY)) {
    344       state->handler = entity0;
    345       return XML_ROLE_ENTITY_NONE;
    346     }
    347     if (XmlNameMatchesAscii(enc,
    348                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
    349                             end,
    350                             KW_ATTLIST)) {
    351       state->handler = attlist0;
    352       return XML_ROLE_ATTLIST_NONE;
    353     }
    354     if (XmlNameMatchesAscii(enc,
    355                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
    356                             end,
    357                             KW_ELEMENT)) {
    358       state->handler = element0;
    359       return XML_ROLE_ELEMENT_NONE;
    360     }
    361     if (XmlNameMatchesAscii(enc,
    362                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
    363                             end,
    364                             KW_NOTATION)) {
    365       state->handler = notation0;
    366       return XML_ROLE_NOTATION_NONE;
    367     }
    368     break;
    369   case XML_TOK_PI:
    370     return XML_ROLE_PI;
    371   case XML_TOK_COMMENT:
    372     return XML_ROLE_COMMENT;
    373   case XML_TOK_PARAM_ENTITY_REF:
    374     return XML_ROLE_PARAM_ENTITY_REF;
    375   case XML_TOK_CLOSE_BRACKET:
    376     state->handler = doctype5;
    377     return XML_ROLE_DOCTYPE_NONE;
    378   case XML_TOK_NONE:
    379     return XML_ROLE_NONE;
    380   }
    381   return common(state, tok);
    382 }
    383 
    384 #ifdef XML_DTD
    385 
    386 static int PTRCALL
    387 externalSubset0(PROLOG_STATE *state,
    388                 int tok,
    389                 const char *ptr,
    390                 const char *end,
    391                 const ENCODING *enc)
    392 {
    393   state->handler = externalSubset1;
    394   if (tok == XML_TOK_XML_DECL)
    395     return XML_ROLE_TEXT_DECL;
    396   return externalSubset1(state, tok, ptr, end, enc);
    397 }
    398 
    399 static int PTRCALL
    400 externalSubset1(PROLOG_STATE *state,
    401                 int tok,
    402                 const char *ptr,
    403                 const char *end,
    404                 const ENCODING *enc)
    405 {
    406   switch (tok) {
    407   case XML_TOK_COND_SECT_OPEN:
    408     state->handler = condSect0;
    409     return XML_ROLE_NONE;
    410   case XML_TOK_COND_SECT_CLOSE:
    411     if (state->includeLevel == 0)
    412       break;
    413     state->includeLevel -= 1;
    414     return XML_ROLE_NONE;
    415   case XML_TOK_PROLOG_S:
    416     return XML_ROLE_NONE;
    417   case XML_TOK_CLOSE_BRACKET:
    418     break;
    419   case XML_TOK_NONE:
    420     if (state->includeLevel)
    421       break;
    422     return XML_ROLE_NONE;
    423   default:
    424     return internalSubset(state, tok, ptr, end, enc);
    425   }
    426   return common(state, tok);
    427 }
    428 
    429 #endif /* XML_DTD */
    430 
    431 static int PTRCALL
    432 entity0(PROLOG_STATE *state,
    433         int tok,
    434         const char *ptr,
    435         const char *end,
    436         const ENCODING *enc)
    437 {
    438   switch (tok) {
    439   case XML_TOK_PROLOG_S:
    440     return XML_ROLE_ENTITY_NONE;
    441   case XML_TOK_PERCENT:
    442     state->handler = entity1;
    443     return XML_ROLE_ENTITY_NONE;
    444   case XML_TOK_NAME:
    445     state->handler = entity2;
    446     return XML_ROLE_GENERAL_ENTITY_NAME;
    447   }
    448   return common(state, tok);
    449 }
    450 
    451 static int PTRCALL
    452 entity1(PROLOG_STATE *state,
    453         int tok,
    454         const char *ptr,
    455         const char *end,
    456         const ENCODING *enc)
    457 {
    458   switch (tok) {
    459   case XML_TOK_PROLOG_S:
    460     return XML_ROLE_ENTITY_NONE;
    461   case XML_TOK_NAME:
    462     state->handler = entity7;
    463     return XML_ROLE_PARAM_ENTITY_NAME;
    464   }
    465   return common(state, tok);
    466 }
    467 
    468 static int PTRCALL
    469 entity2(PROLOG_STATE *state,
    470         int tok,
    471         const char *ptr,
    472         const char *end,
    473         const ENCODING *enc)
    474 {
    475   switch (tok) {
    476   case XML_TOK_PROLOG_S:
    477     return XML_ROLE_ENTITY_NONE;
    478   case XML_TOK_NAME:
    479     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
    480       state->handler = entity4;
    481       return XML_ROLE_ENTITY_NONE;
    482     }
    483     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
    484       state->handler = entity3;
    485       return XML_ROLE_ENTITY_NONE;
    486     }
    487     break;
    488   case XML_TOK_LITERAL:
    489     state->handler = declClose;
    490     state->role_none = XML_ROLE_ENTITY_NONE;
    491     return XML_ROLE_ENTITY_VALUE;
    492   }
    493   return common(state, tok);
    494 }
    495 
    496 static int PTRCALL
    497 entity3(PROLOG_STATE *state,
    498         int tok,
    499         const char *ptr,
    500         const char *end,
    501         const ENCODING *enc)
    502 {
    503   switch (tok) {
    504   case XML_TOK_PROLOG_S:
    505     return XML_ROLE_ENTITY_NONE;
    506   case XML_TOK_LITERAL:
    507     state->handler = entity4;
    508     return XML_ROLE_ENTITY_PUBLIC_ID;
    509   }
    510   return common(state, tok);
    511 }
    512 
    513 static int PTRCALL
    514 entity4(PROLOG_STATE *state,
    515         int tok,
    516         const char *ptr,
    517         const char *end,
    518         const ENCODING *enc)
    519 {
    520   switch (tok) {
    521   case XML_TOK_PROLOG_S:
    522     return XML_ROLE_ENTITY_NONE;
    523   case XML_TOK_LITERAL:
    524     state->handler = entity5;
    525     return XML_ROLE_ENTITY_SYSTEM_ID;
    526   }
    527   return common(state, tok);
    528 }
    529 
    530 static int PTRCALL
    531 entity5(PROLOG_STATE *state,
    532         int tok,
    533         const char *ptr,
    534         const char *end,
    535         const ENCODING *enc)
    536 {
    537   switch (tok) {
    538   case XML_TOK_PROLOG_S:
    539     return XML_ROLE_ENTITY_NONE;
    540   case XML_TOK_DECL_CLOSE:
    541     setTopLevel(state);
    542     return XML_ROLE_ENTITY_COMPLETE;
    543   case XML_TOK_NAME:
    544     if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
    545       state->handler = entity6;
    546       return XML_ROLE_ENTITY_NONE;
    547     }
    548     break;
    549   }
    550   return common(state, tok);
    551 }
    552 
    553 static int PTRCALL
    554 entity6(PROLOG_STATE *state,
    555         int tok,
    556         const char *ptr,
    557         const char *end,
    558         const ENCODING *enc)
    559 {
    560   switch (tok) {
    561   case XML_TOK_PROLOG_S:
    562     return XML_ROLE_ENTITY_NONE;
    563   case XML_TOK_NAME:
    564     state->handler = declClose;
    565     state->role_none = XML_ROLE_ENTITY_NONE;
    566     return XML_ROLE_ENTITY_NOTATION_NAME;
    567   }
    568   return common(state, tok);
    569 }
    570 
    571 static int PTRCALL
    572 entity7(PROLOG_STATE *state,
    573         int tok,
    574         const char *ptr,
    575         const char *end,
    576         const ENCODING *enc)
    577 {
    578   switch (tok) {
    579   case XML_TOK_PROLOG_S:
    580     return XML_ROLE_ENTITY_NONE;
    581   case XML_TOK_NAME:
    582     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
    583       state->handler = entity9;
    584       return XML_ROLE_ENTITY_NONE;
    585     }
    586     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
    587       state->handler = entity8;
    588       return XML_ROLE_ENTITY_NONE;
    589     }
    590     break;
    591   case XML_TOK_LITERAL:
    592     state->handler = declClose;
    593     state->role_none = XML_ROLE_ENTITY_NONE;
    594     return XML_ROLE_ENTITY_VALUE;
    595   }
    596   return common(state, tok);
    597 }
    598 
    599 static int PTRCALL
    600 entity8(PROLOG_STATE *state,
    601         int tok,
    602         const char *ptr,
    603         const char *end,
    604         const ENCODING *enc)
    605 {
    606   switch (tok) {
    607   case XML_TOK_PROLOG_S:
    608     return XML_ROLE_ENTITY_NONE;
    609   case XML_TOK_LITERAL:
    610     state->handler = entity9;
    611     return XML_ROLE_ENTITY_PUBLIC_ID;
    612   }
    613   return common(state, tok);
    614 }
    615 
    616 static int PTRCALL
    617 entity9(PROLOG_STATE *state,
    618         int tok,
    619         const char *ptr,
    620         const char *end,
    621         const ENCODING *enc)
    622 {
    623   switch (tok) {
    624   case XML_TOK_PROLOG_S:
    625     return XML_ROLE_ENTITY_NONE;
    626   case XML_TOK_LITERAL:
    627     state->handler = entity10;
    628     return XML_ROLE_ENTITY_SYSTEM_ID;
    629   }
    630   return common(state, tok);
    631 }
    632 
    633 static int PTRCALL
    634 entity10(PROLOG_STATE *state,
    635          int tok,
    636          const char *ptr,
    637          const char *end,
    638          const ENCODING *enc)
    639 {
    640   switch (tok) {
    641   case XML_TOK_PROLOG_S:
    642     return XML_ROLE_ENTITY_NONE;
    643   case XML_TOK_DECL_CLOSE:
    644     setTopLevel(state);
    645     return XML_ROLE_ENTITY_COMPLETE;
    646   }
    647   return common(state, tok);
    648 }
    649 
    650 static int PTRCALL
    651 notation0(PROLOG_STATE *state,
    652           int tok,
    653           const char *ptr,
    654           const char *end,
    655           const ENCODING *enc)
    656 {
    657   switch (tok) {
    658   case XML_TOK_PROLOG_S:
    659     return XML_ROLE_NOTATION_NONE;
    660   case XML_TOK_NAME:
    661     state->handler = notation1;
    662     return XML_ROLE_NOTATION_NAME;
    663   }
    664   return common(state, tok);
    665 }
    666 
    667 static int PTRCALL
    668 notation1(PROLOG_STATE *state,
    669           int tok,
    670           const char *ptr,
    671           const char *end,
    672           const ENCODING *enc)
    673 {
    674   switch (tok) {
    675   case XML_TOK_PROLOG_S:
    676     return XML_ROLE_NOTATION_NONE;
    677   case XML_TOK_NAME:
    678     if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
    679       state->handler = notation3;
    680       return XML_ROLE_NOTATION_NONE;
    681     }
    682     if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
    683       state->handler = notation2;
    684       return XML_ROLE_NOTATION_NONE;
    685     }
    686     break;
    687   }
    688   return common(state, tok);
    689 }
    690 
    691 static int PTRCALL
    692 notation2(PROLOG_STATE *state,
    693           int tok,
    694           const char *ptr,
    695           const char *end,
    696           const ENCODING *enc)
    697 {
    698   switch (tok) {
    699   case XML_TOK_PROLOG_S:
    700     return XML_ROLE_NOTATION_NONE;
    701   case XML_TOK_LITERAL:
    702     state->handler = notation4;
    703     return XML_ROLE_NOTATION_PUBLIC_ID;
    704   }
    705   return common(state, tok);
    706 }
    707 
    708 static int PTRCALL
    709 notation3(PROLOG_STATE *state,
    710           int tok,
    711           const char *ptr,
    712           const char *end,
    713           const ENCODING *enc)
    714 {
    715   switch (tok) {
    716   case XML_TOK_PROLOG_S:
    717     return XML_ROLE_NOTATION_NONE;
    718   case XML_TOK_LITERAL:
    719     state->handler = declClose;
    720     state->role_none = XML_ROLE_NOTATION_NONE;
    721     return XML_ROLE_NOTATION_SYSTEM_ID;
    722   }
    723   return common(state, tok);
    724 }
    725 
    726 static int PTRCALL
    727 notation4(PROLOG_STATE *state,
    728           int tok,
    729           const char *ptr,
    730           const char *end,
    731           const ENCODING *enc)
    732 {
    733   switch (tok) {
    734   case XML_TOK_PROLOG_S:
    735     return XML_ROLE_NOTATION_NONE;
    736   case XML_TOK_LITERAL:
    737     state->handler = declClose;
    738     state->role_none = XML_ROLE_NOTATION_NONE;
    739     return XML_ROLE_NOTATION_SYSTEM_ID;
    740   case XML_TOK_DECL_CLOSE:
    741     setTopLevel(state);
    742     return XML_ROLE_NOTATION_NO_SYSTEM_ID;
    743   }
    744   return common(state, tok);
    745 }
    746 
    747 static int PTRCALL
    748 attlist0(PROLOG_STATE *state,
    749          int tok,
    750          const char *ptr,
    751          const char *end,
    752          const ENCODING *enc)
    753 {
    754   switch (tok) {
    755   case XML_TOK_PROLOG_S:
    756     return XML_ROLE_ATTLIST_NONE;
    757   case XML_TOK_NAME:
    758   case XML_TOK_PREFIXED_NAME:
    759     state->handler = attlist1;
    760     return XML_ROLE_ATTLIST_ELEMENT_NAME;
    761   }
    762   return common(state, tok);
    763 }
    764 
    765 static int PTRCALL
    766 attlist1(PROLOG_STATE *state,
    767          int tok,
    768          const char *ptr,
    769          const char *end,
    770          const ENCODING *enc)
    771 {
    772   switch (tok) {
    773   case XML_TOK_PROLOG_S:
    774     return XML_ROLE_ATTLIST_NONE;
    775   case XML_TOK_DECL_CLOSE:
    776     setTopLevel(state);
    777     return XML_ROLE_ATTLIST_NONE;
    778   case XML_TOK_NAME:
    779   case XML_TOK_PREFIXED_NAME:
    780     state->handler = attlist2;
    781     return XML_ROLE_ATTRIBUTE_NAME;
    782   }
    783   return common(state, tok);
    784 }
    785 
    786 static int PTRCALL
    787 attlist2(PROLOG_STATE *state,
    788          int tok,
    789          const char *ptr,
    790          const char *end,
    791          const ENCODING *enc)
    792 {
    793   switch (tok) {
    794   case XML_TOK_PROLOG_S:
    795     return XML_ROLE_ATTLIST_NONE;
    796   case XML_TOK_NAME:
    797     {
    798       static const char * const types[] = {
    799         KW_CDATA,
    800         KW_ID,
    801         KW_IDREF,
    802         KW_IDREFS,
    803         KW_ENTITY,
    804         KW_ENTITIES,
    805         KW_NMTOKEN,
    806         KW_NMTOKENS,
    807       };
    808       int i;
    809       for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
    810         if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
    811           state->handler = attlist8;
    812           return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
    813         }
    814     }
    815     if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
    816       state->handler = attlist5;
    817       return XML_ROLE_ATTLIST_NONE;
    818     }
    819     break;
    820   case XML_TOK_OPEN_PAREN:
    821     state->handler = attlist3;
    822     return XML_ROLE_ATTLIST_NONE;
    823   }
    824   return common(state, tok);
    825 }
    826 
    827 static int PTRCALL
    828 attlist3(PROLOG_STATE *state,
    829          int tok,
    830          const char *ptr,
    831          const char *end,
    832          const ENCODING *enc)
    833 {
    834   switch (tok) {
    835   case XML_TOK_PROLOG_S:
    836     return XML_ROLE_ATTLIST_NONE;
    837   case XML_TOK_NMTOKEN:
    838   case XML_TOK_NAME:
    839   case XML_TOK_PREFIXED_NAME:
    840     state->handler = attlist4;
    841     return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
    842   }
    843   return common(state, tok);
    844 }
    845 
    846 static int PTRCALL
    847 attlist4(PROLOG_STATE *state,
    848          int tok,
    849          const char *ptr,
    850          const char *end,
    851          const ENCODING *enc)
    852 {
    853   switch (tok) {
    854   case XML_TOK_PROLOG_S:
    855     return XML_ROLE_ATTLIST_NONE;
    856   case XML_TOK_CLOSE_PAREN:
    857     state->handler = attlist8;
    858     return XML_ROLE_ATTLIST_NONE;
    859   case XML_TOK_OR:
    860     state->handler = attlist3;
    861     return XML_ROLE_ATTLIST_NONE;
    862   }
    863   return common(state, tok);
    864 }
    865 
    866 static int PTRCALL
    867 attlist5(PROLOG_STATE *state,
    868          int tok,
    869          const char *ptr,
    870          const char *end,
    871          const ENCODING *enc)
    872 {
    873   switch (tok) {
    874   case XML_TOK_PROLOG_S:
    875     return XML_ROLE_ATTLIST_NONE;
    876   case XML_TOK_OPEN_PAREN:
    877     state->handler = attlist6;
    878     return XML_ROLE_ATTLIST_NONE;
    879   }
    880   return common(state, tok);
    881 }
    882 
    883 static int PTRCALL
    884 attlist6(PROLOG_STATE *state,
    885          int tok,
    886          const char *ptr,
    887          const char *end,
    888          const ENCODING *enc)
    889 {
    890   switch (tok) {
    891   case XML_TOK_PROLOG_S:
    892     return XML_ROLE_ATTLIST_NONE;
    893   case XML_TOK_NAME:
    894     state->handler = attlist7;
    895     return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
    896   }
    897   return common(state, tok);
    898 }
    899 
    900 static int PTRCALL
    901 attlist7(PROLOG_STATE *state,
    902          int tok,
    903          const char *ptr,
    904          const char *end,
    905          const ENCODING *enc)
    906 {
    907   switch (tok) {
    908   case XML_TOK_PROLOG_S:
    909     return XML_ROLE_ATTLIST_NONE;
    910   case XML_TOK_CLOSE_PAREN:
    911     state->handler = attlist8;
    912     return XML_ROLE_ATTLIST_NONE;
    913   case XML_TOK_OR:
    914     state->handler = attlist6;
    915     return XML_ROLE_ATTLIST_NONE;
    916   }
    917   return common(state, tok);
    918 }
    919 
    920 /* default value */
    921 static int PTRCALL
    922 attlist8(PROLOG_STATE *state,
    923          int tok,
    924          const char *ptr,
    925          const char *end,
    926          const ENCODING *enc)
    927 {
    928   switch (tok) {
    929   case XML_TOK_PROLOG_S:
    930     return XML_ROLE_ATTLIST_NONE;
    931   case XML_TOK_POUND_NAME:
    932     if (XmlNameMatchesAscii(enc,
    933                             ptr + MIN_BYTES_PER_CHAR(enc),
    934                             end,
    935                             KW_IMPLIED)) {
    936       state->handler = attlist1;
    937       return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
    938     }
    939     if (XmlNameMatchesAscii(enc,
    940                             ptr + MIN_BYTES_PER_CHAR(enc),
    941                             end,
    942                             KW_REQUIRED)) {
    943       state->handler = attlist1;
    944       return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
    945     }
    946     if (XmlNameMatchesAscii(enc,
    947                             ptr + MIN_BYTES_PER_CHAR(enc),
    948                             end,
    949                             KW_FIXED)) {
    950       state->handler = attlist9;
    951       return XML_ROLE_ATTLIST_NONE;
    952     }
    953     break;
    954   case XML_TOK_LITERAL:
    955     state->handler = attlist1;
    956     return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
    957   }
    958   return common(state, tok);
    959 }
    960 
    961 static int PTRCALL
    962 attlist9(PROLOG_STATE *state,
    963          int tok,
    964          const char *ptr,
    965          const char *end,
    966          const ENCODING *enc)
    967 {
    968   switch (tok) {
    969   case XML_TOK_PROLOG_S:
    970     return XML_ROLE_ATTLIST_NONE;
    971   case XML_TOK_LITERAL:
    972     state->handler = attlist1;
    973     return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
    974   }
    975   return common(state, tok);
    976 }
    977 
    978 static int PTRCALL
    979 element0(PROLOG_STATE *state,
    980          int tok,
    981          const char *ptr,
    982          const char *end,
    983          const ENCODING *enc)
    984 {
    985   switch (tok) {
    986   case XML_TOK_PROLOG_S:
    987     return XML_ROLE_ELEMENT_NONE;
    988   case XML_TOK_NAME:
    989   case XML_TOK_PREFIXED_NAME:
    990     state->handler = element1;
    991     return XML_ROLE_ELEMENT_NAME;
    992   }
    993   return common(state, tok);
    994 }
    995 
    996 static int PTRCALL
    997 element1(PROLOG_STATE *state,
    998          int tok,
    999          const char *ptr,
   1000          const char *end,
   1001          const ENCODING *enc)
   1002 {
   1003   switch (tok) {
   1004   case XML_TOK_PROLOG_S:
   1005     return XML_ROLE_ELEMENT_NONE;
   1006   case XML_TOK_NAME:
   1007     if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
   1008       state->handler = declClose;
   1009       state->role_none = XML_ROLE_ELEMENT_NONE;
   1010       return XML_ROLE_CONTENT_EMPTY;
   1011     }
   1012     if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
   1013       state->handler = declClose;
   1014       state->role_none = XML_ROLE_ELEMENT_NONE;
   1015       return XML_ROLE_CONTENT_ANY;
   1016     }
   1017     break;
   1018   case XML_TOK_OPEN_PAREN:
   1019     state->handler = element2;
   1020     state->level = 1;
   1021     return XML_ROLE_GROUP_OPEN;
   1022   }
   1023   return common(state, tok);
   1024 }
   1025 
   1026 static int PTRCALL
   1027 element2(PROLOG_STATE *state,
   1028          int tok,
   1029          const char *ptr,
   1030          const char *end,
   1031          const ENCODING *enc)
   1032 {
   1033   switch (tok) {
   1034   case XML_TOK_PROLOG_S:
   1035     return XML_ROLE_ELEMENT_NONE;
   1036   case XML_TOK_POUND_NAME:
   1037     if (XmlNameMatchesAscii(enc,
   1038                             ptr + MIN_BYTES_PER_CHAR(enc),
   1039                             end,
   1040                             KW_PCDATA)) {
   1041       state->handler = element3;
   1042       return XML_ROLE_CONTENT_PCDATA;
   1043     }
   1044     break;
   1045   case XML_TOK_OPEN_PAREN:
   1046     state->level = 2;
   1047     state->handler = element6;
   1048     return XML_ROLE_GROUP_OPEN;
   1049   case XML_TOK_NAME:
   1050   case XML_TOK_PREFIXED_NAME:
   1051     state->handler = element7;
   1052     return XML_ROLE_CONTENT_ELEMENT;
   1053   case XML_TOK_NAME_QUESTION:
   1054     state->handler = element7;
   1055     return XML_ROLE_CONTENT_ELEMENT_OPT;
   1056   case XML_TOK_NAME_ASTERISK:
   1057     state->handler = element7;
   1058     return XML_ROLE_CONTENT_ELEMENT_REP;
   1059   case XML_TOK_NAME_PLUS:
   1060     state->handler = element7;
   1061     return XML_ROLE_CONTENT_ELEMENT_PLUS;
   1062   }
   1063   return common(state, tok);
   1064 }
   1065 
   1066 static int PTRCALL
   1067 element3(PROLOG_STATE *state,
   1068          int tok,
   1069          const char *ptr,
   1070          const char *end,
   1071          const ENCODING *enc)
   1072 {
   1073   switch (tok) {
   1074   case XML_TOK_PROLOG_S:
   1075     return XML_ROLE_ELEMENT_NONE;
   1076   case XML_TOK_CLOSE_PAREN:
   1077     state->handler = declClose;
   1078     state->role_none = XML_ROLE_ELEMENT_NONE;
   1079     return XML_ROLE_GROUP_CLOSE;
   1080   case XML_TOK_CLOSE_PAREN_ASTERISK:
   1081     state->handler = declClose;
   1082     state->role_none = XML_ROLE_ELEMENT_NONE;
   1083     return XML_ROLE_GROUP_CLOSE_REP;
   1084   case XML_TOK_OR:
   1085     state->handler = element4;
   1086     return XML_ROLE_ELEMENT_NONE;
   1087   }
   1088   return common(state, tok);
   1089 }
   1090 
   1091 static int PTRCALL
   1092 element4(PROLOG_STATE *state,
   1093          int tok,
   1094          const char *ptr,
   1095          const char *end,
   1096          const ENCODING *enc)
   1097 {
   1098   switch (tok) {
   1099   case XML_TOK_PROLOG_S:
   1100     return XML_ROLE_ELEMENT_NONE;
   1101   case XML_TOK_NAME:
   1102   case XML_TOK_PREFIXED_NAME:
   1103     state->handler = element5;
   1104     return XML_ROLE_CONTENT_ELEMENT;
   1105   }
   1106   return common(state, tok);
   1107 }
   1108 
   1109 static int PTRCALL
   1110 element5(PROLOG_STATE *state,
   1111          int tok,
   1112          const char *ptr,
   1113          const char *end,
   1114          const ENCODING *enc)
   1115 {
   1116   switch (tok) {
   1117   case XML_TOK_PROLOG_S:
   1118     return XML_ROLE_ELEMENT_NONE;
   1119   case XML_TOK_CLOSE_PAREN_ASTERISK:
   1120     state->handler = declClose;
   1121     state->role_none = XML_ROLE_ELEMENT_NONE;
   1122     return XML_ROLE_GROUP_CLOSE_REP;
   1123   case XML_TOK_OR:
   1124     state->handler = element4;
   1125     return XML_ROLE_ELEMENT_NONE;
   1126   }
   1127   return common(state, tok);
   1128 }
   1129 
   1130 static int PTRCALL
   1131 element6(PROLOG_STATE *state,
   1132          int tok,
   1133          const char *ptr,
   1134          const char *end,
   1135          const ENCODING *enc)
   1136 {
   1137   switch (tok) {
   1138   case XML_TOK_PROLOG_S:
   1139     return XML_ROLE_ELEMENT_NONE;
   1140   case XML_TOK_OPEN_PAREN:
   1141     state->level += 1;
   1142     return XML_ROLE_GROUP_OPEN;
   1143   case XML_TOK_NAME:
   1144   case XML_TOK_PREFIXED_NAME:
   1145     state->handler = element7;
   1146     return XML_ROLE_CONTENT_ELEMENT;
   1147   case XML_TOK_NAME_QUESTION:
   1148     state->handler = element7;
   1149     return XML_ROLE_CONTENT_ELEMENT_OPT;
   1150   case XML_TOK_NAME_ASTERISK:
   1151     state->handler = element7;
   1152     return XML_ROLE_CONTENT_ELEMENT_REP;
   1153   case XML_TOK_NAME_PLUS:
   1154     state->handler = element7;
   1155     return XML_ROLE_CONTENT_ELEMENT_PLUS;
   1156   }
   1157   return common(state, tok);
   1158 }
   1159 
   1160 static int PTRCALL
   1161 element7(PROLOG_STATE *state,
   1162          int tok,
   1163          const char *ptr,
   1164          const char *end,
   1165          const ENCODING *enc)
   1166 {
   1167   switch (tok) {
   1168   case XML_TOK_PROLOG_S:
   1169     return XML_ROLE_ELEMENT_NONE;
   1170   case XML_TOK_CLOSE_PAREN:
   1171     state->level -= 1;
   1172     if (state->level == 0) {
   1173       state->handler = declClose;
   1174       state->role_none = XML_ROLE_ELEMENT_NONE;
   1175     }
   1176     return XML_ROLE_GROUP_CLOSE;
   1177   case XML_TOK_CLOSE_PAREN_ASTERISK:
   1178     state->level -= 1;
   1179     if (state->level == 0) {
   1180       state->handler = declClose;
   1181       state->role_none = XML_ROLE_ELEMENT_NONE;
   1182     }
   1183     return XML_ROLE_GROUP_CLOSE_REP;
   1184   case XML_TOK_CLOSE_PAREN_QUESTION:
   1185     state->level -= 1;
   1186     if (state->level == 0) {
   1187       state->handler = declClose;
   1188       state->role_none = XML_ROLE_ELEMENT_NONE;
   1189     }
   1190     return XML_ROLE_GROUP_CLOSE_OPT;
   1191   case XML_TOK_CLOSE_PAREN_PLUS:
   1192     state->level -= 1;
   1193     if (state->level == 0) {
   1194       state->handler = declClose;
   1195       state->role_none = XML_ROLE_ELEMENT_NONE;
   1196     }
   1197     return XML_ROLE_GROUP_CLOSE_PLUS;
   1198   case XML_TOK_COMMA:
   1199     state->handler = element6;
   1200     return XML_ROLE_GROUP_SEQUENCE;
   1201   case XML_TOK_OR:
   1202     state->handler = element6;
   1203     return XML_ROLE_GROUP_CHOICE;
   1204   }
   1205   return common(state, tok);
   1206 }
   1207 
   1208 #ifdef XML_DTD
   1209 
   1210 static int PTRCALL
   1211 condSect0(PROLOG_STATE *state,
   1212           int tok,
   1213           const char *ptr,
   1214           const char *end,
   1215           const ENCODING *enc)
   1216 {
   1217   switch (tok) {
   1218   case XML_TOK_PROLOG_S:
   1219     return XML_ROLE_NONE;
   1220   case XML_TOK_NAME:
   1221     if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
   1222       state->handler = condSect1;
   1223       return XML_ROLE_NONE;
   1224     }
   1225     if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
   1226       state->handler = condSect2;
   1227       return XML_ROLE_NONE;
   1228     }
   1229     break;
   1230   }
   1231   return common(state, tok);
   1232 }
   1233 
   1234 static int PTRCALL
   1235 condSect1(PROLOG_STATE *state,
   1236           int tok,
   1237           const char *ptr,
   1238           const char *end,
   1239           const ENCODING *enc)
   1240 {
   1241   switch (tok) {
   1242   case XML_TOK_PROLOG_S:
   1243     return XML_ROLE_NONE;
   1244   case XML_TOK_OPEN_BRACKET:
   1245     state->handler = externalSubset1;
   1246     state->includeLevel += 1;
   1247     return XML_ROLE_NONE;
   1248   }
   1249   return common(state, tok);
   1250 }
   1251 
   1252 static int PTRCALL
   1253 condSect2(PROLOG_STATE *state,
   1254           int tok,
   1255           const char *ptr,
   1256           const char *end,
   1257           const ENCODING *enc)
   1258 {
   1259   switch (tok) {
   1260   case XML_TOK_PROLOG_S:
   1261     return XML_ROLE_NONE;
   1262   case XML_TOK_OPEN_BRACKET:
   1263     state->handler = externalSubset1;
   1264     return XML_ROLE_IGNORE_SECT;
   1265   }
   1266   return common(state, tok);
   1267 }
   1268 
   1269 #endif /* XML_DTD */
   1270 
   1271 static int PTRCALL
   1272 declClose(PROLOG_STATE *state,
   1273           int tok,
   1274           const char *ptr,
   1275           const char *end,
   1276           const ENCODING *enc)
   1277 {
   1278   switch (tok) {
   1279   case XML_TOK_PROLOG_S:
   1280     return state->role_none;
   1281   case XML_TOK_DECL_CLOSE:
   1282     setTopLevel(state);
   1283     return state->role_none;
   1284   }
   1285   return common(state, tok);
   1286 }
   1287 
   1288 static int PTRCALL
   1289 error(PROLOG_STATE *state,
   1290       int tok,
   1291       const char *ptr,
   1292       const char *end,
   1293       const ENCODING *enc)
   1294 {
   1295   return XML_ROLE_NONE;
   1296 }
   1297 
   1298 static int FASTCALL
   1299 common(PROLOG_STATE *state, int tok)
   1300 {
   1301 #ifdef XML_DTD
   1302   if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
   1303     return XML_ROLE_INNER_PARAM_ENTITY_REF;
   1304 #endif
   1305   state->handler = error;
   1306   return XML_ROLE_ERROR;
   1307 }
   1308 
   1309 void
   1310 XmlPrologStateInit(PROLOG_STATE *state)
   1311 {
   1312   state->handler = prolog0;
   1313 #ifdef XML_DTD
   1314   state->documentEntity = 1;
   1315   state->includeLevel = 0;
   1316   state->inEntityValue = 0;
   1317 #endif /* XML_DTD */
   1318 }
   1319 
   1320 #ifdef XML_DTD
   1321 
   1322 void
   1323 XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
   1324 {
   1325   state->handler = externalSubset0;
   1326   state->documentEntity = 0;
   1327   state->includeLevel = 0;
   1328 }
   1329 
   1330 #endif /* XML_DTD */
   1331