Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2008 Esmertec AG.
      3  * Copyright (C) 2008 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include <setjmp.h>
     21 #include <assert.h>
     22 #include "wbxml_parser.h"
     23 #include "csp13_data.h"
     24 #ifdef SUPPORT_SYNCML
     25 #include "syncml_data.h"
     26 #endif
     27 
     28 #ifdef PLATFORM_ANDROID
     29 extern "C" void *bsearch(const void *key, const void *base0, size_t nmemb,
     30         size_t size, int (*compar)(const void *, const void *));
     31 #endif
     32 
     33 #define ARRAY_SIZE(a)   (sizeof(a) / sizeof(a[0]))
     34 
     35 //#define WBXML_DEBUG 1
     36 
     37 /* Major TODO items:
     38    - Attribute value tokens (not used by IMPS CSP)
     39    - EXT_* except EXT_T_0 (not used by IMPS CSP)
     40    - PI (not used by IMPS CSP)
     41    - cleanups
     42 
     43    Other TODO:
     44    - Support more public ID? Only IMPS is supported now.
     45    - Support other charsets than UTF-8
     46  */
     47 
     48 static int compareTokenData(const void * t1, const void * t2)
     49 {
     50     return ((TokenData *)t1)->token - ((TokenData *)t2)->token;
     51 }
     52 
     53 static int compareAttrData(const void * t1, const void * t2)
     54 {
     55     return ((AttrData *)t1)->token - ((AttrData *)t2)->token;
     56 }
     57 
     58 static bool isTagStart(int token)
     59 {
     60     if (token == TOKEN_SWITCH_PAGE)
     61         return true;
     62 
     63     token &= 0x3f;
     64     return (token >= TOKEN_LITERAL && token < TOKEN_EXT_I_0);
     65 }
     66 
     67 static bool isAttrStart(int token)
     68 {
     69     return (token >= TOKEN_LITERAL && token < TOKEN_EXT_I_0) ||
     70         (token > TOKEN_LITERAL_C && token < 0x80);
     71 }
     72 
     73 WbxmlParser::WbxmlParser(uint32_t transportEncoding) :
     74     mTransportEncoding(transportEncoding)
     75 {
     76     reset();
     77 }
     78 
     79 WbxmlParser::~WbxmlParser()
     80 {
     81 }
     82 
     83 void WbxmlParser::reset(void)
     84 {
     85     mContentHandler = NULL;
     86 
     87     mExternalChunk = NULL;
     88     mExternalChunkLen = 0;
     89     mLastChunk.clear();
     90     mDataOffset = 0;
     91     mIsDataEnd = false;
     92 
     93     mStartElemStack.clear();
     94     mStringTable.clear();
     95 
     96     mCurrTagPage = mCurrAttrPage = 0;
     97     mPublicId = 0;
     98 
     99     mState = EXPECT_HEADER;
    100     mLastError = ERROR_NO_ERROR;
    101 }
    102 
    103 void WbxmlParser::setContentHandler(WbxmlContentHandler * handler)
    104 {
    105     mContentHandler = handler;
    106 }
    107 
    108 int WbxmlParser::parse(const char * data, uint32_t dataLen, bool end)
    109 {
    110     if (data == NULL) {
    111         mLastError = ERROR_INVALID_DATA;
    112         return WBXML_STATUS_ERROR;
    113     }
    114 
    115     // All temporary C++ varaibles must be declared before setjmp to make
    116     // sure they get properly destructed after longjmp.
    117     vector<Attribute> attribs;
    118     Attribute attrib;
    119     string tagName;
    120     string characters;
    121     string opaque;
    122 
    123 #ifdef WBXML_DEBUG
    124     printf("\nparse dataLen %d; end %d; readPos %d; availData %d\n",
    125         dataLen, end, getReadPos(), availDataSize());
    126 #endif
    127     appendData(data, dataLen, end);
    128     volatile int readPos = getReadPos();
    129     int setjmpRet;
    130     switch (setjmpRet = setjmp(mJmpbuf)) {
    131         case 0:
    132             break;
    133 
    134         case ERROR_NEED_MORE_DATA:
    135             if (!mIsDataEnd) {
    136 #ifdef WBXML_DEBUG
    137                 printf("\nneed more data: readPos %d\n", readPos);
    138 #endif
    139                 setReadPos(readPos);
    140                 saveRemainingData();
    141                 return WBXML_STATUS_OK;
    142             } else {
    143 #ifdef WBXML_DEBUG
    144                 printf("wbxml parser error: unexpected data end\n");
    145 #endif
    146                 mLastError = ERROR_NEED_MORE_DATA;
    147                 return WBXML_STATUS_ERROR;
    148             }
    149             break;
    150 
    151         case ERROR_UNSUPPORTED_PUBID:
    152         case ERROR_UNSUPPORTED_CHARSET:
    153         case ERROR_INVALID_STRING_TABLE:
    154         case ERROR_INVALID_STRING_TABLE_REFERENCE:
    155         case ERROR_INVALID_EXT_TOKEN:
    156         case ERROR_INVALID_MBUINT:
    157         case ERROR_INVALID_ENTITY:
    158         case ERROR_UNRECOGNIZED_TAG:
    159         case ERROR_UNRECOGNIZED_ATTR:
    160         case ERROR_MISSING_ATTR:
    161         case ERROR_MISSING_TOKEN_END:
    162 #ifdef WBXML_DEBUG
    163             printf("wbxml parser error %d\n", setjmpRet);
    164 #endif
    165             mLastError = ParserError(setjmpRet);
    166             return WBXML_STATUS_ERROR;
    167             break;
    168 
    169         case ERROR_NOT_SUPPORTED_YET:
    170             printf("wbxml parser error: Not implemented feature.\n");
    171             mLastError = ParserError(setjmpRet);
    172             return WBXML_STATUS_ERROR;
    173             break;
    174 
    175         default:
    176             printf("wbxml parser error: Impossible execution path.\n");
    177             mLastError = ParserError(setjmpRet);
    178             return WBXML_STATUS_ERROR;
    179             break;
    180     }
    181 
    182     for (;;) {
    183         // save readPos for error recovery
    184         readPos = getReadPos();
    185 
    186         switch (mState) {
    187             case EXPECT_HEADER:
    188                 mDocVersion = readByte();
    189 
    190                 mPublicId = readMbuint32();
    191                 if (mPublicId != 0) {
    192                     if (!selectTokenMapping(mPublicId)) {
    193 #ifdef WBXML_DEBUG
    194                         printf("wbxml parser error: unsupported public id \n");
    195 #endif
    196                         longjmp(mJmpbuf, ERROR_UNSUPPORTED_PUBID);
    197                     }
    198                 } else {
    199                     mPublicId = -readMbuint32();
    200                 }
    201                 mCharset = readMbuint32();
    202                 if (!mCharset) {
    203                     mCharset = mTransportEncoding;
    204                     if (!mCharset) {
    205                         mCharset = CHARSET_UTF8;
    206                     }
    207                 }
    208                 // TODO: support more charsets other than UTF-8
    209                 if (mCharset != CHARSET_UTF8) {
    210 #ifdef WBXML_DEBUG
    211                     printf("wbxml parser error: unsupported charset\n");
    212 #endif
    213                     longjmp(mJmpbuf, ERROR_UNSUPPORTED_CHARSET);
    214                 }
    215 
    216                 // now advance to next state
    217                 if (mContentHandler) {
    218                     mContentHandler->handlePublicId(mPublicId);
    219                 }
    220                 mState = EXPECT_STRING_TABLE;
    221                 break;
    222 
    223             case EXPECT_STRING_TABLE:
    224             {
    225                 uint32_t len = readMbuint32();
    226                 if (availDataSize() < len) {
    227                     longjmp(mJmpbuf, ERROR_NEED_MORE_DATA);
    228                 }
    229                 mStringTable.clear();
    230                 // TODO: optimize this
    231                 while (len--) {
    232                     mStringTable += readByte();
    233                 }
    234                 if (mStringTable.size()) {
    235                     if (mStringTable[mStringTable.size() - 1] != 0) {
    236                         // must have an ending \0
    237                         //TODO:the byte array returned by SCTS does not contain '\0' at the
    238                         //end,should this be fixed accordingly?
    239 #ifdef WBXML_DEBUG
    240                         printf("wbxml parser error: invalid string table\n");
    241 #endif
    242                         longjmp(mJmpbuf, ERROR_INVALID_STRING_TABLE);
    243                     }
    244                 }
    245                 mState = EXPECT_BODY_START;
    246                 if (mPublicId <= 0) {
    247                     const char * s = mStringTable.c_str() + (-mPublicId);
    248 #ifdef SUPPORT_SYNCML
    249                     if (strcmp(s, "-//SYNCML//DTD SyncML 1.2//EN") == 0) {
    250                         mPublicId = PUBLICID_SYNCML_1_2;
    251                     } else if (strcmp(s, "-//SYNCML//DTD SyncML 1.1//EN") == 0) {
    252                         mPublicId = PUBLICID_SYNCML_1_1;
    253                     } else if (strcmp(s, "-//SYNCML//DTD SyncML 1.0//EN") == 0) {
    254                         mPublicId = PUBLICID_SYNCML_1_0;
    255                     }
    256 #endif
    257                     if ((mPublicId <= 0) || !selectTokenMapping(mPublicId)) {
    258                         longjmp(mJmpbuf, ERROR_UNSUPPORTED_PUBID);
    259                     }
    260                 }
    261                 break;
    262             }
    263 
    264             case EXPECT_BODY_START:
    265                 //TODO: handle possible PIs
    266                 mState = EXPECT_ELEMENT_START;
    267                 break;
    268 
    269             case EXPECT_ELEMENT_START:
    270             {
    271                 int stag = readByte();
    272                 const char * name;
    273                 if ((stag & 0x3f) == TOKEN_LITERAL) {
    274                     name = resolveStrTableRef();
    275                 } else {
    276                     if (stag == TOKEN_SWITCH_PAGE) {
    277                         mCurrTagPage = readByte();
    278                         stag = readByte();
    279                     }
    280                     name = lookupTagName(stag);
    281                 }
    282                 if (name == NULL) {
    283 #ifdef WBXML_DEBUG
    284                     printf("wbxml parser error: unrecognized tag\n");
    285 #endif
    286                     longjmp(mJmpbuf, ERROR_UNRECOGNIZED_TAG);
    287                 }
    288                 attribs.clear();
    289                 if (stag & 0x80) {
    290                     // followed by 1 or more attributes
    291                     while (peekByte() != TOKEN_END) {
    292                         readAttribute(&attrib);
    293                         attribs.push_back(attrib);
    294                     }
    295                     if (!attribs.size()) {
    296 #ifdef WBXML_DEBUG
    297                         printf("wbxml parser error: missing attributes\n");
    298 #endif
    299                         longjmp(mJmpbuf, ERROR_MISSING_ATTR);
    300                     }
    301                     // TOKEN_END
    302                     readByte();
    303                 }
    304                 if (mContentHandler) {
    305                     mContentHandler->startElement(name, attribs);
    306                 }
    307                 if (stag & 0x40) {
    308                     mState = EXPECT_CONTENT;
    309                 } else {
    310                     mState = ELEMENT_END;
    311                 }
    312                 tagName = name;
    313                 mStartElemStack.push_back(name);
    314                 break;
    315             }
    316 
    317             case EXPECT_CONTENT:
    318             {
    319                 int byte = peekByte();
    320                 if (byte == TOKEN_SWITCH_PAGE) {
    321                     readByte();
    322                     mCurrTagPage = readByte();
    323                     byte = peekByte();
    324                 }
    325                 if (isTagStart(byte) || byte == TOKEN_END) {
    326                     if (characters.size() && mContentHandler) {
    327                         mContentHandler->characters(characters.c_str(), characters.size());
    328                         characters.clear();
    329                     }
    330                     if (byte == TOKEN_END) {
    331                         mState = EXPECT_ELEMENT_END;
    332                     } else {
    333                         mState = EXPECT_ELEMENT_START;
    334                     }
    335                 } else {
    336                     // TODO: handle extension and pi
    337                     switch (byte) {
    338                         case TOKEN_ENTITY:
    339                         case TOKEN_STR_I:
    340                         case TOKEN_STR_T:
    341                             readString(characters);
    342                             break;
    343 
    344                         case TOKEN_EXT_T_0:
    345                         {
    346                             readByte();
    347                             uint32_t valueToken = readMbuint32();
    348                             if (mPublicId == PUBLICID_IMPS_1_1
    349                                     || mPublicId == PUBLICID_IMPS_1_2
    350                                     || mPublicId == PUBLICID_IMPS_1_3) {
    351                                 TokenData t = {valueToken, NULL};
    352                                 const TokenData * res = (TokenData *)bsearch(&t,
    353                                         csp13ExtValueTokens, ARRAY_SIZE(csp13ExtValueTokens),
    354                                         sizeof(csp13ExtValueTokens[0]), compareTokenData);
    355                                 if (res) {
    356                                     characters.append(res->tagName);
    357                                 } else {
    358                                     longjmp(mJmpbuf, ERROR_INVALID_EXT_TOKEN);
    359                                 }
    360                             } else {
    361                                 printf ("Token 0x%x\n", byte);
    362                                 longjmp(mJmpbuf, ERROR_NOT_SUPPORTED_YET);
    363                             }
    364                             break;
    365                         }
    366 
    367                         case TOKEN_OPAQUE:
    368                         {
    369                             readByte();
    370                             uint32_t opaqueDataLen = readMbuint32();
    371                             opaque.clear();
    372                             while (opaqueDataLen--) {
    373                                 opaque += (char)readByte();
    374                             }
    375                             if (mContentHandler) {
    376                                 mContentHandler->opaque(opaque.c_str(), opaque.size());
    377                             }
    378                             break;
    379                         }
    380 
    381                         default:
    382                             printf ("Token 0x%x\n", byte);
    383                             longjmp(mJmpbuf, ERROR_NOT_SUPPORTED_YET);
    384                             break;
    385                     }
    386                 }
    387                 break;
    388             }
    389 
    390             case EXPECT_ELEMENT_END:
    391                 if (readByte() != TOKEN_END) {
    392 #ifdef WBXML_DEBUG
    393                     printf("wbxml parser error: TOKEN_END expected\n");
    394 #endif
    395                     longjmp(mJmpbuf, ERROR_MISSING_TOKEN_END);
    396                 }
    397                 mState = ELEMENT_END;
    398                 break;
    399 
    400             case ELEMENT_END:
    401                 assert(!mStartElemStack.empty());
    402 
    403                 tagName = mStartElemStack.back();
    404                 mStartElemStack.pop_back();
    405                 if (mContentHandler) {
    406                     mContentHandler->endElement(tagName.c_str());
    407                 }
    408                 if (mStartElemStack.empty()) {
    409                     mState = EXPECT_BODY_END;
    410                 } else {
    411                     mState = EXPECT_CONTENT;
    412                 }
    413                 break;
    414 
    415             case EXPECT_BODY_END:
    416                 // TODO: handle possible PIs
    417 
    418                 // we're done
    419                 return WBXML_STATUS_OK;
    420                 break;
    421         }
    422     }
    423 }
    424 
    425 /*
    426  * We don't make a copy of the data chunk for the current parse() until
    427  * it returns.
    428  * The remaining data will be saved in saveRemainingData() before parse()
    429  * returns.
    430  */
    431 void WbxmlParser::appendData(const char * data, uint32_t len, bool end)
    432 {
    433     mExternalChunk = data;
    434     mExternalChunkLen = len;
    435     mIsDataEnd = end;
    436 }
    437 
    438 void WbxmlParser::saveRemainingData()
    439 {
    440     if (mDataOffset > mLastChunk.size()) {
    441         uint32_t offsetToExtChunk = mDataOffset - mLastChunk.size();
    442         assert(offsetToExtChunk <= mExternalChunkLen);
    443         mLastChunk.assign(mExternalChunk + offsetToExtChunk,
    444                 mExternalChunkLen - offsetToExtChunk);
    445         mDataOffset = 0;
    446     } else {
    447         mLastChunk.append(mExternalChunk, mExternalChunkLen);
    448     }
    449     mExternalChunk = NULL;
    450     mExternalChunkLen = 0;
    451 }
    452 
    453 int WbxmlParser::readByte()
    454 {
    455     if (mDataOffset < mLastChunk.size()) {
    456 #ifdef WBXML_DEBUG
    457         printf ("rb 0x%x; ", (unsigned char)mLastChunk[mDataOffset]);
    458 #endif
    459         return (unsigned char)mLastChunk[mDataOffset++];
    460     } else {
    461         uint32_t offsetToExtChunk = mDataOffset - mLastChunk.size();
    462         if (offsetToExtChunk < mExternalChunkLen) {
    463             mDataOffset++;
    464 #ifdef WBXML_DEBUG
    465             printf ("rb 0x%x; ", (unsigned char)mExternalChunk[offsetToExtChunk]);
    466 #endif
    467             return (unsigned char)mExternalChunk[offsetToExtChunk];
    468         }
    469         longjmp(mJmpbuf, ERROR_NEED_MORE_DATA);
    470     }
    471 }
    472 
    473 int WbxmlParser::peekByte()
    474 {
    475     if (mDataOffset < mLastChunk.size()) {
    476         return (unsigned char)mLastChunk[mDataOffset];
    477     } else {
    478         uint32_t offsetToExtChunk = mDataOffset - mLastChunk.size();
    479         if (offsetToExtChunk < mExternalChunkLen) {
    480             return (unsigned char)mExternalChunk[offsetToExtChunk];
    481         }
    482         longjmp(mJmpbuf, ERROR_NEED_MORE_DATA);
    483     }
    484 }
    485 
    486 uint32_t WbxmlParser::readMbuint32()
    487 {
    488     uint32_t value = 0;
    489     uint32_t byte;
    490     do {
    491         if ((value >> 25) != 0) {
    492             // would go overflow. not a valid uint32.
    493             longjmp(mJmpbuf, ERROR_INVALID_MBUINT);
    494         }
    495         byte = readByte();
    496         value = (value << 7) | (byte & 0x7f);
    497     } while (byte & 0x80);
    498     return value;
    499 }
    500 
    501 /**
    502  * Read STR_I | STR_T | ENTITY and *append* to str.
    503  * Yes this looks ugly...
    504  */
    505 void WbxmlParser::readString(string & str)
    506 {
    507     int byte = readByte();
    508     switch (byte) {
    509         case TOKEN_STR_I:
    510             //TODO: assuming UTF-8
    511             while ((byte = readByte()) != 0) {
    512                 str += (char)byte;
    513             }
    514             break;
    515 
    516         case TOKEN_ENTITY:
    517         {
    518             uint32_t ch = readMbuint32();
    519             //TODO: assuming UTF-8 for now.
    520             if (ch <= 0x7f) {
    521                 str += (char)ch;
    522             } else if (ch <= 0x7ff) {
    523                 str += (char)((ch >> 6) | 0xc0);
    524                 str += (char)((ch & 0x3f) | 0x80);
    525             } else if (ch <= 0xffff) {
    526                 str += (char)((ch >> 12) | 0xe0);
    527                 str += (char)(((ch >> 6) & 0x3f) | 0x80);
    528                 str += (char)((ch & 0x3f) | 0x80);
    529             } else if (ch <= 0x10ffff) {
    530                 // 010000 - 10FFFF
    531                 str += (char)((ch >> 18) | 0xf0);
    532                 str += (char)(((ch >> 12) & 0x3f) | 0x80);
    533                 str += (char)(((ch >> 6) & 0x3f) | 0x80);
    534                 str += (char)((ch & 0x3f) | 0x80);
    535             } else {
    536                 // not a valid UCS-4 character
    537                 longjmp(mJmpbuf, ERROR_INVALID_ENTITY);
    538             }
    539             break;
    540         }
    541 
    542         case TOKEN_STR_T:
    543         {
    544             const char * s = resolveStrTableRef();
    545             str.append(s, strlen(s));
    546             break;
    547         }
    548 
    549         default:
    550             // impossible
    551             printf ("Unknown token 0x%02x\n", byte);
    552             longjmp(mJmpbuf, ERROR_NOT_SUPPORTED_YET);
    553             break;
    554     }
    555 }
    556 
    557 const char * WbxmlParser::resolveStrTableRef(void)
    558 {
    559     uint32_t offset = readMbuint32();
    560     if (offset >= mStringTable.size()) {
    561         longjmp(mJmpbuf, ERROR_INVALID_STRING_TABLE_REFERENCE);
    562     }
    563     return mStringTable.c_str() + offset;
    564 }
    565 
    566 bool WbxmlParser::selectTokenMapping(int publicId)
    567 {
    568     switch (publicId) {
    569         case PUBLICID_IMPS_1_3:
    570         case PUBLICID_IMPS_1_2:
    571         case PUBLICID_IMPS_1_1:
    572             mTagPages = csp13TagPages;
    573             mNumTagPages = ARRAY_SIZE(csp13TagPages);
    574             mAttrPages = csp13AttrPages;
    575             mNumAttrPages = ARRAY_SIZE(csp13AttrPages);
    576             break;
    577 
    578 #ifdef SUPPORT_SYNCML
    579         case PUBLICID_SYNCML_1_0:
    580         case PUBLICID_SYNCML_1_1:
    581         case PUBLICID_SYNCML_1_2:
    582         case PUBLICID_SYNCML_METINF_1_2:
    583             mTagPages = syncmlTagPages;
    584             mNumTagPages = ARRAY_SIZE(syncmlTagPages);
    585             mAttrPages = NULL;
    586             mNumAttrPages = 0;
    587             break;
    588 
    589         case PUBLICID_SYNCML_DEVINF_1_2:
    590             mTagPages = syncmlDevInfTagPages;
    591             mNumTagPages = ARRAY_SIZE(syncmlDevInfTagPages);
    592             mAttrPages = NULL;
    593             mNumAttrPages = 0;
    594             break;
    595 #endif
    596         default:
    597             return false;
    598     }
    599     return true;
    600 }
    601 
    602 const char * WbxmlParser::lookupTagName(int tag) const
    603 {
    604     tag = tag & 0x3f;
    605 
    606     // TODO: optimize this
    607     if (mCurrTagPage >= mNumTagPages) {
    608         return NULL;
    609     }
    610     const TagCodePage * page = &mTagPages[mCurrTagPage];
    611     if (page == NULL) {
    612         return NULL;
    613     }
    614 
    615     TokenData t = {tag, NULL};
    616     const TokenData * res = (TokenData *)bsearch(&t, page->tags, page->numTokens,
    617             sizeof(TokenData), compareTokenData);
    618     if (res) {
    619         return res->tagName;
    620     }
    621 
    622     return NULL;
    623 }
    624 
    625 const char * WbxmlParser::lookupAttrName(int token, const char **prefix) const
    626 {
    627     // TODO: optimize this
    628     if (mCurrAttrPage >= mNumAttrPages) {
    629         return NULL;
    630     }
    631     const AttrCodePage * page = &mAttrPages[mCurrAttrPage];
    632     if (page == NULL) {
    633         return NULL;
    634     }
    635 
    636     AttrData t = {token, NULL, NULL};
    637     const AttrData * res = (AttrData *)bsearch(&t, page->attrs, page->numTokens,
    638             sizeof(AttrData), compareAttrData);
    639     if (res) {
    640         if (prefix) {
    641             *prefix = res->attrValuePrefix;
    642         }
    643         return res->attrName;
    644     }
    645 
    646     return NULL;
    647 }
    648 
    649 void WbxmlParser::readAttribute(Attribute * attrib)
    650 {
    651     // attribute start: attrib start token, LITERAL or END
    652     int attrStart = readByte();
    653     const char * name;
    654     const char * valuePrefix = NULL;
    655 
    656     if (attrStart == TOKEN_LITERAL) {
    657         name = resolveStrTableRef();
    658     } else {
    659         if (attrStart == TOKEN_SWITCH_PAGE) {
    660             mCurrAttrPage = readByte();
    661             attrStart = readByte();
    662         }
    663         name = lookupAttrName(attrStart, &valuePrefix);
    664     }
    665     if (name == NULL) {
    666         longjmp(mJmpbuf, ERROR_UNRECOGNIZED_ATTR);
    667     }
    668     attrib->name = name;
    669     attrib->value = "";
    670     if (valuePrefix != NULL) {
    671         attrib->value = valuePrefix;
    672     }
    673 
    674     // now attribute value: zero or more value, string, entity or extension tokens
    675     for (;;) {
    676         int valueToken = peekByte();
    677         if (isAttrStart(valueToken) || valueToken == TOKEN_END) {
    678             // An attribute start token, a LITERAL token or the END token
    679             // indicates the end of an attribute value.
    680             return;
    681         }
    682         switch (valueToken) {
    683             case TOKEN_ENTITY:
    684             case TOKEN_STR_I:
    685             case TOKEN_STR_T:
    686                 readString(attrib->value);
    687                 break;
    688 
    689             case TOKEN_EXT_I_0:
    690             case TOKEN_EXT_I_1:
    691             case TOKEN_EXT_I_2:
    692             case TOKEN_EXT_0:
    693             case TOKEN_EXT_1:
    694             case TOKEN_EXT_2:
    695                 //TODO: document type specific
    696                 printf ("Unsupported Token 0x%x\n", valueToken);
    697                 longjmp(mJmpbuf, ERROR_NOT_SUPPORTED_YET);
    698                 break;
    699 
    700             default:
    701                 //TODO
    702                 printf ("Unknown Token 0x%x\n", valueToken);
    703                 longjmp(mJmpbuf, ERROR_NOT_SUPPORTED_YET);
    704                 break;
    705         }
    706     }
    707 }
    708 
    709