Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2007 Esmertec AG.
      3  * Copyright (C) 2007 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 #ifndef WBXML_PARSER_H
     19 #define WBXML_PARSER_H
     20 
     21 #include <setjmp.h>
     22 #include <stdint.h>
     23 #include "wbxml_const.h"
     24 #include "wbxml_stl.h"
     25 #include "wbxml_tabledef.h"
     26 
     27 struct Attribute
     28 {
     29     string name;
     30     string value;
     31 };
     32 
     33 class WbxmlContentHandler
     34 {
     35 public:
     36     virtual ~WbxmlContentHandler() {}
     37     virtual void handlePublicId(uint32_t id) = 0;
     38     virtual void startElement(const char * name, const vector<Attribute> & attribs) = 0;
     39     virtual void endElement(const char * name) = 0;
     40     virtual void characters(const char * data, int len) = 0;
     41     virtual void opaque(const char * data, int len) = 0;
     42 };
     43 
     44 class DefaultWbxmlContentHandler: public WbxmlContentHandler
     45 {
     46 public:
     47     DefaultWbxmlContentHandler()
     48     {
     49         mPublicId = -1;
     50     }
     51 
     52     void handlePublicId(uint32_t id)
     53     {
     54         mPublicId = id;
     55     }
     56 
     57     // @return public ID or -1 if no public ID seen
     58     int getPublicId(void) const
     59     {
     60         return mPublicId;
     61     }
     62 
     63     void startElement(const char * name, const vector<Attribute> & attribs)
     64     {
     65     }
     66 
     67     void endElement(const char * name)
     68     {
     69     }
     70 
     71     void characters(const char * data, int len)
     72     {
     73     }
     74 
     75     void opaque(const char * data, int len)
     76     {
     77     }
     78 
     79 private:
     80     int mPublicId;
     81 };
     82 
     83 class WbxmlParser
     84 {
     85 public:
     86     WbxmlParser(uint32_t transportEncoding);
     87     ~WbxmlParser();
     88 
     89     void setContentHandler(WbxmlContentHandler * handler);
     90 
     91     //void setTokenMappings(uint32_t publicId, TagTable tagTable, AttrTable attrTable);
     92     int parse(const char * data, uint32_t len, bool end);
     93 
     94     void reset(void);
     95 
     96     int getError(void) const
     97     {
     98         return mLastError;
     99     }
    100 
    101 private:
    102     enum ParserState {
    103         EXPECT_HEADER,
    104         EXPECT_STRING_TABLE,
    105         EXPECT_BODY_START,
    106         EXPECT_ELEMENT_START,
    107         EXPECT_ELEMENT_END,
    108         ELEMENT_END,
    109         EXPECT_CONTENT,
    110         EXPECT_BODY_END,
    111     };
    112     enum ParserError {
    113         ERROR_NO_ERROR = 0,
    114         ERROR_INVALID_DATA = 1,
    115         ERROR_NEED_MORE_DATA,
    116         ERROR_UNSUPPORTED_PUBID,
    117         ERROR_UNSUPPORTED_CHARSET,
    118         ERROR_INVALID_STRING_TABLE,
    119         ERROR_INVALID_STRING_TABLE_REFERENCE,
    120         ERROR_INVALID_EXT_TOKEN,
    121         ERROR_INVALID_MBUINT,
    122         ERROR_INVALID_ENTITY,
    123         ERROR_UNRECOGNIZED_TAG,
    124         ERROR_UNRECOGNIZED_ATTR,
    125         ERROR_MISSING_ATTR,
    126         ERROR_MISSING_TOKEN_END,
    127         ERROR_NOT_SUPPORTED_YET   = 999,
    128     };
    129 
    130     int readByte();
    131     int peekByte();
    132     uint32_t readMbuint32();
    133     void readString(string & str);
    134     const char * resolveStrTableRef(void);
    135 
    136     const char * lookupTagName(int tag) const;
    137     const char * lookupAttrName(int tag, const char **valuePrefix) const;
    138     void readAttribute(Attribute * attrib);
    139 
    140     jmp_buf mJmpbuf;
    141 
    142     string mLastChunk;
    143     const char * mExternalChunk;
    144     uint32_t mExternalChunkLen;
    145     uint32_t mDataOffset;
    146     bool mIsDataEnd;
    147 
    148     int getReadPos(void) const
    149     {
    150         return mDataOffset;
    151     }
    152     void setReadPos(int pos)
    153     {
    154         mDataOffset = pos;
    155     }
    156     void appendData(const char * data, uint32_t len, bool end);
    157     void saveRemainingData();
    158     uint32_t availDataSize(void) const
    159     {
    160         return mLastChunk.size() + mExternalChunkLen - mDataOffset;
    161     }
    162 
    163     bool selectTokenMapping(int publicId);
    164 
    165     const TagCodePage * mTagPages;
    166     uint32_t mNumTagPages;
    167     const AttrCodePage * mAttrPages;
    168     uint32_t mNumAttrPages;
    169 
    170     uint32_t mTransportEncoding;
    171     WbxmlContentHandler * mContentHandler;
    172 
    173     vector<string> mStartElemStack;
    174     string mStringTable;
    175     uint32_t mCurrTagPage;
    176     uint32_t mCurrAttrPage;
    177 
    178     ParserState mState;
    179     ParserError mLastError;
    180 
    181     int mDocVersion;
    182     uint32_t mPublicId;
    183     uint32_t mCharset;
    184 };
    185 
    186 #endif
    187 
    188