1 /* 2 * Copyright (C) 2007 Esmertec AG. 3 * Copyright (C) 2007 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 #ifndef WBXML_PARSER_H 19 #define WBXML_PARSER_H 20 21 #include <setjmp.h> 22 #include <stdint.h> 23 #include "wbxml_const.h" 24 #include "wbxml_stl.h" 25 #include "wbxml_tabledef.h" 26 27 struct Attribute 28 { 29 string name; 30 string value; 31 }; 32 33 class WbxmlContentHandler 34 { 35 public: 36 virtual ~WbxmlContentHandler() {} 37 virtual void handlePublicId(uint32_t id) = 0; 38 virtual void startElement(const char * name, const vector<Attribute> & attribs) = 0; 39 virtual void endElement(const char * name) = 0; 40 virtual void characters(const char * data, int len) = 0; 41 virtual void opaque(const char * data, int len) = 0; 42 }; 43 44 class DefaultWbxmlContentHandler: public WbxmlContentHandler 45 { 46 public: 47 DefaultWbxmlContentHandler() 48 { 49 mPublicId = -1; 50 } 51 52 void handlePublicId(uint32_t id) 53 { 54 mPublicId = id; 55 } 56 57 // @return public ID or -1 if no public ID seen 58 int getPublicId(void) const 59 { 60 return mPublicId; 61 } 62 63 void startElement(const char * name, const vector<Attribute> & attribs) 64 { 65 } 66 67 void endElement(const char * name) 68 { 69 } 70 71 void characters(const char * data, int len) 72 { 73 } 74 75 void opaque(const char * data, int len) 76 { 77 } 78 79 private: 80 int mPublicId; 81 }; 82 83 class WbxmlParser 84 { 85 public: 86 WbxmlParser(uint32_t transportEncoding); 87 ~WbxmlParser(); 88 89 void setContentHandler(WbxmlContentHandler * handler); 90 91 //void setTokenMappings(uint32_t publicId, TagTable tagTable, AttrTable attrTable); 92 int parse(const char * data, uint32_t len, bool end); 93 94 void reset(void); 95 96 int getError(void) const 97 { 98 return mLastError; 99 } 100 101 private: 102 enum ParserState { 103 EXPECT_HEADER, 104 EXPECT_STRING_TABLE, 105 EXPECT_BODY_START, 106 EXPECT_ELEMENT_START, 107 EXPECT_ELEMENT_END, 108 ELEMENT_END, 109 EXPECT_CONTENT, 110 EXPECT_BODY_END, 111 }; 112 enum ParserError { 113 ERROR_NO_ERROR = 0, 114 ERROR_INVALID_DATA = 1, 115 ERROR_NEED_MORE_DATA, 116 ERROR_UNSUPPORTED_PUBID, 117 ERROR_UNSUPPORTED_CHARSET, 118 ERROR_INVALID_STRING_TABLE, 119 ERROR_INVALID_STRING_TABLE_REFERENCE, 120 ERROR_INVALID_EXT_TOKEN, 121 ERROR_INVALID_MBUINT, 122 ERROR_INVALID_ENTITY, 123 ERROR_UNRECOGNIZED_TAG, 124 ERROR_UNRECOGNIZED_ATTR, 125 ERROR_MISSING_ATTR, 126 ERROR_MISSING_TOKEN_END, 127 ERROR_NOT_SUPPORTED_YET = 999, 128 }; 129 130 int readByte(); 131 int peekByte(); 132 uint32_t readMbuint32(); 133 void readString(string & str); 134 const char * resolveStrTableRef(void); 135 136 const char * lookupTagName(int tag) const; 137 const char * lookupAttrName(int tag, const char **valuePrefix) const; 138 void readAttribute(Attribute * attrib); 139 140 jmp_buf mJmpbuf; 141 142 string mLastChunk; 143 const char * mExternalChunk; 144 uint32_t mExternalChunkLen; 145 uint32_t mDataOffset; 146 bool mIsDataEnd; 147 148 int getReadPos(void) const 149 { 150 return mDataOffset; 151 } 152 void setReadPos(int pos) 153 { 154 mDataOffset = pos; 155 } 156 void appendData(const char * data, uint32_t len, bool end); 157 void saveRemainingData(); 158 uint32_t availDataSize(void) const 159 { 160 return mLastChunk.size() + mExternalChunkLen - mDataOffset; 161 } 162 163 bool selectTokenMapping(int publicId); 164 165 const TagCodePage * mTagPages; 166 uint32_t mNumTagPages; 167 const AttrCodePage * mAttrPages; 168 uint32_t mNumAttrPages; 169 170 uint32_t mTransportEncoding; 171 WbxmlContentHandler * mContentHandler; 172 173 vector<string> mStartElemStack; 174 string mStringTable; 175 uint32_t mCurrTagPage; 176 uint32_t mCurrAttrPage; 177 178 ParserState mState; 179 ParserError mLastError; 180 181 int mDocVersion; 182 uint32_t mPublicId; 183 uint32_t mCharset; 184 }; 185 186 #endif 187 188