1 #ifndef _XEXMLPARSER_HPP 2 #define _XEXMLPARSER_HPP 3 /*------------------------------------------------------------------------- 4 * drawElements Quality Program Test Executor 5 * ------------------------------------------ 6 * 7 * Copyright 2014 The Android Open Source Project 8 * 9 * Licensed under the Apache License, Version 2.0 (the "License"); 10 * you may not use this file except in compliance with the License. 11 * You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 * 21 *//*! 22 * \file 23 * \brief XML Parser. 24 * 25 * \todo [2012-06-07 pyry] Not supported / handled properly: 26 * - xml namespaces (<ns:Element>) 27 * - backslash escapes in strings 28 * - " -style escapes 29 * - utf-8 30 *//*--------------------------------------------------------------------*/ 31 32 #include "xeDefs.hpp" 33 #include "deRingBuffer.hpp" 34 35 #include <string> 36 #include <map> 37 38 namespace xe 39 { 40 namespace xml 41 { 42 43 enum Token 44 { 45 TOKEN_INCOMPLETE = 0, //!< Not enough data to determine token. 46 TOKEN_END_OF_STRING, //!< End of document string. 47 TOKEN_DATA, //!< Block of data (anything outside tags). 48 TOKEN_COMMENT, //!< <!-- comment --> 49 TOKEN_IDENTIFIER, //!< Identifier (in tags). 50 TOKEN_STRING, //!< String (in tags). 51 TOKEN_TAG_START, //!< < 52 TOKEN_TAG_END, //!< > 53 TOKEN_END_TAG_START, //!< </ 54 TOKEN_EMPTY_ELEMENT_END, //!< /> 55 TOKEN_PROCESSING_INSTRUCTION_START, //!< <? 56 TOKEN_PROCESSING_INSTRUCTION_END, //!< ?> 57 TOKEN_EQUAL, //!< = 58 TOKEN_ENTITY, //!< Entity reference, such as & 59 60 TOKEN_LAST 61 }; 62 63 enum Element 64 { 65 ELEMENT_INCOMPLETE = 0, //!< Incomplete element. 66 ELEMENT_START, //!< Element start. 67 ELEMENT_END, //!< Element end. 68 ELEMENT_DATA, //!< Data element. 69 ELEMENT_END_OF_STRING, //!< End of document string. 70 71 ELEMENT_LAST 72 }; 73 74 const char* getTokenName (Token token); 75 76 // \todo [2012-10-17 pyry] Add line number etc. 77 class ParseError : public xe::ParseError 78 { 79 public: 80 ParseError (const std::string& message) : xe::ParseError(message) {} 81 }; 82 83 class Tokenizer 84 { 85 public: 86 Tokenizer (void); 87 ~Tokenizer (void); 88 89 void clear (void); //!< Resets tokenizer to initial state. 90 91 void feed (const deUint8* bytes, int numBytes); 92 void advance (void); 93 94 Token getToken (void) const { return m_curToken; } 95 int getTokenLen (void) const { return m_curTokenLen; } 96 deUint8 getTokenByte (int offset) const { DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING); return m_buf.peekBack(offset); } 97 void getTokenStr (std::string& dst) const; 98 void appendTokenStr (std::string& dst) const; 99 100 void getString (std::string& dst) const; 101 102 private: 103 Tokenizer (const Tokenizer& other); 104 Tokenizer& operator= (const Tokenizer& other); 105 106 int getChar (int offset) const; 107 108 void error (const std::string& what); 109 110 enum State 111 { 112 STATE_DATA = 0, 113 STATE_TAG, 114 STATE_IDENTIFIER, 115 STATE_VALUE, 116 STATE_COMMENT, 117 STATE_ENTITY, 118 119 STATE_LAST 120 }; 121 122 enum 123 { 124 END_OF_STRING = 0, //!< End of string (0). 125 END_OF_BUFFER = 0xffffffff //!< End of current data buffer. 126 }; 127 128 Token m_curToken; //!< Current token. 129 int m_curTokenLen; //!< Length of current token. 130 131 State m_state; //!< Tokenization state. 132 133 de::RingBuffer<deUint8> m_buf; 134 }; 135 136 class Parser 137 { 138 public: 139 typedef std::map<std::string, std::string> AttributeMap; 140 typedef AttributeMap::const_iterator AttributeIter; 141 142 Parser (void); 143 ~Parser (void); 144 145 void clear (void); //!< Resets parser to initial state. 146 147 void feed (const deUint8* bytes, int numBytes); 148 void advance (void); 149 150 Element getElement (void) const { return m_element; } 151 152 // For ELEMENT_START / ELEMENT_END. 153 const char* getElementName (void) const { return m_elementName.c_str(); } 154 155 // For ELEMENT_START. 156 bool hasAttribute (const char* name) const { return m_attributes.find(name) != m_attributes.end(); } 157 const char* getAttribute (const char* name) const { return m_attributes.find(name)->second.c_str(); } 158 const AttributeMap& attributes (void) const { return m_attributes; } 159 160 // For ELEMENT_DATA. 161 int getDataSize (void) const; 162 deUint8 getDataByte (int offset) const; 163 void getDataStr (std::string& dst) const; 164 void appendDataStr (std::string& dst) const; 165 166 private: 167 Parser (const Parser& other); 168 Parser& operator= (const Parser& other); 169 170 void parseEntityValue (void); 171 172 void error (const std::string& what); 173 174 enum State 175 { 176 STATE_DATA = 0, //!< Initial state - assuming data or tag open. 177 STATE_ENTITY, //!< Parsed entity is stored - overrides data. 178 STATE_IN_PROCESSING_INSTRUCTION, //!< In processing instruction. 179 STATE_START_TAG_OPEN, //!< Start tag open. 180 STATE_END_TAG_OPEN, //!< End tag open. 181 STATE_EXPECTING_END_TAG_CLOSE, //!< Expecting end tag close. 182 STATE_ATTRIBUTE_LIST, //!< Expecting attribute list. 183 STATE_EXPECTING_ATTRIBUTE_EQ, //!< Got attribute name, expecting =. 184 STATE_EXPECTING_ATTRIBUTE_VALUE, //!< Expecting attribute value. 185 STATE_YIELD_EMPTY_ELEMENT_END, //!< Empty element: start has been reported but not end. 186 187 STATE_LAST 188 }; 189 190 Tokenizer m_tokenizer; 191 192 Element m_element; 193 std::string m_elementName; 194 AttributeMap m_attributes; 195 196 State m_state; 197 std::string m_attribName; 198 std::string m_entityValue; //!< Data override, such as entity value. 199 }; 200 201 // Inline implementations 202 203 inline void Tokenizer::getTokenStr (std::string& dst) const 204 { 205 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING); 206 dst.resize(m_curTokenLen); 207 for (int ndx = 0; ndx < m_curTokenLen; ndx++) 208 dst[ndx] = m_buf.peekBack(ndx); 209 } 210 211 inline void Tokenizer::appendTokenStr (std::string& dst) const 212 { 213 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING); 214 215 size_t oldLen = dst.size(); 216 dst.resize(oldLen+m_curTokenLen); 217 218 for (int ndx = 0; ndx < m_curTokenLen; ndx++) 219 dst[oldLen+ndx] = m_buf.peekBack(ndx); 220 } 221 222 inline int Parser::getDataSize (void) const 223 { 224 if (m_state != STATE_ENTITY) 225 return m_tokenizer.getTokenLen(); 226 else 227 return (int)m_entityValue.size(); 228 } 229 230 inline deUint8 Parser::getDataByte (int offset) const 231 { 232 if (m_state != STATE_ENTITY) 233 return m_tokenizer.getTokenByte(offset); 234 else 235 return (deUint8)m_entityValue[offset]; 236 } 237 238 inline void Parser::getDataStr (std::string& dst) const 239 { 240 if (m_state != STATE_ENTITY) 241 return m_tokenizer.getTokenStr(dst); 242 else 243 dst = m_entityValue; 244 } 245 246 inline void Parser::appendDataStr (std::string& dst) const 247 { 248 if (m_state != STATE_ENTITY) 249 return m_tokenizer.appendTokenStr(dst); 250 else 251 dst += m_entityValue; 252 } 253 254 } // xml 255 } // xe 256 257 #endif // _XEXMLPARSER_HPP 258