Home | History | Annotate | Download | only in executor
      1 #ifndef _XEXMLPARSER_HPP
      2 #define _XEXMLPARSER_HPP
      3 /*-------------------------------------------------------------------------
      4  * drawElements Quality Program Test Executor
      5  * ------------------------------------------
      6  *
      7  * Copyright 2014 The Android Open Source Project
      8  *
      9  * Licensed under the Apache License, Version 2.0 (the "License");
     10  * you may not use this file except in compliance with the License.
     11  * You may obtain a copy of the License at
     12  *
     13  *      http://www.apache.org/licenses/LICENSE-2.0
     14  *
     15  * Unless required by applicable law or agreed to in writing, software
     16  * distributed under the License is distributed on an "AS IS" BASIS,
     17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     18  * See the License for the specific language governing permissions and
     19  * limitations under the License.
     20  *
     21  *//*!
     22  * \file
     23  * \brief XML Parser.
     24  *
     25  * \todo [2012-06-07 pyry] Not supported / handled properly:
     26  *  - xml namespaces (<ns:Element>)
     27  *  - backslash escapes in strings
     28  *  - &quot; -style escapes
     29  *  - utf-8
     30  *//*--------------------------------------------------------------------*/
     31 
     32 #include "xeDefs.hpp"
     33 #include "deRingBuffer.hpp"
     34 
     35 #include <string>
     36 #include <map>
     37 
     38 namespace xe
     39 {
     40 namespace xml
     41 {
     42 
     43 enum Token
     44 {
     45 	TOKEN_INCOMPLETE = 0,					//!< Not enough data to determine token.
     46 	TOKEN_END_OF_STRING,					//!< End of document string.
     47 	TOKEN_DATA,								//!< Block of data (anything outside tags).
     48 	TOKEN_COMMENT,							//!< <!-- comment -->
     49 	TOKEN_IDENTIFIER,						//!< Identifier (in tags).
     50 	TOKEN_STRING,							//!< String (in tags).
     51 	TOKEN_TAG_START,						//!< <
     52 	TOKEN_TAG_END,							//!< >
     53 	TOKEN_END_TAG_START,					//!< </
     54 	TOKEN_EMPTY_ELEMENT_END,				//!< />
     55 	TOKEN_PROCESSING_INSTRUCTION_START,		//!< <?
     56 	TOKEN_PROCESSING_INSTRUCTION_END,		//!< ?>
     57 	TOKEN_EQUAL,							//!< =
     58 	TOKEN_ENTITY,							//!< Entity reference, such as &amp;
     59 
     60 	TOKEN_LAST
     61 };
     62 
     63 enum Element
     64 {
     65 	ELEMENT_INCOMPLETE = 0,	//!< Incomplete element.
     66 	ELEMENT_START,			//!< Element start.
     67 	ELEMENT_END,			//!< Element end.
     68 	ELEMENT_DATA,			//!< Data element.
     69 	ELEMENT_END_OF_STRING,	//!< End of document string.
     70 
     71 	ELEMENT_LAST
     72 };
     73 
     74 const char* getTokenName (Token token);
     75 
     76 // \todo [2012-10-17 pyry] Add line number etc.
     77 class ParseError : public xe::ParseError
     78 {
     79 public:
     80 	ParseError (const std::string& message) : xe::ParseError(message) {}
     81 };
     82 
     83 class Tokenizer
     84 {
     85 public:
     86 						Tokenizer			(void);
     87 						~Tokenizer			(void);
     88 
     89 	void				clear				(void);		//!< Resets tokenizer to initial state.
     90 
     91 	void				feed				(const deUint8* bytes, int numBytes);
     92 	void				advance				(void);
     93 
     94 	Token				getToken			(void) const		{ return m_curToken;	}
     95 	int					getTokenLen			(void) const		{ return m_curTokenLen;	}
     96 	deUint8				getTokenByte		(int offset) const	{ DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING); return m_buf.peekBack(offset); }
     97 	void				getTokenStr			(std::string& dst) const;
     98 	void				appendTokenStr		(std::string& dst) const;
     99 
    100 	void				getString			(std::string& dst) const;
    101 
    102 private:
    103 						Tokenizer			(const Tokenizer& other);
    104 	Tokenizer&			operator=			(const Tokenizer& other);
    105 
    106 	int					getChar				(int offset) const;
    107 
    108 	void				error				(const std::string& what);
    109 
    110 	enum State
    111 	{
    112 		STATE_DATA = 0,
    113 		STATE_TAG,
    114 		STATE_IDENTIFIER,
    115 		STATE_VALUE,
    116 		STATE_COMMENT,
    117 		STATE_ENTITY,
    118 
    119 		STATE_LAST
    120 	};
    121 
    122 	enum
    123 	{
    124 		END_OF_STRING	= 0,			//!< End of string (0).
    125 		END_OF_BUFFER	= 0xffffffff	//!< End of current data buffer.
    126 	};
    127 
    128 	Token						m_curToken;			//!< Current token.
    129 	int							m_curTokenLen;		//!< Length of current token.
    130 
    131 	State						m_state;			//!< Tokenization state.
    132 
    133 	de::RingBuffer<deUint8>		m_buf;
    134 };
    135 
    136 class Parser
    137 {
    138 public:
    139 	typedef std::map<std::string, std::string>		AttributeMap;
    140 	typedef AttributeMap::const_iterator			AttributeIter;
    141 
    142 						Parser				(void);
    143 						~Parser				(void);
    144 
    145 	void				clear				(void);		//!< Resets parser to initial state.
    146 
    147 	void				feed				(const deUint8* bytes, int numBytes);
    148 	void				advance				(void);
    149 
    150 	Element				getElement			(void) const						{ return m_element;										}
    151 
    152 	// For ELEMENT_START / ELEMENT_END.
    153 	const char*			getElementName		(void) const						{ return m_elementName.c_str();							}
    154 
    155 	// For ELEMENT_START.
    156 	bool				hasAttribute		(const char* name) const			{ return m_attributes.find(name) != m_attributes.end();	}
    157 	const char*			getAttribute		(const char* name) const			{ return m_attributes.find(name)->second.c_str();		}
    158 	const AttributeMap&	attributes			(void) const						{ return m_attributes;									}
    159 
    160 	// For ELEMENT_DATA.
    161 	int					getDataSize			(void) const;
    162 	deUint8				getDataByte			(int offset) const;
    163 	void				getDataStr			(std::string& dst) const;
    164 	void				appendDataStr		(std::string& dst) const;
    165 
    166 private:
    167 						Parser				(const Parser& other);
    168 	Parser&				operator=			(const Parser& other);
    169 
    170 	void				parseEntityValue	(void);
    171 
    172 	void				error				(const std::string& what);
    173 
    174 	enum State
    175 	{
    176 		STATE_DATA = 0,						//!< Initial state - assuming data or tag open.
    177 		STATE_ENTITY,						//!< Parsed entity is stored - overrides data.
    178 		STATE_IN_PROCESSING_INSTRUCTION,	//!< In processing instruction.
    179 		STATE_START_TAG_OPEN,				//!< Start tag open.
    180 		STATE_END_TAG_OPEN,					//!< End tag open.
    181 		STATE_EXPECTING_END_TAG_CLOSE,		//!< Expecting end tag close.
    182 		STATE_ATTRIBUTE_LIST,				//!< Expecting attribute list.
    183 		STATE_EXPECTING_ATTRIBUTE_EQ,		//!< Got attribute name, expecting =.
    184 		STATE_EXPECTING_ATTRIBUTE_VALUE,	//!< Expecting attribute value.
    185 		STATE_YIELD_EMPTY_ELEMENT_END,		//!< Empty element: start has been reported but not end.
    186 
    187 		STATE_LAST
    188 	};
    189 
    190 	Tokenizer			m_tokenizer;
    191 
    192 	Element				m_element;
    193 	std::string			m_elementName;
    194 	AttributeMap		m_attributes;
    195 
    196 	State				m_state;
    197 	std::string			m_attribName;
    198 	std::string			m_entityValue;		//!< Data override, such as entity value.
    199 };
    200 
    201 // Inline implementations
    202 
    203 inline void Tokenizer::getTokenStr (std::string& dst) const
    204 {
    205 	DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
    206 	dst.resize(m_curTokenLen);
    207 	for (int ndx = 0; ndx < m_curTokenLen; ndx++)
    208 		dst[ndx] = m_buf.peekBack(ndx);
    209 }
    210 
    211 inline void Tokenizer::appendTokenStr (std::string& dst) const
    212 {
    213 	DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
    214 
    215 	size_t oldLen = dst.size();
    216 	dst.resize(oldLen+m_curTokenLen);
    217 
    218 	for (int ndx = 0; ndx < m_curTokenLen; ndx++)
    219 		dst[oldLen+ndx] = m_buf.peekBack(ndx);
    220 }
    221 
    222 inline int Parser::getDataSize (void) const
    223 {
    224 	if (m_state != STATE_ENTITY)
    225 		return m_tokenizer.getTokenLen();
    226 	else
    227 		return (int)m_entityValue.size();
    228 }
    229 
    230 inline deUint8 Parser::getDataByte (int offset) const
    231 {
    232 	if (m_state != STATE_ENTITY)
    233 		return m_tokenizer.getTokenByte(offset);
    234 	else
    235 		return (deUint8)m_entityValue[offset];
    236 }
    237 
    238 inline void Parser::getDataStr (std::string& dst) const
    239 {
    240 	if (m_state != STATE_ENTITY)
    241 		return m_tokenizer.getTokenStr(dst);
    242 	else
    243 		dst = m_entityValue;
    244 }
    245 
    246 inline void Parser::appendDataStr (std::string& dst) const
    247 {
    248 	if (m_state != STATE_ENTITY)
    249 		return m_tokenizer.appendTokenStr(dst);
    250 	else
    251 		dst += m_entityValue;
    252 }
    253 
    254 } // xml
    255 } // xe
    256 
    257 #endif // _XEXMLPARSER_HPP
    258