Home | History | Annotate | Download | only in executor
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program Test Executor
      3  * ------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief XML Parser.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "xeXMLParser.hpp"
     25 #include "deInt32.h"
     26 
     27 namespace xe
     28 {
     29 namespace xml
     30 {
     31 
     32 enum
     33 {
     34 	TOKENIZER_INITIAL_BUFFER_SIZE	= 1024
     35 };
     36 
     37 static inline bool isIdentifierStartChar (int ch)
     38 {
     39 	return de::inRange<int>(ch, 'a', 'z') || de::inRange<int>(ch, 'A', 'Z');
     40 }
     41 
     42 static inline bool isIdentifierChar (int ch)
     43 {
     44 	return isIdentifierStartChar(ch) || de::inRange<int>(ch, '0', '9') || (ch == '-') || (ch == '_');
     45 }
     46 
     47 static inline bool isWhitespaceChar (int ch)
     48 {
     49 	return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
     50 }
     51 
     52 static int getNextBufferSize (int curSize, int minNewSize)
     53 {
     54 	return de::max(curSize*2, 1<<deLog2Ceil32(minNewSize));
     55 }
     56 
     57 Tokenizer::Tokenizer (void)
     58 	: m_curToken	(TOKEN_INCOMPLETE)
     59 	, m_curTokenLen	(0)
     60 	, m_state		(STATE_DATA)
     61 	, m_buf			(TOKENIZER_INITIAL_BUFFER_SIZE)
     62 {
     63 }
     64 
     65 Tokenizer::~Tokenizer (void)
     66 {
     67 }
     68 
     69 void Tokenizer::clear (void)
     70 {
     71 	m_curToken		= TOKEN_INCOMPLETE;
     72 	m_curTokenLen	= 0;
     73 	m_state			= STATE_DATA;
     74 	m_buf.clear();
     75 }
     76 
     77 void Tokenizer::error (const std::string& what)
     78 {
     79 	throw ParseError(what);
     80 }
     81 
     82 void Tokenizer::feed (const deUint8* bytes, int numBytes)
     83 {
     84 	// Grow buffer if necessary.
     85 	if (m_buf.getNumFree() < numBytes)
     86 	{
     87 		m_buf.resize(getNextBufferSize(m_buf.getSize(), m_buf.getNumElements()+numBytes));
     88 	}
     89 
     90 	// Append to front.
     91 	m_buf.pushFront(bytes, numBytes);
     92 
     93 	// If we haven't parsed complete token, re-try after data feed.
     94 	if (m_curToken == TOKEN_INCOMPLETE)
     95 		advance();
     96 }
     97 
     98 int Tokenizer::getChar (int offset) const
     99 {
    100 	DE_ASSERT(de::inRange(offset, 0, m_buf.getNumElements()));
    101 
    102 	if (offset < m_buf.getNumElements())
    103 		return m_buf.peekBack(offset);
    104 	else
    105 		return END_OF_BUFFER;
    106 }
    107 
    108 void Tokenizer::advance (void)
    109 {
    110 	if (m_curToken != TOKEN_INCOMPLETE)
    111 	{
    112 		// Parser should not try to advance beyond end of string.
    113 		DE_ASSERT(m_curToken != TOKEN_END_OF_STRING);
    114 
    115 		// If current token is tag end, change state to data.
    116 		if (m_curToken == TOKEN_TAG_END						||
    117 			m_curToken == TOKEN_EMPTY_ELEMENT_END			||
    118 			m_curToken == TOKEN_PROCESSING_INSTRUCTION_END	||
    119 			m_curToken == TOKEN_COMMENT						||
    120 			m_curToken == TOKEN_ENTITY)
    121 			m_state = STATE_DATA;
    122 
    123 		// Advance buffer by length of last token.
    124 		m_buf.popBack(m_curTokenLen);
    125 
    126 		// Reset state.
    127 		m_curToken		= TOKEN_INCOMPLETE;
    128 		m_curTokenLen	= 0;
    129 
    130 		// If we hit end of string here, report it as end of string.
    131 		if (getChar(0) == END_OF_STRING)
    132 		{
    133 			m_curToken		= TOKEN_END_OF_STRING;
    134 			m_curTokenLen	= 1;
    135 			return;
    136 		}
    137 	}
    138 
    139 	int curChar = getChar(m_curTokenLen);
    140 
    141 	for (;;)
    142 	{
    143 		if (m_state == STATE_DATA)
    144 		{
    145 			// Advance until we hit end of buffer or tag start and treat that as data token.
    146 			if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER || curChar == '<' || curChar == '&')
    147 			{
    148 				if (curChar == '<')
    149 					m_state = STATE_TAG;
    150 				else if (curChar == '&')
    151 					m_state = STATE_ENTITY;
    152 
    153 				if (m_curTokenLen > 0)
    154 				{
    155 					// Report data token.
    156 					m_curToken = TOKEN_DATA;
    157 					return;
    158 				}
    159 				else if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER)
    160 				{
    161 					// Just return incomplete token, no data parsed.
    162 					return;
    163 				}
    164 				else
    165 				{
    166 					DE_ASSERT(m_state == STATE_TAG || m_state == STATE_ENTITY);
    167 					continue;
    168 				}
    169 			}
    170 		}
    171 		else
    172 		{
    173 			// Eat all whitespace if present.
    174 			if (m_curTokenLen == 0)
    175 			{
    176 				while (isWhitespaceChar(curChar))
    177 				{
    178 					m_buf.popBack();
    179 					curChar = getChar(0);
    180 				}
    181 			}
    182 
    183 			// Handle end of string / buffer.
    184 			if (curChar == END_OF_STRING)
    185 				error("Unexpected end of string");
    186 			else if (curChar == (int)END_OF_BUFFER)
    187 			{
    188 				DE_ASSERT(m_curToken == TOKEN_INCOMPLETE);
    189 				return;
    190 			}
    191 
    192 			if (m_curTokenLen == 0)
    193 			{
    194 				// Expect start of identifier, value or special tag token.
    195 				if (curChar == '\'' || curChar == '"')
    196 					m_state = STATE_VALUE;
    197 				else if (isIdentifierStartChar(curChar))
    198 					m_state = STATE_IDENTIFIER;
    199 				else if (curChar == '<' || curChar == '?' || curChar == '/')
    200 					m_state = STATE_TAG;
    201 				else if (curChar == '&')
    202 					DE_ASSERT(m_state == STATE_ENTITY);
    203 				else if (curChar == '=')
    204 				{
    205 					m_curToken		= TOKEN_EQUAL;
    206 					m_curTokenLen	= 1;
    207 					return;
    208 				}
    209 				else if (curChar == '>')
    210 				{
    211 					m_curToken		= TOKEN_TAG_END;
    212 					m_curTokenLen	= 1;
    213 					return;
    214 				}
    215 				else
    216 					error("Unexpected character");
    217 			}
    218 			else if (m_state == STATE_IDENTIFIER)
    219 			{
    220 				if (!isIdentifierChar(curChar))
    221 				{
    222 					m_curToken = TOKEN_IDENTIFIER;
    223 					return;
    224 				}
    225 			}
    226 			else if (m_state == STATE_VALUE)
    227 			{
    228 				// \todo [2012-06-07 pyry] Escapes.
    229 				if (curChar == '\'' || curChar == '"')
    230 				{
    231 					// \todo [2012-10-17 pyry] Should we actually do the check against getChar(0)?
    232 					if (curChar != getChar(0))
    233 						error("Mismatched quote");
    234 					m_curToken		 = TOKEN_STRING;
    235 					m_curTokenLen	+= 1;
    236 					return;
    237 				}
    238 			}
    239 			else if (m_state == STATE_COMMENT)
    240 			{
    241 				DE_ASSERT(m_curTokenLen >= 2); // 2 characters have been parsed if we are in comment state.
    242 
    243 				if (m_curTokenLen <= 3)
    244 				{
    245 					if (curChar != '-')
    246 						error("Invalid comment start");
    247 				}
    248 				else
    249 				{
    250 					int prev2 = m_curTokenLen > 5 ? getChar(m_curTokenLen-2) : 0;
    251 					int prev1 = m_curTokenLen > 4 ? getChar(m_curTokenLen-1) : 0;
    252 
    253 					if (prev2 == '-' && prev1 == '-')
    254 					{
    255 						if (curChar != '>')
    256 							error("Invalid comment end");
    257 						m_curToken		 = TOKEN_COMMENT;
    258 						m_curTokenLen	+= 1;
    259 						return;
    260 					}
    261 				}
    262 			}
    263 			else if (m_state == STATE_ENTITY)
    264 			{
    265 				if (m_curTokenLen >= 1)
    266 				{
    267 					if (curChar == ';')
    268 					{
    269 						m_curToken		 = TOKEN_ENTITY;
    270 						m_curTokenLen	+= 1;
    271 						return;
    272 					}
    273 					else if (!de::inRange<int>(curChar, '0', '9')	&&
    274 							 !de::inRange<int>(curChar, 'a', 'z')	&&
    275 							 !de::inRange<int>(curChar, 'A', 'Z'))
    276 						error("Invalid entity");
    277 				}
    278 			}
    279 			else
    280 			{
    281 				// Special tokens are at most 2 characters.
    282 				DE_ASSERT(m_state == STATE_TAG && m_curTokenLen == 1);
    283 
    284 				int prevChar = getChar(m_curTokenLen-1);
    285 
    286 				if (prevChar == '<')
    287 				{
    288 					// Tag start.
    289 					if (curChar == '/')
    290 					{
    291 						m_curToken		= TOKEN_END_TAG_START;
    292 						m_curTokenLen	= 2;
    293 						return;
    294 					}
    295 					else if (curChar == '?')
    296 					{
    297 						m_curToken		= TOKEN_PROCESSING_INSTRUCTION_START;
    298 						m_curTokenLen	= 2;
    299 						return;
    300 					}
    301 					else if (curChar == '!')
    302 					{
    303 						m_state = STATE_COMMENT;
    304 					}
    305 					else
    306 					{
    307 						m_curToken		= TOKEN_TAG_START;
    308 						m_curTokenLen	= 1;
    309 						return;
    310 					}
    311 				}
    312 				else if (prevChar == '?')
    313 				{
    314 					if (curChar != '>')
    315 						error("Invalid processing instruction end");
    316 					m_curToken		= TOKEN_PROCESSING_INSTRUCTION_END;
    317 					m_curTokenLen	= 2;
    318 					return;
    319 				}
    320 				else if (prevChar == '/')
    321 				{
    322 					if (curChar != '>')
    323 						error("Invalid empty element end");
    324 					m_curToken		= TOKEN_EMPTY_ELEMENT_END;
    325 					m_curTokenLen	= 2;
    326 					return;
    327 				}
    328 				else
    329 					error("Could not parse special token");
    330 			}
    331 		}
    332 
    333 		m_curTokenLen	+= 1;
    334 		curChar			 = getChar(m_curTokenLen);
    335 	}
    336 }
    337 
    338 void Tokenizer::getString (std::string& dst) const
    339 {
    340 	DE_ASSERT(m_curToken == TOKEN_STRING);
    341 	dst.resize(m_curTokenLen-2);
    342 	for (int ndx = 0; ndx < m_curTokenLen-2; ndx++)
    343 		dst[ndx] = m_buf.peekBack(ndx+1);
    344 }
    345 
    346 Parser::Parser (void)
    347 	: m_element		(ELEMENT_INCOMPLETE)
    348 	, m_state		(STATE_DATA)
    349 {
    350 }
    351 
    352 Parser::~Parser (void)
    353 {
    354 }
    355 
    356 void Parser::clear (void)
    357 {
    358 	m_tokenizer.clear();
    359 	m_elementName.clear();
    360 	m_attributes.clear();
    361 	m_attribName.clear();
    362 	m_entityValue.clear();
    363 
    364 	m_element	= ELEMENT_INCOMPLETE;
    365 	m_state		= STATE_DATA;
    366 }
    367 
    368 void Parser::error (const std::string& what)
    369 {
    370 	throw ParseError(what);
    371 }
    372 
    373 void Parser::feed (const deUint8* bytes, int numBytes)
    374 {
    375 	m_tokenizer.feed(bytes, numBytes);
    376 
    377 	if (m_element == ELEMENT_INCOMPLETE)
    378 		advance();
    379 }
    380 
    381 void Parser::advance (void)
    382 {
    383 	if (m_element == ELEMENT_START)
    384 		m_attributes.clear();
    385 
    386 	// \note No token is advanced when element end is reported.
    387 	if (m_state == STATE_YIELD_EMPTY_ELEMENT_END)
    388 	{
    389 		DE_ASSERT(m_element == ELEMENT_START);
    390 		m_element	= ELEMENT_END;
    391 		m_state		= STATE_DATA;
    392 		return;
    393 	}
    394 
    395 	if (m_element != ELEMENT_INCOMPLETE)
    396 	{
    397 		m_tokenizer.advance();
    398 		m_element = ELEMENT_INCOMPLETE;
    399 	}
    400 
    401 	for (;;)
    402 	{
    403 		Token curToken = m_tokenizer.getToken();
    404 
    405 		// Skip comments.
    406 		while (curToken == TOKEN_COMMENT)
    407 		{
    408 			m_tokenizer.advance();
    409 			curToken = m_tokenizer.getToken();
    410 		}
    411 
    412 		if (curToken == TOKEN_INCOMPLETE)
    413 		{
    414 			DE_ASSERT(m_element == ELEMENT_INCOMPLETE);
    415 			return;
    416 		}
    417 
    418 		switch (m_state)
    419 		{
    420 			case STATE_ENTITY:
    421 				m_state = STATE_DATA;
    422 				// Fall-through to STATE_DATA processing.
    423 
    424 			case STATE_DATA:
    425 				switch (curToken)
    426 				{
    427 					case TOKEN_DATA:
    428 						m_element = ELEMENT_DATA;
    429 						return;
    430 
    431 					case TOKEN_END_OF_STRING:
    432 						m_element = ELEMENT_END_OF_STRING;
    433 						return;
    434 
    435 					case TOKEN_TAG_START:
    436 						m_state = STATE_START_TAG_OPEN;
    437 						break;
    438 
    439 					case TOKEN_END_TAG_START:
    440 						m_state = STATE_END_TAG_OPEN;
    441 						break;
    442 
    443 					case TOKEN_PROCESSING_INSTRUCTION_START:
    444 						m_state = STATE_IN_PROCESSING_INSTRUCTION;
    445 						break;
    446 
    447 					case TOKEN_ENTITY:
    448 						m_state		= STATE_ENTITY;
    449 						m_element	= ELEMENT_DATA;
    450 						parseEntityValue();
    451 						return;
    452 
    453 					default:
    454 						error("Unexpected token");
    455 				}
    456 				break;
    457 
    458 			case STATE_IN_PROCESSING_INSTRUCTION:
    459 				if (curToken == TOKEN_PROCESSING_INSTRUCTION_END)
    460 					m_state = STATE_DATA;
    461 				else
    462 					if (curToken != TOKEN_IDENTIFIER && curToken != TOKEN_EQUAL && curToken != TOKEN_STRING)
    463 						error("Unexpected token in processing instruction");
    464 				break;
    465 
    466 			case STATE_START_TAG_OPEN:
    467 				if (curToken != TOKEN_IDENTIFIER)
    468 					error("Expected identifier");
    469 				m_tokenizer.getTokenStr(m_elementName);
    470 				m_state = STATE_ATTRIBUTE_LIST;
    471 				break;
    472 
    473 			case STATE_END_TAG_OPEN:
    474 				if (curToken != TOKEN_IDENTIFIER)
    475 					error("Expected identifier");
    476 				m_tokenizer.getTokenStr(m_elementName);
    477 				m_state = STATE_EXPECTING_END_TAG_CLOSE;
    478 				break;
    479 
    480 			case STATE_EXPECTING_END_TAG_CLOSE:
    481 				if (curToken != TOKEN_TAG_END)
    482 					error("Expected tag end");
    483 				m_state		= STATE_DATA;
    484 				m_element	= ELEMENT_END;
    485 				return;
    486 
    487 			case STATE_ATTRIBUTE_LIST:
    488 				if (curToken == TOKEN_IDENTIFIER)
    489 				{
    490 					m_tokenizer.getTokenStr(m_attribName);
    491 					m_state = STATE_EXPECTING_ATTRIBUTE_EQ;
    492 				}
    493 				else if (curToken == TOKEN_EMPTY_ELEMENT_END)
    494 				{
    495 					m_state		= STATE_YIELD_EMPTY_ELEMENT_END;
    496 					m_element	= ELEMENT_START;
    497 					return;
    498 				}
    499 				else if (curToken == TOKEN_TAG_END)
    500 				{
    501 					m_state		= STATE_DATA;
    502 					m_element	= ELEMENT_START;
    503 					return;
    504 				}
    505 				else
    506 					error("Unexpected token");
    507 				break;
    508 
    509 			case STATE_EXPECTING_ATTRIBUTE_EQ:
    510 				if (curToken != TOKEN_EQUAL)
    511 					error("Expected '='");
    512 				m_state = STATE_EXPECTING_ATTRIBUTE_VALUE;
    513 				break;
    514 
    515 			case STATE_EXPECTING_ATTRIBUTE_VALUE:
    516 				if (curToken != TOKEN_STRING)
    517 					error("Expected value");
    518 				if (hasAttribute(m_attribName.c_str()))
    519 					error("Duplicate attribute");
    520 
    521 				m_tokenizer.getString(m_attributes[m_attribName]);
    522 				m_state = STATE_ATTRIBUTE_LIST;
    523 				break;
    524 
    525 			default:
    526 				DE_ASSERT(false);
    527 		}
    528 
    529 		m_tokenizer.advance();
    530 	}
    531 }
    532 
    533 static char getEntityValue (const std::string& entity)
    534 {
    535 	static const struct
    536 	{
    537 		const char*		name;
    538 		char			value;
    539 	} s_entities[] =
    540 	{
    541 			{ "&lt;",			'<' },
    542 			{ "&gt;",			'>' },
    543 			{ "&amp;",			'&' },
    544 			{ "&apos;",			'\''},
    545 			{ "&quot;",			'"' },
    546 	};
    547 
    548 	for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_entities); ndx++)
    549 	{
    550 		if (entity == s_entities[ndx].name)
    551 			return s_entities[ndx].value;
    552 	}
    553 
    554 	return 0;
    555 }
    556 
    557 void Parser::parseEntityValue (void)
    558 {
    559 	DE_ASSERT(m_state == STATE_ENTITY && m_tokenizer.getToken() == TOKEN_ENTITY);
    560 
    561 	std::string entity;
    562 	m_tokenizer.getTokenStr(entity);
    563 
    564 	const char value = getEntityValue(entity);
    565 	if (value == 0)
    566 		error("Invalid entity '" + entity + "'");
    567 
    568 	m_entityValue.resize(1);
    569 	m_entityValue[0] = value;
    570 }
    571 
    572 } // xml
    573 } // xe
    574