1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "../../include/fxcrt/fx_xml.h" 8 #include "xml_int.h" 9 CXML_Parser::~CXML_Parser() 10 { 11 if (m_bOwnedStream) { 12 m_pDataAcc->Release(); 13 } 14 } 15 FX_BOOL CXML_Parser::Init(FX_LPBYTE pBuffer, size_t size) 16 { 17 if (m_pAllocator) { 18 m_pDataAcc = FX_NewAtAllocator(m_pAllocator)CXML_DataBufAcc(pBuffer, size, m_pAllocator); 19 } else { 20 m_pDataAcc = FX_NEW CXML_DataBufAcc(pBuffer, size, NULL); 21 } 22 if (!m_pDataAcc) { 23 return FALSE; 24 } 25 return Init(TRUE); 26 } 27 FX_BOOL CXML_Parser::Init(IFX_FileRead *pFileRead) 28 { 29 if (m_pAllocator) { 30 m_pDataAcc = FX_NewAtAllocator(m_pAllocator)CXML_DataStmAcc(pFileRead, m_pAllocator); 31 } else { 32 m_pDataAcc = FX_NEW CXML_DataStmAcc(pFileRead, NULL); 33 } 34 if (!m_pDataAcc) { 35 return FALSE; 36 } 37 return Init(TRUE); 38 } 39 FX_BOOL CXML_Parser::Init(IFX_BufferRead *pBuffer) 40 { 41 if (!pBuffer) { 42 return FALSE; 43 } 44 m_pDataAcc = pBuffer; 45 return Init(FALSE); 46 } 47 FX_BOOL CXML_Parser::Init(FX_BOOL bOwndedStream) 48 { 49 m_bOwnedStream = bOwndedStream; 50 m_nOffset = 0; 51 return ReadNextBlock(); 52 } 53 FX_BOOL CXML_Parser::ReadNextBlock() 54 { 55 if (!m_pDataAcc->ReadNextBlock()) { 56 return FALSE; 57 } 58 m_pBuffer = m_pDataAcc->GetBlockBuffer(); 59 m_dwBufferSize = m_pDataAcc->GetBlockSize(); 60 m_nBufferOffset = m_pDataAcc->GetBlockOffset(); 61 m_dwIndex = 0; 62 return m_dwBufferSize > 0; 63 } 64 FX_BOOL CXML_Parser::IsEOF() 65 { 66 if (!m_pDataAcc->IsEOF()) { 67 return FALSE; 68 } 69 return m_dwIndex >= m_dwBufferSize; 70 } 71 #define FXCRTM_XML_CHARTYPE_Normal 0x00 72 #define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 73 #define FXCRTM_XML_CHARTYPE_Letter 0x02 74 #define FXCRTM_XML_CHARTYPE_Digital 0x04 75 #define FXCRTM_XML_CHARTYPE_NameIntro 0x08 76 #define FXCRTM_XML_CHARTYPE_NameChar 0x10 77 #define FXCRTM_XML_CHARTYPE_HexDigital 0x20 78 #define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 79 #define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 80 #define FXCRTM_XML_CHARTYPE_HexChar 0x60 81 FX_BYTE g_FXCRT_XML_ByteTypes[256] = { 82 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 83 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 84 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, 85 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 86 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 87 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, 88 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 89 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 90 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 91 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 92 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 93 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 94 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 95 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 96 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 97 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x01, 0x01, 98 }; 99 FX_BOOL g_FXCRT_XML_IsWhiteSpace(FX_BYTE ch) 100 { 101 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar) != 0; 102 } 103 FX_BOOL g_FXCRT_XML_IsLetter(FX_BYTE ch) 104 { 105 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Letter) != 0; 106 } 107 FX_BOOL g_FXCRT_XML_IsDigital(FX_BYTE ch) 108 { 109 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital) != 0; 110 } 111 FX_BOOL g_FXCRT_XML_IsNameIntro(FX_BYTE ch) 112 { 113 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro) != 0; 114 } 115 FX_BOOL g_FXCRT_XML_IsNameChar(FX_BYTE ch) 116 { 117 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar) != 0; 118 } 119 FX_BOOL g_FXCRT_XML_IsHexChar(FX_BYTE ch) 120 { 121 return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar) != 0; 122 } 123 void CXML_Parser::SkipWhiteSpaces() 124 { 125 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 126 if (IsEOF()) { 127 return; 128 } 129 do { 130 while (m_dwIndex < m_dwBufferSize && g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { 131 m_dwIndex ++; 132 } 133 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 134 if (m_dwIndex < m_dwBufferSize || IsEOF()) { 135 break; 136 } 137 } while (ReadNextBlock()); 138 } 139 void CXML_Parser::GetName(CFX_ByteStringL &space, CFX_ByteStringL &name) 140 { 141 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 142 if (IsEOF()) { 143 return; 144 } 145 CFX_ByteTextBuf buf(m_pAllocator); 146 FX_BYTE ch; 147 do { 148 while (m_dwIndex < m_dwBufferSize) { 149 ch = m_pBuffer[m_dwIndex]; 150 if (ch == ':') { 151 buf.GetByteStringL(space); 152 buf.Clear(); 153 } else if (g_FXCRT_XML_IsNameChar(ch)) { 154 buf.AppendChar(ch); 155 } else { 156 break; 157 } 158 m_dwIndex ++; 159 } 160 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 161 if (m_dwIndex < m_dwBufferSize || IsEOF()) { 162 break; 163 } 164 } while (ReadNextBlock()); 165 buf.GetByteStringL(name); 166 } 167 void CXML_Parser::SkipLiterals(FX_BSTR str) 168 { 169 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 170 if (IsEOF()) { 171 return; 172 } 173 FX_INT32 i = 0, iLen = str.GetLength(); 174 do { 175 while (m_dwIndex < m_dwBufferSize) { 176 if (str.GetAt(i) != m_pBuffer[m_dwIndex ++]) { 177 i = 0; 178 } else { 179 i ++; 180 if (i == iLen) { 181 break; 182 } 183 } 184 } 185 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 186 if (i == iLen) { 187 return; 188 } 189 if (m_dwIndex < m_dwBufferSize || IsEOF()) { 190 break; 191 } 192 } while (ReadNextBlock()); 193 while (!m_pDataAcc->IsEOF()) { 194 ReadNextBlock(); 195 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwBufferSize; 196 } 197 m_dwIndex = m_dwBufferSize; 198 } 199 FX_DWORD CXML_Parser::GetCharRef() 200 { 201 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 202 if (IsEOF()) { 203 return 0; 204 } 205 FX_BYTE ch; 206 FX_INT32 iState = 0; 207 CFX_ByteTextBuf buf(m_pAllocator); 208 FX_DWORD code = 0; 209 do { 210 while (m_dwIndex < m_dwBufferSize) { 211 ch = m_pBuffer[m_dwIndex]; 212 switch (iState) { 213 case 0: 214 if (ch == '#') { 215 m_dwIndex ++; 216 iState = 2; 217 break; 218 } 219 iState = 1; 220 case 1: 221 m_dwIndex ++; 222 if (ch == ';') { 223 CFX_ByteStringC ref = buf.GetByteString(); 224 if (ref == FX_BSTRC("gt")) { 225 code = '>'; 226 } else if (ref == FX_BSTRC("lt")) { 227 code = '<'; 228 } else if (ref == FX_BSTRC("amp")) { 229 code = '&'; 230 } else if (ref == FX_BSTRC("apos")) { 231 code = '\''; 232 } else if (ref == FX_BSTRC("quot")) { 233 code = '"'; 234 } 235 iState = 10; 236 break; 237 } 238 buf.AppendByte(ch); 239 break; 240 case 2: 241 if (ch == 'x') { 242 m_dwIndex ++; 243 iState = 4; 244 break; 245 } 246 iState = 3; 247 case 3: 248 m_dwIndex ++; 249 if (ch == ';') { 250 iState = 10; 251 break; 252 } 253 if (g_FXCRT_XML_IsDigital(ch)) { 254 code = code * 10 + ch - '0'; 255 } 256 break; 257 case 4: 258 m_dwIndex ++; 259 if (ch == ';') { 260 iState = 10; 261 break; 262 } 263 FX_BYTE nHex = g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar; 264 if (nHex) { 265 if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) { 266 code = (code << 4) + ch - '0'; 267 } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) { 268 code = (code << 4) + ch - 87; 269 } else { 270 code = (code << 4) + ch - 55; 271 } 272 } 273 break; 274 } 275 if (iState == 10) { 276 break; 277 } 278 } 279 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 280 if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) { 281 break; 282 } 283 } while (ReadNextBlock()); 284 return code; 285 } 286 void CXML_Parser::GetAttrValue(CFX_WideStringL &value) 287 { 288 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 289 if (IsEOF()) { 290 return; 291 } 292 CFX_UTF8Decoder decoder(m_pAllocator); 293 FX_BYTE mark = 0, ch; 294 do { 295 while (m_dwIndex < m_dwBufferSize) { 296 ch = m_pBuffer[m_dwIndex]; 297 if (mark == 0) { 298 if (ch != '\'' && ch != '"') { 299 return; 300 } 301 mark = ch; 302 m_dwIndex ++; 303 ch = 0; 304 continue; 305 } 306 m_dwIndex ++; 307 if (ch == mark) { 308 break; 309 } 310 if (ch == '&') { 311 decoder.AppendChar(GetCharRef()); 312 if (IsEOF()) { 313 decoder.GetResult(value); 314 return; 315 } 316 } else { 317 decoder.Input(ch); 318 } 319 } 320 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 321 if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) { 322 break; 323 } 324 } while (ReadNextBlock()); 325 decoder.GetResult(value); 326 } 327 void CXML_Parser::GetTagName(CFX_ByteStringL &space, CFX_ByteStringL &name, FX_BOOL &bEndTag, FX_BOOL bStartTag) 328 { 329 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 330 if (IsEOF()) { 331 return; 332 } 333 bEndTag = FALSE; 334 FX_BYTE ch; 335 FX_INT32 iState = bStartTag ? 1 : 0; 336 do { 337 while (m_dwIndex < m_dwBufferSize) { 338 ch = m_pBuffer[m_dwIndex]; 339 switch (iState) { 340 case 0: 341 m_dwIndex ++; 342 if (ch != '<') { 343 break; 344 } 345 iState = 1; 346 break; 347 case 1: 348 if (ch == '?') { 349 m_dwIndex ++; 350 SkipLiterals(FX_BSTRC("?>")); 351 iState = 0; 352 break; 353 } else if (ch == '!') { 354 m_dwIndex ++; 355 SkipLiterals(FX_BSTRC("-->")); 356 iState = 0; 357 break; 358 } 359 if (ch == '/') { 360 m_dwIndex ++; 361 GetName(space, name); 362 bEndTag = TRUE; 363 } else { 364 GetName(space, name); 365 bEndTag = FALSE; 366 } 367 return; 368 } 369 } 370 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 371 if (m_dwIndex < m_dwBufferSize || IsEOF()) { 372 break; 373 } 374 } while (ReadNextBlock()); 375 } 376 CXML_Element* CXML_Parser::ParseElement(CXML_Element* pParent, FX_BOOL bStartTag) 377 { 378 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 379 if (IsEOF()) { 380 return NULL; 381 } 382 CFX_ByteStringL tag_name, tag_space; 383 FX_BOOL bEndTag; 384 GetTagName(tag_space, tag_name, bEndTag, bStartTag); 385 if (tag_name.IsEmpty() || bEndTag) { 386 tag_space.Empty(m_pAllocator); 387 return NULL; 388 } 389 CXML_Element* pElement; 390 if (m_pAllocator) { 391 pElement = FX_NewAtAllocator(m_pAllocator)CXML_Element(m_pAllocator); 392 } else { 393 pElement = FX_NEW CXML_Element; 394 } 395 if (pElement) { 396 pElement->m_pParent = pParent; 397 pElement->SetTag(tag_space, tag_name); 398 } 399 tag_space.Empty(m_pAllocator); 400 tag_name.Empty(m_pAllocator); 401 if (!pElement) { 402 return NULL; 403 } 404 do { 405 CFX_ByteStringL attr_space, attr_name; 406 while (m_dwIndex < m_dwBufferSize) { 407 SkipWhiteSpaces(); 408 if (IsEOF()) { 409 break; 410 } 411 if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) { 412 break; 413 } 414 attr_space.Empty(m_pAllocator); 415 attr_name.Empty(m_pAllocator); 416 GetName(attr_space, attr_name); 417 SkipWhiteSpaces(); 418 if (IsEOF()) { 419 break; 420 } 421 if (m_pBuffer[m_dwIndex] != '=') { 422 break; 423 } 424 m_dwIndex ++; 425 SkipWhiteSpaces(); 426 if (IsEOF()) { 427 break; 428 } 429 CFX_WideStringL attr_value; 430 GetAttrValue(attr_value); 431 pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value, m_pAllocator); 432 attr_value.Empty(m_pAllocator); 433 } 434 attr_space.Empty(m_pAllocator); 435 attr_name.Empty(m_pAllocator); 436 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 437 if (m_dwIndex < m_dwBufferSize || IsEOF()) { 438 break; 439 } 440 } while (ReadNextBlock()); 441 SkipWhiteSpaces(); 442 if (IsEOF()) { 443 return pElement; 444 } 445 FX_BYTE ch = m_pBuffer[m_dwIndex ++]; 446 if (ch == '/') { 447 m_dwIndex ++; 448 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 449 return pElement; 450 } 451 if (ch != '>') { 452 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 453 if (m_pAllocator) { 454 FX_DeleteAtAllocator(pElement, m_pAllocator, CXML_Element); 455 } else { 456 delete pElement; 457 } 458 return NULL; 459 } 460 SkipWhiteSpaces(); 461 if (IsEOF()) { 462 return pElement; 463 } 464 CFX_UTF8Decoder decoder(m_pAllocator); 465 CFX_WideTextBuf content(m_pAllocator); 466 FX_BOOL bCDATA = FALSE; 467 FX_INT32 iState = 0; 468 do { 469 while (m_dwIndex < m_dwBufferSize) { 470 ch = m_pBuffer[m_dwIndex ++]; 471 switch (iState) { 472 case 0: 473 if (ch == '<') { 474 iState = 1; 475 } else if (ch == '&') { 476 decoder.ClearStatus(); 477 decoder.AppendChar(GetCharRef()); 478 } else { 479 decoder.Input(ch); 480 } 481 break; 482 case 1: 483 if (ch == '!') { 484 iState = 2; 485 } else if (ch == '?') { 486 SkipLiterals(FX_BSTRC("?>")); 487 SkipWhiteSpaces(); 488 iState = 0; 489 } else if (ch == '/') { 490 CFX_ByteStringL space, name; 491 GetName(space, name); 492 space.Empty(m_pAllocator); 493 name.Empty(m_pAllocator); 494 SkipWhiteSpaces(); 495 m_dwIndex ++; 496 iState = 10; 497 } else { 498 content << decoder.GetResult(); 499 CFX_WideStringL dataStr; 500 content.GetWideStringL(dataStr); 501 if (!bCDATA && !m_bSaveSpaceChars) { 502 dataStr.TrimRight((FX_LPCWSTR)L" \t\r\n"); 503 } 504 InsertContentSegment(bCDATA, dataStr, pElement); 505 dataStr.Empty(m_pAllocator); 506 content.Clear(); 507 decoder.Clear(); 508 bCDATA = FALSE; 509 iState = 0; 510 m_dwIndex --; 511 CXML_Element* pSubElement = ParseElement(pElement, TRUE); 512 if (pSubElement == NULL) { 513 break; 514 } 515 pSubElement->m_pParent = pElement; 516 pElement->m_Children.Add((FX_LPVOID)CXML_Element::Element); 517 pElement->m_Children.Add(pSubElement); 518 SkipWhiteSpaces(); 519 } 520 break; 521 case 2: 522 if (ch == '[') { 523 SkipLiterals(FX_BSTRC("]]>")); 524 } else if (ch == '-') { 525 m_dwIndex ++; 526 SkipLiterals(FX_BSTRC("-->")); 527 } else { 528 SkipLiterals(FX_BSTRC(">")); 529 } 530 decoder.Clear(); 531 SkipWhiteSpaces(); 532 iState = 0; 533 break; 534 } 535 if (iState == 10) { 536 break; 537 } 538 } 539 m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex; 540 if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) { 541 break; 542 } 543 } while (ReadNextBlock()); 544 content << decoder.GetResult(); 545 CFX_WideStringL dataStr; 546 content.GetWideStringL(dataStr); 547 if (!m_bSaveSpaceChars) { 548 dataStr.TrimRight((FX_LPCWSTR)L" \t\r\n"); 549 } 550 InsertContentSegment(bCDATA, dataStr, pElement); 551 dataStr.Empty(m_pAllocator); 552 content.Clear(); 553 decoder.Clear(); 554 bCDATA = FALSE; 555 return pElement; 556 } 557 void CXML_Parser::InsertContentSegment(FX_BOOL bCDATA, FX_WSTR content, CXML_Element* pElement) 558 { 559 if (content.IsEmpty()) { 560 return; 561 } 562 CXML_Content* pContent; 563 if (m_pAllocator) { 564 pContent = FX_NewAtAllocator(m_pAllocator)CXML_Content; 565 } else { 566 pContent = FX_NEW CXML_Content; 567 } 568 if (!pContent) { 569 return; 570 } 571 pContent->Set(bCDATA, content, m_pAllocator); 572 pElement->m_Children.Add((FX_LPVOID)CXML_Element::Content); 573 pElement->m_Children.Add(pContent); 574 } 575 static CXML_Element* XML_ContinueParse(CXML_Parser &parser, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize) 576 { 577 parser.m_bSaveSpaceChars = bSaveSpaceChars; 578 CXML_Element* pElement = parser.ParseElement(NULL, FALSE); 579 if (pParsedSize) { 580 *pParsedSize = parser.m_nOffset; 581 } 582 return pElement; 583 } 584 CXML_Element* CXML_Element::Parse(const void* pBuffer, size_t size, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator) 585 { 586 CXML_Parser parser(pAllocator); 587 if (!parser.Init((FX_LPBYTE)pBuffer, size)) { 588 return NULL; 589 } 590 return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize); 591 } 592 CXML_Element* CXML_Element::Parse(IFX_FileRead *pFile, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator) 593 { 594 CXML_Parser parser(pAllocator); 595 if (!parser.Init(pFile)) { 596 return NULL; 597 } 598 return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize); 599 } 600 CXML_Element* CXML_Element::Parse(IFX_BufferRead *pBuffer, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator) 601 { 602 CXML_Parser parser(pAllocator); 603 if (!parser.Init(pBuffer)) { 604 return NULL; 605 } 606 return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize); 607 } 608 CXML_Element::CXML_Element(IFX_Allocator* pAllocator) 609 : m_pParent(NULL) 610 , m_QSpaceName() 611 , m_TagName() 612 , m_AttrMap() 613 , m_Children(pAllocator) 614 { 615 } 616 CXML_Element::CXML_Element(FX_BSTR qSpace, FX_BSTR tagName, IFX_Allocator* pAllocator) 617 : m_pParent(NULL) 618 , m_QSpaceName() 619 , m_TagName() 620 , m_AttrMap() 621 , m_Children(pAllocator) 622 { 623 m_QSpaceName.Set(qSpace, pAllocator); 624 m_TagName.Set(tagName, pAllocator); 625 } 626 CXML_Element::CXML_Element(FX_BSTR qTagName, IFX_Allocator* pAllocator) 627 : m_pParent(NULL) 628 , m_QSpaceName() 629 , m_TagName() 630 , m_AttrMap() 631 , m_Children(pAllocator) 632 { 633 SetTag(qTagName); 634 } 635 CXML_Element::~CXML_Element() 636 { 637 Empty(); 638 } 639 void CXML_Element::Empty() 640 { 641 IFX_Allocator* pAllocator = m_Children.m_pAllocator; 642 m_QSpaceName.Empty(pAllocator); 643 m_TagName.Empty(pAllocator); 644 m_AttrMap.RemoveAll(pAllocator); 645 RemoveChildren(); 646 } 647 void CXML_Element::RemoveChildren() 648 { 649 IFX_Allocator* pAllocator = m_Children.m_pAllocator; 650 for (int i = 0; i < m_Children.GetSize(); i += 2) { 651 ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i); 652 if (type == Content) { 653 CXML_Content* content = (CXML_Content*)m_Children.GetAt(i + 1); 654 if (pAllocator) { 655 FX_DeleteAtAllocator(content, pAllocator, CXML_Content); 656 } else { 657 delete content; 658 } 659 } else if (type == Element) { 660 CXML_Element* child = (CXML_Element*)m_Children.GetAt(i + 1); 661 child->RemoveChildren(); 662 if (pAllocator) { 663 FX_DeleteAtAllocator(child, pAllocator, CXML_Element); 664 } else { 665 delete child; 666 } 667 } 668 } 669 m_Children.RemoveAll(); 670 } 671 CFX_ByteString CXML_Element::GetTagName(FX_BOOL bQualified) const 672 { 673 if (!bQualified || m_QSpaceName.IsEmpty()) { 674 return m_TagName; 675 } 676 CFX_ByteString bsTag = m_QSpaceName; 677 bsTag += ":"; 678 bsTag += m_TagName; 679 return bsTag; 680 } 681 void CXML_Element::GetTagName(CFX_ByteStringL &tagName, FX_BOOL bQualified) const 682 { 683 IFX_Allocator* pAllocator = m_Children.m_pAllocator; 684 if (!bQualified || m_QSpaceName.IsEmpty()) { 685 tagName.Set(m_TagName, pAllocator); 686 return; 687 } 688 FX_LPSTR str = tagName.AllocBuffer(m_QSpaceName.GetLength() + m_TagName.GetLength() + 2, pAllocator); 689 if (!str) { 690 return; 691 } 692 FXSYS_memcpy32(str, m_QSpaceName.GetCStr(), m_QSpaceName.GetLength()); 693 str += m_QSpaceName.GetLength(); 694 *str = ':'; 695 str ++; 696 FXSYS_memcpy32(str, m_TagName.GetCStr(), m_TagName.GetLength()); 697 str += m_TagName.GetLength(); 698 *str = '\0'; 699 } 700 CFX_ByteString CXML_Element::GetNamespace(FX_BOOL bQualified) const 701 { 702 if (bQualified) { 703 return m_QSpaceName; 704 } 705 return GetNamespaceURI(m_QSpaceName); 706 } 707 void CXML_Element::GetNamespace(CFX_ByteStringL &nameSpace, FX_BOOL bQualified) const 708 { 709 IFX_Allocator* pAllocator = m_Children.m_pAllocator; 710 if (bQualified) { 711 nameSpace.Set(m_QSpaceName, pAllocator); 712 return; 713 } 714 GetNamespaceURI(m_QSpaceName, nameSpace); 715 } 716 CFX_ByteString CXML_Element::GetNamespaceURI(FX_BSTR qName) const 717 { 718 const CFX_WideStringL* pwsSpace; 719 const CXML_Element *pElement = this; 720 do { 721 if (qName.IsEmpty()) { 722 pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC(""), FX_BSTRC("xmlns")); 723 } else { 724 pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC("xmlns"), qName); 725 } 726 if (pwsSpace) { 727 break; 728 } 729 pElement = pElement->GetParent(); 730 } while(pElement); 731 return pwsSpace ? FX_UTF8Encode(*pwsSpace) : CFX_ByteString(); 732 } 733 void CXML_Element::GetNamespaceURI(FX_BSTR qName, CFX_ByteStringL &uri) const 734 { 735 IFX_Allocator* pAllocator = m_Children.m_pAllocator; 736 const CFX_WideStringL* pwsSpace; 737 const CXML_Element *pElement = this; 738 do { 739 if (qName.IsEmpty()) { 740 pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC(""), FX_BSTRC("xmlns")); 741 } else { 742 pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC("xmlns"), qName); 743 } 744 if (pwsSpace) { 745 break; 746 } 747 pElement = pElement->GetParent(); 748 } while(pElement); 749 if (pwsSpace) { 750 FX_UTF8Encode(pwsSpace->GetPtr(), pwsSpace->GetLength(), uri, pAllocator); 751 } 752 } 753 void CXML_Element::GetAttrByIndex(int index, CFX_ByteString& space, CFX_ByteString& name, CFX_WideString& value) const 754 { 755 if (index < 0 || index >= m_AttrMap.GetSize()) { 756 return; 757 } 758 CXML_AttrItem& item = m_AttrMap.GetAt(index); 759 space = item.m_QSpaceName; 760 name = item.m_AttrName; 761 value = item.m_Value; 762 } 763 void CXML_Element::GetAttrByIndex(int index, CFX_ByteStringL &space, CFX_ByteStringL &name, CFX_WideStringL &value) const 764 { 765 if (index < 0 || index >= m_AttrMap.GetSize()) { 766 return; 767 } 768 IFX_Allocator* pAllocator = m_Children.m_pAllocator; 769 CXML_AttrItem& item = m_AttrMap.GetAt(index); 770 space.Set(item.m_QSpaceName, pAllocator); 771 name.Set(item.m_AttrName, pAllocator); 772 value.Set(item.m_Value, pAllocator); 773 } 774 FX_BOOL CXML_Element::HasAttr(FX_BSTR name) const 775 { 776 CFX_ByteStringC bsSpace, bsName; 777 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 778 return m_AttrMap.Lookup(bsSpace, bsName) != NULL; 779 } 780 FX_BOOL CXML_Element::GetAttrValue(FX_BSTR name, CFX_WideString& attribute) const 781 { 782 CFX_ByteStringC bsSpace, bsName; 783 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 784 const CFX_WideStringL* pValue = m_AttrMap.Lookup(bsSpace, bsName); 785 if (pValue) { 786 attribute = CFX_WideString(pValue->GetPtr(), pValue->GetLength()); 787 return TRUE; 788 } 789 return FALSE; 790 } 791 const CFX_WideStringL* CXML_Element::GetAttrValuePtr(FX_BSTR name) const 792 { 793 CFX_ByteStringC bsSpace, bsName; 794 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 795 return m_AttrMap.Lookup(bsSpace, bsName); 796 } 797 FX_BOOL CXML_Element::GetAttrValue(FX_BSTR space, FX_BSTR name, CFX_WideString& attribute) const 798 { 799 const CFX_WideStringL* pValue = m_AttrMap.Lookup(space, name); 800 if (pValue) { 801 attribute = CFX_WideString(pValue->GetPtr(), pValue->GetLength()); 802 return TRUE; 803 } 804 return FALSE; 805 } 806 const CFX_WideStringL* CXML_Element::GetAttrValuePtr(FX_BSTR space, FX_BSTR name) const 807 { 808 return m_AttrMap.Lookup(space, name); 809 } 810 FX_BOOL CXML_Element::GetAttrInteger(FX_BSTR name, int& attribute) const 811 { 812 CFX_ByteStringC bsSpace, bsName; 813 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 814 const CFX_WideStringL* pwsValue = m_AttrMap.Lookup(bsSpace, bsName); 815 if (pwsValue) { 816 attribute = pwsValue->GetInteger(); 817 return TRUE; 818 } 819 return FALSE; 820 } 821 FX_BOOL CXML_Element::GetAttrInteger(FX_BSTR space, FX_BSTR name, int& attribute) const 822 { 823 const CFX_WideStringL* pwsValue = m_AttrMap.Lookup(space, name); 824 if (pwsValue) { 825 attribute = pwsValue->GetInteger(); 826 return TRUE; 827 } 828 return FALSE; 829 } 830 FX_BOOL CXML_Element::GetAttrFloat(FX_BSTR name, FX_FLOAT& attribute) const 831 { 832 CFX_ByteStringC bsSpace, bsName; 833 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 834 return GetAttrFloat(bsSpace, bsName, attribute); 835 } 836 FX_BOOL CXML_Element::GetAttrFloat(FX_BSTR space, FX_BSTR name, FX_FLOAT& attribute) const 837 { 838 CFX_WideString value; 839 const CFX_WideStringL* pValue = m_AttrMap.Lookup(space, name); 840 if (pValue) { 841 attribute = pValue->GetFloat(); 842 return TRUE; 843 } 844 return FALSE; 845 } 846 FX_DWORD CXML_Element::CountChildren() const 847 { 848 return m_Children.GetSize() / 2; 849 } 850 CXML_Element::ChildType CXML_Element::GetChildType(FX_DWORD index) const 851 { 852 index <<= 1; 853 if (index >= (FX_DWORD)m_Children.GetSize()) { 854 return Invalid; 855 } 856 return (ChildType)(FX_UINTPTR)m_Children.GetAt(index); 857 } 858 CFX_WideString CXML_Element::GetContent(FX_DWORD index) const 859 { 860 index <<= 1; 861 if (index >= (FX_DWORD)m_Children.GetSize() || 862 (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Content) { 863 return CFX_WideString(); 864 } 865 CXML_Content* pContent = (CXML_Content*)m_Children.GetAt(index + 1); 866 if (pContent) { 867 return pContent->m_Content; 868 } 869 return CFX_WideString(); 870 } 871 const CFX_WideStringL* CXML_Element::GetContentPtr(FX_DWORD index) const 872 { 873 index <<= 1; 874 if (index >= (FX_DWORD)m_Children.GetSize() || 875 (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Content) { 876 return NULL; 877 } 878 CXML_Content* pContent = (CXML_Content*)m_Children.GetAt(index + 1); 879 if (pContent) { 880 return &pContent->m_Content; 881 } 882 return NULL; 883 } 884 CXML_Element* CXML_Element::GetElement(FX_DWORD index) const 885 { 886 index <<= 1; 887 if (index >= (FX_DWORD)m_Children.GetSize() || 888 (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Element) { 889 return NULL; 890 } 891 return (CXML_Element*)m_Children.GetAt(index + 1); 892 } 893 FX_DWORD CXML_Element::CountElements(FX_BSTR space, FX_BSTR tag) const 894 { 895 int count = 0; 896 for (int i = 0; i < m_Children.GetSize(); i += 2) { 897 ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i); 898 if (type != Element) { 899 continue; 900 } 901 CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1); 902 if ((space.IsEmpty() || pKid->m_QSpaceName == space) && pKid->m_TagName == tag) { 903 count ++; 904 } 905 } 906 return count; 907 } 908 CXML_Element* CXML_Element::GetElement(FX_BSTR space, FX_BSTR tag, int index) const 909 { 910 if (index < 0) { 911 return NULL; 912 } 913 for (int i = 0; i < m_Children.GetSize(); i += 2) { 914 ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i); 915 if (type != Element) { 916 continue; 917 } 918 CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1); 919 if ((!space.IsEmpty() && pKid->m_QSpaceName != space) || pKid->m_TagName != tag) { 920 continue; 921 } 922 if (index -- == 0) { 923 return pKid; 924 } 925 } 926 return NULL; 927 } 928 FX_DWORD CXML_Element::FindElement(CXML_Element *pChild) const 929 { 930 for (int i = 0; i < m_Children.GetSize(); i += 2) { 931 if ((ChildType)(FX_UINTPTR)m_Children.GetAt(i) == Element && 932 (CXML_Element*)m_Children.GetAt(i + 1) == pChild) { 933 return (FX_DWORD)(i >> 1); 934 } 935 } 936 return (FX_DWORD) - 1; 937 } 938 const CFX_WideStringL* CXML_AttrMap::Lookup(FX_BSTR space, FX_BSTR name) const 939 { 940 if (m_pMap == NULL) { 941 return NULL; 942 } 943 for (int i = 0; i < m_pMap->GetSize(); i ++) { 944 CXML_AttrItem& item = GetAt(i); 945 if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) { 946 return &item.m_Value; 947 } 948 } 949 return NULL; 950 } 951 void CXML_AttrMap::SetAt(FX_BSTR space, FX_BSTR name, FX_WSTR value, IFX_Allocator* pAllocator) 952 { 953 for (int i = 0; i < GetSize(); i ++) { 954 CXML_AttrItem& item = GetAt(i); 955 if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) { 956 item.m_Value.Set(value, pAllocator); 957 return; 958 } 959 } 960 if (!m_pMap) { 961 if (pAllocator) { 962 m_pMap = FX_NewAtAllocator(pAllocator)CFX_ObjectArray<CXML_AttrItem>(pAllocator); 963 } else { 964 m_pMap = FX_NEW CFX_ObjectArray<CXML_AttrItem>; 965 } 966 } 967 if (!m_pMap) { 968 return; 969 } 970 CXML_AttrItem* pItem = (CXML_AttrItem*)m_pMap->AddSpace(); 971 if (!pItem) { 972 return; 973 } 974 pItem->m_QSpaceName.Set(space, pAllocator); 975 pItem->m_AttrName.Set(name, pAllocator); 976 pItem->m_Value.Set(value, pAllocator); 977 } 978 void CXML_AttrMap::RemoveAt(FX_BSTR space, FX_BSTR name, IFX_Allocator* pAllocator) 979 { 980 if (m_pMap == NULL) { 981 return; 982 } 983 for (int i = 0; i < m_pMap->GetSize(); i ++) { 984 CXML_AttrItem& item = GetAt(i); 985 if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) { 986 item.Empty(pAllocator); 987 m_pMap->RemoveAt(i); 988 return; 989 } 990 } 991 } 992 int CXML_AttrMap::GetSize() const 993 { 994 return m_pMap == NULL ? 0 : m_pMap->GetSize(); 995 } 996 CXML_AttrItem& CXML_AttrMap::GetAt(int index) const 997 { 998 ASSERT(m_pMap != NULL); 999 return (*m_pMap)[index]; 1000 } 1001 void CXML_AttrMap::RemoveAll(IFX_Allocator* pAllocator) 1002 { 1003 if (!m_pMap) { 1004 return; 1005 } 1006 for (int i = 0; i < m_pMap->GetSize(); i ++) { 1007 CXML_AttrItem& item = (*m_pMap)[i]; 1008 item.Empty(pAllocator); 1009 } 1010 m_pMap->RemoveAll(); 1011 if (pAllocator) { 1012 FX_DeleteAtAllocator(m_pMap, pAllocator, CFX_ObjectArray<CXML_AttrItem>); 1013 } else { 1014 delete m_pMap; 1015 } 1016 m_pMap = NULL; 1017 } 1018