Home | History | Annotate | Download | only in fxcrt
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include <algorithm>
      8 #include <memory>
      9 #include <vector>
     10 
     11 #include "core/fxcrt/fx_ext.h"
     12 #include "core/fxcrt/fx_xml.h"
     13 #include "core/fxcrt/xml_int.h"
     14 #include "third_party/base/ptr_util.h"
     15 #include "third_party/base/stl_util.h"
     16 
     17 namespace {
     18 
     19 #define FXCRTM_XML_CHARTYPE_Normal 0x00
     20 #define FXCRTM_XML_CHARTYPE_SpaceChar 0x01
     21 #define FXCRTM_XML_CHARTYPE_Letter 0x02
     22 #define FXCRTM_XML_CHARTYPE_Digital 0x04
     23 #define FXCRTM_XML_CHARTYPE_NameIntro 0x08
     24 #define FXCRTM_XML_CHARTYPE_NameChar 0x10
     25 #define FXCRTM_XML_CHARTYPE_HexDigital 0x20
     26 #define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40
     27 #define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60
     28 #define FXCRTM_XML_CHARTYPE_HexChar 0x60
     29 
     30 const uint8_t g_FXCRT_XML_ByteTypes[256] = {
     31     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     32     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     33     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
     34     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00,
     35     0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00,
     36     0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A,
     37     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     38     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18,
     39     0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     40     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     41     0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A,
     42     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     43     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     44     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     45     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     46     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     47     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     48     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     49     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     50     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     51     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     52     0x1A, 0x1A, 0x01, 0x01,
     53 };
     54 
     55 bool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) {
     56   return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar);
     57 }
     58 
     59 bool g_FXCRT_XML_IsDigital(uint8_t ch) {
     60   return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital);
     61 }
     62 
     63 bool g_FXCRT_XML_IsNameIntro(uint8_t ch) {
     64   return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro);
     65 }
     66 
     67 bool g_FXCRT_XML_IsNameChar(uint8_t ch) {
     68   return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar);
     69 }
     70 
     71 class CXML_DataBufAcc : public IFX_BufferedReadStream {
     72  public:
     73   template <typename T, typename... Args>
     74   friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
     75 
     76   // IFX_BufferedReadStream
     77   bool IsEOF() override;
     78   FX_FILESIZE GetPosition() override;
     79   size_t ReadBlock(void* buffer, size_t size) override;
     80   bool ReadNextBlock(bool bRestart) override;
     81   const uint8_t* GetBlockBuffer() override;
     82   size_t GetBlockSize() override;
     83   FX_FILESIZE GetBlockOffset() override;
     84 
     85  private:
     86   CXML_DataBufAcc(const uint8_t* pBuffer, size_t size);
     87   ~CXML_DataBufAcc() override;
     88 
     89   const uint8_t* m_pBuffer;
     90   size_t m_dwSize;
     91   size_t m_dwCurPos;
     92 };
     93 
     94 CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size)
     95     : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {}
     96 
     97 CXML_DataBufAcc::~CXML_DataBufAcc() {}
     98 
     99 bool CXML_DataBufAcc::IsEOF() {
    100   return m_dwCurPos >= m_dwSize;
    101 }
    102 
    103 FX_FILESIZE CXML_DataBufAcc::GetPosition() {
    104   return static_cast<FX_FILESIZE>(m_dwCurPos);
    105 }
    106 
    107 size_t CXML_DataBufAcc::ReadBlock(void* buffer, size_t size) {
    108   return 0;
    109 }
    110 
    111 bool CXML_DataBufAcc::ReadNextBlock(bool bRestart) {
    112   if (bRestart)
    113     m_dwCurPos = 0;
    114 
    115   if (m_dwCurPos < m_dwSize) {
    116     m_dwCurPos = m_dwSize;
    117     return true;
    118   }
    119   return false;
    120 }
    121 
    122 const uint8_t* CXML_DataBufAcc::GetBlockBuffer() {
    123   return m_pBuffer;
    124 }
    125 
    126 size_t CXML_DataBufAcc::GetBlockSize() {
    127   return m_dwSize;
    128 }
    129 
    130 FX_FILESIZE CXML_DataBufAcc::GetBlockOffset() {
    131   return 0;
    132 }
    133 
    134 class CXML_DataStmAcc : public IFX_BufferedReadStream {
    135  public:
    136   template <typename T, typename... Args>
    137   friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
    138 
    139   // IFX_BufferedReadStream
    140   bool IsEOF() override;
    141   FX_FILESIZE GetPosition() override;
    142   size_t ReadBlock(void* buffer, size_t size) override;
    143   bool ReadNextBlock(bool bRestart) override;
    144   const uint8_t* GetBlockBuffer() override;
    145   size_t GetBlockSize() override;
    146   FX_FILESIZE GetBlockOffset() override;
    147 
    148  private:
    149   explicit CXML_DataStmAcc(
    150       const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead);
    151   ~CXML_DataStmAcc() override;
    152 
    153   CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead;
    154   uint8_t* m_pBuffer;
    155   FX_FILESIZE m_nStart;
    156   size_t m_dwSize;
    157 };
    158 
    159 CXML_DataStmAcc::CXML_DataStmAcc(
    160     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead)
    161     : m_pFileRead(pFileRead), m_pBuffer(nullptr), m_nStart(0), m_dwSize(0) {
    162   ASSERT(m_pFileRead);
    163 }
    164 
    165 CXML_DataStmAcc::~CXML_DataStmAcc() {
    166   FX_Free(m_pBuffer);
    167 }
    168 
    169 bool CXML_DataStmAcc::IsEOF() {
    170   return m_nStart + static_cast<FX_FILESIZE>(m_dwSize) >=
    171          m_pFileRead->GetSize();
    172 }
    173 
    174 FX_FILESIZE CXML_DataStmAcc::GetPosition() {
    175   return m_nStart + static_cast<FX_FILESIZE>(m_dwSize);
    176 }
    177 
    178 size_t CXML_DataStmAcc::ReadBlock(void* buffer, size_t size) {
    179   return 0;
    180 }
    181 
    182 bool CXML_DataStmAcc::ReadNextBlock(bool bRestart) {
    183   if (bRestart)
    184     m_nStart = 0;
    185 
    186   FX_FILESIZE nLength = m_pFileRead->GetSize();
    187   m_nStart += static_cast<FX_FILESIZE>(m_dwSize);
    188   if (m_nStart >= nLength)
    189     return false;
    190 
    191   static const FX_FILESIZE FX_XMLDATASTREAM_BufferSize = 32 * 1024;
    192   m_dwSize = static_cast<size_t>(
    193       std::min(FX_XMLDATASTREAM_BufferSize, nLength - m_nStart));
    194   if (!m_pBuffer)
    195     m_pBuffer = FX_Alloc(uint8_t, m_dwSize);
    196 
    197   return m_pFileRead->ReadBlock(m_pBuffer, m_nStart, m_dwSize);
    198 }
    199 
    200 const uint8_t* CXML_DataStmAcc::GetBlockBuffer() {
    201   return (const uint8_t*)m_pBuffer;
    202 }
    203 
    204 size_t CXML_DataStmAcc::GetBlockSize() {
    205   return m_dwSize;
    206 }
    207 
    208 FX_FILESIZE CXML_DataStmAcc::GetBlockOffset() {
    209   return m_nStart;
    210 }
    211 
    212 }  // namespace
    213 
    214 CXML_Parser::CXML_Parser()
    215     : m_nOffset(0),
    216       m_pBuffer(nullptr),
    217       m_dwBufferSize(0),
    218       m_nBufferOffset(0),
    219       m_dwIndex(0) {}
    220 
    221 CXML_Parser::~CXML_Parser() {}
    222 
    223 bool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) {
    224   m_pDataAcc = pdfium::MakeRetain<CXML_DataBufAcc>(pBuffer, size);
    225   m_nOffset = 0;
    226   return ReadNextBlock();
    227 }
    228 
    229 bool CXML_Parser::ReadNextBlock() {
    230   if (!m_pDataAcc->ReadNextBlock())
    231     return false;
    232 
    233   m_pBuffer = m_pDataAcc->GetBlockBuffer();
    234   m_dwBufferSize = m_pDataAcc->GetBlockSize();
    235   m_nBufferOffset = m_pDataAcc->GetBlockOffset();
    236   m_dwIndex = 0;
    237   return m_dwBufferSize > 0;
    238 }
    239 
    240 bool CXML_Parser::IsEOF() {
    241   return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize;
    242 }
    243 
    244 void CXML_Parser::SkipWhiteSpaces() {
    245   m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    246   if (IsEOF())
    247     return;
    248 
    249   do {
    250     while (m_dwIndex < m_dwBufferSize &&
    251            g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) {
    252       m_dwIndex++;
    253     }
    254     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    255     if (m_dwIndex < m_dwBufferSize || IsEOF())
    256       break;
    257   } while (ReadNextBlock());
    258 }
    259 
    260 void CXML_Parser::GetName(CFX_ByteString* space, CFX_ByteString* name) {
    261   m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    262   if (IsEOF())
    263     return;
    264 
    265   CFX_ByteTextBuf buf;
    266   uint8_t ch;
    267   do {
    268     while (m_dwIndex < m_dwBufferSize) {
    269       ch = m_pBuffer[m_dwIndex];
    270       if (ch == ':') {
    271         *space = buf.AsStringC();
    272         buf.Clear();
    273       } else if (g_FXCRT_XML_IsNameChar(ch)) {
    274         buf.AppendChar(ch);
    275       } else {
    276         break;
    277       }
    278       m_dwIndex++;
    279     }
    280     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    281     if (m_dwIndex < m_dwBufferSize || IsEOF())
    282       break;
    283   } while (ReadNextBlock());
    284   *name = buf.AsStringC();
    285 }
    286 
    287 void CXML_Parser::SkipLiterals(const CFX_ByteStringC& str) {
    288   m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    289   if (IsEOF()) {
    290     return;
    291   }
    292   int32_t i = 0, iLen = str.GetLength();
    293   do {
    294     while (m_dwIndex < m_dwBufferSize) {
    295       if (str.GetAt(i) != m_pBuffer[m_dwIndex++]) {
    296         i = 0;
    297         continue;
    298       }
    299       i++;
    300       if (i == iLen)
    301         break;
    302     }
    303     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    304     if (i == iLen)
    305       return;
    306 
    307     if (m_dwIndex < m_dwBufferSize || IsEOF())
    308       break;
    309   } while (ReadNextBlock());
    310   while (!m_pDataAcc->IsEOF()) {
    311     ReadNextBlock();
    312     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwBufferSize);
    313   }
    314   m_dwIndex = m_dwBufferSize;
    315 }
    316 
    317 uint32_t CXML_Parser::GetCharRef() {
    318   m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    319   if (IsEOF())
    320     return 0;
    321 
    322   uint8_t ch;
    323   int32_t iState = 0;
    324   CFX_ByteTextBuf buf;
    325   uint32_t code = 0;
    326   do {
    327     while (m_dwIndex < m_dwBufferSize) {
    328       ch = m_pBuffer[m_dwIndex];
    329       switch (iState) {
    330         case 0:
    331           if (ch == '#') {
    332             m_dwIndex++;
    333             iState = 2;
    334             break;
    335           }
    336           iState = 1;
    337         case 1:
    338           m_dwIndex++;
    339           if (ch == ';') {
    340             CFX_ByteStringC ref = buf.AsStringC();
    341             if (ref == "gt")
    342               code = '>';
    343             else if (ref == "lt")
    344               code = '<';
    345             else if (ref == "amp")
    346               code = '&';
    347             else if (ref == "apos")
    348               code = '\'';
    349             else if (ref == "quot")
    350               code = '"';
    351             iState = 10;
    352             break;
    353           }
    354           buf.AppendByte(ch);
    355           break;
    356         case 2:
    357           if (ch == 'x') {
    358             m_dwIndex++;
    359             iState = 4;
    360             break;
    361           }
    362           iState = 3;
    363         case 3:
    364           m_dwIndex++;
    365           if (ch == ';') {
    366             iState = 10;
    367             break;
    368           }
    369           if (g_FXCRT_XML_IsDigital(ch))
    370             code = code * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    371           break;
    372         case 4:
    373           m_dwIndex++;
    374           if (ch == ';') {
    375             iState = 10;
    376             break;
    377           }
    378           uint8_t nHex =
    379               g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar;
    380           if (nHex) {
    381             if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) {
    382               code =
    383                   (code << 4) + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    384             } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) {
    385               code = (code << 4) + ch - 87;
    386             } else {
    387               code = (code << 4) + ch - 55;
    388             }
    389           }
    390           break;
    391       }
    392       if (iState == 10)
    393         break;
    394     }
    395     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    396     if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
    397       break;
    398     }
    399   } while (ReadNextBlock());
    400   return code;
    401 }
    402 
    403 void CXML_Parser::GetAttrValue(CFX_WideString& value) {
    404   m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    405   if (IsEOF())
    406     return;
    407 
    408   CFX_UTF8Decoder decoder;
    409   uint8_t mark = 0, ch = 0;
    410   do {
    411     while (m_dwIndex < m_dwBufferSize) {
    412       ch = m_pBuffer[m_dwIndex];
    413       if (mark == 0) {
    414         if (ch != '\'' && ch != '"')
    415           return;
    416 
    417         mark = ch;
    418         m_dwIndex++;
    419         ch = 0;
    420         continue;
    421       }
    422       m_dwIndex++;
    423       if (ch == mark)
    424         break;
    425 
    426       if (ch == '&') {
    427         decoder.AppendChar(GetCharRef());
    428         if (IsEOF()) {
    429           value = decoder.GetResult();
    430           return;
    431         }
    432       } else {
    433         decoder.Input(ch);
    434       }
    435     }
    436     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    437     if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF())
    438       break;
    439   } while (ReadNextBlock());
    440   value = decoder.GetResult();
    441 }
    442 
    443 void CXML_Parser::GetTagName(bool bStartTag,
    444                              bool* bEndTag,
    445                              CFX_ByteString* space,
    446                              CFX_ByteString* name) {
    447   m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    448   if (IsEOF())
    449     return;
    450 
    451   *bEndTag = false;
    452   uint8_t ch;
    453   int32_t iState = bStartTag ? 1 : 0;
    454   do {
    455     while (m_dwIndex < m_dwBufferSize) {
    456       ch = m_pBuffer[m_dwIndex];
    457       switch (iState) {
    458         case 0:
    459           m_dwIndex++;
    460           if (ch != '<')
    461             break;
    462 
    463           iState = 1;
    464           break;
    465         case 1:
    466           if (ch == '?') {
    467             m_dwIndex++;
    468             SkipLiterals("?>");
    469             iState = 0;
    470             break;
    471           }
    472           if (ch == '!') {
    473             m_dwIndex++;
    474             SkipLiterals("-->");
    475             iState = 0;
    476             break;
    477           }
    478           if (ch == '/') {
    479             m_dwIndex++;
    480             GetName(space, name);
    481             *bEndTag = true;
    482           } else {
    483             GetName(space, name);
    484             *bEndTag = false;
    485           }
    486           return;
    487       }
    488     }
    489     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    490     if (m_dwIndex < m_dwBufferSize || IsEOF())
    491       break;
    492   } while (ReadNextBlock());
    493 }
    494 
    495 std::unique_ptr<CXML_Element> CXML_Parser::ParseElement(CXML_Element* pParent,
    496                                                         bool bStartTag) {
    497   m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    498   if (IsEOF())
    499     return nullptr;
    500 
    501   CFX_ByteString tag_name;
    502   CFX_ByteString tag_space;
    503   bool bEndTag;
    504   GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name);
    505   if (tag_name.IsEmpty() || bEndTag)
    506     return nullptr;
    507 
    508   auto pElement = pdfium::MakeUnique<CXML_Element>(
    509       pParent, tag_space.AsStringC(), tag_name.AsStringC());
    510   do {
    511     CFX_ByteString attr_space;
    512     CFX_ByteString attr_name;
    513     while (m_dwIndex < m_dwBufferSize) {
    514       SkipWhiteSpaces();
    515       if (IsEOF())
    516         break;
    517 
    518       if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex]))
    519         break;
    520 
    521       GetName(&attr_space, &attr_name);
    522       SkipWhiteSpaces();
    523       if (IsEOF())
    524         break;
    525 
    526       if (m_pBuffer[m_dwIndex] != '=')
    527         break;
    528 
    529       m_dwIndex++;
    530       SkipWhiteSpaces();
    531       if (IsEOF())
    532         break;
    533 
    534       CFX_WideString attr_value;
    535       GetAttrValue(attr_value);
    536       pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value);
    537     }
    538     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    539     if (m_dwIndex < m_dwBufferSize || IsEOF())
    540       break;
    541   } while (ReadNextBlock());
    542   SkipWhiteSpaces();
    543   if (IsEOF())
    544     return pElement;
    545 
    546   uint8_t ch = m_pBuffer[m_dwIndex++];
    547   if (ch == '/') {
    548     m_dwIndex++;
    549     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    550     return pElement;
    551   }
    552   if (ch != '>') {
    553     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    554     return nullptr;
    555   }
    556   SkipWhiteSpaces();
    557   if (IsEOF())
    558     return pElement;
    559 
    560   CFX_UTF8Decoder decoder;
    561   CFX_WideTextBuf content;
    562   bool bCDATA = false;
    563   int32_t iState = 0;
    564   do {
    565     while (m_dwIndex < m_dwBufferSize) {
    566       ch = m_pBuffer[m_dwIndex++];
    567       switch (iState) {
    568         case 0:
    569           if (ch == '<') {
    570             iState = 1;
    571           } else if (ch == '&') {
    572             decoder.ClearStatus();
    573             decoder.AppendChar(GetCharRef());
    574           } else {
    575             decoder.Input(ch);
    576           }
    577           break;
    578         case 1:
    579           if (ch == '!') {
    580             iState = 2;
    581           } else if (ch == '?') {
    582             SkipLiterals("?>");
    583             SkipWhiteSpaces();
    584             iState = 0;
    585           } else if (ch == '/') {
    586             CFX_ByteString space;
    587             CFX_ByteString name;
    588             GetName(&space, &name);
    589             SkipWhiteSpaces();
    590             m_dwIndex++;
    591             iState = 10;
    592           } else {
    593             content << decoder.GetResult();
    594             CFX_WideString dataStr = content.MakeString();
    595             if (!bCDATA)
    596               dataStr.TrimRight(L" \t\r\n");
    597 
    598             InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get());
    599             content.Clear();
    600             decoder.Clear();
    601             bCDATA = false;
    602             iState = 0;
    603             m_dwIndex--;
    604             std::unique_ptr<CXML_Element> pSubElement(
    605                 ParseElement(pElement.get(), true));
    606             if (!pSubElement)
    607               break;
    608 
    609             pElement->m_Children.push_back(
    610                 {CXML_Element::Element, pSubElement.release()});
    611             SkipWhiteSpaces();
    612           }
    613           break;
    614         case 2:
    615           if (ch == '[') {
    616             SkipLiterals("]]>");
    617           } else if (ch == '-') {
    618             m_dwIndex++;
    619             SkipLiterals("-->");
    620           } else {
    621             SkipLiterals(">");
    622           }
    623           decoder.Clear();
    624           SkipWhiteSpaces();
    625           iState = 0;
    626           break;
    627       }
    628       if (iState == 10) {
    629         break;
    630       }
    631     }
    632     m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
    633     if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF())
    634       break;
    635   } while (ReadNextBlock());
    636   content << decoder.GetResult();
    637   CFX_WideString dataStr = content.MakeString();
    638   dataStr.TrimRight(L" \t\r\n");
    639 
    640   InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get());
    641   content.Clear();
    642   decoder.Clear();
    643   bCDATA = false;
    644   return pElement;
    645 }
    646 
    647 void CXML_Parser::InsertContentSegment(bool bCDATA,
    648                                        const CFX_WideStringC& content,
    649                                        CXML_Element* pElement) {
    650   if (content.IsEmpty())
    651     return;
    652 
    653   CXML_Content* pContent = new CXML_Content;
    654   pContent->Set(bCDATA, content);
    655   pElement->m_Children.push_back({CXML_Element::Content, pContent});
    656 }
    657 
    658 std::unique_ptr<CXML_Element> CXML_Element::Parse(const void* pBuffer,
    659                                                   size_t size) {
    660   CXML_Parser parser;
    661   if (!parser.Init(static_cast<const uint8_t*>(pBuffer), size))
    662     return nullptr;
    663   return parser.ParseElement(nullptr, false);
    664 }
    665 
    666 CXML_Element::CXML_Element(const CXML_Element* pParent,
    667                            const CFX_ByteStringC& qSpace,
    668                            const CFX_ByteStringC& tagname)
    669     : m_pParent(pParent), m_QSpaceName(qSpace), m_TagName(tagname) {}
    670 
    671 CXML_Element::~CXML_Element() {
    672   Empty();
    673 }
    674 
    675 void CXML_Element::Empty() {
    676   RemoveChildren();
    677 }
    678 void CXML_Element::RemoveChildren() {
    679   for (const ChildRecord& record : m_Children) {
    680     if (record.type == Content) {
    681       delete static_cast<CXML_Content*>(record.child);
    682     } else if (record.type == Element) {
    683       CXML_Element* child = static_cast<CXML_Element*>(record.child);
    684       child->RemoveChildren();
    685       delete child;
    686     }
    687   }
    688   m_Children.clear();
    689 }
    690 CFX_ByteString CXML_Element::GetTagName(bool bQualified) const {
    691   if (!bQualified || m_QSpaceName.IsEmpty()) {
    692     return m_TagName;
    693   }
    694   CFX_ByteString bsTag = m_QSpaceName;
    695   bsTag += ":";
    696   bsTag += m_TagName;
    697   return bsTag;
    698 }
    699 
    700 CFX_ByteString CXML_Element::GetNamespace(bool bQualified) const {
    701   return bQualified ? m_QSpaceName : GetNamespaceURI(m_QSpaceName);
    702 }
    703 
    704 CFX_ByteString CXML_Element::GetNamespaceURI(
    705     const CFX_ByteString& qName) const {
    706   const CFX_WideString* pwsSpace;
    707   const CXML_Element* pElement = this;
    708   do {
    709     if (qName.IsEmpty())
    710       pwsSpace = pElement->m_AttrMap.Lookup("", "xmlns");
    711     else
    712       pwsSpace = pElement->m_AttrMap.Lookup("xmlns", qName);
    713     if (pwsSpace)
    714       break;
    715 
    716     pElement = pElement->GetParent();
    717   } while (pElement);
    718   return pwsSpace ? pwsSpace->UTF8Encode() : CFX_ByteString();
    719 }
    720 
    721 void CXML_Element::GetAttrByIndex(int index,
    722                                   CFX_ByteString& space,
    723                                   CFX_ByteString& name,
    724                                   CFX_WideString& value) const {
    725   if (index < 0 || index >= m_AttrMap.GetSize())
    726     return;
    727 
    728   CXML_AttrItem& item = m_AttrMap.GetAt(index);
    729   space = item.m_QSpaceName;
    730   name = item.m_AttrName;
    731   value = item.m_Value;
    732 }
    733 
    734 bool CXML_Element::HasAttr(const CFX_ByteStringC& name) const {
    735   CFX_ByteStringC bsSpace;
    736   CFX_ByteStringC bsName;
    737   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    738   return !!m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName));
    739 }
    740 
    741 bool CXML_Element::GetAttrValue(const CFX_ByteStringC& name,
    742                                 CFX_WideString& attribute) const {
    743   CFX_ByteStringC bsSpace;
    744   CFX_ByteStringC bsName;
    745   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    746   return GetAttrValue(bsSpace, bsName, attribute);
    747 }
    748 
    749 bool CXML_Element::GetAttrValue(const CFX_ByteStringC& space,
    750                                 const CFX_ByteStringC& name,
    751                                 CFX_WideString& attribute) const {
    752   const CFX_WideString* pValue =
    753       m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name));
    754   if (!pValue)
    755     return false;
    756 
    757   attribute = *pValue;
    758   return true;
    759 }
    760 
    761 bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& name,
    762                                   int& attribute) const {
    763   CFX_ByteStringC bsSpace;
    764   CFX_ByteStringC bsName;
    765   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    766   const CFX_WideString* pwsValue =
    767       m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName));
    768   if (!pwsValue)
    769     return false;
    770 
    771   attribute = pwsValue->GetInteger();
    772   return true;
    773 }
    774 
    775 bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& space,
    776                                   const CFX_ByteStringC& name,
    777                                   int& attribute) const {
    778   const CFX_WideString* pwsValue =
    779       m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name));
    780   if (!pwsValue)
    781     return false;
    782 
    783   attribute = pwsValue->GetInteger();
    784   return true;
    785 }
    786 
    787 bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& name,
    788                                 FX_FLOAT& attribute) const {
    789   CFX_ByteStringC bsSpace;
    790   CFX_ByteStringC bsName;
    791   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    792   return GetAttrFloat(bsSpace, bsName, attribute);
    793 }
    794 
    795 bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& space,
    796                                 const CFX_ByteStringC& name,
    797                                 FX_FLOAT& attribute) const {
    798   const CFX_WideString* pValue =
    799       m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name));
    800   if (!pValue)
    801     return false;
    802 
    803   attribute = pValue->GetFloat();
    804   return true;
    805 }
    806 
    807 CXML_Element::ChildType CXML_Element::GetChildType(uint32_t index) const {
    808   return index < m_Children.size() ? m_Children[index].type : Invalid;
    809 }
    810 
    811 CFX_WideString CXML_Element::GetContent(uint32_t index) const {
    812   if (index < m_Children.size() && m_Children[index].type == Content) {
    813     CXML_Content* pContent =
    814         static_cast<CXML_Content*>(m_Children[index].child);
    815     if (pContent)
    816       return pContent->m_Content;
    817   }
    818   return CFX_WideString();
    819 }
    820 
    821 CXML_Element* CXML_Element::GetElement(uint32_t index) const {
    822   if (index < m_Children.size() && m_Children[index].type == Element)
    823     return static_cast<CXML_Element*>(m_Children[index].child);
    824   return nullptr;
    825 }
    826 
    827 uint32_t CXML_Element::CountElements(const CFX_ByteStringC& space,
    828                                      const CFX_ByteStringC& tag) const {
    829   int count = 0;
    830   for (const ChildRecord& record : m_Children) {
    831     if (record.type != Element)
    832       continue;
    833 
    834     CXML_Element* pKid = static_cast<CXML_Element*>(record.child);
    835     if ((space.IsEmpty() || pKid->m_QSpaceName == space) &&
    836         pKid->m_TagName == tag) {
    837       count++;
    838     }
    839   }
    840   return count;
    841 }
    842 
    843 CXML_Element* CXML_Element::GetElement(const CFX_ByteStringC& space,
    844                                        const CFX_ByteStringC& tag,
    845                                        int index) const {
    846   if (index < 0)
    847     return nullptr;
    848 
    849   for (const ChildRecord& record : m_Children) {
    850     if (record.type != Element)
    851       continue;
    852 
    853     CXML_Element* pKid = static_cast<CXML_Element*>(record.child);
    854     if ((space.IsEmpty() || pKid->m_QSpaceName == space) &&
    855         pKid->m_TagName == tag) {
    856       if (index-- == 0)
    857         return pKid;
    858     }
    859   }
    860   return nullptr;
    861 }
    862 
    863 uint32_t CXML_Element::FindElement(CXML_Element* pChild) const {
    864   int index = 0;
    865   for (const ChildRecord& record : m_Children) {
    866     if (record.type == Element &&
    867         static_cast<CXML_Element*>(record.child) == pChild) {
    868       return index;
    869     }
    870     ++index;
    871   }
    872   return (uint32_t)-1;
    873 }
    874 
    875 bool CXML_AttrItem::Matches(const CFX_ByteString& space,
    876                             const CFX_ByteString& name) const {
    877   return (space.IsEmpty() || m_QSpaceName == space) && m_AttrName == name;
    878 }
    879 
    880 CXML_AttrMap::CXML_AttrMap() {}
    881 
    882 CXML_AttrMap::~CXML_AttrMap() {}
    883 
    884 const CFX_WideString* CXML_AttrMap::Lookup(const CFX_ByteString& space,
    885                                            const CFX_ByteString& name) const {
    886   if (!m_pMap)
    887     return nullptr;
    888 
    889   for (const auto& item : *m_pMap) {
    890     if (item.Matches(space, name))
    891       return &item.m_Value;
    892   }
    893   return nullptr;
    894 }
    895 
    896 void CXML_AttrMap::SetAt(const CFX_ByteString& space,
    897                          const CFX_ByteString& name,
    898                          const CFX_WideString& value) {
    899   if (!m_pMap)
    900     m_pMap = pdfium::MakeUnique<std::vector<CXML_AttrItem>>();
    901 
    902   for (CXML_AttrItem& item : *m_pMap) {
    903     if (item.Matches(space, name)) {
    904       item.m_Value = value;
    905       return;
    906     }
    907   }
    908 
    909   m_pMap->push_back({space, name, CFX_WideString(value)});
    910 }
    911 
    912 int CXML_AttrMap::GetSize() const {
    913   return m_pMap ? pdfium::CollectionSize<int>(*m_pMap) : 0;
    914 }
    915 
    916 CXML_AttrItem& CXML_AttrMap::GetAt(int index) const {
    917   return (*m_pMap)[index];
    918 }
    919