Home | History | Annotate | Download | only in xml
      1 // Copyright 2017 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h"
      8 
      9 #include <algorithm>
     10 #include <cwctype>
     11 #include <iterator>
     12 
     13 #include "core/fxcrt/fx_extension.h"
     14 #include "core/fxcrt/fx_safe_types.h"
     15 
     16 namespace {
     17 
     18 const uint32_t kMaxCharRange = 0x10ffff;
     19 
     20 bool IsXMLWhiteSpace(wchar_t ch) {
     21   return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
     22 }
     23 
     24 struct FX_XMLNAMECHAR {
     25   uint16_t wStart;
     26   uint16_t wEnd;
     27   bool bStartChar;
     28 };
     29 
     30 const FX_XMLNAMECHAR g_XMLNameChars[] = {
     31     {L'-', L'.', false},    {L'0', L'9', false},     {L':', L':', false},
     32     {L'A', L'Z', true},     {L'_', L'_', true},      {L'a', L'z', true},
     33     {0xB7, 0xB7, false},    {0xC0, 0xD6, true},      {0xD8, 0xF6, true},
     34     {0xF8, 0x02FF, true},   {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
     35     {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true},  {0x203F, 0x2040, false},
     36     {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true},  {0x3001, 0xD7FF, true},
     37     {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
     38 };
     39 
     40 
     41 int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src,
     42                             FX_FILESIZE iSrcLen) {
     43   uint32_t unicode = 0;
     44   int32_t iDstNum = 0;
     45   const wchar_t* pSrc = src.data();
     46   while (iSrcLen-- > 0) {
     47     unicode = *pSrc++;
     48     int nbytes = 0;
     49     if ((uint32_t)unicode < 0x80) {
     50       nbytes = 1;
     51     } else if ((uint32_t)unicode < 0x800) {
     52       nbytes = 2;
     53     } else if ((uint32_t)unicode < 0x10000) {
     54       nbytes = 3;
     55     } else if ((uint32_t)unicode < 0x200000) {
     56       nbytes = 4;
     57     } else if ((uint32_t)unicode < 0x4000000) {
     58       nbytes = 5;
     59     } else {
     60       nbytes = 6;
     61     }
     62     iDstNum += nbytes;
     63   }
     64   return iDstNum;
     65 }
     66 
     67 }  // namespace
     68 
     69 // static
     70 bool CFX_XMLSyntaxParser::IsXMLNameChar(wchar_t ch, bool bFirstChar) {
     71   auto* it = std::lower_bound(
     72       std::begin(g_XMLNameChars), std::end(g_XMLNameChars), ch,
     73       [](const FX_XMLNAMECHAR& arg, wchar_t ch) { return arg.wEnd < ch; });
     74   return it != std::end(g_XMLNameChars) && ch >= it->wStart &&
     75          (!bFirstChar || it->bStartChar);
     76 }
     77 
     78 CFX_XMLSyntaxParser::CFX_XMLSyntaxParser(
     79     const RetainPtr<CFX_SeekableStreamProxy>& pStream)
     80     : m_pStream(pStream),
     81       m_iXMLPlaneSize(32 * 1024),
     82       m_iCurrentPos(0),
     83       m_iCurrentNodeNum(-1),
     84       m_iLastNodeNum(-1),
     85       m_iParsedBytes(0),
     86       m_ParsedChars(0),
     87       m_iBufferChars(0),
     88       m_bEOS(false),
     89       m_Start(0),
     90       m_End(0),
     91       m_iAllocStep(m_BlockBuffer.GetAllocStep()),
     92       m_pCurrentBlock(nullptr),
     93       m_iIndexInBlock(0),
     94       m_iTextDataLength(0),
     95       m_syntaxParserResult(FX_XmlSyntaxResult::None),
     96       m_syntaxParserState(FDE_XmlSyntaxState::Text),
     97       m_wQuotationMark(0),
     98       m_iEntityStart(-1) {
     99   ASSERT(pStream);
    100 
    101   m_CurNode.iNodeNum = -1;
    102   m_CurNode.eNodeType = FX_XMLNODE_Unknown;
    103 
    104   m_iXMLPlaneSize =
    105       std::min(m_iXMLPlaneSize,
    106                pdfium::base::checked_cast<size_t>(m_pStream->GetLength()));
    107   m_iCurrentPos = m_pStream->GetBOMLength();
    108 
    109   FX_SAFE_SIZE_T alloc_size_safe = m_iXMLPlaneSize;
    110   alloc_size_safe += 1;  // For NUL.
    111   if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
    112     m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    113     return;
    114   }
    115 
    116   m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
    117 
    118   m_BlockBuffer.InitBuffer();
    119   std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    120       m_BlockBuffer.GetAvailableBlock();
    121 }
    122 
    123 CFX_XMLSyntaxParser::~CFX_XMLSyntaxParser() {}
    124 
    125 FX_XmlSyntaxResult CFX_XMLSyntaxParser::DoSyntaxParse() {
    126   if (m_syntaxParserResult == FX_XmlSyntaxResult::Error ||
    127       m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) {
    128     return m_syntaxParserResult;
    129   }
    130 
    131   FX_FILESIZE iStreamLength = m_pStream->GetLength();
    132   FX_FILESIZE iPos;
    133 
    134   FX_XmlSyntaxResult syntaxParserResult = FX_XmlSyntaxResult::None;
    135   while (true) {
    136     if (m_Start >= m_End) {
    137       if (m_bEOS || m_iCurrentPos >= iStreamLength) {
    138         m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
    139         return m_syntaxParserResult;
    140       }
    141       m_ParsedChars += m_End;
    142       m_iParsedBytes = m_iCurrentPos;
    143       if (m_pStream->GetPosition() != m_iCurrentPos)
    144         m_pStream->Seek(CFX_SeekableStreamProxy::From::Begin, m_iCurrentPos);
    145 
    146       m_iBufferChars =
    147           m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS);
    148       iPos = m_pStream->GetPosition();
    149       if (m_iBufferChars < 1) {
    150         m_iCurrentPos = iStreamLength;
    151         m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
    152         return m_syntaxParserResult;
    153       }
    154       m_iCurrentPos = iPos;
    155       m_Start = 0;
    156       m_End = m_iBufferChars;
    157     }
    158 
    159     while (m_Start < m_End) {
    160       wchar_t ch = m_Buffer[m_Start];
    161       switch (m_syntaxParserState) {
    162         case FDE_XmlSyntaxState::Text:
    163           if (ch == L'<') {
    164             if (!m_BlockBuffer.IsEmpty()) {
    165               m_iTextDataLength = m_BlockBuffer.GetDataLength();
    166               m_BlockBuffer.Reset(true);
    167               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    168                   m_BlockBuffer.GetAvailableBlock();
    169               m_iEntityStart = -1;
    170               syntaxParserResult = FX_XmlSyntaxResult::Text;
    171             } else {
    172               m_Start++;
    173               m_syntaxParserState = FDE_XmlSyntaxState::Node;
    174             }
    175           } else {
    176             ParseTextChar(ch);
    177           }
    178           break;
    179         case FDE_XmlSyntaxState::Node:
    180           if (ch == L'!') {
    181             m_Start++;
    182             m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
    183           } else if (ch == L'/') {
    184             m_Start++;
    185             m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
    186           } else if (ch == L'?') {
    187             m_iLastNodeNum++;
    188             m_iCurrentNodeNum = m_iLastNodeNum;
    189             m_CurNode.iNodeNum = m_iLastNodeNum;
    190             m_CurNode.eNodeType = FX_XMLNODE_Instruction;
    191             m_XMLNodeStack.push(m_CurNode);
    192             m_Start++;
    193             m_syntaxParserState = FDE_XmlSyntaxState::Target;
    194             syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen;
    195           } else {
    196             m_iLastNodeNum++;
    197             m_iCurrentNodeNum = m_iLastNodeNum;
    198             m_CurNode.iNodeNum = m_iLastNodeNum;
    199             m_CurNode.eNodeType = FX_XMLNODE_Element;
    200             m_XMLNodeStack.push(m_CurNode);
    201             m_syntaxParserState = FDE_XmlSyntaxState::Tag;
    202             syntaxParserResult = FX_XmlSyntaxResult::ElementOpen;
    203           }
    204           break;
    205         case FDE_XmlSyntaxState::Target:
    206         case FDE_XmlSyntaxState::Tag:
    207           if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
    208             if (m_BlockBuffer.IsEmpty()) {
    209               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    210               return m_syntaxParserResult;
    211             }
    212 
    213             m_iTextDataLength = m_BlockBuffer.GetDataLength();
    214             m_BlockBuffer.Reset(true);
    215             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    216                 m_BlockBuffer.GetAvailableBlock();
    217             if (m_syntaxParserState != FDE_XmlSyntaxState::Target)
    218               syntaxParserResult = FX_XmlSyntaxResult::TagName;
    219             else
    220               syntaxParserResult = FX_XmlSyntaxResult::TargetName;
    221 
    222             m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
    223           } else {
    224             if (m_iIndexInBlock == m_iAllocStep) {
    225               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    226                   m_BlockBuffer.GetAvailableBlock();
    227               if (!m_pCurrentBlock) {
    228                 return FX_XmlSyntaxResult::Error;
    229               }
    230             }
    231             m_pCurrentBlock[m_iIndexInBlock++] = ch;
    232             m_BlockBuffer.IncrementDataLength();
    233             m_Start++;
    234           }
    235           break;
    236         case FDE_XmlSyntaxState::AttriName:
    237           if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) {
    238             m_Start++;
    239             break;
    240           }
    241           if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
    242             if (m_BlockBuffer.IsEmpty()) {
    243               if (m_CurNode.eNodeType == FX_XMLNODE_Element) {
    244                 if (ch == L'>' || ch == L'/') {
    245                   m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
    246                   break;
    247                 }
    248               } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
    249                 if (ch == L'?') {
    250                   m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
    251                   m_Start++;
    252                 } else {
    253                   m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
    254                 }
    255                 break;
    256               }
    257               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    258               return m_syntaxParserResult;
    259             } else {
    260               if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
    261                 if (ch != '=' && !IsXMLWhiteSpace(ch)) {
    262                   m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
    263                   break;
    264                 }
    265               }
    266               m_iTextDataLength = m_BlockBuffer.GetDataLength();
    267               m_BlockBuffer.Reset(true);
    268               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    269                   m_BlockBuffer.GetAvailableBlock();
    270               m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
    271               syntaxParserResult = FX_XmlSyntaxResult::AttriName;
    272             }
    273           } else {
    274             if (m_iIndexInBlock == m_iAllocStep) {
    275               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    276                   m_BlockBuffer.GetAvailableBlock();
    277               if (!m_pCurrentBlock) {
    278                 return FX_XmlSyntaxResult::Error;
    279               }
    280             }
    281             m_pCurrentBlock[m_iIndexInBlock++] = ch;
    282             m_BlockBuffer.IncrementDataLength();
    283             m_Start++;
    284           }
    285           break;
    286         case FDE_XmlSyntaxState::AttriEqualSign:
    287           if (IsXMLWhiteSpace(ch)) {
    288             m_Start++;
    289             break;
    290           }
    291           if (ch != L'=') {
    292             if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
    293               m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
    294               break;
    295             }
    296             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    297             return m_syntaxParserResult;
    298           } else {
    299             m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
    300             m_Start++;
    301           }
    302           break;
    303         case FDE_XmlSyntaxState::AttriQuotation:
    304           if (IsXMLWhiteSpace(ch)) {
    305             m_Start++;
    306             break;
    307           }
    308           if (ch != L'\"' && ch != L'\'') {
    309             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    310             return m_syntaxParserResult;
    311           } else {
    312             m_wQuotationMark = ch;
    313             m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
    314             m_Start++;
    315           }
    316           break;
    317         case FDE_XmlSyntaxState::AttriValue:
    318           if (ch == m_wQuotationMark) {
    319             if (m_iEntityStart > -1) {
    320               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    321               return m_syntaxParserResult;
    322             }
    323             m_iTextDataLength = m_BlockBuffer.GetDataLength();
    324             m_wQuotationMark = 0;
    325             m_BlockBuffer.Reset(true);
    326             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    327                 m_BlockBuffer.GetAvailableBlock();
    328             m_Start++;
    329             m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
    330             syntaxParserResult = FX_XmlSyntaxResult::AttriValue;
    331           } else {
    332             ParseTextChar(ch);
    333           }
    334           break;
    335         case FDE_XmlSyntaxState::CloseInstruction:
    336           if (ch != L'>') {
    337             if (m_iIndexInBlock == m_iAllocStep) {
    338               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    339                   m_BlockBuffer.GetAvailableBlock();
    340               if (!m_pCurrentBlock) {
    341                 return FX_XmlSyntaxResult::Error;
    342               }
    343             }
    344             m_pCurrentBlock[m_iIndexInBlock++] = ch;
    345             m_BlockBuffer.IncrementDataLength();
    346             m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
    347           } else if (!m_BlockBuffer.IsEmpty()) {
    348             m_iTextDataLength = m_BlockBuffer.GetDataLength();
    349             m_BlockBuffer.Reset(true);
    350             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    351                 m_BlockBuffer.GetAvailableBlock();
    352             syntaxParserResult = FX_XmlSyntaxResult::TargetData;
    353           } else {
    354             m_Start++;
    355             if (m_XMLNodeStack.empty()) {
    356               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    357               return m_syntaxParserResult;
    358             }
    359             m_XMLNodeStack.pop();
    360             if (!m_XMLNodeStack.empty()) {
    361               m_CurNode = m_XMLNodeStack.top();
    362             } else {
    363               m_CurNode.iNodeNum = -1;
    364               m_CurNode.eNodeType = FX_XMLNODE_Unknown;
    365             }
    366             m_iCurrentNodeNum = m_CurNode.iNodeNum;
    367             m_BlockBuffer.Reset(true);
    368             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    369                 m_BlockBuffer.GetAvailableBlock();
    370             m_syntaxParserState = FDE_XmlSyntaxState::Text;
    371             syntaxParserResult = FX_XmlSyntaxResult::InstructionClose;
    372           }
    373           break;
    374         case FDE_XmlSyntaxState::BreakElement:
    375           if (ch == L'>') {
    376             m_syntaxParserState = FDE_XmlSyntaxState::Text;
    377             syntaxParserResult = FX_XmlSyntaxResult::ElementBreak;
    378           } else if (ch == L'/') {
    379             m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
    380           } else {
    381             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    382             return m_syntaxParserResult;
    383           }
    384           m_Start++;
    385           break;
    386         case FDE_XmlSyntaxState::CloseElement:
    387           if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
    388             if (ch == L'>') {
    389               if (m_XMLNodeStack.empty()) {
    390                 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    391                 return m_syntaxParserResult;
    392               }
    393               m_XMLNodeStack.pop();
    394               if (!m_XMLNodeStack.empty()) {
    395                 m_CurNode = m_XMLNodeStack.top();
    396               } else {
    397                 m_CurNode.iNodeNum = -1;
    398                 m_CurNode.eNodeType = FX_XMLNODE_Unknown;
    399               }
    400               m_iCurrentNodeNum = m_CurNode.iNodeNum;
    401               m_iTextDataLength = m_BlockBuffer.GetDataLength();
    402               m_BlockBuffer.Reset(true);
    403               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    404                   m_BlockBuffer.GetAvailableBlock();
    405               m_syntaxParserState = FDE_XmlSyntaxState::Text;
    406               syntaxParserResult = FX_XmlSyntaxResult::ElementClose;
    407             } else if (!IsXMLWhiteSpace(ch)) {
    408               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    409               return m_syntaxParserResult;
    410             }
    411           } else {
    412             if (m_iIndexInBlock == m_iAllocStep) {
    413               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    414                   m_BlockBuffer.GetAvailableBlock();
    415               if (!m_pCurrentBlock) {
    416                 return FX_XmlSyntaxResult::Error;
    417               }
    418             }
    419             m_pCurrentBlock[m_iIndexInBlock++] = ch;
    420             m_BlockBuffer.IncrementDataLength();
    421           }
    422           m_Start++;
    423           break;
    424         case FDE_XmlSyntaxState::SkipCommentOrDecl:
    425           if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"--", 2) == 0) {
    426             m_Start += 2;
    427             m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
    428           } else if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"[CDATA[", 7) ==
    429                      0) {
    430             m_Start += 7;
    431             m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
    432           } else {
    433             m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
    434             m_SkipChar = L'>';
    435             m_SkipStack.push(L'>');
    436           }
    437           break;
    438         case FDE_XmlSyntaxState::SkipCData: {
    439           if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) {
    440             m_Start += 3;
    441             syntaxParserResult = FX_XmlSyntaxResult::CData;
    442             m_iTextDataLength = m_BlockBuffer.GetDataLength();
    443             m_BlockBuffer.Reset(true);
    444             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    445                 m_BlockBuffer.GetAvailableBlock();
    446             m_syntaxParserState = FDE_XmlSyntaxState::Text;
    447           } else {
    448             if (m_iIndexInBlock == m_iAllocStep) {
    449               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    450                   m_BlockBuffer.GetAvailableBlock();
    451               if (!m_pCurrentBlock)
    452                 return FX_XmlSyntaxResult::Error;
    453             }
    454             m_pCurrentBlock[m_iIndexInBlock++] = ch;
    455             m_BlockBuffer.IncrementDataLength();
    456             m_Start++;
    457           }
    458           break;
    459         }
    460         case FDE_XmlSyntaxState::SkipDeclNode:
    461           if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
    462             m_Start++;
    463             if (ch != m_SkipChar)
    464               break;
    465 
    466             m_SkipStack.pop();
    467             if (m_SkipStack.empty())
    468               m_syntaxParserState = FDE_XmlSyntaxState::Text;
    469             else
    470               m_SkipChar = m_SkipStack.top();
    471           } else {
    472             switch (ch) {
    473               case L'<':
    474                 m_SkipChar = L'>';
    475                 m_SkipStack.push(L'>');
    476                 break;
    477               case L'[':
    478                 m_SkipChar = L']';
    479                 m_SkipStack.push(L']');
    480                 break;
    481               case L'(':
    482                 m_SkipChar = L')';
    483                 m_SkipStack.push(L')');
    484                 break;
    485               case L'\'':
    486                 m_SkipChar = L'\'';
    487                 m_SkipStack.push(L'\'');
    488                 break;
    489               case L'\"':
    490                 m_SkipChar = L'\"';
    491                 m_SkipStack.push(L'\"');
    492                 break;
    493               default:
    494                 if (ch == m_SkipChar) {
    495                   m_SkipStack.pop();
    496                   if (m_SkipStack.empty()) {
    497                     if (m_BlockBuffer.GetDataLength() >= 9)
    498                       (void)m_BlockBuffer.GetTextData(0, 7);
    499 
    500                     m_iTextDataLength = m_BlockBuffer.GetDataLength();
    501                     m_BlockBuffer.Reset(true);
    502                     std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    503                         m_BlockBuffer.GetAvailableBlock();
    504                     m_syntaxParserState = FDE_XmlSyntaxState::Text;
    505                   } else {
    506                     m_SkipChar = m_SkipStack.top();
    507                   }
    508                 }
    509                 break;
    510             }
    511             if (!m_SkipStack.empty()) {
    512               if (m_iIndexInBlock == m_iAllocStep) {
    513                 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    514                     m_BlockBuffer.GetAvailableBlock();
    515                 if (!m_pCurrentBlock) {
    516                   return FX_XmlSyntaxResult::Error;
    517                 }
    518               }
    519               m_pCurrentBlock[m_iIndexInBlock++] = ch;
    520               m_BlockBuffer.IncrementDataLength();
    521             }
    522             m_Start++;
    523           }
    524           break;
    525         case FDE_XmlSyntaxState::SkipComment:
    526           if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"-->", 3) == 0) {
    527             m_Start += 2;
    528             m_syntaxParserState = FDE_XmlSyntaxState::Text;
    529           }
    530 
    531           m_Start++;
    532           break;
    533         case FDE_XmlSyntaxState::TargetData:
    534           if (IsXMLWhiteSpace(ch)) {
    535             if (m_BlockBuffer.IsEmpty()) {
    536               m_Start++;
    537               break;
    538             }
    539             if (m_wQuotationMark == 0) {
    540               m_iTextDataLength = m_BlockBuffer.GetDataLength();
    541               m_wQuotationMark = 0;
    542               m_BlockBuffer.Reset(true);
    543               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    544                   m_BlockBuffer.GetAvailableBlock();
    545               m_Start++;
    546               syntaxParserResult = FX_XmlSyntaxResult::TargetData;
    547               break;
    548             }
    549           }
    550           if (ch == '?') {
    551             m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
    552             m_Start++;
    553           } else if (ch == '\"') {
    554             if (m_wQuotationMark == 0) {
    555               m_wQuotationMark = ch;
    556               m_Start++;
    557             } else if (ch == m_wQuotationMark) {
    558               m_iTextDataLength = m_BlockBuffer.GetDataLength();
    559               m_wQuotationMark = 0;
    560               m_BlockBuffer.Reset(true);
    561               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    562                   m_BlockBuffer.GetAvailableBlock();
    563               m_Start++;
    564               syntaxParserResult = FX_XmlSyntaxResult::TargetData;
    565             } else {
    566               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
    567               return m_syntaxParserResult;
    568             }
    569           } else {
    570             if (m_iIndexInBlock == m_iAllocStep) {
    571               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    572                   m_BlockBuffer.GetAvailableBlock();
    573               if (!m_pCurrentBlock) {
    574                 return FX_XmlSyntaxResult::Error;
    575               }
    576             }
    577             m_pCurrentBlock[m_iIndexInBlock++] = ch;
    578             m_BlockBuffer.IncrementDataLength();
    579             m_Start++;
    580           }
    581           break;
    582         default:
    583           break;
    584       }
    585       if (syntaxParserResult != FX_XmlSyntaxResult::None)
    586         return syntaxParserResult;
    587     }
    588   }
    589   return FX_XmlSyntaxResult::Text;
    590 }
    591 
    592 int32_t CFX_XMLSyntaxParser::GetStatus() const {
    593   if (!m_pStream)
    594     return -1;
    595 
    596   int32_t iStreamLength = m_pStream->GetLength();
    597   if (iStreamLength < 1)
    598     return 100;
    599 
    600   if (m_syntaxParserResult == FX_XmlSyntaxResult::Error)
    601     return -1;
    602 
    603   if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString)
    604     return 100;
    605   return m_iParsedBytes * 100 / iStreamLength;
    606 }
    607 
    608 FX_FILESIZE CFX_XMLSyntaxParser::GetCurrentBinaryPos() const {
    609   if (!m_pStream)
    610     return 0;
    611 
    612   int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start);
    613   return m_iParsedBytes + nDstLen;
    614 }
    615 
    616 void CFX_XMLSyntaxParser::ParseTextChar(wchar_t character) {
    617   if (m_iIndexInBlock == m_iAllocStep) {
    618     std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    619         m_BlockBuffer.GetAvailableBlock();
    620     if (!m_pCurrentBlock)
    621       return;
    622   }
    623 
    624   m_pCurrentBlock[m_iIndexInBlock++] = character;
    625   m_BlockBuffer.IncrementDataLength();
    626   if (m_iEntityStart > -1 && character == L';') {
    627     WideString csEntity = m_BlockBuffer.GetTextData(
    628         m_iEntityStart + 1,
    629         m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1);
    630     int32_t iLen = csEntity.GetLength();
    631     if (iLen > 0) {
    632       if (csEntity[0] == L'#') {
    633         uint32_t ch = 0;
    634         wchar_t w;
    635         if (iLen > 1 && csEntity[1] == L'x') {
    636           for (int32_t i = 2; i < iLen; i++) {
    637             w = csEntity[i];
    638             if (std::iswdigit(w))
    639               ch = (ch << 4) + w - L'0';
    640             else if (w >= L'A' && w <= L'F')
    641               ch = (ch << 4) + w - 55;
    642             else if (w >= L'a' && w <= L'f')
    643               ch = (ch << 4) + w - 87;
    644             else
    645               break;
    646           }
    647         } else {
    648           for (int32_t i = 1; i < iLen; i++) {
    649             w = csEntity[i];
    650             if (!std::iswdigit(w))
    651               break;
    652             ch = ch * 10 + w - L'0';
    653           }
    654         }
    655         if (ch > kMaxCharRange)
    656           ch = ' ';
    657 
    658         character = static_cast<wchar_t>(ch);
    659         if (character != 0) {
    660           m_BlockBuffer.SetTextChar(m_iEntityStart, character);
    661           m_iEntityStart++;
    662         }
    663       } else {
    664         if (csEntity.Compare(L"amp") == 0) {
    665           m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
    666           m_iEntityStart++;
    667         } else if (csEntity.Compare(L"lt") == 0) {
    668           m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
    669           m_iEntityStart++;
    670         } else if (csEntity.Compare(L"gt") == 0) {
    671           m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
    672           m_iEntityStart++;
    673         } else if (csEntity.Compare(L"apos") == 0) {
    674           m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
    675           m_iEntityStart++;
    676         } else if (csEntity.Compare(L"quot") == 0) {
    677           m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
    678           m_iEntityStart++;
    679         }
    680       }
    681     }
    682     if (m_iEntityStart >= 0 &&
    683         m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) {
    684       m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() -
    685                                     m_iEntityStart);
    686     }
    687     std::tie(m_pCurrentBlock, m_iIndexInBlock) =
    688         m_BlockBuffer.GetAvailableBlock();
    689     m_iEntityStart = -1;
    690   } else if (m_iEntityStart < 0 && character == L'&') {
    691     m_iEntityStart = m_BlockBuffer.GetDataLength() - 1;
    692   }
    693   m_Start++;
    694 }
    695