Home | History | Annotate | Download | only in fxcrt
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "../../include/fxcrt/fx_xml.h"
      8 #include "xml_int.h"
      9 CXML_Parser::~CXML_Parser()
     10 {
     11     if (m_bOwnedStream) {
     12         m_pDataAcc->Release();
     13     }
     14 }
     15 FX_BOOL CXML_Parser::Init(FX_LPBYTE pBuffer, size_t size)
     16 {
     17     if (m_pAllocator) {
     18         m_pDataAcc = FX_NewAtAllocator(m_pAllocator)CXML_DataBufAcc(pBuffer, size, m_pAllocator);
     19     } else {
     20         m_pDataAcc = FX_NEW CXML_DataBufAcc(pBuffer, size, NULL);
     21     }
     22     if (!m_pDataAcc) {
     23         return FALSE;
     24     }
     25     return Init(TRUE);
     26 }
     27 FX_BOOL CXML_Parser::Init(IFX_FileRead *pFileRead)
     28 {
     29     if (m_pAllocator) {
     30         m_pDataAcc = FX_NewAtAllocator(m_pAllocator)CXML_DataStmAcc(pFileRead, m_pAllocator);
     31     } else {
     32         m_pDataAcc = FX_NEW CXML_DataStmAcc(pFileRead, NULL);
     33     }
     34     if (!m_pDataAcc) {
     35         return FALSE;
     36     }
     37     return Init(TRUE);
     38 }
     39 FX_BOOL CXML_Parser::Init(IFX_BufferRead *pBuffer)
     40 {
     41     if (!pBuffer) {
     42         return FALSE;
     43     }
     44     m_pDataAcc = pBuffer;
     45     return Init(FALSE);
     46 }
     47 FX_BOOL CXML_Parser::Init(FX_BOOL bOwndedStream)
     48 {
     49     m_bOwnedStream = bOwndedStream;
     50     m_nOffset = 0;
     51     return ReadNextBlock();
     52 }
     53 FX_BOOL CXML_Parser::ReadNextBlock()
     54 {
     55     if (!m_pDataAcc->ReadNextBlock()) {
     56         return FALSE;
     57     }
     58     m_pBuffer = m_pDataAcc->GetBlockBuffer();
     59     m_dwBufferSize = m_pDataAcc->GetBlockSize();
     60     m_nBufferOffset = m_pDataAcc->GetBlockOffset();
     61     m_dwIndex = 0;
     62     return m_dwBufferSize > 0;
     63 }
     64 FX_BOOL CXML_Parser::IsEOF()
     65 {
     66     if (!m_pDataAcc->IsEOF()) {
     67         return FALSE;
     68     }
     69     return m_dwIndex >= m_dwBufferSize;
     70 }
     71 #define FXCRTM_XML_CHARTYPE_Normal			0x00
     72 #define FXCRTM_XML_CHARTYPE_SpaceChar		0x01
     73 #define FXCRTM_XML_CHARTYPE_Letter			0x02
     74 #define FXCRTM_XML_CHARTYPE_Digital			0x04
     75 #define FXCRTM_XML_CHARTYPE_NameIntro		0x08
     76 #define FXCRTM_XML_CHARTYPE_NameChar		0x10
     77 #define FXCRTM_XML_CHARTYPE_HexDigital		0x20
     78 #define FXCRTM_XML_CHARTYPE_HexLowerLetter	0x40
     79 #define FXCRTM_XML_CHARTYPE_HexUpperLetter	0x60
     80 #define FXCRTM_XML_CHARTYPE_HexChar			0x60
     81 FX_BYTE g_FXCRT_XML_ByteTypes[256] = {
     82     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     83     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
     84     0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00,
     85     0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
     86     0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     87     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18,
     88     0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     89     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00,
     90     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     91     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     92     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     93     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     94     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     95     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     96     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
     97     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x01, 0x01,
     98 };
     99 FX_BOOL g_FXCRT_XML_IsWhiteSpace(FX_BYTE ch)
    100 {
    101     return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar) != 0;
    102 }
    103 FX_BOOL g_FXCRT_XML_IsLetter(FX_BYTE ch)
    104 {
    105     return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Letter) != 0;
    106 }
    107 FX_BOOL g_FXCRT_XML_IsDigital(FX_BYTE ch)
    108 {
    109     return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital) != 0;
    110 }
    111 FX_BOOL g_FXCRT_XML_IsNameIntro(FX_BYTE ch)
    112 {
    113     return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro) != 0;
    114 }
    115 FX_BOOL g_FXCRT_XML_IsNameChar(FX_BYTE ch)
    116 {
    117     return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar) != 0;
    118 }
    119 FX_BOOL g_FXCRT_XML_IsHexChar(FX_BYTE ch)
    120 {
    121     return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar) != 0;
    122 }
    123 void CXML_Parser::SkipWhiteSpaces()
    124 {
    125     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    126     if (IsEOF()) {
    127         return;
    128     }
    129     do {
    130         while (m_dwIndex < m_dwBufferSize && g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) {
    131             m_dwIndex ++;
    132         }
    133         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    134         if (m_dwIndex < m_dwBufferSize || IsEOF()) {
    135             break;
    136         }
    137     } while (ReadNextBlock());
    138 }
    139 void CXML_Parser::GetName(CFX_ByteStringL &space, CFX_ByteStringL &name)
    140 {
    141     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    142     if (IsEOF()) {
    143         return;
    144     }
    145     CFX_ByteTextBuf buf(m_pAllocator);
    146     FX_BYTE ch;
    147     do {
    148         while (m_dwIndex < m_dwBufferSize) {
    149             ch = m_pBuffer[m_dwIndex];
    150             if (ch == ':') {
    151                 buf.GetByteStringL(space);
    152                 buf.Clear();
    153             } else if (g_FXCRT_XML_IsNameChar(ch)) {
    154                 buf.AppendChar(ch);
    155             } else {
    156                 break;
    157             }
    158             m_dwIndex ++;
    159         }
    160         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    161         if (m_dwIndex < m_dwBufferSize || IsEOF()) {
    162             break;
    163         }
    164     } while (ReadNextBlock());
    165     buf.GetByteStringL(name);
    166 }
    167 void CXML_Parser::SkipLiterals(FX_BSTR str)
    168 {
    169     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    170     if (IsEOF()) {
    171         return;
    172     }
    173     FX_INT32 i = 0, iLen = str.GetLength();
    174     do {
    175         while (m_dwIndex < m_dwBufferSize) {
    176             if (str.GetAt(i) != m_pBuffer[m_dwIndex ++]) {
    177                 i = 0;
    178             } else {
    179                 i ++;
    180                 if (i == iLen) {
    181                     break;
    182                 }
    183             }
    184         }
    185         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    186         if (i == iLen) {
    187             return;
    188         }
    189         if (m_dwIndex < m_dwBufferSize || IsEOF()) {
    190             break;
    191         }
    192     } while (ReadNextBlock());
    193     while (!m_pDataAcc->IsEOF()) {
    194         ReadNextBlock();
    195         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwBufferSize;
    196     }
    197     m_dwIndex = m_dwBufferSize;
    198 }
    199 FX_DWORD CXML_Parser::GetCharRef()
    200 {
    201     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    202     if (IsEOF()) {
    203         return 0;
    204     }
    205     FX_BYTE ch;
    206     FX_INT32 iState = 0;
    207     CFX_ByteTextBuf buf(m_pAllocator);
    208     FX_DWORD code = 0;
    209     do {
    210         while (m_dwIndex < m_dwBufferSize) {
    211             ch = m_pBuffer[m_dwIndex];
    212             switch (iState) {
    213                 case 0:
    214                     if (ch == '#') {
    215                         m_dwIndex ++;
    216                         iState = 2;
    217                         break;
    218                     }
    219                     iState = 1;
    220                 case 1:
    221                     m_dwIndex ++;
    222                     if (ch == ';') {
    223                         CFX_ByteStringC ref = buf.GetByteString();
    224                         if (ref == FX_BSTRC("gt")) {
    225                             code = '>';
    226                         } else if (ref == FX_BSTRC("lt")) {
    227                             code = '<';
    228                         } else if (ref == FX_BSTRC("amp")) {
    229                             code = '&';
    230                         } else if (ref == FX_BSTRC("apos")) {
    231                             code = '\'';
    232                         } else if (ref == FX_BSTRC("quot")) {
    233                             code = '"';
    234                         }
    235                         iState = 10;
    236                         break;
    237                     }
    238                     buf.AppendByte(ch);
    239                     break;
    240                 case 2:
    241                     if (ch == 'x') {
    242                         m_dwIndex ++;
    243                         iState = 4;
    244                         break;
    245                     }
    246                     iState = 3;
    247                 case 3:
    248                     m_dwIndex ++;
    249                     if (ch == ';') {
    250                         iState = 10;
    251                         break;
    252                     }
    253                     if (g_FXCRT_XML_IsDigital(ch)) {
    254                         code = code * 10 + ch - '0';
    255                     }
    256                     break;
    257                 case 4:
    258                     m_dwIndex ++;
    259                     if (ch == ';') {
    260                         iState = 10;
    261                         break;
    262                     }
    263                     FX_BYTE nHex = g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar;
    264                     if (nHex) {
    265                         if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) {
    266                             code = (code << 4) + ch - '0';
    267                         } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) {
    268                             code = (code << 4) + ch - 87;
    269                         } else {
    270                             code = (code << 4) + ch - 55;
    271                         }
    272                     }
    273                     break;
    274             }
    275             if (iState == 10) {
    276                 break;
    277             }
    278         }
    279         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    280         if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
    281             break;
    282         }
    283     } while (ReadNextBlock());
    284     return code;
    285 }
    286 void CXML_Parser::GetAttrValue(CFX_WideStringL &value)
    287 {
    288     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    289     if (IsEOF()) {
    290         return;
    291     }
    292     CFX_UTF8Decoder decoder(m_pAllocator);
    293     FX_BYTE mark = 0, ch;
    294     do {
    295         while (m_dwIndex < m_dwBufferSize) {
    296             ch = m_pBuffer[m_dwIndex];
    297             if (mark == 0) {
    298                 if (ch != '\'' && ch != '"') {
    299                     return;
    300                 }
    301                 mark = ch;
    302                 m_dwIndex ++;
    303                 ch = 0;
    304                 continue;
    305             }
    306             m_dwIndex ++;
    307             if (ch == mark) {
    308                 break;
    309             }
    310             if (ch == '&') {
    311                 decoder.AppendChar(GetCharRef());
    312                 if (IsEOF()) {
    313                     decoder.GetResult(value);
    314                     return;
    315                 }
    316             } else {
    317                 decoder.Input(ch);
    318             }
    319         }
    320         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    321         if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) {
    322             break;
    323         }
    324     } while (ReadNextBlock());
    325     decoder.GetResult(value);
    326 }
    327 void CXML_Parser::GetTagName(CFX_ByteStringL &space, CFX_ByteStringL &name, FX_BOOL &bEndTag, FX_BOOL bStartTag)
    328 {
    329     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    330     if (IsEOF()) {
    331         return;
    332     }
    333     bEndTag = FALSE;
    334     FX_BYTE ch;
    335     FX_INT32 iState = bStartTag ? 1 : 0;
    336     do {
    337         while (m_dwIndex < m_dwBufferSize) {
    338             ch = m_pBuffer[m_dwIndex];
    339             switch (iState) {
    340                 case 0:
    341                     m_dwIndex ++;
    342                     if (ch != '<') {
    343                         break;
    344                     }
    345                     iState = 1;
    346                     break;
    347                 case 1:
    348                     if (ch == '?') {
    349                         m_dwIndex ++;
    350                         SkipLiterals(FX_BSTRC("?>"));
    351                         iState = 0;
    352                         break;
    353                     } else if (ch == '!') {
    354                         m_dwIndex ++;
    355                         SkipLiterals(FX_BSTRC("-->"));
    356                         iState = 0;
    357                         break;
    358                     }
    359                     if (ch == '/') {
    360                         m_dwIndex ++;
    361                         GetName(space, name);
    362                         bEndTag = TRUE;
    363                     } else {
    364                         GetName(space, name);
    365                         bEndTag = FALSE;
    366                     }
    367                     return;
    368             }
    369         }
    370         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    371         if (m_dwIndex < m_dwBufferSize || IsEOF()) {
    372             break;
    373         }
    374     } while (ReadNextBlock());
    375 }
    376 CXML_Element* CXML_Parser::ParseElement(CXML_Element* pParent, FX_BOOL bStartTag)
    377 {
    378     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    379     if (IsEOF()) {
    380         return NULL;
    381     }
    382     CFX_ByteStringL tag_name, tag_space;
    383     FX_BOOL bEndTag;
    384     GetTagName(tag_space, tag_name, bEndTag, bStartTag);
    385     if (tag_name.IsEmpty() || bEndTag) {
    386         tag_space.Empty(m_pAllocator);
    387         return NULL;
    388     }
    389     CXML_Element* pElement;
    390     if (m_pAllocator) {
    391         pElement = FX_NewAtAllocator(m_pAllocator)CXML_Element(m_pAllocator);
    392     } else {
    393         pElement = FX_NEW CXML_Element;
    394     }
    395     if (pElement) {
    396         pElement->m_pParent = pParent;
    397         pElement->SetTag(tag_space, tag_name);
    398     }
    399     tag_space.Empty(m_pAllocator);
    400     tag_name.Empty(m_pAllocator);
    401     if (!pElement) {
    402         return NULL;
    403     }
    404     do {
    405         CFX_ByteStringL attr_space, attr_name;
    406         while (m_dwIndex < m_dwBufferSize) {
    407             SkipWhiteSpaces();
    408             if (IsEOF()) {
    409                 break;
    410             }
    411             if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) {
    412                 break;
    413             }
    414             attr_space.Empty(m_pAllocator);
    415             attr_name.Empty(m_pAllocator);
    416             GetName(attr_space, attr_name);
    417             SkipWhiteSpaces();
    418             if (IsEOF()) {
    419                 break;
    420             }
    421             if (m_pBuffer[m_dwIndex] != '=') {
    422                 break;
    423             }
    424             m_dwIndex ++;
    425             SkipWhiteSpaces();
    426             if (IsEOF()) {
    427                 break;
    428             }
    429             CFX_WideStringL attr_value;
    430             GetAttrValue(attr_value);
    431             pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value, m_pAllocator);
    432             attr_value.Empty(m_pAllocator);
    433         }
    434         attr_space.Empty(m_pAllocator);
    435         attr_name.Empty(m_pAllocator);
    436         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    437         if (m_dwIndex < m_dwBufferSize || IsEOF()) {
    438             break;
    439         }
    440     } while (ReadNextBlock());
    441     SkipWhiteSpaces();
    442     if (IsEOF()) {
    443         return pElement;
    444     }
    445     FX_BYTE ch = m_pBuffer[m_dwIndex ++];
    446     if (ch == '/') {
    447         m_dwIndex ++;
    448         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    449         return pElement;
    450     }
    451     if (ch != '>') {
    452         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    453         if (m_pAllocator) {
    454             FX_DeleteAtAllocator(pElement, m_pAllocator, CXML_Element);
    455         } else {
    456             delete pElement;
    457         }
    458         return NULL;
    459     }
    460     SkipWhiteSpaces();
    461     if (IsEOF()) {
    462         return pElement;
    463     }
    464     CFX_UTF8Decoder decoder(m_pAllocator);
    465     CFX_WideTextBuf content(m_pAllocator);
    466     FX_BOOL bCDATA = FALSE;
    467     FX_INT32 iState = 0;
    468     do {
    469         while (m_dwIndex < m_dwBufferSize) {
    470             ch = m_pBuffer[m_dwIndex ++];
    471             switch (iState) {
    472                 case 0:
    473                     if (ch == '<') {
    474                         iState = 1;
    475                     } else if (ch == '&') {
    476                         decoder.ClearStatus();
    477                         decoder.AppendChar(GetCharRef());
    478                     } else {
    479                         decoder.Input(ch);
    480                     }
    481                     break;
    482                 case 1:
    483                     if (ch == '!') {
    484                         iState = 2;
    485                     } else if (ch == '?') {
    486                         SkipLiterals(FX_BSTRC("?>"));
    487                         SkipWhiteSpaces();
    488                         iState = 0;
    489                     } else if (ch == '/') {
    490                         CFX_ByteStringL space, name;
    491                         GetName(space, name);
    492                         space.Empty(m_pAllocator);
    493                         name.Empty(m_pAllocator);
    494                         SkipWhiteSpaces();
    495                         m_dwIndex ++;
    496                         iState = 10;
    497                     } else {
    498                         content << decoder.GetResult();
    499                         CFX_WideStringL dataStr;
    500                         content.GetWideStringL(dataStr);
    501                         if (!bCDATA && !m_bSaveSpaceChars) {
    502                             dataStr.TrimRight((FX_LPCWSTR)L" \t\r\n");
    503                         }
    504                         InsertContentSegment(bCDATA, dataStr, pElement);
    505                         dataStr.Empty(m_pAllocator);
    506                         content.Clear();
    507                         decoder.Clear();
    508                         bCDATA = FALSE;
    509                         iState = 0;
    510                         m_dwIndex --;
    511                         CXML_Element* pSubElement = ParseElement(pElement, TRUE);
    512                         if (pSubElement == NULL) {
    513                             break;
    514                         }
    515                         pSubElement->m_pParent = pElement;
    516                         pElement->m_Children.Add((FX_LPVOID)CXML_Element::Element);
    517                         pElement->m_Children.Add(pSubElement);
    518                         SkipWhiteSpaces();
    519                     }
    520                     break;
    521                 case 2:
    522                     if (ch == '[') {
    523                         SkipLiterals(FX_BSTRC("]]>"));
    524                     } else if (ch == '-') {
    525                         m_dwIndex ++;
    526                         SkipLiterals(FX_BSTRC("-->"));
    527                     } else {
    528                         SkipLiterals(FX_BSTRC(">"));
    529                     }
    530                     decoder.Clear();
    531                     SkipWhiteSpaces();
    532                     iState = 0;
    533                     break;
    534             }
    535             if (iState == 10) {
    536                 break;
    537             }
    538         }
    539         m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
    540         if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
    541             break;
    542         }
    543     } while (ReadNextBlock());
    544     content << decoder.GetResult();
    545     CFX_WideStringL dataStr;
    546     content.GetWideStringL(dataStr);
    547     if (!m_bSaveSpaceChars) {
    548         dataStr.TrimRight((FX_LPCWSTR)L" \t\r\n");
    549     }
    550     InsertContentSegment(bCDATA, dataStr, pElement);
    551     dataStr.Empty(m_pAllocator);
    552     content.Clear();
    553     decoder.Clear();
    554     bCDATA = FALSE;
    555     return pElement;
    556 }
    557 void CXML_Parser::InsertContentSegment(FX_BOOL bCDATA, FX_WSTR content, CXML_Element* pElement)
    558 {
    559     if (content.IsEmpty()) {
    560         return;
    561     }
    562     CXML_Content* pContent;
    563     if (m_pAllocator) {
    564         pContent = FX_NewAtAllocator(m_pAllocator)CXML_Content;
    565     } else {
    566         pContent = FX_NEW CXML_Content;
    567     }
    568     if (!pContent) {
    569         return;
    570     }
    571     pContent->Set(bCDATA, content, m_pAllocator);
    572     pElement->m_Children.Add((FX_LPVOID)CXML_Element::Content);
    573     pElement->m_Children.Add(pContent);
    574 }
    575 static CXML_Element* XML_ContinueParse(CXML_Parser &parser, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize)
    576 {
    577     parser.m_bSaveSpaceChars = bSaveSpaceChars;
    578     CXML_Element* pElement = parser.ParseElement(NULL, FALSE);
    579     if (pParsedSize) {
    580         *pParsedSize = parser.m_nOffset;
    581     }
    582     return pElement;
    583 }
    584 CXML_Element* CXML_Element::Parse(const void* pBuffer, size_t size, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator)
    585 {
    586     CXML_Parser parser(pAllocator);
    587     if (!parser.Init((FX_LPBYTE)pBuffer, size)) {
    588         return NULL;
    589     }
    590     return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
    591 }
    592 CXML_Element* CXML_Element::Parse(IFX_FileRead *pFile, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator)
    593 {
    594     CXML_Parser parser(pAllocator);
    595     if (!parser.Init(pFile)) {
    596         return NULL;
    597     }
    598     return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
    599 }
    600 CXML_Element* CXML_Element::Parse(IFX_BufferRead *pBuffer, FX_BOOL bSaveSpaceChars, FX_FILESIZE* pParsedSize, IFX_Allocator* pAllocator)
    601 {
    602     CXML_Parser parser(pAllocator);
    603     if (!parser.Init(pBuffer)) {
    604         return NULL;
    605     }
    606     return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
    607 }
    608 CXML_Element::CXML_Element(IFX_Allocator* pAllocator)
    609     : m_pParent(NULL)
    610     , m_QSpaceName()
    611     , m_TagName()
    612     , m_AttrMap()
    613     , m_Children(pAllocator)
    614 {
    615 }
    616 CXML_Element::CXML_Element(FX_BSTR qSpace, FX_BSTR tagName, IFX_Allocator* pAllocator)
    617     : m_pParent(NULL)
    618     , m_QSpaceName()
    619     , m_TagName()
    620     , m_AttrMap()
    621     , m_Children(pAllocator)
    622 {
    623     m_QSpaceName.Set(qSpace, pAllocator);
    624     m_TagName.Set(tagName, pAllocator);
    625 }
    626 CXML_Element::CXML_Element(FX_BSTR qTagName, IFX_Allocator* pAllocator)
    627     : m_pParent(NULL)
    628     , m_QSpaceName()
    629     , m_TagName()
    630     , m_AttrMap()
    631     , m_Children(pAllocator)
    632 {
    633     SetTag(qTagName);
    634 }
    635 CXML_Element::~CXML_Element()
    636 {
    637     Empty();
    638 }
    639 void CXML_Element::Empty()
    640 {
    641     IFX_Allocator* pAllocator = m_Children.m_pAllocator;
    642     m_QSpaceName.Empty(pAllocator);
    643     m_TagName.Empty(pAllocator);
    644     m_AttrMap.RemoveAll(pAllocator);
    645     RemoveChildren();
    646 }
    647 void CXML_Element::RemoveChildren()
    648 {
    649     IFX_Allocator* pAllocator = m_Children.m_pAllocator;
    650     for (int i = 0; i < m_Children.GetSize(); i += 2) {
    651         ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i);
    652         if (type == Content) {
    653             CXML_Content* content = (CXML_Content*)m_Children.GetAt(i + 1);
    654             if (pAllocator) {
    655                 FX_DeleteAtAllocator(content, pAllocator, CXML_Content);
    656             } else {
    657                 delete content;
    658             }
    659         } else if (type == Element) {
    660             CXML_Element* child = (CXML_Element*)m_Children.GetAt(i + 1);
    661             child->RemoveChildren();
    662             if (pAllocator) {
    663                 FX_DeleteAtAllocator(child, pAllocator, CXML_Element);
    664             } else {
    665                 delete child;
    666             }
    667         }
    668     }
    669     m_Children.RemoveAll();
    670 }
    671 CFX_ByteString CXML_Element::GetTagName(FX_BOOL bQualified) const
    672 {
    673     if (!bQualified || m_QSpaceName.IsEmpty()) {
    674         return m_TagName;
    675     }
    676     CFX_ByteString bsTag = m_QSpaceName;
    677     bsTag += ":";
    678     bsTag += m_TagName;
    679     return bsTag;
    680 }
    681 void CXML_Element::GetTagName(CFX_ByteStringL &tagName, FX_BOOL bQualified) const
    682 {
    683     IFX_Allocator* pAllocator = m_Children.m_pAllocator;
    684     if (!bQualified || m_QSpaceName.IsEmpty()) {
    685         tagName.Set(m_TagName, pAllocator);
    686         return;
    687     }
    688     FX_LPSTR str = tagName.AllocBuffer(m_QSpaceName.GetLength() + m_TagName.GetLength() + 2, pAllocator);
    689     if (!str) {
    690         return;
    691     }
    692     FXSYS_memcpy32(str, m_QSpaceName.GetCStr(), m_QSpaceName.GetLength());
    693     str += m_QSpaceName.GetLength();
    694     *str = ':';
    695     str ++;
    696     FXSYS_memcpy32(str, m_TagName.GetCStr(), m_TagName.GetLength());
    697     str += m_TagName.GetLength();
    698     *str = '\0';
    699 }
    700 CFX_ByteString CXML_Element::GetNamespace(FX_BOOL bQualified) const
    701 {
    702     if (bQualified) {
    703         return m_QSpaceName;
    704     }
    705     return GetNamespaceURI(m_QSpaceName);
    706 }
    707 void CXML_Element::GetNamespace(CFX_ByteStringL &nameSpace, FX_BOOL bQualified) const
    708 {
    709     IFX_Allocator* pAllocator = m_Children.m_pAllocator;
    710     if (bQualified) {
    711         nameSpace.Set(m_QSpaceName, pAllocator);
    712         return;
    713     }
    714     GetNamespaceURI(m_QSpaceName, nameSpace);
    715 }
    716 CFX_ByteString CXML_Element::GetNamespaceURI(FX_BSTR qName) const
    717 {
    718     const CFX_WideStringL* pwsSpace;
    719     const CXML_Element *pElement = this;
    720     do {
    721         if (qName.IsEmpty()) {
    722             pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC(""), FX_BSTRC("xmlns"));
    723         } else {
    724             pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC("xmlns"), qName);
    725         }
    726         if (pwsSpace) {
    727             break;
    728         }
    729         pElement = pElement->GetParent();
    730     } while(pElement);
    731     return pwsSpace ? FX_UTF8Encode(*pwsSpace) : CFX_ByteString();
    732 }
    733 void CXML_Element::GetNamespaceURI(FX_BSTR qName, CFX_ByteStringL &uri) const
    734 {
    735     IFX_Allocator* pAllocator = m_Children.m_pAllocator;
    736     const CFX_WideStringL* pwsSpace;
    737     const CXML_Element *pElement = this;
    738     do {
    739         if (qName.IsEmpty()) {
    740             pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC(""), FX_BSTRC("xmlns"));
    741         } else {
    742             pwsSpace = pElement->m_AttrMap.Lookup(FX_BSTRC("xmlns"), qName);
    743         }
    744         if (pwsSpace) {
    745             break;
    746         }
    747         pElement = pElement->GetParent();
    748     } while(pElement);
    749     if (pwsSpace) {
    750         FX_UTF8Encode(pwsSpace->GetPtr(), pwsSpace->GetLength(), uri, pAllocator);
    751     }
    752 }
    753 void CXML_Element::GetAttrByIndex(int index, CFX_ByteString& space, CFX_ByteString& name, CFX_WideString& value) const
    754 {
    755     if (index < 0 || index >= m_AttrMap.GetSize()) {
    756         return;
    757     }
    758     CXML_AttrItem& item = m_AttrMap.GetAt(index);
    759     space = item.m_QSpaceName;
    760     name = item.m_AttrName;
    761     value = item.m_Value;
    762 }
    763 void CXML_Element::GetAttrByIndex(int index, CFX_ByteStringL &space, CFX_ByteStringL &name, CFX_WideStringL &value) const
    764 {
    765     if (index < 0 || index >= m_AttrMap.GetSize()) {
    766         return;
    767     }
    768     IFX_Allocator* pAllocator = m_Children.m_pAllocator;
    769     CXML_AttrItem& item = m_AttrMap.GetAt(index);
    770     space.Set(item.m_QSpaceName, pAllocator);
    771     name.Set(item.m_AttrName, pAllocator);
    772     value.Set(item.m_Value, pAllocator);
    773 }
    774 FX_BOOL CXML_Element::HasAttr(FX_BSTR name) const
    775 {
    776     CFX_ByteStringC bsSpace, bsName;
    777     FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    778     return m_AttrMap.Lookup(bsSpace, bsName) != NULL;
    779 }
    780 FX_BOOL CXML_Element::GetAttrValue(FX_BSTR name, CFX_WideString& attribute) const
    781 {
    782     CFX_ByteStringC bsSpace, bsName;
    783     FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    784     const CFX_WideStringL* pValue = m_AttrMap.Lookup(bsSpace, bsName);
    785     if (pValue) {
    786         attribute = CFX_WideString(pValue->GetPtr(), pValue->GetLength());
    787         return TRUE;
    788     }
    789     return FALSE;
    790 }
    791 const CFX_WideStringL* CXML_Element::GetAttrValuePtr(FX_BSTR name) const
    792 {
    793     CFX_ByteStringC bsSpace, bsName;
    794     FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    795     return m_AttrMap.Lookup(bsSpace, bsName);
    796 }
    797 FX_BOOL CXML_Element::GetAttrValue(FX_BSTR space, FX_BSTR name, CFX_WideString& attribute) const
    798 {
    799     const CFX_WideStringL* pValue = m_AttrMap.Lookup(space, name);
    800     if (pValue) {
    801         attribute = CFX_WideString(pValue->GetPtr(), pValue->GetLength());
    802         return TRUE;
    803     }
    804     return FALSE;
    805 }
    806 const CFX_WideStringL* CXML_Element::GetAttrValuePtr(FX_BSTR space, FX_BSTR name) const
    807 {
    808     return m_AttrMap.Lookup(space, name);
    809 }
    810 FX_BOOL CXML_Element::GetAttrInteger(FX_BSTR name, int& attribute) const
    811 {
    812     CFX_ByteStringC bsSpace, bsName;
    813     FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    814     const CFX_WideStringL* pwsValue = m_AttrMap.Lookup(bsSpace, bsName);
    815     if (pwsValue) {
    816         attribute = pwsValue->GetInteger();
    817         return TRUE;
    818     }
    819     return FALSE;
    820 }
    821 FX_BOOL	CXML_Element::GetAttrInteger(FX_BSTR space, FX_BSTR name, int& attribute) const
    822 {
    823     const CFX_WideStringL* pwsValue = m_AttrMap.Lookup(space, name);
    824     if (pwsValue) {
    825         attribute = pwsValue->GetInteger();
    826         return TRUE;
    827     }
    828     return FALSE;
    829 }
    830 FX_BOOL CXML_Element::GetAttrFloat(FX_BSTR name, FX_FLOAT& attribute) const
    831 {
    832     CFX_ByteStringC bsSpace, bsName;
    833     FX_XML_SplitQualifiedName(name, bsSpace, bsName);
    834     return GetAttrFloat(bsSpace, bsName, attribute);
    835 }
    836 FX_BOOL CXML_Element::GetAttrFloat(FX_BSTR space, FX_BSTR name, FX_FLOAT& attribute) const
    837 {
    838     CFX_WideString value;
    839     const CFX_WideStringL* pValue = m_AttrMap.Lookup(space, name);
    840     if (pValue) {
    841         attribute = pValue->GetFloat();
    842         return TRUE;
    843     }
    844     return FALSE;
    845 }
    846 FX_DWORD CXML_Element::CountChildren() const
    847 {
    848     return m_Children.GetSize() / 2;
    849 }
    850 CXML_Element::ChildType CXML_Element::GetChildType(FX_DWORD index) const
    851 {
    852     index <<= 1;
    853     if (index >= (FX_DWORD)m_Children.GetSize()) {
    854         return Invalid;
    855     }
    856     return (ChildType)(FX_UINTPTR)m_Children.GetAt(index);
    857 }
    858 CFX_WideString CXML_Element::GetContent(FX_DWORD index) const
    859 {
    860     index <<= 1;
    861     if (index >= (FX_DWORD)m_Children.GetSize() ||
    862             (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Content) {
    863         return CFX_WideString();
    864     }
    865     CXML_Content* pContent = (CXML_Content*)m_Children.GetAt(index + 1);
    866     if (pContent) {
    867         return pContent->m_Content;
    868     }
    869     return CFX_WideString();
    870 }
    871 const CFX_WideStringL* CXML_Element::GetContentPtr(FX_DWORD index) const
    872 {
    873     index <<= 1;
    874     if (index >= (FX_DWORD)m_Children.GetSize() ||
    875             (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Content) {
    876         return NULL;
    877     }
    878     CXML_Content* pContent = (CXML_Content*)m_Children.GetAt(index + 1);
    879     if (pContent) {
    880         return &pContent->m_Content;
    881     }
    882     return NULL;
    883 }
    884 CXML_Element* CXML_Element::GetElement(FX_DWORD index) const
    885 {
    886     index <<= 1;
    887     if (index >= (FX_DWORD)m_Children.GetSize() ||
    888             (ChildType)(FX_UINTPTR)m_Children.GetAt(index) != Element) {
    889         return NULL;
    890     }
    891     return (CXML_Element*)m_Children.GetAt(index + 1);
    892 }
    893 FX_DWORD CXML_Element::CountElements(FX_BSTR space, FX_BSTR tag) const
    894 {
    895     int count = 0;
    896     for (int i = 0; i < m_Children.GetSize(); i += 2) {
    897         ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i);
    898         if (type != Element) {
    899             continue;
    900         }
    901         CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1);
    902         if ((space.IsEmpty() || pKid->m_QSpaceName == space) && pKid->m_TagName == tag) {
    903             count ++;
    904         }
    905     }
    906     return count;
    907 }
    908 CXML_Element* CXML_Element::GetElement(FX_BSTR space, FX_BSTR tag, int index) const
    909 {
    910     if (index < 0) {
    911         return NULL;
    912     }
    913     for (int i = 0; i < m_Children.GetSize(); i += 2) {
    914         ChildType type = (ChildType)(FX_UINTPTR)m_Children.GetAt(i);
    915         if (type != Element) {
    916             continue;
    917         }
    918         CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1);
    919         if ((!space.IsEmpty() && pKid->m_QSpaceName != space) || pKid->m_TagName != tag) {
    920             continue;
    921         }
    922         if (index -- == 0) {
    923             return pKid;
    924         }
    925     }
    926     return NULL;
    927 }
    928 FX_DWORD CXML_Element::FindElement(CXML_Element *pChild) const
    929 {
    930     for (int i = 0; i < m_Children.GetSize(); i += 2) {
    931         if ((ChildType)(FX_UINTPTR)m_Children.GetAt(i) == Element &&
    932                 (CXML_Element*)m_Children.GetAt(i + 1) == pChild) {
    933             return (FX_DWORD)(i >> 1);
    934         }
    935     }
    936     return (FX_DWORD) - 1;
    937 }
    938 const CFX_WideStringL* CXML_AttrMap::Lookup(FX_BSTR space, FX_BSTR name) const
    939 {
    940     if (m_pMap == NULL) {
    941         return NULL;
    942     }
    943     for (int i = 0; i < m_pMap->GetSize(); i ++) {
    944         CXML_AttrItem& item = GetAt(i);
    945         if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) {
    946             return &item.m_Value;
    947         }
    948     }
    949     return NULL;
    950 }
    951 void CXML_AttrMap::SetAt(FX_BSTR space, FX_BSTR name, FX_WSTR value, IFX_Allocator* pAllocator)
    952 {
    953     for (int i = 0; i < GetSize(); i ++) {
    954         CXML_AttrItem& item = GetAt(i);
    955         if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) {
    956             item.m_Value.Set(value, pAllocator);
    957             return;
    958         }
    959     }
    960     if (!m_pMap) {
    961         if (pAllocator) {
    962             m_pMap = FX_NewAtAllocator(pAllocator)CFX_ObjectArray<CXML_AttrItem>(pAllocator);
    963         } else {
    964             m_pMap = FX_NEW CFX_ObjectArray<CXML_AttrItem>;
    965         }
    966     }
    967     if (!m_pMap) {
    968         return;
    969     }
    970     CXML_AttrItem* pItem = (CXML_AttrItem*)m_pMap->AddSpace();
    971     if (!pItem) {
    972         return;
    973     }
    974     pItem->m_QSpaceName.Set(space, pAllocator);
    975     pItem->m_AttrName.Set(name, pAllocator);
    976     pItem->m_Value.Set(value, pAllocator);
    977 }
    978 void CXML_AttrMap::RemoveAt(FX_BSTR space, FX_BSTR name, IFX_Allocator* pAllocator)
    979 {
    980     if (m_pMap == NULL) {
    981         return;
    982     }
    983     for (int i = 0; i < m_pMap->GetSize(); i ++) {
    984         CXML_AttrItem& item = GetAt(i);
    985         if ((space.IsEmpty() || item.m_QSpaceName == space) && item.m_AttrName == name) {
    986             item.Empty(pAllocator);
    987             m_pMap->RemoveAt(i);
    988             return;
    989         }
    990     }
    991 }
    992 int CXML_AttrMap::GetSize() const
    993 {
    994     return m_pMap == NULL ? 0 : m_pMap->GetSize();
    995 }
    996 CXML_AttrItem& CXML_AttrMap::GetAt(int index) const
    997 {
    998     ASSERT(m_pMap != NULL);
    999     return (*m_pMap)[index];
   1000 }
   1001 void CXML_AttrMap::RemoveAll(IFX_Allocator* pAllocator)
   1002 {
   1003     if (!m_pMap) {
   1004         return;
   1005     }
   1006     for (int i = 0; i < m_pMap->GetSize(); i ++) {
   1007         CXML_AttrItem& item = (*m_pMap)[i];
   1008         item.Empty(pAllocator);
   1009     }
   1010     m_pMap->RemoveAll();
   1011     if (pAllocator) {
   1012         FX_DeleteAtAllocator(m_pMap, pAllocator, CFX_ObjectArray<CXML_AttrItem>);
   1013     } else {
   1014         delete m_pMap;
   1015     }
   1016     m_pMap = NULL;
   1017 }
   1018