Home | History | Annotate | Download | only in xml
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h"
      6 
      7 #include <memory>
      8 
      9 #include "core/fxcrt/cfx_seekablestreamproxy.h"
     10 #include "core/fxcrt/fx_codepage.h"
     11 #include "testing/gtest/include/gtest/gtest.h"
     12 #include "testing/test_support.h"
     13 
     14 TEST(CFX_XMLSyntaxParserTest, CData) {
     15   const char* input =
     16       "<script contentType=\"application/x-javascript\">\n"
     17       "  <![CDATA[\n"
     18       "    if (a[1] < 3)\n"
     19       "      app.alert(\"Tclams\");\n"
     20       "  ]]>\n"
     21       "</script>";
     22 
     23   const wchar_t* cdata =
     24       L"\n"
     25       L"    if (a[1] < 3)\n"
     26       L"      app.alert(\"Tclams\");\n"
     27       L"  ";
     28 
     29   RetainPtr<CFX_SeekableStreamProxy> stream =
     30       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
     31           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
     32   stream->SetCodePage(FX_CODEPAGE_UTF8);
     33 
     34   CFX_XMLSyntaxParser parser(stream);
     35   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
     36   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
     37   ASSERT_EQ(L"script", parser.GetTagName());
     38 
     39   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
     40   ASSERT_EQ(L"contentType", parser.GetAttributeName());
     41   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
     42   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
     43 
     44   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
     45   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
     46   ASSERT_EQ(L"\n  ", parser.GetTextData());
     47 
     48   ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
     49   ASSERT_EQ(cdata, parser.GetTextData());
     50 
     51   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
     52   ASSERT_EQ(L"\n", parser.GetTextData());
     53 
     54   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
     55   ASSERT_EQ(L"script", parser.GetTagName());
     56 
     57   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
     58 }
     59 
     60 TEST(CFX_XMLSyntaxParserTest, CDataWithInnerScript) {
     61   const char* input =
     62       "<script contentType=\"application/x-javascript\">\n"
     63       "  <![CDATA[\n"
     64       "    if (a[1] < 3)\n"
     65       "      app.alert(\"Tclams\");\n"
     66       "    </script>\n"
     67       "  ]]>\n"
     68       "</script>";
     69 
     70   const wchar_t* cdata =
     71       L"\n"
     72       L"    if (a[1] < 3)\n"
     73       L"      app.alert(\"Tclams\");\n"
     74       L"    </script>\n"
     75       L"  ";
     76 
     77   RetainPtr<CFX_SeekableStreamProxy> stream =
     78       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
     79           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
     80   stream->SetCodePage(FX_CODEPAGE_UTF8);
     81 
     82   CFX_XMLSyntaxParser parser(stream);
     83   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
     84   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
     85   ASSERT_EQ(L"script", parser.GetTagName());
     86 
     87   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
     88   ASSERT_EQ(L"contentType", parser.GetAttributeName());
     89   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
     90   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
     91 
     92   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
     93   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
     94   ASSERT_EQ(L"\n  ", parser.GetTextData());
     95 
     96   ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
     97   ASSERT_EQ(cdata, parser.GetTextData());
     98 
     99   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    100   ASSERT_EQ(L"\n", parser.GetTextData());
    101 
    102   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    103   ASSERT_EQ(L"script", parser.GetTagName());
    104 
    105   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    106 }
    107 
    108 TEST(CFX_XMLSyntaxParserTest, ArrowBangArrow) {
    109   const char* input =
    110       "<script contentType=\"application/x-javascript\">\n"
    111       "  <!>\n"
    112       "</script>";
    113 
    114   RetainPtr<CFX_SeekableStreamProxy> stream =
    115       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    116           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    117   stream->SetCodePage(FX_CODEPAGE_UTF8);
    118 
    119   CFX_XMLSyntaxParser parser(stream);
    120   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    121   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    122 
    123   ASSERT_EQ(L"script", parser.GetTagName());
    124 
    125   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    126   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    127   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    128   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    129 
    130   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    131   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    132   ASSERT_EQ(L"\n  ", parser.GetTextData());
    133 
    134   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    135   ASSERT_EQ(L"\n", parser.GetTextData());
    136 
    137   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    138   ASSERT_EQ(L"script", parser.GetTagName());
    139 
    140   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    141 }
    142 
    143 TEST(CFX_XMLSyntaxParserTest, ArrowBangBracketArrow) {
    144   const char* input =
    145       "<script contentType=\"application/x-javascript\">\n"
    146       "  <![>\n"
    147       "</script>";
    148 
    149   RetainPtr<CFX_SeekableStreamProxy> stream =
    150       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    151           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    152   stream->SetCodePage(FX_CODEPAGE_UTF8);
    153 
    154   CFX_XMLSyntaxParser parser(stream);
    155   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    156   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    157   ASSERT_EQ(L"script", parser.GetTagName());
    158 
    159   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    160   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    161   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    162   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    163 
    164   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    165   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    166   ASSERT_EQ(L"\n  ", parser.GetTextData());
    167 
    168   // Parser walks to end of input.
    169 
    170   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    171 }
    172 
    173 TEST(CFX_XMLSyntaxParserTest, IncompleteCData) {
    174   const char* input =
    175       "<script contentType=\"application/x-javascript\">\n"
    176       "  <![CDATA>\n"
    177       "</script>";
    178 
    179   RetainPtr<CFX_SeekableStreamProxy> stream =
    180       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    181           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    182   stream->SetCodePage(FX_CODEPAGE_UTF8);
    183 
    184   CFX_XMLSyntaxParser parser(stream);
    185   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    186   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    187   ASSERT_EQ(L"script", parser.GetTagName());
    188 
    189   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    190   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    191   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    192   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    193 
    194   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    195   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    196   ASSERT_EQ(L"\n  ", parser.GetTextData());
    197 
    198   // Parser walks to end of input.
    199 
    200   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    201 }
    202 
    203 TEST(CFX_XMLSyntaxParserTest, UnClosedCData) {
    204   const char* input =
    205       "<script contentType=\"application/x-javascript\">\n"
    206       "  <![CDATA[\n"
    207       "</script>";
    208 
    209   RetainPtr<CFX_SeekableStreamProxy> stream =
    210       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    211           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    212   stream->SetCodePage(FX_CODEPAGE_UTF8);
    213 
    214   CFX_XMLSyntaxParser parser(stream);
    215   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    216   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    217   ASSERT_EQ(L"script", parser.GetTagName());
    218 
    219   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    220   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    221   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    222   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    223 
    224   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    225   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    226   ASSERT_EQ(L"\n  ", parser.GetTextData());
    227 
    228   // Parser walks to end of input.
    229 
    230   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    231 }
    232 
    233 TEST(CFX_XMLSyntaxParserTest, EmptyCData) {
    234   const char* input =
    235       "<script contentType=\"application/x-javascript\">\n"
    236       "  <![CDATA[]]>\n"
    237       "</script>";
    238 
    239   RetainPtr<CFX_SeekableStreamProxy> stream =
    240       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    241           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    242   stream->SetCodePage(FX_CODEPAGE_UTF8);
    243 
    244   CFX_XMLSyntaxParser parser(stream);
    245   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    246   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    247   ASSERT_EQ(L"script", parser.GetTagName());
    248 
    249   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    250   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    251   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    252   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    253 
    254   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    255   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    256   ASSERT_EQ(L"\n  ", parser.GetTextData());
    257 
    258   ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
    259   ASSERT_EQ(L"", parser.GetTextData());
    260 
    261   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    262   ASSERT_EQ(L"\n", parser.GetTextData());
    263 
    264   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    265   ASSERT_EQ(L"script", parser.GetTagName());
    266 
    267   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    268 }
    269 
    270 TEST(CFX_XMLSyntaxParserTest, Comment) {
    271   const char* input =
    272       "<script contentType=\"application/x-javascript\">\n"
    273       "  <!-- A Comment -->\n"
    274       "</script>";
    275 
    276   RetainPtr<CFX_SeekableStreamProxy> stream =
    277       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    278           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    279   stream->SetCodePage(FX_CODEPAGE_UTF8);
    280 
    281   CFX_XMLSyntaxParser parser(stream);
    282   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    283   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    284   ASSERT_EQ(L"script", parser.GetTagName());
    285 
    286   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    287   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    288   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    289   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    290 
    291   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    292   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    293   ASSERT_EQ(L"\n  ", parser.GetTextData());
    294 
    295   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    296   ASSERT_EQ(L"\n", parser.GetTextData());
    297 
    298   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    299   ASSERT_EQ(L"script", parser.GetTagName());
    300 
    301   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    302 }
    303 
    304 TEST(CFX_XMLSyntaxParserTest, IncorrectCommentStart) {
    305   const char* input =
    306       "<script contentType=\"application/x-javascript\">\n"
    307       "  <!- A Comment -->\n"
    308       "</script>";
    309 
    310   RetainPtr<CFX_SeekableStreamProxy> stream =
    311       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    312           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    313   stream->SetCodePage(FX_CODEPAGE_UTF8);
    314 
    315   CFX_XMLSyntaxParser parser(stream);
    316   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    317   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    318   ASSERT_EQ(L"script", parser.GetTagName());
    319 
    320   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    321   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    322   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    323   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    324 
    325   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    326   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    327   ASSERT_EQ(L"\n  ", parser.GetTextData());
    328 
    329   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    330   ASSERT_EQ(L"\n", parser.GetTextData());
    331 
    332   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    333   ASSERT_EQ(L"script", parser.GetTagName());
    334 
    335   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    336 }
    337 
    338 TEST(CFX_XMLSyntaxParserTest, CommentEmpty) {
    339   const char* input =
    340       "<script contentType=\"application/x-javascript\">\n"
    341       "  <!---->\n"
    342       "</script>";
    343 
    344   RetainPtr<CFX_SeekableStreamProxy> stream =
    345       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    346           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    347   stream->SetCodePage(FX_CODEPAGE_UTF8);
    348 
    349   CFX_XMLSyntaxParser parser(stream);
    350   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    351   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    352   ASSERT_EQ(L"script", parser.GetTagName());
    353 
    354   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    355   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    356   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    357   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    358 
    359   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    360   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    361   ASSERT_EQ(L"\n  ", parser.GetTextData());
    362 
    363   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    364   ASSERT_EQ(L"\n", parser.GetTextData());
    365 
    366   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    367   ASSERT_EQ(L"script", parser.GetTagName());
    368 
    369   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    370 }
    371 
    372 TEST(CFX_XMLSyntaxParserTest, CommentThreeDash) {
    373   const char* input =
    374       "<script contentType=\"application/x-javascript\">\n"
    375       "  <!--->\n"
    376       "</script>";
    377 
    378   RetainPtr<CFX_SeekableStreamProxy> stream =
    379       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    380           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    381   stream->SetCodePage(FX_CODEPAGE_UTF8);
    382 
    383   CFX_XMLSyntaxParser parser(stream);
    384   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    385   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    386   ASSERT_EQ(L"script", parser.GetTagName());
    387 
    388   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    389   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    390   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    391   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    392 
    393   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    394   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    395   ASSERT_EQ(L"\n  ", parser.GetTextData());
    396 
    397   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    398 }
    399 
    400 TEST(CFX_XMLSyntaxParserTest, CommentTwoDash) {
    401   const char* input =
    402       "<script contentType=\"application/x-javascript\">\n"
    403       "  <!-->\n"
    404       "</script>";
    405 
    406   RetainPtr<CFX_SeekableStreamProxy> stream =
    407       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    408           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    409   stream->SetCodePage(FX_CODEPAGE_UTF8);
    410 
    411   CFX_XMLSyntaxParser parser(stream);
    412   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    413   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    414   ASSERT_EQ(L"script", parser.GetTagName());
    415 
    416   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    417   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    418   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    419   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    420 
    421   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    422   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    423   ASSERT_EQ(L"\n  ", parser.GetTextData());
    424 
    425   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    426 }
    427 
    428 TEST(CFX_XMLSyntaxParserTest, Entities) {
    429   const char* input =
    430       "<script contentType=\"application/x-javascript\">"
    431       "&#66;"
    432       "&#x54;"
    433       "&#x00000000000000000048;"
    434       "&#x0000000000000000AB48;"
    435       "&#x0000000000000000000;"
    436       "</script>";
    437 
    438   RetainPtr<CFX_SeekableStreamProxy> stream =
    439       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    440           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    441   stream->SetCodePage(FX_CODEPAGE_UTF8);
    442 
    443   CFX_XMLSyntaxParser parser(stream);
    444   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    445   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    446   ASSERT_EQ(L"script", parser.GetTagName());
    447 
    448   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    449   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    450   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    451   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    452 
    453   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    454   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    455   ASSERT_EQ(L"BTH\xab48", parser.GetTextData());
    456 
    457   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    458   ASSERT_EQ(L"script", parser.GetTagName());
    459 
    460   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    461 }
    462 
    463 TEST(CFX_XMLSyntaxParserTest, EntityOverflowHex) {
    464   const char* input =
    465       "<script contentType=\"application/x-javascript\">"
    466       "&#xaDBDFFFFF;"
    467       "&#xafffffffffffffffffffffffffffffffff;"
    468       "</script>";
    469 
    470   RetainPtr<CFX_SeekableStreamProxy> stream =
    471       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    472           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    473   stream->SetCodePage(FX_CODEPAGE_UTF8);
    474 
    475   CFX_XMLSyntaxParser parser(stream);
    476   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    477   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    478   ASSERT_EQ(L"script", parser.GetTagName());
    479 
    480   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    481   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    482   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    483   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    484 
    485   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    486   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    487   ASSERT_EQ(L"  ", parser.GetTextData());
    488 
    489   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    490   ASSERT_EQ(L"script", parser.GetTagName());
    491 
    492   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    493 }
    494 
    495 TEST(CFX_XMLSyntaxParserTest, EntityOverflowDecimal) {
    496   const char* input =
    497       "<script contentType=\"application/x-javascript\">"
    498       "&#2914910205;"
    499       "&#29149102052342342134521341234512351234213452315;"
    500       "</script>";
    501 
    502   RetainPtr<CFX_SeekableStreamProxy> stream =
    503       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    504           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
    505   stream->SetCodePage(FX_CODEPAGE_UTF8);
    506 
    507   CFX_XMLSyntaxParser parser(stream);
    508   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
    509   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
    510   ASSERT_EQ(L"script", parser.GetTagName());
    511 
    512   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
    513   ASSERT_EQ(L"contentType", parser.GetAttributeName());
    514   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
    515   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
    516 
    517   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
    518   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
    519   ASSERT_EQ(L"  ", parser.GetTextData());
    520 
    521   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
    522   ASSERT_EQ(L"script", parser.GetTagName());
    523 
    524   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
    525 }
    526 
    527 TEST(CFX_XMLSyntaxParserTest, IsXMLNameChar) {
    528   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', true));
    529   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', false));
    530 
    531   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2069, true));
    532   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2070, true));
    533   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2073, true));
    534   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x218F, true));
    535   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2190, true));
    536 
    537   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDEF, true));
    538   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF0, true));
    539   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF1, true));
    540   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFD, true));
    541   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFE, true));
    542 }
    543