1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h" 6 7 #include <memory> 8 9 #include "core/fxcrt/cfx_seekablestreamproxy.h" 10 #include "core/fxcrt/fx_codepage.h" 11 #include "testing/gtest/include/gtest/gtest.h" 12 #include "testing/test_support.h" 13 14 TEST(CFX_XMLSyntaxParserTest, CData) { 15 const char* input = 16 "<script contentType=\"application/x-javascript\">\n" 17 " <![CDATA[\n" 18 " if (a[1] < 3)\n" 19 " app.alert(\"Tclams\");\n" 20 " ]]>\n" 21 "</script>"; 22 23 const wchar_t* cdata = 24 L"\n" 25 L" if (a[1] < 3)\n" 26 L" app.alert(\"Tclams\");\n" 27 L" "; 28 29 RetainPtr<CFX_SeekableStreamProxy> stream = 30 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 31 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 32 stream->SetCodePage(FX_CODEPAGE_UTF8); 33 34 CFX_XMLSyntaxParser parser(stream); 35 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 36 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 37 ASSERT_EQ(L"script", parser.GetTagName()); 38 39 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 40 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 41 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 42 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 43 44 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 45 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 46 ASSERT_EQ(L"\n ", parser.GetTextData()); 47 48 ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse()); 49 ASSERT_EQ(cdata, parser.GetTextData()); 50 51 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 52 ASSERT_EQ(L"\n", parser.GetTextData()); 53 54 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 55 ASSERT_EQ(L"script", parser.GetTagName()); 56 57 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 58 } 59 60 TEST(CFX_XMLSyntaxParserTest, CDataWithInnerScript) { 61 const char* input = 62 "<script contentType=\"application/x-javascript\">\n" 63 " <![CDATA[\n" 64 " if (a[1] < 3)\n" 65 " app.alert(\"Tclams\");\n" 66 " </script>\n" 67 " ]]>\n" 68 "</script>"; 69 70 const wchar_t* cdata = 71 L"\n" 72 L" if (a[1] < 3)\n" 73 L" app.alert(\"Tclams\");\n" 74 L" </script>\n" 75 L" "; 76 77 RetainPtr<CFX_SeekableStreamProxy> stream = 78 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 79 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 80 stream->SetCodePage(FX_CODEPAGE_UTF8); 81 82 CFX_XMLSyntaxParser parser(stream); 83 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 84 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 85 ASSERT_EQ(L"script", parser.GetTagName()); 86 87 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 88 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 89 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 90 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 91 92 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 93 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 94 ASSERT_EQ(L"\n ", parser.GetTextData()); 95 96 ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse()); 97 ASSERT_EQ(cdata, parser.GetTextData()); 98 99 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 100 ASSERT_EQ(L"\n", parser.GetTextData()); 101 102 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 103 ASSERT_EQ(L"script", parser.GetTagName()); 104 105 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 106 } 107 108 TEST(CFX_XMLSyntaxParserTest, ArrowBangArrow) { 109 const char* input = 110 "<script contentType=\"application/x-javascript\">\n" 111 " <!>\n" 112 "</script>"; 113 114 RetainPtr<CFX_SeekableStreamProxy> stream = 115 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 116 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 117 stream->SetCodePage(FX_CODEPAGE_UTF8); 118 119 CFX_XMLSyntaxParser parser(stream); 120 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 121 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 122 123 ASSERT_EQ(L"script", parser.GetTagName()); 124 125 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 126 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 127 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 128 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 129 130 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 131 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 132 ASSERT_EQ(L"\n ", parser.GetTextData()); 133 134 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 135 ASSERT_EQ(L"\n", parser.GetTextData()); 136 137 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 138 ASSERT_EQ(L"script", parser.GetTagName()); 139 140 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 141 } 142 143 TEST(CFX_XMLSyntaxParserTest, ArrowBangBracketArrow) { 144 const char* input = 145 "<script contentType=\"application/x-javascript\">\n" 146 " <![>\n" 147 "</script>"; 148 149 RetainPtr<CFX_SeekableStreamProxy> stream = 150 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 151 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 152 stream->SetCodePage(FX_CODEPAGE_UTF8); 153 154 CFX_XMLSyntaxParser parser(stream); 155 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 156 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 157 ASSERT_EQ(L"script", parser.GetTagName()); 158 159 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 160 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 161 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 162 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 163 164 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 165 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 166 ASSERT_EQ(L"\n ", parser.GetTextData()); 167 168 // Parser walks to end of input. 169 170 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 171 } 172 173 TEST(CFX_XMLSyntaxParserTest, IncompleteCData) { 174 const char* input = 175 "<script contentType=\"application/x-javascript\">\n" 176 " <![CDATA>\n" 177 "</script>"; 178 179 RetainPtr<CFX_SeekableStreamProxy> stream = 180 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 181 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 182 stream->SetCodePage(FX_CODEPAGE_UTF8); 183 184 CFX_XMLSyntaxParser parser(stream); 185 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 186 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 187 ASSERT_EQ(L"script", parser.GetTagName()); 188 189 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 190 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 191 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 192 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 193 194 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 195 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 196 ASSERT_EQ(L"\n ", parser.GetTextData()); 197 198 // Parser walks to end of input. 199 200 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 201 } 202 203 TEST(CFX_XMLSyntaxParserTest, UnClosedCData) { 204 const char* input = 205 "<script contentType=\"application/x-javascript\">\n" 206 " <![CDATA[\n" 207 "</script>"; 208 209 RetainPtr<CFX_SeekableStreamProxy> stream = 210 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 211 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 212 stream->SetCodePage(FX_CODEPAGE_UTF8); 213 214 CFX_XMLSyntaxParser parser(stream); 215 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 216 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 217 ASSERT_EQ(L"script", parser.GetTagName()); 218 219 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 220 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 221 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 222 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 223 224 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 225 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 226 ASSERT_EQ(L"\n ", parser.GetTextData()); 227 228 // Parser walks to end of input. 229 230 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 231 } 232 233 TEST(CFX_XMLSyntaxParserTest, EmptyCData) { 234 const char* input = 235 "<script contentType=\"application/x-javascript\">\n" 236 " <![CDATA[]]>\n" 237 "</script>"; 238 239 RetainPtr<CFX_SeekableStreamProxy> stream = 240 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 241 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 242 stream->SetCodePage(FX_CODEPAGE_UTF8); 243 244 CFX_XMLSyntaxParser parser(stream); 245 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 246 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 247 ASSERT_EQ(L"script", parser.GetTagName()); 248 249 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 250 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 251 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 252 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 253 254 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 255 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 256 ASSERT_EQ(L"\n ", parser.GetTextData()); 257 258 ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse()); 259 ASSERT_EQ(L"", parser.GetTextData()); 260 261 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 262 ASSERT_EQ(L"\n", parser.GetTextData()); 263 264 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 265 ASSERT_EQ(L"script", parser.GetTagName()); 266 267 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 268 } 269 270 TEST(CFX_XMLSyntaxParserTest, Comment) { 271 const char* input = 272 "<script contentType=\"application/x-javascript\">\n" 273 " <!-- A Comment -->\n" 274 "</script>"; 275 276 RetainPtr<CFX_SeekableStreamProxy> stream = 277 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 278 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 279 stream->SetCodePage(FX_CODEPAGE_UTF8); 280 281 CFX_XMLSyntaxParser parser(stream); 282 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 283 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 284 ASSERT_EQ(L"script", parser.GetTagName()); 285 286 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 287 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 288 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 289 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 290 291 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 292 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 293 ASSERT_EQ(L"\n ", parser.GetTextData()); 294 295 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 296 ASSERT_EQ(L"\n", parser.GetTextData()); 297 298 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 299 ASSERT_EQ(L"script", parser.GetTagName()); 300 301 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 302 } 303 304 TEST(CFX_XMLSyntaxParserTest, IncorrectCommentStart) { 305 const char* input = 306 "<script contentType=\"application/x-javascript\">\n" 307 " <!- A Comment -->\n" 308 "</script>"; 309 310 RetainPtr<CFX_SeekableStreamProxy> stream = 311 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 312 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 313 stream->SetCodePage(FX_CODEPAGE_UTF8); 314 315 CFX_XMLSyntaxParser parser(stream); 316 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 317 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 318 ASSERT_EQ(L"script", parser.GetTagName()); 319 320 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 321 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 322 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 323 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 324 325 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 326 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 327 ASSERT_EQ(L"\n ", parser.GetTextData()); 328 329 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 330 ASSERT_EQ(L"\n", parser.GetTextData()); 331 332 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 333 ASSERT_EQ(L"script", parser.GetTagName()); 334 335 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 336 } 337 338 TEST(CFX_XMLSyntaxParserTest, CommentEmpty) { 339 const char* input = 340 "<script contentType=\"application/x-javascript\">\n" 341 " <!---->\n" 342 "</script>"; 343 344 RetainPtr<CFX_SeekableStreamProxy> stream = 345 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 346 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 347 stream->SetCodePage(FX_CODEPAGE_UTF8); 348 349 CFX_XMLSyntaxParser parser(stream); 350 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 351 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 352 ASSERT_EQ(L"script", parser.GetTagName()); 353 354 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 355 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 356 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 357 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 358 359 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 360 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 361 ASSERT_EQ(L"\n ", parser.GetTextData()); 362 363 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 364 ASSERT_EQ(L"\n", parser.GetTextData()); 365 366 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 367 ASSERT_EQ(L"script", parser.GetTagName()); 368 369 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 370 } 371 372 TEST(CFX_XMLSyntaxParserTest, CommentThreeDash) { 373 const char* input = 374 "<script contentType=\"application/x-javascript\">\n" 375 " <!--->\n" 376 "</script>"; 377 378 RetainPtr<CFX_SeekableStreamProxy> stream = 379 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 380 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 381 stream->SetCodePage(FX_CODEPAGE_UTF8); 382 383 CFX_XMLSyntaxParser parser(stream); 384 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 385 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 386 ASSERT_EQ(L"script", parser.GetTagName()); 387 388 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 389 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 390 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 391 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 392 393 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 394 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 395 ASSERT_EQ(L"\n ", parser.GetTextData()); 396 397 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 398 } 399 400 TEST(CFX_XMLSyntaxParserTest, CommentTwoDash) { 401 const char* input = 402 "<script contentType=\"application/x-javascript\">\n" 403 " <!-->\n" 404 "</script>"; 405 406 RetainPtr<CFX_SeekableStreamProxy> stream = 407 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 408 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 409 stream->SetCodePage(FX_CODEPAGE_UTF8); 410 411 CFX_XMLSyntaxParser parser(stream); 412 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 413 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 414 ASSERT_EQ(L"script", parser.GetTagName()); 415 416 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 417 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 418 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 419 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 420 421 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 422 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 423 ASSERT_EQ(L"\n ", parser.GetTextData()); 424 425 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 426 } 427 428 TEST(CFX_XMLSyntaxParserTest, Entities) { 429 const char* input = 430 "<script contentType=\"application/x-javascript\">" 431 "B" 432 "T" 433 "H" 434 "ꭈ" 435 "�" 436 "</script>"; 437 438 RetainPtr<CFX_SeekableStreamProxy> stream = 439 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 440 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 441 stream->SetCodePage(FX_CODEPAGE_UTF8); 442 443 CFX_XMLSyntaxParser parser(stream); 444 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 445 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 446 ASSERT_EQ(L"script", parser.GetTagName()); 447 448 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 449 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 450 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 451 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 452 453 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 454 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 455 ASSERT_EQ(L"BTH\xab48", parser.GetTextData()); 456 457 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 458 ASSERT_EQ(L"script", parser.GetTagName()); 459 460 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 461 } 462 463 TEST(CFX_XMLSyntaxParserTest, EntityOverflowHex) { 464 const char* input = 465 "<script contentType=\"application/x-javascript\">" 466 "�" 467 "�" 468 "</script>"; 469 470 RetainPtr<CFX_SeekableStreamProxy> stream = 471 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 472 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 473 stream->SetCodePage(FX_CODEPAGE_UTF8); 474 475 CFX_XMLSyntaxParser parser(stream); 476 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 477 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 478 ASSERT_EQ(L"script", parser.GetTagName()); 479 480 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 481 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 482 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 483 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 484 485 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 486 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 487 ASSERT_EQ(L" ", parser.GetTextData()); 488 489 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 490 ASSERT_EQ(L"script", parser.GetTagName()); 491 492 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 493 } 494 495 TEST(CFX_XMLSyntaxParserTest, EntityOverflowDecimal) { 496 const char* input = 497 "<script contentType=\"application/x-javascript\">" 498 "�" 499 "�" 500 "</script>"; 501 502 RetainPtr<CFX_SeekableStreamProxy> stream = 503 pdfium::MakeRetain<CFX_SeekableStreamProxy>( 504 reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); 505 stream->SetCodePage(FX_CODEPAGE_UTF8); 506 507 CFX_XMLSyntaxParser parser(stream); 508 ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); 509 ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); 510 ASSERT_EQ(L"script", parser.GetTagName()); 511 512 ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); 513 ASSERT_EQ(L"contentType", parser.GetAttributeName()); 514 ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); 515 ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); 516 517 ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); 518 ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); 519 ASSERT_EQ(L" ", parser.GetTextData()); 520 521 ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); 522 ASSERT_EQ(L"script", parser.GetTagName()); 523 524 ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); 525 } 526 527 TEST(CFX_XMLSyntaxParserTest, IsXMLNameChar) { 528 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', true)); 529 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', false)); 530 531 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2069, true)); 532 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2070, true)); 533 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2073, true)); 534 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x218F, true)); 535 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2190, true)); 536 537 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDEF, true)); 538 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF0, true)); 539 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF1, true)); 540 EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFD, true)); 541 EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFE, true)); 542 } 543