1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "xfa/fxfa/fm2js/xfa_lexer.h" 8 9 #include "core/fxcrt/fx_ext.h" 10 11 namespace { 12 13 struct XFA_FMDChar { 14 static const FX_WCHAR* inc(const FX_WCHAR*& p) { 15 ++p; 16 return p; 17 } 18 static const FX_WCHAR* dec(const FX_WCHAR*& p) { 19 --p; 20 return p; 21 } 22 static uint16_t get(const FX_WCHAR* p) { return *p; } 23 static bool isWhiteSpace(const FX_WCHAR* p) { 24 return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20; 25 } 26 static bool isLineTerminator(const FX_WCHAR* p) { 27 return *p == 0x0A || *p == 0x0D; 28 } 29 static bool isBinary(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '1'; } 30 static bool isOctal(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '7'; } 31 static bool isDigital(const FX_WCHAR* p) { 32 return (*p) >= '0' && (*p) <= '9'; 33 } 34 static bool isHex(const FX_WCHAR* p) { 35 return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') || 36 ((*p) >= 'A' && (*p) <= 'F'); 37 } 38 static bool isAlpha(const FX_WCHAR* p) { 39 return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A'); 40 } 41 static bool isAvalid(const FX_WCHAR* p, bool flag = 0); 42 static bool string2number(const FX_WCHAR* s, 43 FX_DOUBLE* pValue, 44 const FX_WCHAR*& pEnd); 45 static bool isUnicodeAlpha(uint16_t ch); 46 }; 47 48 inline bool XFA_FMDChar::isAvalid(const FX_WCHAR* p, bool flag) { 49 if (*p == 0) { 50 return 1; 51 } 52 if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D || 53 (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) { 54 return 1; 55 } 56 if (!flag) { 57 if (*p == 0x0B || *p == 0x0C) { 58 return 1; 59 } 60 } 61 return 0; 62 } 63 64 inline bool XFA_FMDChar::string2number(const FX_WCHAR* s, 65 FX_DOUBLE* pValue, 66 const FX_WCHAR*& pEnd) { 67 if (s) { 68 *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd); 69 } 70 return 0; 71 } 72 73 inline bool XFA_FMDChar::isUnicodeAlpha(uint16_t ch) { 74 if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B || 75 ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' || 76 ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' || 77 ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' || 78 ch == '+' || ch == '-' || ch == '*' || ch == '/') { 79 return false; 80 } 81 return true; 82 } 83 84 const XFA_FMKeyword keyWords[] = { 85 {TOKand, 0x00000026, L"&"}, 86 {TOKlparen, 0x00000028, L"("}, 87 {TOKrparen, 0x00000029, L")"}, 88 {TOKmul, 0x0000002a, L"*"}, 89 {TOKplus, 0x0000002b, L"+"}, 90 {TOKcomma, 0x0000002c, L","}, 91 {TOKminus, 0x0000002d, L"-"}, 92 {TOKdot, 0x0000002e, L"."}, 93 {TOKdiv, 0x0000002f, L"/"}, 94 {TOKlt, 0x0000003c, L"<"}, 95 {TOKassign, 0x0000003d, L"="}, 96 {TOKgt, 0x0000003e, L">"}, 97 {TOKlbracket, 0x0000005b, L"["}, 98 {TOKrbracket, 0x0000005d, L"]"}, 99 {TOKor, 0x0000007c, L"|"}, 100 {TOKdotscream, 0x0000ec11, L".#"}, 101 {TOKdotstar, 0x0000ec18, L".*"}, 102 {TOKdotdot, 0x0000ec1c, L".."}, 103 {TOKle, 0x000133f9, L"<="}, 104 {TOKne, 0x000133fa, L"<>"}, 105 {TOKeq, 0x0001391a, L"=="}, 106 {TOKge, 0x00013e3b, L">="}, 107 {TOKdo, 0x00020153, L"do"}, 108 {TOKkseq, 0x00020676, L"eq"}, 109 {TOKksge, 0x000210ac, L"ge"}, 110 {TOKksgt, 0x000210bb, L"gt"}, 111 {TOKif, 0x00021aef, L"if"}, 112 {TOKin, 0x00021af7, L"in"}, 113 {TOKksle, 0x00022a51, L"le"}, 114 {TOKkslt, 0x00022a60, L"lt"}, 115 {TOKksne, 0x00023493, L"ne"}, 116 {TOKksor, 0x000239c1, L"or"}, 117 {TOKnull, 0x052931bb, L"null"}, 118 {TOKbreak, 0x05518c25, L"break"}, 119 {TOKksand, 0x09f9db33, L"and"}, 120 {TOKend, 0x0a631437, L"end"}, 121 {TOKeof, 0x0a63195a, L"eof"}, 122 {TOKfor, 0x0a7d67a7, L"for"}, 123 {TOKnan, 0x0b4f91dd, L"nan"}, 124 {TOKksnot, 0x0b4fd9b1, L"not"}, 125 {TOKvar, 0x0c2203e9, L"var"}, 126 {TOKthen, 0x2d5738cf, L"then"}, 127 {TOKelse, 0x45f65ee9, L"else"}, 128 {TOKexit, 0x4731d6ba, L"exit"}, 129 {TOKdownto, 0x4caadc3b, L"downto"}, 130 {TOKreturn, 0x4db8bd60, L"return"}, 131 {TOKinfinity, 0x5c0a010a, L"infinity"}, 132 {TOKendwhile, 0x5c64bff0, L"endwhile"}, 133 {TOKforeach, 0x67e31f38, L"foreach"}, 134 {TOKendfunc, 0x68f984a3, L"endfunc"}, 135 {TOKelseif, 0x78253218, L"elseif"}, 136 {TOKwhile, 0x84229259, L"while"}, 137 {TOKendfor, 0x8ab49d7e, L"endfor"}, 138 {TOKthrow, 0x8db05c94, L"throw"}, 139 {TOKstep, 0xa7a7887c, L"step"}, 140 {TOKupto, 0xb5155328, L"upto"}, 141 {TOKcontinue, 0xc0340685, L"continue"}, 142 {TOKfunc, 0xcdce60ec, L"func"}, 143 {TOKendif, 0xe0e8fee6, L"endif"}, 144 }; 145 146 const XFA_FM_TOKEN KEYWORD_START = TOKdo; 147 const XFA_FM_TOKEN KEYWORD_END = TOKendif; 148 149 } // namespace 150 151 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) { 152 if (op < KEYWORD_START || op > KEYWORD_END) 153 return L""; 154 return keyWords[op].m_keyword; 155 } 156 157 CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {} 158 159 CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum) 160 : m_type(TOKreserver), m_uLinenum(uLineNum) {} 161 162 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc, 163 CXFA_FMErrorInfo* pErrorInfo) 164 : m_ptr(wsFormCalc.c_str()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {} 165 166 CXFA_FMLexer::~CXFA_FMLexer() {} 167 168 CXFA_FMToken* CXFA_FMLexer::NextToken() { 169 m_pToken.reset(Scan()); 170 return m_pToken.get(); 171 } 172 173 CXFA_FMToken* CXFA_FMLexer::Scan() { 174 uint16_t ch = 0; 175 CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine); 176 if (!XFA_FMDChar::isAvalid(m_ptr)) { 177 ch = XFA_FMDChar::get(m_ptr); 178 Error(kFMErrUnsupportedChar, ch); 179 return p; 180 } 181 int iRet = 0; 182 while (1) { 183 if (!XFA_FMDChar::isAvalid(m_ptr)) { 184 ch = XFA_FMDChar::get(m_ptr); 185 Error(kFMErrUnsupportedChar, ch); 186 return p; 187 } 188 ch = XFA_FMDChar::get(m_ptr); 189 switch (ch) { 190 case 0: 191 p->m_type = TOKeof; 192 return p; 193 case 0x0A: 194 ++m_uCurrentLine; 195 p->m_uLinenum = m_uCurrentLine; 196 XFA_FMDChar::inc(m_ptr); 197 break; 198 case 0x0D: 199 XFA_FMDChar::inc(m_ptr); 200 break; 201 case ';': { 202 const FX_WCHAR* pTemp = 0; 203 Comment(m_ptr, pTemp); 204 m_ptr = pTemp; 205 } break; 206 case '"': { 207 const FX_WCHAR* pTemp = 0; 208 p->m_type = TOKstring; 209 iRet = String(p, m_ptr, pTemp); 210 m_ptr = pTemp; 211 } 212 return p; 213 case '0': 214 case '1': 215 case '2': 216 case '3': 217 case '4': 218 case '5': 219 case '6': 220 case '7': 221 case '8': 222 case '9': { 223 p->m_type = TOKnumber; 224 const FX_WCHAR* pTemp = 0; 225 iRet = Number(p, m_ptr, pTemp); 226 m_ptr = pTemp; 227 if (iRet) { 228 Error(kFMErrBadSuffixNumber); 229 return p; 230 } 231 } 232 return p; 233 case '=': 234 XFA_FMDChar::inc(m_ptr); 235 if (XFA_FMDChar::isAvalid(m_ptr)) { 236 ch = XFA_FMDChar::get(m_ptr); 237 if (ch == '=') { 238 p->m_type = TOKeq; 239 XFA_FMDChar::inc(m_ptr); 240 return p; 241 } else { 242 p->m_type = TOKassign; 243 return p; 244 } 245 } else { 246 ch = XFA_FMDChar::get(m_ptr); 247 Error(kFMErrUnsupportedChar, ch); 248 return p; 249 } 250 break; 251 case '<': 252 XFA_FMDChar::inc(m_ptr); 253 if (XFA_FMDChar::isAvalid(m_ptr)) { 254 ch = XFA_FMDChar::get(m_ptr); 255 if (ch == '=') { 256 p->m_type = TOKle; 257 XFA_FMDChar::inc(m_ptr); 258 return p; 259 } else if (ch == '>') { 260 p->m_type = TOKne; 261 XFA_FMDChar::inc(m_ptr); 262 return p; 263 } else { 264 p->m_type = TOKlt; 265 return p; 266 } 267 } else { 268 ch = XFA_FMDChar::get(m_ptr); 269 Error(kFMErrUnsupportedChar, ch); 270 return p; 271 } 272 break; 273 case '>': 274 XFA_FMDChar::inc(m_ptr); 275 if (XFA_FMDChar::isAvalid(m_ptr)) { 276 ch = XFA_FMDChar::get(m_ptr); 277 if (ch == '=') { 278 p->m_type = TOKge; 279 XFA_FMDChar::inc(m_ptr); 280 return p; 281 } else { 282 p->m_type = TOKgt; 283 return p; 284 } 285 } else { 286 ch = XFA_FMDChar::get(m_ptr); 287 Error(kFMErrUnsupportedChar, ch); 288 return p; 289 } 290 break; 291 case ',': 292 p->m_type = TOKcomma; 293 XFA_FMDChar::inc(m_ptr); 294 return p; 295 case '(': 296 p->m_type = TOKlparen; 297 XFA_FMDChar::inc(m_ptr); 298 return p; 299 case ')': 300 p->m_type = TOKrparen; 301 XFA_FMDChar::inc(m_ptr); 302 return p; 303 case '[': 304 p->m_type = TOKlbracket; 305 XFA_FMDChar::inc(m_ptr); 306 return p; 307 case ']': 308 p->m_type = TOKrbracket; 309 XFA_FMDChar::inc(m_ptr); 310 return p; 311 case '&': 312 XFA_FMDChar::inc(m_ptr); 313 p->m_type = TOKand; 314 return p; 315 case '|': 316 XFA_FMDChar::inc(m_ptr); 317 p->m_type = TOKor; 318 return p; 319 case '+': 320 XFA_FMDChar::inc(m_ptr); 321 p->m_type = TOKplus; 322 return p; 323 case '-': 324 XFA_FMDChar::inc(m_ptr); 325 p->m_type = TOKminus; 326 return p; 327 case '*': 328 XFA_FMDChar::inc(m_ptr); 329 p->m_type = TOKmul; 330 return p; 331 case '/': 332 XFA_FMDChar::inc(m_ptr); 333 if (XFA_FMDChar::isAvalid(m_ptr)) { 334 ch = XFA_FMDChar::get(m_ptr); 335 if (ch == '/') { 336 const FX_WCHAR* pTemp = 0; 337 Comment(m_ptr, pTemp); 338 m_ptr = pTemp; 339 break; 340 } else { 341 p->m_type = TOKdiv; 342 return p; 343 } 344 } else { 345 ch = XFA_FMDChar::get(m_ptr); 346 Error(kFMErrUnsupportedChar, ch); 347 return p; 348 } 349 break; 350 case '.': 351 XFA_FMDChar::inc(m_ptr); 352 if (XFA_FMDChar::isAvalid(m_ptr)) { 353 ch = XFA_FMDChar::get(m_ptr); 354 if (ch == '.') { 355 p->m_type = TOKdotdot; 356 XFA_FMDChar::inc(m_ptr); 357 return p; 358 } else if (ch == '*') { 359 p->m_type = TOKdotstar; 360 XFA_FMDChar::inc(m_ptr); 361 return p; 362 } else if (ch == '#') { 363 p->m_type = TOKdotscream; 364 XFA_FMDChar::inc(m_ptr); 365 return p; 366 } else if (ch <= '9' && ch >= '0') { 367 p->m_type = TOKnumber; 368 const FX_WCHAR* pTemp = 0; 369 XFA_FMDChar::dec(m_ptr); 370 iRet = Number(p, m_ptr, pTemp); 371 m_ptr = pTemp; 372 if (iRet) { 373 Error(kFMErrBadSuffixNumber); 374 } 375 return p; 376 } else { 377 p->m_type = TOKdot; 378 return p; 379 } 380 } else { 381 ch = XFA_FMDChar::get(m_ptr); 382 Error(kFMErrUnsupportedChar, ch); 383 return p; 384 } 385 case 0x09: 386 case 0x0B: 387 case 0x0C: 388 case 0x20: 389 XFA_FMDChar::inc(m_ptr); 390 break; 391 default: { 392 const FX_WCHAR* pTemp = 0; 393 iRet = Identifiers(p, m_ptr, pTemp); 394 m_ptr = pTemp; 395 if (iRet) { 396 return p; 397 } 398 p->m_type = IsKeyword(p->m_wstring); 399 } 400 return p; 401 } 402 } 403 } 404 405 uint32_t CXFA_FMLexer::Number(CXFA_FMToken* t, 406 const FX_WCHAR* p, 407 const FX_WCHAR*& pEnd) { 408 FX_DOUBLE number = 0; 409 if (XFA_FMDChar::string2number(p, &number, pEnd)) { 410 return 1; 411 } 412 if (pEnd && XFA_FMDChar::isAlpha(pEnd)) { 413 return 1; 414 } 415 t->m_wstring = CFX_WideStringC(p, (pEnd - p)); 416 return 0; 417 } 418 419 uint32_t CXFA_FMLexer::String(CXFA_FMToken* t, 420 const FX_WCHAR* p, 421 const FX_WCHAR*& pEnd) { 422 const FX_WCHAR* pStart = p; 423 uint16_t ch = 0; 424 XFA_FMDChar::inc(p); 425 ch = XFA_FMDChar::get(p); 426 while (ch) { 427 if (!XFA_FMDChar::isAvalid(p)) { 428 ch = XFA_FMDChar::get(p); 429 pEnd = p; 430 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); 431 Error(kFMErrUnsupportedChar, ch); 432 return 1; 433 } 434 if (ch == '"') { 435 XFA_FMDChar::inc(p); 436 if (!XFA_FMDChar::isAvalid(p)) { 437 ch = XFA_FMDChar::get(p); 438 pEnd = p; 439 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); 440 Error(kFMErrUnsupportedChar, ch); 441 return 1; 442 } 443 ch = XFA_FMDChar::get(p); 444 if (ch == '"') { 445 goto NEXT; 446 } else { 447 break; 448 } 449 } 450 NEXT: 451 XFA_FMDChar::inc(p); 452 ch = XFA_FMDChar::get(p); 453 } 454 pEnd = p; 455 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); 456 return 0; 457 } 458 459 uint32_t CXFA_FMLexer::Identifiers(CXFA_FMToken* t, 460 const FX_WCHAR* p, 461 const FX_WCHAR*& pEnd) { 462 const FX_WCHAR* pStart = p; 463 uint16_t ch = 0; 464 ch = XFA_FMDChar::get(p); 465 XFA_FMDChar::inc(p); 466 if (!XFA_FMDChar::isAvalid(p)) { 467 pEnd = p; 468 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); 469 Error(kFMErrUnsupportedChar, ch); 470 return 1; 471 } 472 ch = XFA_FMDChar::get(p); 473 while (ch) { 474 if (!XFA_FMDChar::isAvalid(p)) { 475 pEnd = p; 476 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); 477 Error(kFMErrUnsupportedChar, ch); 478 return 1; 479 } 480 ch = XFA_FMDChar::get(p); 481 if (XFA_FMDChar::isUnicodeAlpha(ch)) { 482 XFA_FMDChar::inc(p); 483 } else { 484 pEnd = p; 485 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); 486 return 0; 487 } 488 } 489 pEnd = p; 490 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); 491 return 0; 492 } 493 494 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) { 495 unsigned ch = 0; 496 XFA_FMDChar::inc(p); 497 ch = XFA_FMDChar::get(p); 498 while (ch) { 499 if (ch == 0x0D) { 500 XFA_FMDChar::inc(p); 501 pEnd = p; 502 return; 503 } 504 if (ch == 0x0A) { 505 ++m_uCurrentLine; 506 XFA_FMDChar::inc(p); 507 pEnd = p; 508 return; 509 } 510 XFA_FMDChar::inc(p); 511 ch = XFA_FMDChar::get(p); 512 } 513 pEnd = p; 514 } 515 516 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) { 517 uint32_t uHash = FX_HashCode_GetW(str, true); 518 int32_t iStart = KEYWORD_START; 519 int32_t iEnd = KEYWORD_END; 520 do { 521 int32_t iMid = (iStart + iEnd) / 2; 522 XFA_FMKeyword keyword = keyWords[iMid]; 523 if (uHash == keyword.m_uHash) 524 return keyword.m_type; 525 if (uHash < keyword.m_uHash) 526 iEnd = iMid - 1; 527 else 528 iStart = iMid + 1; 529 } while (iStart <= iEnd); 530 return TOKidentifier; 531 } 532 533 void CXFA_FMLexer::Error(const FX_WCHAR* msg, ...) { 534 m_pErrorInfo->linenum = m_uCurrentLine; 535 va_list ap; 536 va_start(ap, msg); 537 m_pErrorInfo->message.FormatV(msg, ap); 538 va_end(ap); 539 } 540 541 bool CXFA_FMLexer::HasError() const { 542 if (m_pErrorInfo->message.IsEmpty()) { 543 return false; 544 } 545 return true; 546 } 547