Home | History | Annotate | Download | only in fm2js
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "xfa/fxfa/fm2js/xfa_lexer.h"
      8 
      9 #include "core/fxcrt/fx_ext.h"
     10 
     11 namespace {
     12 
     13 struct XFA_FMDChar {
     14   static const FX_WCHAR* inc(const FX_WCHAR*& p) {
     15     ++p;
     16     return p;
     17   }
     18   static const FX_WCHAR* dec(const FX_WCHAR*& p) {
     19     --p;
     20     return p;
     21   }
     22   static uint16_t get(const FX_WCHAR* p) { return *p; }
     23   static bool isWhiteSpace(const FX_WCHAR* p) {
     24     return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20;
     25   }
     26   static bool isLineTerminator(const FX_WCHAR* p) {
     27     return *p == 0x0A || *p == 0x0D;
     28   }
     29   static bool isBinary(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '1'; }
     30   static bool isOctal(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '7'; }
     31   static bool isDigital(const FX_WCHAR* p) {
     32     return (*p) >= '0' && (*p) <= '9';
     33   }
     34   static bool isHex(const FX_WCHAR* p) {
     35     return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') ||
     36            ((*p) >= 'A' && (*p) <= 'F');
     37   }
     38   static bool isAlpha(const FX_WCHAR* p) {
     39     return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A');
     40   }
     41   static bool isAvalid(const FX_WCHAR* p, bool flag = 0);
     42   static bool string2number(const FX_WCHAR* s,
     43                             FX_DOUBLE* pValue,
     44                             const FX_WCHAR*& pEnd);
     45   static bool isUnicodeAlpha(uint16_t ch);
     46 };
     47 
     48 inline bool XFA_FMDChar::isAvalid(const FX_WCHAR* p, bool flag) {
     49   if (*p == 0) {
     50     return 1;
     51   }
     52   if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D ||
     53       (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) {
     54     return 1;
     55   }
     56   if (!flag) {
     57     if (*p == 0x0B || *p == 0x0C) {
     58       return 1;
     59     }
     60   }
     61   return 0;
     62 }
     63 
     64 inline bool XFA_FMDChar::string2number(const FX_WCHAR* s,
     65                                        FX_DOUBLE* pValue,
     66                                        const FX_WCHAR*& pEnd) {
     67   if (s) {
     68     *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd);
     69   }
     70   return 0;
     71 }
     72 
     73 inline bool XFA_FMDChar::isUnicodeAlpha(uint16_t ch) {
     74   if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
     75       ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
     76       ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
     77       ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
     78       ch == '+' || ch == '-' || ch == '*' || ch == '/') {
     79     return false;
     80   }
     81   return true;
     82 }
     83 
     84 const XFA_FMKeyword keyWords[] = {
     85     {TOKand, 0x00000026, L"&"},
     86     {TOKlparen, 0x00000028, L"("},
     87     {TOKrparen, 0x00000029, L")"},
     88     {TOKmul, 0x0000002a, L"*"},
     89     {TOKplus, 0x0000002b, L"+"},
     90     {TOKcomma, 0x0000002c, L","},
     91     {TOKminus, 0x0000002d, L"-"},
     92     {TOKdot, 0x0000002e, L"."},
     93     {TOKdiv, 0x0000002f, L"/"},
     94     {TOKlt, 0x0000003c, L"<"},
     95     {TOKassign, 0x0000003d, L"="},
     96     {TOKgt, 0x0000003e, L">"},
     97     {TOKlbracket, 0x0000005b, L"["},
     98     {TOKrbracket, 0x0000005d, L"]"},
     99     {TOKor, 0x0000007c, L"|"},
    100     {TOKdotscream, 0x0000ec11, L".#"},
    101     {TOKdotstar, 0x0000ec18, L".*"},
    102     {TOKdotdot, 0x0000ec1c, L".."},
    103     {TOKle, 0x000133f9, L"<="},
    104     {TOKne, 0x000133fa, L"<>"},
    105     {TOKeq, 0x0001391a, L"=="},
    106     {TOKge, 0x00013e3b, L">="},
    107     {TOKdo, 0x00020153, L"do"},
    108     {TOKkseq, 0x00020676, L"eq"},
    109     {TOKksge, 0x000210ac, L"ge"},
    110     {TOKksgt, 0x000210bb, L"gt"},
    111     {TOKif, 0x00021aef, L"if"},
    112     {TOKin, 0x00021af7, L"in"},
    113     {TOKksle, 0x00022a51, L"le"},
    114     {TOKkslt, 0x00022a60, L"lt"},
    115     {TOKksne, 0x00023493, L"ne"},
    116     {TOKksor, 0x000239c1, L"or"},
    117     {TOKnull, 0x052931bb, L"null"},
    118     {TOKbreak, 0x05518c25, L"break"},
    119     {TOKksand, 0x09f9db33, L"and"},
    120     {TOKend, 0x0a631437, L"end"},
    121     {TOKeof, 0x0a63195a, L"eof"},
    122     {TOKfor, 0x0a7d67a7, L"for"},
    123     {TOKnan, 0x0b4f91dd, L"nan"},
    124     {TOKksnot, 0x0b4fd9b1, L"not"},
    125     {TOKvar, 0x0c2203e9, L"var"},
    126     {TOKthen, 0x2d5738cf, L"then"},
    127     {TOKelse, 0x45f65ee9, L"else"},
    128     {TOKexit, 0x4731d6ba, L"exit"},
    129     {TOKdownto, 0x4caadc3b, L"downto"},
    130     {TOKreturn, 0x4db8bd60, L"return"},
    131     {TOKinfinity, 0x5c0a010a, L"infinity"},
    132     {TOKendwhile, 0x5c64bff0, L"endwhile"},
    133     {TOKforeach, 0x67e31f38, L"foreach"},
    134     {TOKendfunc, 0x68f984a3, L"endfunc"},
    135     {TOKelseif, 0x78253218, L"elseif"},
    136     {TOKwhile, 0x84229259, L"while"},
    137     {TOKendfor, 0x8ab49d7e, L"endfor"},
    138     {TOKthrow, 0x8db05c94, L"throw"},
    139     {TOKstep, 0xa7a7887c, L"step"},
    140     {TOKupto, 0xb5155328, L"upto"},
    141     {TOKcontinue, 0xc0340685, L"continue"},
    142     {TOKfunc, 0xcdce60ec, L"func"},
    143     {TOKendif, 0xe0e8fee6, L"endif"},
    144 };
    145 
    146 const XFA_FM_TOKEN KEYWORD_START = TOKdo;
    147 const XFA_FM_TOKEN KEYWORD_END = TOKendif;
    148 
    149 }  // namespace
    150 
    151 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
    152   if (op < KEYWORD_START || op > KEYWORD_END)
    153     return L"";
    154   return keyWords[op].m_keyword;
    155 }
    156 
    157 CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {}
    158 
    159 CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum)
    160     : m_type(TOKreserver), m_uLinenum(uLineNum) {}
    161 
    162 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
    163                            CXFA_FMErrorInfo* pErrorInfo)
    164     : m_ptr(wsFormCalc.c_str()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {}
    165 
    166 CXFA_FMLexer::~CXFA_FMLexer() {}
    167 
    168 CXFA_FMToken* CXFA_FMLexer::NextToken() {
    169   m_pToken.reset(Scan());
    170   return m_pToken.get();
    171 }
    172 
    173 CXFA_FMToken* CXFA_FMLexer::Scan() {
    174   uint16_t ch = 0;
    175   CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine);
    176   if (!XFA_FMDChar::isAvalid(m_ptr)) {
    177     ch = XFA_FMDChar::get(m_ptr);
    178     Error(kFMErrUnsupportedChar, ch);
    179     return p;
    180   }
    181   int iRet = 0;
    182   while (1) {
    183     if (!XFA_FMDChar::isAvalid(m_ptr)) {
    184       ch = XFA_FMDChar::get(m_ptr);
    185       Error(kFMErrUnsupportedChar, ch);
    186       return p;
    187     }
    188     ch = XFA_FMDChar::get(m_ptr);
    189     switch (ch) {
    190       case 0:
    191         p->m_type = TOKeof;
    192         return p;
    193       case 0x0A:
    194         ++m_uCurrentLine;
    195         p->m_uLinenum = m_uCurrentLine;
    196         XFA_FMDChar::inc(m_ptr);
    197         break;
    198       case 0x0D:
    199         XFA_FMDChar::inc(m_ptr);
    200         break;
    201       case ';': {
    202         const FX_WCHAR* pTemp = 0;
    203         Comment(m_ptr, pTemp);
    204         m_ptr = pTemp;
    205       } break;
    206       case '"': {
    207         const FX_WCHAR* pTemp = 0;
    208         p->m_type = TOKstring;
    209         iRet = String(p, m_ptr, pTemp);
    210         m_ptr = pTemp;
    211       }
    212         return p;
    213       case '0':
    214       case '1':
    215       case '2':
    216       case '3':
    217       case '4':
    218       case '5':
    219       case '6':
    220       case '7':
    221       case '8':
    222       case '9': {
    223         p->m_type = TOKnumber;
    224         const FX_WCHAR* pTemp = 0;
    225         iRet = Number(p, m_ptr, pTemp);
    226         m_ptr = pTemp;
    227         if (iRet) {
    228           Error(kFMErrBadSuffixNumber);
    229           return p;
    230         }
    231       }
    232         return p;
    233       case '=':
    234         XFA_FMDChar::inc(m_ptr);
    235         if (XFA_FMDChar::isAvalid(m_ptr)) {
    236           ch = XFA_FMDChar::get(m_ptr);
    237           if (ch == '=') {
    238             p->m_type = TOKeq;
    239             XFA_FMDChar::inc(m_ptr);
    240             return p;
    241           } else {
    242             p->m_type = TOKassign;
    243             return p;
    244           }
    245         } else {
    246           ch = XFA_FMDChar::get(m_ptr);
    247           Error(kFMErrUnsupportedChar, ch);
    248           return p;
    249         }
    250         break;
    251       case '<':
    252         XFA_FMDChar::inc(m_ptr);
    253         if (XFA_FMDChar::isAvalid(m_ptr)) {
    254           ch = XFA_FMDChar::get(m_ptr);
    255           if (ch == '=') {
    256             p->m_type = TOKle;
    257             XFA_FMDChar::inc(m_ptr);
    258             return p;
    259           } else if (ch == '>') {
    260             p->m_type = TOKne;
    261             XFA_FMDChar::inc(m_ptr);
    262             return p;
    263           } else {
    264             p->m_type = TOKlt;
    265             return p;
    266           }
    267         } else {
    268           ch = XFA_FMDChar::get(m_ptr);
    269           Error(kFMErrUnsupportedChar, ch);
    270           return p;
    271         }
    272         break;
    273       case '>':
    274         XFA_FMDChar::inc(m_ptr);
    275         if (XFA_FMDChar::isAvalid(m_ptr)) {
    276           ch = XFA_FMDChar::get(m_ptr);
    277           if (ch == '=') {
    278             p->m_type = TOKge;
    279             XFA_FMDChar::inc(m_ptr);
    280             return p;
    281           } else {
    282             p->m_type = TOKgt;
    283             return p;
    284           }
    285         } else {
    286           ch = XFA_FMDChar::get(m_ptr);
    287           Error(kFMErrUnsupportedChar, ch);
    288           return p;
    289         }
    290         break;
    291       case ',':
    292         p->m_type = TOKcomma;
    293         XFA_FMDChar::inc(m_ptr);
    294         return p;
    295       case '(':
    296         p->m_type = TOKlparen;
    297         XFA_FMDChar::inc(m_ptr);
    298         return p;
    299       case ')':
    300         p->m_type = TOKrparen;
    301         XFA_FMDChar::inc(m_ptr);
    302         return p;
    303       case '[':
    304         p->m_type = TOKlbracket;
    305         XFA_FMDChar::inc(m_ptr);
    306         return p;
    307       case ']':
    308         p->m_type = TOKrbracket;
    309         XFA_FMDChar::inc(m_ptr);
    310         return p;
    311       case '&':
    312         XFA_FMDChar::inc(m_ptr);
    313         p->m_type = TOKand;
    314         return p;
    315       case '|':
    316         XFA_FMDChar::inc(m_ptr);
    317         p->m_type = TOKor;
    318         return p;
    319       case '+':
    320         XFA_FMDChar::inc(m_ptr);
    321         p->m_type = TOKplus;
    322         return p;
    323       case '-':
    324         XFA_FMDChar::inc(m_ptr);
    325         p->m_type = TOKminus;
    326         return p;
    327       case '*':
    328         XFA_FMDChar::inc(m_ptr);
    329         p->m_type = TOKmul;
    330         return p;
    331       case '/':
    332         XFA_FMDChar::inc(m_ptr);
    333         if (XFA_FMDChar::isAvalid(m_ptr)) {
    334           ch = XFA_FMDChar::get(m_ptr);
    335           if (ch == '/') {
    336             const FX_WCHAR* pTemp = 0;
    337             Comment(m_ptr, pTemp);
    338             m_ptr = pTemp;
    339             break;
    340           } else {
    341             p->m_type = TOKdiv;
    342             return p;
    343           }
    344         } else {
    345           ch = XFA_FMDChar::get(m_ptr);
    346           Error(kFMErrUnsupportedChar, ch);
    347           return p;
    348         }
    349         break;
    350       case '.':
    351         XFA_FMDChar::inc(m_ptr);
    352         if (XFA_FMDChar::isAvalid(m_ptr)) {
    353           ch = XFA_FMDChar::get(m_ptr);
    354           if (ch == '.') {
    355             p->m_type = TOKdotdot;
    356             XFA_FMDChar::inc(m_ptr);
    357             return p;
    358           } else if (ch == '*') {
    359             p->m_type = TOKdotstar;
    360             XFA_FMDChar::inc(m_ptr);
    361             return p;
    362           } else if (ch == '#') {
    363             p->m_type = TOKdotscream;
    364             XFA_FMDChar::inc(m_ptr);
    365             return p;
    366           } else if (ch <= '9' && ch >= '0') {
    367             p->m_type = TOKnumber;
    368             const FX_WCHAR* pTemp = 0;
    369             XFA_FMDChar::dec(m_ptr);
    370             iRet = Number(p, m_ptr, pTemp);
    371             m_ptr = pTemp;
    372             if (iRet) {
    373               Error(kFMErrBadSuffixNumber);
    374             }
    375             return p;
    376           } else {
    377             p->m_type = TOKdot;
    378             return p;
    379           }
    380         } else {
    381           ch = XFA_FMDChar::get(m_ptr);
    382           Error(kFMErrUnsupportedChar, ch);
    383           return p;
    384         }
    385       case 0x09:
    386       case 0x0B:
    387       case 0x0C:
    388       case 0x20:
    389         XFA_FMDChar::inc(m_ptr);
    390         break;
    391       default: {
    392         const FX_WCHAR* pTemp = 0;
    393         iRet = Identifiers(p, m_ptr, pTemp);
    394         m_ptr = pTemp;
    395         if (iRet) {
    396           return p;
    397         }
    398         p->m_type = IsKeyword(p->m_wstring);
    399       }
    400         return p;
    401     }
    402   }
    403 }
    404 
    405 uint32_t CXFA_FMLexer::Number(CXFA_FMToken* t,
    406                               const FX_WCHAR* p,
    407                               const FX_WCHAR*& pEnd) {
    408   FX_DOUBLE number = 0;
    409   if (XFA_FMDChar::string2number(p, &number, pEnd)) {
    410     return 1;
    411   }
    412   if (pEnd && XFA_FMDChar::isAlpha(pEnd)) {
    413     return 1;
    414   }
    415   t->m_wstring = CFX_WideStringC(p, (pEnd - p));
    416   return 0;
    417 }
    418 
    419 uint32_t CXFA_FMLexer::String(CXFA_FMToken* t,
    420                               const FX_WCHAR* p,
    421                               const FX_WCHAR*& pEnd) {
    422   const FX_WCHAR* pStart = p;
    423   uint16_t ch = 0;
    424   XFA_FMDChar::inc(p);
    425   ch = XFA_FMDChar::get(p);
    426   while (ch) {
    427     if (!XFA_FMDChar::isAvalid(p)) {
    428       ch = XFA_FMDChar::get(p);
    429       pEnd = p;
    430       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    431       Error(kFMErrUnsupportedChar, ch);
    432       return 1;
    433     }
    434     if (ch == '"') {
    435       XFA_FMDChar::inc(p);
    436       if (!XFA_FMDChar::isAvalid(p)) {
    437         ch = XFA_FMDChar::get(p);
    438         pEnd = p;
    439         t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    440         Error(kFMErrUnsupportedChar, ch);
    441         return 1;
    442       }
    443       ch = XFA_FMDChar::get(p);
    444       if (ch == '"') {
    445         goto NEXT;
    446       } else {
    447         break;
    448       }
    449     }
    450   NEXT:
    451     XFA_FMDChar::inc(p);
    452     ch = XFA_FMDChar::get(p);
    453   }
    454   pEnd = p;
    455   t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    456   return 0;
    457 }
    458 
    459 uint32_t CXFA_FMLexer::Identifiers(CXFA_FMToken* t,
    460                                    const FX_WCHAR* p,
    461                                    const FX_WCHAR*& pEnd) {
    462   const FX_WCHAR* pStart = p;
    463   uint16_t ch = 0;
    464   ch = XFA_FMDChar::get(p);
    465   XFA_FMDChar::inc(p);
    466   if (!XFA_FMDChar::isAvalid(p)) {
    467     pEnd = p;
    468     t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    469     Error(kFMErrUnsupportedChar, ch);
    470     return 1;
    471   }
    472   ch = XFA_FMDChar::get(p);
    473   while (ch) {
    474     if (!XFA_FMDChar::isAvalid(p)) {
    475       pEnd = p;
    476       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    477       Error(kFMErrUnsupportedChar, ch);
    478       return 1;
    479     }
    480     ch = XFA_FMDChar::get(p);
    481     if (XFA_FMDChar::isUnicodeAlpha(ch)) {
    482       XFA_FMDChar::inc(p);
    483     } else {
    484       pEnd = p;
    485       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    486       return 0;
    487     }
    488   }
    489   pEnd = p;
    490   t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
    491   return 0;
    492 }
    493 
    494 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) {
    495   unsigned ch = 0;
    496   XFA_FMDChar::inc(p);
    497   ch = XFA_FMDChar::get(p);
    498   while (ch) {
    499     if (ch == 0x0D) {
    500       XFA_FMDChar::inc(p);
    501       pEnd = p;
    502       return;
    503     }
    504     if (ch == 0x0A) {
    505       ++m_uCurrentLine;
    506       XFA_FMDChar::inc(p);
    507       pEnd = p;
    508       return;
    509     }
    510     XFA_FMDChar::inc(p);
    511     ch = XFA_FMDChar::get(p);
    512   }
    513   pEnd = p;
    514 }
    515 
    516 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
    517   uint32_t uHash = FX_HashCode_GetW(str, true);
    518   int32_t iStart = KEYWORD_START;
    519   int32_t iEnd = KEYWORD_END;
    520   do {
    521     int32_t iMid = (iStart + iEnd) / 2;
    522     XFA_FMKeyword keyword = keyWords[iMid];
    523     if (uHash == keyword.m_uHash)
    524       return keyword.m_type;
    525     if (uHash < keyword.m_uHash)
    526       iEnd = iMid - 1;
    527     else
    528       iStart = iMid + 1;
    529   } while (iStart <= iEnd);
    530   return TOKidentifier;
    531 }
    532 
    533 void CXFA_FMLexer::Error(const FX_WCHAR* msg, ...) {
    534   m_pErrorInfo->linenum = m_uCurrentLine;
    535   va_list ap;
    536   va_start(ap, msg);
    537   m_pErrorInfo->message.FormatV(msg, ap);
    538   va_end(ap);
    539 }
    540 
    541 bool CXFA_FMLexer::HasError() const {
    542   if (m_pErrorInfo->message.IsEmpty()) {
    543     return false;
    544   }
    545   return true;
    546 }
    547