Home | History | Annotate | Download | only in src
      1 /*
      2 ** 2001 September 15
      3 **
      4 ** The author disclaims copyright to this source code.  In place of
      5 ** a legal notice, here is a blessing:
      6 **
      7 **    May you do good and not evil.
      8 **    May you find forgiveness for yourself and forgive others.
      9 **    May you share freely, never taking more than you give.
     10 **
     11 *************************************************************************
     12 ** An tokenizer for SQL
     13 **
     14 ** This file contains C code that splits an SQL input string up into
     15 ** individual tokens and sends those tokens one-by-one over to the
     16 ** parser for analysis.
     17 */
     18 #include "sqliteInt.h"
     19 #include <stdlib.h>
     20 
     21 /*
     22 ** The charMap() macro maps alphabetic characters into their
     23 ** lower-case ASCII equivalent.  On ASCII machines, this is just
     24 ** an upper-to-lower case map.  On EBCDIC machines we also need
     25 ** to adjust the encoding.  Only alphabetic characters and underscores
     26 ** need to be translated.
     27 */
     28 #ifdef SQLITE_ASCII
     29 # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
     30 #endif
     31 #ifdef SQLITE_EBCDIC
     32 # define charMap(X) ebcdicToAscii[(unsigned char)X]
     33 const unsigned char ebcdicToAscii[] = {
     34 /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
     35    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 0x */
     36    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 1x */
     37    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 2x */
     38    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 3x */
     39    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4x */
     40    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5x */
     41    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 95,  0,  0,  /* 6x */
     42    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 7x */
     43    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* 8x */
     44    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* 9x */
     45    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ax */
     46    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Bx */
     47    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* Cx */
     48    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* Dx */
     49    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ex */
     50    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Fx */
     51 };
     52 #endif
     53 
     54 /*
     55 ** The sqlite3KeywordCode function looks up an identifier to determine if
     56 ** it is a keyword.  If it is a keyword, the token code of that keyword is
     57 ** returned.  If the input is not a keyword, TK_ID is returned.
     58 **
     59 ** The implementation of this routine was generated by a program,
     60 ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
     61 ** The output of the mkkeywordhash.c program is written into a file
     62 ** named keywordhash.h and then included into this source file by
     63 ** the #include below.
     64 */
     65 #include "keywordhash.h"
     66 
     67 
     68 /*
     69 ** If X is a character that can be used in an identifier then
     70 ** IdChar(X) will be true.  Otherwise it is false.
     71 **
     72 ** For ASCII, any character with the high-order bit set is
     73 ** allowed in an identifier.  For 7-bit characters,
     74 ** sqlite3IsIdChar[X] must be 1.
     75 **
     76 ** For EBCDIC, the rules are more complex but have the same
     77 ** end result.
     78 **
     79 ** Ticket #1066.  the SQL standard does not allow '$' in the
     80 ** middle of identfiers.  But many SQL implementations do.
     81 ** SQLite will allow '$' in identifiers for compatibility.
     82 ** But the feature is undocumented.
     83 */
     84 #ifdef SQLITE_ASCII
     85 #define IdChar(C)  ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
     86 #endif
     87 #ifdef SQLITE_EBCDIC
     88 const char sqlite3IsEbcdicIdChar[] = {
     89 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
     90     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 4x */
     91     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,  /* 5x */
     92     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,  /* 6x */
     93     0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,  /* 7x */
     94     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,  /* 8x */
     95     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,  /* 9x */
     96     1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,  /* Ax */
     97     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* Bx */
     98     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Cx */
     99     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Dx */
    100     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Ex */
    101     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,  /* Fx */
    102 };
    103 #define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
    104 #endif
    105 
    106 
    107 /*
    108 ** Return the length of the token that begins at z[0].
    109 ** Store the token type in *tokenType before returning.
    110 */
    111 int sqlite3GetToken(const unsigned char *z, int *tokenType){
    112   int i, c;
    113   switch( *z ){
    114     case ' ': case '\t': case '\n': case '\f': case '\r': {
    115       testcase( z[0]==' ' );
    116       testcase( z[0]=='\t' );
    117       testcase( z[0]=='\n' );
    118       testcase( z[0]=='\f' );
    119       testcase( z[0]=='\r' );
    120       for(i=1; sqlite3Isspace(z[i]); i++){}
    121       *tokenType = TK_SPACE;
    122       return i;
    123     }
    124     case '-': {
    125       if( z[1]=='-' ){
    126         /* IMP: R-15891-05542 -- syntax diagram for comments */
    127         for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
    128         *tokenType = TK_SPACE;   /* IMP: R-22934-25134 */
    129         return i;
    130       }
    131       *tokenType = TK_MINUS;
    132       return 1;
    133     }
    134     case '(': {
    135       *tokenType = TK_LP;
    136       return 1;
    137     }
    138     case ')': {
    139       *tokenType = TK_RP;
    140       return 1;
    141     }
    142     case ';': {
    143       *tokenType = TK_SEMI;
    144       return 1;
    145     }
    146     case '+': {
    147       *tokenType = TK_PLUS;
    148       return 1;
    149     }
    150     case '*': {
    151       *tokenType = TK_STAR;
    152       return 1;
    153     }
    154     case '/': {
    155       if( z[1]!='*' || z[2]==0 ){
    156         *tokenType = TK_SLASH;
    157         return 1;
    158       }
    159       /* IMP: R-15891-05542 -- syntax diagram for comments */
    160       for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
    161       if( c ) i++;
    162       *tokenType = TK_SPACE;   /* IMP: R-22934-25134 */
    163       return i;
    164     }
    165     case '%': {
    166       *tokenType = TK_REM;
    167       return 1;
    168     }
    169     case '=': {
    170       *tokenType = TK_EQ;
    171       return 1 + (z[1]=='=');
    172     }
    173     case '<': {
    174       if( (c=z[1])=='=' ){
    175         *tokenType = TK_LE;
    176         return 2;
    177       }else if( c=='>' ){
    178         *tokenType = TK_NE;
    179         return 2;
    180       }else if( c=='<' ){
    181         *tokenType = TK_LSHIFT;
    182         return 2;
    183       }else{
    184         *tokenType = TK_LT;
    185         return 1;
    186       }
    187     }
    188     case '>': {
    189       if( (c=z[1])=='=' ){
    190         *tokenType = TK_GE;
    191         return 2;
    192       }else if( c=='>' ){
    193         *tokenType = TK_RSHIFT;
    194         return 2;
    195       }else{
    196         *tokenType = TK_GT;
    197         return 1;
    198       }
    199     }
    200     case '!': {
    201       if( z[1]!='=' ){
    202         *tokenType = TK_ILLEGAL;
    203         return 2;
    204       }else{
    205         *tokenType = TK_NE;
    206         return 2;
    207       }
    208     }
    209     case '|': {
    210       if( z[1]!='|' ){
    211         *tokenType = TK_BITOR;
    212         return 1;
    213       }else{
    214         *tokenType = TK_CONCAT;
    215         return 2;
    216       }
    217     }
    218     case ',': {
    219       *tokenType = TK_COMMA;
    220       return 1;
    221     }
    222     case '&': {
    223       *tokenType = TK_BITAND;
    224       return 1;
    225     }
    226     case '~': {
    227       *tokenType = TK_BITNOT;
    228       return 1;
    229     }
    230     case '`':
    231     case '\'':
    232     case '"': {
    233       int delim = z[0];
    234       testcase( delim=='`' );
    235       testcase( delim=='\'' );
    236       testcase( delim=='"' );
    237       for(i=1; (c=z[i])!=0; i++){
    238         if( c==delim ){
    239           if( z[i+1]==delim ){
    240             i++;
    241           }else{
    242             break;
    243           }
    244         }
    245       }
    246       if( c=='\'' ){
    247         *tokenType = TK_STRING;
    248         return i+1;
    249       }else if( c!=0 ){
    250         *tokenType = TK_ID;
    251         return i+1;
    252       }else{
    253         *tokenType = TK_ILLEGAL;
    254         return i;
    255       }
    256     }
    257     case '.': {
    258 #ifndef SQLITE_OMIT_FLOATING_POINT
    259       if( !sqlite3Isdigit(z[1]) )
    260 #endif
    261       {
    262         *tokenType = TK_DOT;
    263         return 1;
    264       }
    265       /* If the next character is a digit, this is a floating point
    266       ** number that begins with ".".  Fall thru into the next case */
    267     }
    268     case '0': case '1': case '2': case '3': case '4':
    269     case '5': case '6': case '7': case '8': case '9': {
    270       testcase( z[0]=='0' );  testcase( z[0]=='1' );  testcase( z[0]=='2' );
    271       testcase( z[0]=='3' );  testcase( z[0]=='4' );  testcase( z[0]=='5' );
    272       testcase( z[0]=='6' );  testcase( z[0]=='7' );  testcase( z[0]=='8' );
    273       testcase( z[0]=='9' );
    274       *tokenType = TK_INTEGER;
    275       for(i=0; sqlite3Isdigit(z[i]); i++){}
    276 #ifndef SQLITE_OMIT_FLOATING_POINT
    277       if( z[i]=='.' ){
    278         i++;
    279         while( sqlite3Isdigit(z[i]) ){ i++; }
    280         *tokenType = TK_FLOAT;
    281       }
    282       if( (z[i]=='e' || z[i]=='E') &&
    283            ( sqlite3Isdigit(z[i+1])
    284             || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2]))
    285            )
    286       ){
    287         i += 2;
    288         while( sqlite3Isdigit(z[i]) ){ i++; }
    289         *tokenType = TK_FLOAT;
    290       }
    291 #endif
    292       while( IdChar(z[i]) ){
    293         *tokenType = TK_ILLEGAL;
    294         i++;
    295       }
    296       return i;
    297     }
    298     case '[': {
    299       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
    300       *tokenType = c==']' ? TK_ID : TK_ILLEGAL;
    301       return i;
    302     }
    303     case '?': {
    304       *tokenType = TK_VARIABLE;
    305       for(i=1; sqlite3Isdigit(z[i]); i++){}
    306       return i;
    307     }
    308     case '#': {
    309       for(i=1; sqlite3Isdigit(z[i]); i++){}
    310       if( i>1 ){
    311         /* Parameters of the form #NNN (where NNN is a number) are used
    312         ** internally by sqlite3NestedParse.  */
    313         *tokenType = TK_REGISTER;
    314         return i;
    315       }
    316       /* Fall through into the next case if the '#' is not followed by
    317       ** a digit. Try to match #AAAA where AAAA is a parameter name. */
    318     }
    319 #ifndef SQLITE_OMIT_TCL_VARIABLE
    320     case '$':
    321 #endif
    322     case '@':  /* For compatibility with MS SQL Server */
    323     case ':': {
    324       int n = 0;
    325       testcase( z[0]=='$' );  testcase( z[0]=='@' );  testcase( z[0]==':' );
    326       *tokenType = TK_VARIABLE;
    327       for(i=1; (c=z[i])!=0; i++){
    328         if( IdChar(c) ){
    329           n++;
    330 #ifndef SQLITE_OMIT_TCL_VARIABLE
    331         }else if( c=='(' && n>0 ){
    332           do{
    333             i++;
    334           }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' );
    335           if( c==')' ){
    336             i++;
    337           }else{
    338             *tokenType = TK_ILLEGAL;
    339           }
    340           break;
    341         }else if( c==':' && z[i+1]==':' ){
    342           i++;
    343 #endif
    344         }else{
    345           break;
    346         }
    347       }
    348       if( n==0 ) *tokenType = TK_ILLEGAL;
    349       return i;
    350     }
    351 #ifndef SQLITE_OMIT_BLOB_LITERAL
    352     case 'x': case 'X': {
    353       testcase( z[0]=='x' ); testcase( z[0]=='X' );
    354       if( z[1]=='\'' ){
    355         *tokenType = TK_BLOB;
    356         for(i=2; (c=z[i])!=0 && c!='\''; i++){
    357           if( !sqlite3Isxdigit(c) ){
    358             *tokenType = TK_ILLEGAL;
    359           }
    360         }
    361         if( i%2 || !c ) *tokenType = TK_ILLEGAL;
    362         if( c ) i++;
    363         return i;
    364       }
    365       /* Otherwise fall through to the next case */
    366     }
    367 #endif
    368     default: {
    369       if( !IdChar(*z) ){
    370         break;
    371       }
    372       for(i=1; IdChar(z[i]); i++){}
    373       *tokenType = keywordCode((char*)z, i);
    374       return i;
    375     }
    376   }
    377   *tokenType = TK_ILLEGAL;
    378   return 1;
    379 }
    380 
    381 /*
    382 ** Run the parser on the given SQL string.  The parser structure is
    383 ** passed in.  An SQLITE_ status code is returned.  If an error occurs
    384 ** then an and attempt is made to write an error message into
    385 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that
    386 ** error message.
    387 */
    388 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
    389   int nErr = 0;                   /* Number of errors encountered */
    390   int i;                          /* Loop counter */
    391   void *pEngine;                  /* The LEMON-generated LALR(1) parser */
    392   int tokenType;                  /* type of the next token */
    393   int lastTokenParsed = -1;       /* type of the previous token */
    394   u8 enableLookaside;             /* Saved value of db->lookaside.bEnabled */
    395   sqlite3 *db = pParse->db;       /* The database connection */
    396   int mxSqlLen;                   /* Max length of an SQL string */
    397 
    398 
    399   mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH];
    400   if( db->activeVdbeCnt==0 ){
    401     db->u1.isInterrupted = 0;
    402   }
    403   pParse->rc = SQLITE_OK;
    404   pParse->zTail = zSql;
    405   i = 0;
    406   assert( pzErrMsg!=0 );
    407   pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc);
    408   if( pEngine==0 ){
    409     db->mallocFailed = 1;
    410     return SQLITE_NOMEM;
    411   }
    412   assert( pParse->pNewTable==0 );
    413   assert( pParse->pNewTrigger==0 );
    414   assert( pParse->nVar==0 );
    415   assert( pParse->nVarExpr==0 );
    416   assert( pParse->nVarExprAlloc==0 );
    417   assert( pParse->apVarExpr==0 );
    418   enableLookaside = db->lookaside.bEnabled;
    419   if( db->lookaside.pStart ) db->lookaside.bEnabled = 1;
    420   while( !db->mallocFailed && zSql[i]!=0 ){
    421     assert( i>=0 );
    422     pParse->sLastToken.z = &zSql[i];
    423     pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType);
    424     i += pParse->sLastToken.n;
    425     if( i>mxSqlLen ){
    426       pParse->rc = SQLITE_TOOBIG;
    427       break;
    428     }
    429     switch( tokenType ){
    430       case TK_SPACE: {
    431         if( db->u1.isInterrupted ){
    432           sqlite3ErrorMsg(pParse, "interrupt");
    433           pParse->rc = SQLITE_INTERRUPT;
    434           goto abort_parse;
    435         }
    436         break;
    437       }
    438       case TK_ILLEGAL: {
    439         sqlite3DbFree(db, *pzErrMsg);
    440         *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"",
    441                         &pParse->sLastToken);
    442         nErr++;
    443         goto abort_parse;
    444       }
    445       case TK_SEMI: {
    446         pParse->zTail = &zSql[i];
    447         /* Fall thru into the default case */
    448       }
    449       default: {
    450         sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
    451         lastTokenParsed = tokenType;
    452         if( pParse->rc!=SQLITE_OK ){
    453           goto abort_parse;
    454         }
    455         break;
    456       }
    457     }
    458   }
    459 abort_parse:
    460   if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
    461     if( lastTokenParsed!=TK_SEMI ){
    462       sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
    463       pParse->zTail = &zSql[i];
    464     }
    465     sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
    466   }
    467 #ifdef YYTRACKMAXSTACKDEPTH
    468   sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK,
    469       sqlite3ParserStackPeak(pEngine)
    470   );
    471 #endif /* YYDEBUG */
    472   sqlite3ParserFree(pEngine, sqlite3_free);
    473   db->lookaside.bEnabled = enableLookaside;
    474   if( db->mallocFailed ){
    475     pParse->rc = SQLITE_NOMEM;
    476   }
    477   if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
    478     sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc));
    479   }
    480   assert( pzErrMsg!=0 );
    481   if( pParse->zErrMsg ){
    482     *pzErrMsg = pParse->zErrMsg;
    483     sqlite3_log(pParse->rc, "%s", *pzErrMsg);
    484     pParse->zErrMsg = 0;
    485     nErr++;
    486   }
    487   if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
    488     sqlite3VdbeDelete(pParse->pVdbe);
    489     pParse->pVdbe = 0;
    490   }
    491 #ifndef SQLITE_OMIT_SHARED_CACHE
    492   if( pParse->nested==0 ){
    493     sqlite3DbFree(db, pParse->aTableLock);
    494     pParse->aTableLock = 0;
    495     pParse->nTableLock = 0;
    496   }
    497 #endif
    498 #ifndef SQLITE_OMIT_VIRTUALTABLE
    499   sqlite3_free(pParse->apVtabLock);
    500 #endif
    501 
    502   if( !IN_DECLARE_VTAB ){
    503     /* If the pParse->declareVtab flag is set, do not delete any table
    504     ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
    505     ** will take responsibility for freeing the Table structure.
    506     */
    507     sqlite3DeleteTable(db, pParse->pNewTable);
    508   }
    509 
    510   sqlite3DeleteTrigger(db, pParse->pNewTrigger);
    511   sqlite3DbFree(db, pParse->apVarExpr);
    512   sqlite3DbFree(db, pParse->aAlias);
    513   while( pParse->pAinc ){
    514     AutoincInfo *p = pParse->pAinc;
    515     pParse->pAinc = p->pNext;
    516     sqlite3DbFree(db, p);
    517   }
    518   while( pParse->pZombieTab ){
    519     Table *p = pParse->pZombieTab;
    520     pParse->pZombieTab = p->pNextZombie;
    521     sqlite3DeleteTable(db, p);
    522   }
    523   if( nErr>0 && pParse->rc==SQLITE_OK ){
    524     pParse->rc = SQLITE_ERROR;
    525   }
    526   return nErr;
    527 }
    528