Home | History | Annotate | Download | only in src
      1 /*
      2 ** 2001 September 15
      3 **
      4 ** The author disclaims copyright to this source code.  In place of
      5 ** a legal notice, here is a blessing:
      6 **
      7 **    May you do good and not evil.
      8 **    May you find forgiveness for yourself and forgive others.
      9 **    May you share freely, never taking more than you give.
     10 **
     11 *************************************************************************
     12 ** An tokenizer for SQL
     13 **
     14 ** This file contains C code that implements the sqlite3_complete() API.
     15 ** This code used to be part of the tokenizer.c source file.  But by
     16 ** separating it out, the code will be automatically omitted from
     17 ** static links that do not use it.
     18 */
     19 #include "sqliteInt.h"
     20 #ifndef SQLITE_OMIT_COMPLETE
     21 
     22 /*
     23 ** This is defined in tokenize.c.  We just have to import the definition.
     24 */
     25 #ifndef SQLITE_AMALGAMATION
     26 #ifdef SQLITE_ASCII
     27 #define IdChar(C)  ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
     28 #endif
     29 #ifdef SQLITE_EBCDIC
     30 extern const char sqlite3IsEbcdicIdChar[];
     31 #define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
     32 #endif
     33 #endif /* SQLITE_AMALGAMATION */
     34 
     35 
     36 /*
     37 ** Token types used by the sqlite3_complete() routine.  See the header
     38 ** comments on that procedure for additional information.
     39 */
     40 #define tkSEMI    0
     41 #define tkWS      1
     42 #define tkOTHER   2
     43 #ifndef SQLITE_OMIT_TRIGGER
     44 #define tkEXPLAIN 3
     45 #define tkCREATE  4
     46 #define tkTEMP    5
     47 #define tkTRIGGER 6
     48 #define tkEND     7
     49 #endif
     50 
     51 /*
     52 ** Return TRUE if the given SQL string ends in a semicolon.
     53 **
     54 ** Special handling is require for CREATE TRIGGER statements.
     55 ** Whenever the CREATE TRIGGER keywords are seen, the statement
     56 ** must end with ";END;".
     57 **
     58 ** This implementation uses a state machine with 8 states:
     59 **
     60 **   (0) INVALID   We have not yet seen a non-whitespace character.
     61 **
     62 **   (1) START     At the beginning or end of an SQL statement.  This routine
     63 **                 returns 1 if it ends in the START state and 0 if it ends
     64 **                 in any other state.
     65 **
     66 **   (2) NORMAL    We are in the middle of statement which ends with a single
     67 **                 semicolon.
     68 **
     69 **   (3) EXPLAIN   The keyword EXPLAIN has been seen at the beginning of
     70 **                 a statement.
     71 **
     72 **   (4) CREATE    The keyword CREATE has been seen at the beginning of a
     73 **                 statement, possibly preceeded by EXPLAIN and/or followed by
     74 **                 TEMP or TEMPORARY
     75 **
     76 **   (5) TRIGGER   We are in the middle of a trigger definition that must be
     77 **                 ended by a semicolon, the keyword END, and another semicolon.
     78 **
     79 **   (6) SEMI      We've seen the first semicolon in the ";END;" that occurs at
     80 **                 the end of a trigger definition.
     81 **
     82 **   (7) END       We've seen the ";END" of the ";END;" that occurs at the end
     83 **                 of a trigger difinition.
     84 **
     85 ** Transitions between states above are determined by tokens extracted
     86 ** from the input.  The following tokens are significant:
     87 **
     88 **   (0) tkSEMI      A semicolon.
     89 **   (1) tkWS        Whitespace.
     90 **   (2) tkOTHER     Any other SQL token.
     91 **   (3) tkEXPLAIN   The "explain" keyword.
     92 **   (4) tkCREATE    The "create" keyword.
     93 **   (5) tkTEMP      The "temp" or "temporary" keyword.
     94 **   (6) tkTRIGGER   The "trigger" keyword.
     95 **   (7) tkEND       The "end" keyword.
     96 **
     97 ** Whitespace never causes a state transition and is always ignored.
     98 ** This means that a SQL string of all whitespace is invalid.
     99 **
    100 ** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed
    101 ** to recognize the end of a trigger can be omitted.  All we have to do
    102 ** is look for a semicolon that is not part of an string or comment.
    103 */
    104 int sqlite3_complete(const char *zSql){
    105   u8 state = 0;   /* Current state, using numbers defined in header comment */
    106   u8 token;       /* Value of the next token */
    107 
    108 #ifndef SQLITE_OMIT_TRIGGER
    109   /* A complex statement machine used to detect the end of a CREATE TRIGGER
    110   ** statement.  This is the normal case.
    111   */
    112   static const u8 trans[8][8] = {
    113                      /* Token:                                                */
    114      /* State:       **  SEMI  WS  OTHER  EXPLAIN  CREATE  TEMP  TRIGGER  END */
    115      /* 0 INVALID: */ {    1,  0,     2,       3,      4,    2,       2,   2, },
    116      /* 1   START: */ {    1,  1,     2,       3,      4,    2,       2,   2, },
    117      /* 2  NORMAL: */ {    1,  2,     2,       2,      2,    2,       2,   2, },
    118      /* 3 EXPLAIN: */ {    1,  3,     3,       2,      4,    2,       2,   2, },
    119      /* 4  CREATE: */ {    1,  4,     2,       2,      2,    4,       5,   2, },
    120      /* 5 TRIGGER: */ {    6,  5,     5,       5,      5,    5,       5,   5, },
    121      /* 6    SEMI: */ {    6,  6,     5,       5,      5,    5,       5,   7, },
    122      /* 7     END: */ {    1,  7,     5,       5,      5,    5,       5,   5, },
    123   };
    124 #else
    125   /* If triggers are not supported by this compile then the statement machine
    126   ** used to detect the end of a statement is much simplier
    127   */
    128   static const u8 trans[3][3] = {
    129                      /* Token:           */
    130      /* State:       **  SEMI  WS  OTHER */
    131      /* 0 INVALID: */ {    1,  0,     2, },
    132      /* 1   START: */ {    1,  1,     2, },
    133      /* 2  NORMAL: */ {    1,  2,     2, },
    134   };
    135 #endif /* SQLITE_OMIT_TRIGGER */
    136 
    137   while( *zSql ){
    138     switch( *zSql ){
    139       case ';': {  /* A semicolon */
    140         token = tkSEMI;
    141         break;
    142       }
    143       case ' ':
    144       case '\r':
    145       case '\t':
    146       case '\n':
    147       case '\f': {  /* White space is ignored */
    148         token = tkWS;
    149         break;
    150       }
    151       case '/': {   /* C-style comments */
    152         if( zSql[1]!='*' ){
    153           token = tkOTHER;
    154           break;
    155         }
    156         zSql += 2;
    157         while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; }
    158         if( zSql[0]==0 ) return 0;
    159         zSql++;
    160         token = tkWS;
    161         break;
    162       }
    163       case '-': {   /* SQL-style comments from "--" to end of line */
    164         if( zSql[1]!='-' ){
    165           token = tkOTHER;
    166           break;
    167         }
    168         while( *zSql && *zSql!='\n' ){ zSql++; }
    169         if( *zSql==0 ) return state==1;
    170         token = tkWS;
    171         break;
    172       }
    173       case '[': {   /* Microsoft-style identifiers in [...] */
    174         zSql++;
    175         while( *zSql && *zSql!=']' ){ zSql++; }
    176         if( *zSql==0 ) return 0;
    177         token = tkOTHER;
    178         break;
    179       }
    180       case '`':     /* Grave-accent quoted symbols used by MySQL */
    181       case '"':     /* single- and double-quoted strings */
    182       case '\'': {
    183         int c = *zSql;
    184         zSql++;
    185         while( *zSql && *zSql!=c ){ zSql++; }
    186         if( *zSql==0 ) return 0;
    187         token = tkOTHER;
    188         break;
    189       }
    190       default: {
    191 #ifdef SQLITE_EBCDIC
    192         unsigned char c;
    193 #endif
    194         if( IdChar((u8)*zSql) ){
    195           /* Keywords and unquoted identifiers */
    196           int nId;
    197           for(nId=1; IdChar(zSql[nId]); nId++){}
    198 #ifdef SQLITE_OMIT_TRIGGER
    199           token = tkOTHER;
    200 #else
    201           switch( *zSql ){
    202             case 'c': case 'C': {
    203               if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){
    204                 token = tkCREATE;
    205               }else{
    206                 token = tkOTHER;
    207               }
    208               break;
    209             }
    210             case 't': case 'T': {
    211               if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){
    212                 token = tkTRIGGER;
    213               }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){
    214                 token = tkTEMP;
    215               }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){
    216                 token = tkTEMP;
    217               }else{
    218                 token = tkOTHER;
    219               }
    220               break;
    221             }
    222             case 'e':  case 'E': {
    223               if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){
    224                 token = tkEND;
    225               }else
    226 #ifndef SQLITE_OMIT_EXPLAIN
    227               if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){
    228                 token = tkEXPLAIN;
    229               }else
    230 #endif
    231               {
    232                 token = tkOTHER;
    233               }
    234               break;
    235             }
    236             default: {
    237               token = tkOTHER;
    238               break;
    239             }
    240           }
    241 #endif /* SQLITE_OMIT_TRIGGER */
    242           zSql += nId-1;
    243         }else{
    244           /* Operators and special symbols */
    245           token = tkOTHER;
    246         }
    247         break;
    248       }
    249     }
    250     state = trans[state][token];
    251     zSql++;
    252   }
    253   return state==1;
    254 }
    255 
    256 #ifndef SQLITE_OMIT_UTF16
    257 /*
    258 ** This routine is the same as the sqlite3_complete() routine described
    259 ** above, except that the parameter is required to be UTF-16 encoded, not
    260 ** UTF-8.
    261 */
    262 int sqlite3_complete16(const void *zSql){
    263   sqlite3_value *pVal;
    264   char const *zSql8;
    265   int rc = SQLITE_NOMEM;
    266 
    267 #ifndef SQLITE_OMIT_AUTOINIT
    268   rc = sqlite3_initialize();
    269   if( rc ) return rc;
    270 #endif
    271   pVal = sqlite3ValueNew(0);
    272   sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
    273   zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8);
    274   if( zSql8 ){
    275     rc = sqlite3_complete(zSql8);
    276   }else{
    277     rc = SQLITE_NOMEM;
    278   }
    279   sqlite3ValueFree(pVal);
    280   return sqlite3ApiExit(0, rc);
    281 }
    282 #endif /* SQLITE_OMIT_UTF16 */
    283 #endif /* SQLITE_OMIT_COMPLETE */
    284