Home | History | Annotate | Download | only in src
      1 /*---------------------------------------------------------------------------*
      2  *  LexicalAnalyzer.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include "SR_LexicalAnalyzer.h"
     21 #include "plog.h"
     22 #include "pmemory.h"
     23 
     24 
     25 static const char* MTAG = __FILE__;
     26 
     27 ESR_BOOL isIdentifierChar(LCHAR p);
     28 
     29 ESR_ReturnCode LA_Init(LexicalAnalyzer** self)
     30 {
     31   LexicalAnalyzer* Interface;
     32 
     33   if (self == NULL)
     34   {
     35     PLogError(L("ESR_INVALID_ARGUMENT"));
     36     return ESR_INVALID_ARGUMENT;
     37   }
     38 
     39   Interface = NEW(LexicalAnalyzer, MTAG);
     40   if (Interface == NULL)
     41   {
     42     PLogError(L("ESR_OUT_OF_MEMORY"));
     43     return ESR_OUT_OF_MEMORY;
     44   }
     45   *self = Interface;
     46   return ESR_SUCCESS;
     47 }
     48 
     49 ESR_ReturnCode LA_Analyze(LexicalAnalyzer *lex, LCHAR *script)
     50 {
     51   if (lex == NULL || script == NULL)
     52   {
     53     PLogError(L("ESR_INVALID_ARGUMENT"));
     54     return ESR_INVALID_ARGUMENT;
     55   }
     56 
     57   /* point to the first char */
     58   lex->nextToken = lex->script = script;
     59   return ESR_SUCCESS;
     60 }
     61 
     62 ESR_ReturnCode LA_Free(LexicalAnalyzer *lex)
     63 {
     64   if (lex == NULL)
     65   {
     66     PLogError(L("ESR_INVALID_ARGUMENT"));
     67     return ESR_INVALID_ARGUMENT;
     68   }
     69   FREE(lex);
     70   return ESR_SUCCESS;
     71 }
     72 
     73 
     74 ESR_ReturnCode LA_nextToken(LexicalAnalyzer *lex, LCHAR *tokenBuf, size_t* tokenLen)
     75 {
     76   LCHAR *p;
     77   LCHAR *q;
     78 
     79   while (LISSPACE(*lex->nextToken))
     80     ++lex->nextToken;
     81 
     82   switch (*lex->nextToken)
     83   {
     84     case OP_ASSIGN:
     85     case OP_CONCAT:
     86     case LBRACKET:
     87     case PARAM_DELIM:
     88     case RBRACKET:
     89     case OP_CONDITION_IFTRUE:
     90     case OP_CONDITION_ELSE:
     91     case EO_STATEMENT:
     92       tokenBuf[0] = *lex->nextToken;
     93       tokenBuf[1] = EO_STRING;
     94       *tokenLen = 1;
     95       break;
     96     case STRING_DELIM:
     97       p = lex->nextToken;
     98       q = tokenBuf;
     99       *q++ = *p++;
    100 /* finds the end of the constant string also protects against going past end of string
    101  * The parser above will handle the incomplete string. SteveR
    102  */
    103       while ( ( *p != STRING_DELIM ) && ( *p != '\0' ) )
    104       {
    105         if (*p == ESC_CHAR)
    106           *q++ = *p++;
    107         *q++ = *p++;
    108       }
    109 
    110       *q++ = *p++;
    111       *tokenLen = q - tokenBuf;
    112       tokenBuf[*tokenLen] = EO_STRING; /* make sure its there */
    113       break;
    114     default:
    115       p = lex->nextToken;
    116       while (isIdentifierChar(*p))  /* finds the end of the name of this identifier */
    117         ++p;
    118       *tokenLen = p - lex->nextToken;
    119       LSTRNCPY(tokenBuf, lex->nextToken, *tokenLen);
    120       tokenBuf[*tokenLen] = EO_STRING; /* make sure its there */
    121   }
    122   lex->nextToken += *tokenLen;
    123   return ESR_SUCCESS;
    124 }
    125 
    126 /**
    127  * Indicates if character is in range [a-z] or [A-Z] or [0-9] or ['.'].
    128  **/
    129 ESR_BOOL isIdentifierChar(LCHAR p)
    130 {
    131   return (p == DOT ||                     /* the dot */
    132          p == USCORE ||                  /* the underscore */
    133          (p <= L('z') && p >= L('a')) || /* lowercase alpha */
    134          (p <= L('Z') && p >= L('A')) || /* uppercase alpha */
    135          (p <= L('9') && p >= L('0'))) ? ESR_TRUE : ESR_FALSE;   /* numbers */
    136 }
    137