1 /*---------------------------------------------------------------------------* 2 * LexicalAnalyzer.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #include "SR_LexicalAnalyzer.h" 21 #include "plog.h" 22 #include "pmemory.h" 23 24 25 static const char* MTAG = __FILE__; 26 27 ESR_BOOL isIdentifierChar(LCHAR p); 28 29 ESR_ReturnCode LA_Init(LexicalAnalyzer** self) 30 { 31 LexicalAnalyzer* Interface; 32 33 if (self == NULL) 34 { 35 PLogError(L("ESR_INVALID_ARGUMENT")); 36 return ESR_INVALID_ARGUMENT; 37 } 38 39 Interface = NEW(LexicalAnalyzer, MTAG); 40 if (Interface == NULL) 41 { 42 PLogError(L("ESR_OUT_OF_MEMORY")); 43 return ESR_OUT_OF_MEMORY; 44 } 45 *self = Interface; 46 return ESR_SUCCESS; 47 } 48 49 ESR_ReturnCode LA_Analyze(LexicalAnalyzer *lex, LCHAR *script) 50 { 51 if (lex == NULL || script == NULL) 52 { 53 PLogError(L("ESR_INVALID_ARGUMENT")); 54 return ESR_INVALID_ARGUMENT; 55 } 56 57 /* point to the first char */ 58 lex->nextToken = lex->script = script; 59 return ESR_SUCCESS; 60 } 61 62 ESR_ReturnCode LA_Free(LexicalAnalyzer *lex) 63 { 64 if (lex == NULL) 65 { 66 PLogError(L("ESR_INVALID_ARGUMENT")); 67 return ESR_INVALID_ARGUMENT; 68 } 69 FREE(lex); 70 return ESR_SUCCESS; 71 } 72 73 74 ESR_ReturnCode LA_nextToken(LexicalAnalyzer *lex, LCHAR *tokenBuf, size_t* tokenLen) 75 { 76 LCHAR *p; 77 LCHAR *q; 78 79 while (LISSPACE(*lex->nextToken)) 80 ++lex->nextToken; 81 82 switch (*lex->nextToken) 83 { 84 case OP_ASSIGN: 85 case OP_CONCAT: 86 case LBRACKET: 87 case PARAM_DELIM: 88 case RBRACKET: 89 case OP_CONDITION_IFTRUE: 90 case OP_CONDITION_ELSE: 91 case EO_STATEMENT: 92 tokenBuf[0] = *lex->nextToken; 93 tokenBuf[1] = EO_STRING; 94 *tokenLen = 1; 95 break; 96 case STRING_DELIM: 97 p = lex->nextToken; 98 q = tokenBuf; 99 *q++ = *p++; 100 /* finds the end of the constant string also protects against going past end of string 101 * The parser above will handle the incomplete string. SteveR 102 */ 103 while ( ( *p != STRING_DELIM ) && ( *p != '\0' ) ) 104 { 105 if (*p == ESC_CHAR) 106 *q++ = *p++; 107 *q++ = *p++; 108 } 109 110 *q++ = *p++; 111 *tokenLen = q - tokenBuf; 112 tokenBuf[*tokenLen] = EO_STRING; /* make sure its there */ 113 break; 114 default: 115 p = lex->nextToken; 116 while (isIdentifierChar(*p)) /* finds the end of the name of this identifier */ 117 ++p; 118 *tokenLen = p - lex->nextToken; 119 LSTRNCPY(tokenBuf, lex->nextToken, *tokenLen); 120 tokenBuf[*tokenLen] = EO_STRING; /* make sure its there */ 121 } 122 lex->nextToken += *tokenLen; 123 return ESR_SUCCESS; 124 } 125 126 /** 127 * Indicates if character is in range [a-z] or [A-Z] or [0-9] or ['.']. 128 **/ 129 ESR_BOOL isIdentifierChar(LCHAR p) 130 { 131 return (p == DOT || /* the dot */ 132 p == USCORE || /* the underscore */ 133 (p <= L('z') && p >= L('a')) || /* lowercase alpha */ 134 (p <= L('Z') && p >= L('A')) || /* uppercase alpha */ 135 (p <= L('9') && p >= L('0'))) ? ESR_TRUE : ESR_FALSE; /* numbers */ 136 } 137