1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 // 4 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class 5 // 6 /* 7 *************************************************************************** 8 * Copyright (C) 2002-2014 International Business Machines Corporation 9 * and others. All rights reserved. 10 *************************************************************************** 11 */ 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_BREAK_ITERATION 16 17 #include "unicode/unistr.h" 18 #include "unicode/uniset.h" 19 #include "unicode/uchar.h" 20 #include "unicode/parsepos.h" 21 22 #include "cstr.h" 23 #include "rbbinode.h" 24 #include "rbbirb.h" 25 #include "umutex.h" 26 27 28 // 29 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents 30 // when the hash table is deleted. 31 // 32 U_CDECL_BEGIN 33 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { 34 icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; 35 delete px; 36 } 37 U_CDECL_END 38 39 40 41 U_NAMESPACE_BEGIN 42 43 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) 44 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) 45 { 46 fHashTable = NULL; 47 fCachedSetLookup = NULL; 48 49 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); 50 // uhash_open checks status 51 if (U_FAILURE(status)) { 52 return; 53 } 54 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); 55 } 56 57 58 59 RBBISymbolTable::~RBBISymbolTable() 60 { 61 uhash_close(fHashTable); 62 } 63 64 65 // 66 // RBBISymbolTable::lookup This function from the abstract symbol table inteface 67 // looks up a variable name and returns a UnicodeString 68 // containing the substitution text. 69 // 70 // The variable name does NOT include the leading $. 71 // 72 const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const 73 { 74 RBBISymbolTableEntry *el; 75 RBBINode *varRefNode; 76 RBBINode *exprNode; 77 RBBINode *usetNode; 78 const UnicodeString *retString; 79 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const 80 81 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); 82 if (el == NULL) { 83 return NULL; 84 } 85 86 varRefNode = el->val; 87 exprNode = varRefNode->fLeftChild; // Root node of expression for variable 88 if (exprNode->fType == RBBINode::setRef) { 89 // The $variable refers to a single UnicodeSet 90 // return the ffffString, which will subsequently be interpreted as a 91 // stand-in character for the set by RBBISymbolTable::lookupMatcher() 92 usetNode = exprNode->fLeftChild; 93 This->fCachedSetLookup = usetNode->fInputSet; 94 retString = &ffffString; 95 } 96 else 97 { 98 // The variable refers to something other than just a set. 99 // return the original source string for the expression 100 retString = &exprNode->fText; 101 This->fCachedSetLookup = NULL; 102 } 103 return retString; 104 } 105 106 107 108 // 109 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table 110 // interface maps a single stand-in character to a 111 // pointer to a Unicode Set. The Unicode Set code uses this 112 // mechanism to get all references to the same $variable 113 // name to refer to a single common Unicode Set instance. 114 // 115 // This implementation cheats a little, and does not maintain a map of stand-in chars 116 // to sets. Instead, it takes advantage of the fact that the UnicodeSet 117 // constructor will always call this function right after calling lookup(), 118 // and we just need to remember what set to return between these two calls. 119 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const 120 { 121 UnicodeSet *retVal = NULL; 122 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const 123 if (ch == 0xffff) { 124 retVal = fCachedSetLookup; 125 This->fCachedSetLookup = 0; 126 } 127 return retVal; 128 } 129 130 // 131 // RBBISymbolTable::parseReference This function from the abstract symbol table interface 132 // looks for a $variable name in the source text. 133 // It does not look it up, only scans for it. 134 // It is used by the UnicodeSet parser. 135 // 136 // This implementation is lifted pretty much verbatim 137 // from the rules based transliterator implementation. 138 // I didn't see an obvious way of sharing it. 139 // 140 UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, 141 ParsePosition& pos, int32_t limit) const 142 { 143 int32_t start = pos.getIndex(); 144 int32_t i = start; 145 UnicodeString result; 146 while (i < limit) { 147 UChar c = text.charAt(i); 148 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { 149 break; 150 } 151 ++i; 152 } 153 if (i == start) { // No valid name chars 154 return result; // Indicate failure with empty string 155 } 156 pos.setIndex(i); 157 text.extractBetween(start, i, result); 158 return result; 159 } 160 161 162 163 // 164 // RBBISymbolTable::lookupNode Given a key (a variable name), return the 165 // corresponding RBBI Node. If there is no entry 166 // in the table for this name, return NULL. 167 // 168 RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ 169 170 RBBINode *retNode = NULL; 171 RBBISymbolTableEntry *el; 172 173 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); 174 if (el != NULL) { 175 retNode = el->val; 176 } 177 return retNode; 178 } 179 180 181 // 182 // RBBISymbolTable::addEntry Add a new entry to the symbol table. 183 // Indicate an error if the name already exists - 184 // this will only occur in the case of duplicate 185 // variable assignments. 186 // 187 void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { 188 RBBISymbolTableEntry *e; 189 /* test for buffer overflows */ 190 if (U_FAILURE(err)) { 191 return; 192 } 193 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); 194 if (e != NULL) { 195 err = U_BRK_VARIABLE_REDFINITION; 196 return; 197 } 198 199 e = new RBBISymbolTableEntry; 200 if (e == NULL) { 201 err = U_MEMORY_ALLOCATION_ERROR; 202 return; 203 } 204 e->key = key; 205 e->val = val; 206 uhash_put( fHashTable, &e->key, e, &err); 207 } 208 209 210 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} 211 212 RBBISymbolTableEntry::~RBBISymbolTableEntry() { 213 // The "val" of a symbol table entry is a variable reference node. 214 // The l. child of the val is the rhs expression from the assignment. 215 // Unlike other node types, children of variable reference nodes are not 216 // automatically recursively deleted. We do it manually here. 217 delete val->fLeftChild; 218 val->fLeftChild = NULL; 219 220 delete val; 221 222 // Note: the key UnicodeString is destructed by virtue of being in the object by value. 223 } 224 225 226 // 227 // RBBISymbolTable::print Debugging function, dump out the symbol table contents. 228 // 229 #ifdef RBBI_DEBUG 230 void RBBISymbolTable::rbbiSymtablePrint() const { 231 RBBIDebugPrintf("Variable Definitions Symbol Table\n" 232 "Name Node serial String Val\n" 233 "-------------------------------------------------------------------\n"); 234 235 int32_t pos = UHASH_FIRST; 236 const UHashElement *e = NULL; 237 for (;;) { 238 e = uhash_nextElement(fHashTable, &pos); 239 if (e == NULL ) { 240 break; 241 } 242 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; 243 244 RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum); 245 RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)()); 246 } 247 248 RBBIDebugPrintf("\nParsed Variable Definitions\n"); 249 pos = -1; 250 for (;;) { 251 e = uhash_nextElement(fHashTable, &pos); 252 if (e == NULL ) { 253 break; 254 } 255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; 256 RBBIDebugPrintf("%s\n", CStr(s->key)()); 257 RBBINode::printTree(s->val, TRUE); 258 RBBINode::printTree(s->val->fLeftChild, FALSE); 259 RBBIDebugPrintf("\n"); 260 } 261 } 262 #endif 263 264 265 266 267 268 U_NAMESPACE_END 269 270 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 271