1 // 2 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class 3 // 4 /* 5 *************************************************************************** 6 * Copyright (C) 2002-2006 International Business Machines Corporation * 7 * and others. All rights reserved. * 8 *************************************************************************** 9 */ 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_BREAK_ITERATION 14 15 #include "unicode/unistr.h" 16 #include "unicode/uniset.h" 17 #include "unicode/uchar.h" 18 #include "unicode/parsepos.h" 19 20 #include "umutex.h" 21 22 #include "rbbirb.h" 23 #include "rbbinode.h" 24 25 26 // 27 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents 28 // when the hash table is deleted. 29 // 30 U_CDECL_BEGIN 31 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { 32 U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *px = (U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *)p; 33 delete px; 34 } 35 U_CDECL_END 36 37 38 39 U_NAMESPACE_BEGIN 40 41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) 42 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) 43 { 44 fHashTable = NULL; 45 fCachedSetLookup = NULL; 46 47 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); 48 // uhash_open checks status 49 if (U_FAILURE(status)) { 50 return; 51 } 52 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); 53 } 54 55 56 57 RBBISymbolTable::~RBBISymbolTable() 58 { 59 uhash_close(fHashTable); 60 } 61 62 63 // 64 // RBBISymbolTable::lookup This function from the abstract symbol table inteface 65 // looks up a variable name and returns a UnicodeString 66 // containing the substitution text. 67 // 68 // The variable name does NOT include the leading $. 69 // 70 const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const 71 { 72 RBBISymbolTableEntry *el; 73 RBBINode *varRefNode; 74 RBBINode *exprNode; 75 RBBINode *usetNode; 76 const UnicodeString *retString; 77 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const 78 79 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); 80 if (el == NULL) { 81 return NULL; 82 } 83 84 varRefNode = el->val; 85 exprNode = varRefNode->fLeftChild; // Root node of expression for variable 86 if (exprNode->fType == RBBINode::setRef) { 87 // The $variable refers to a single UnicodeSet 88 // return the ffffString, which will subsequently be interpreted as a 89 // stand-in character for the set by RBBISymbolTable::lookupMatcher() 90 usetNode = exprNode->fLeftChild; 91 This->fCachedSetLookup = usetNode->fInputSet; 92 retString = &ffffString; 93 } 94 else 95 { 96 // The variable refers to something other than just a set. 97 // return the original source string for the expression 98 retString = &exprNode->fText; 99 This->fCachedSetLookup = NULL; 100 } 101 return retString; 102 } 103 104 105 106 // 107 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table 108 // interface maps a single stand-in character to a 109 // pointer to a Unicode Set. The Unicode Set code uses this 110 // mechanism to get all references to the same $variable 111 // name to refer to a single common Unicode Set instance. 112 // 113 // This implementation cheats a little, and does not maintain a map of stand-in chars 114 // to sets. Instead, it takes advantage of the fact that the UnicodeSet 115 // constructor will always call this function right after calling lookup(), 116 // and we just need to remember what set to return between these two calls. 117 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const 118 { 119 UnicodeSet *retVal = NULL; 120 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const 121 if (ch == 0xffff) { 122 retVal = fCachedSetLookup; 123 This->fCachedSetLookup = 0; 124 } 125 return retVal; 126 } 127 128 // 129 // RBBISymbolTable::parseReference This function from the abstract symbol table interface 130 // looks for a $variable name in the source text. 131 // It does not look it up, only scans for it. 132 // It is used by the UnicodeSet parser. 133 // 134 // This implementation is lifted pretty much verbatim 135 // from the rules based transliterator implementation. 136 // I didn't see an obvious way of sharing it. 137 // 138 UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, 139 ParsePosition& pos, int32_t limit) const 140 { 141 int32_t start = pos.getIndex(); 142 int32_t i = start; 143 UnicodeString result; 144 while (i < limit) { 145 UChar c = text.charAt(i); 146 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { 147 break; 148 } 149 ++i; 150 } 151 if (i == start) { // No valid name chars 152 return result; // Indicate failure with empty string 153 } 154 pos.setIndex(i); 155 text.extractBetween(start, i, result); 156 return result; 157 } 158 159 160 161 // 162 // RBBISymbolTable::lookupNode Given a key (a variable name), return the 163 // corresponding RBBI Node. If there is no entry 164 // in the table for this name, return NULL. 165 // 166 RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ 167 168 RBBINode *retNode = NULL; 169 RBBISymbolTableEntry *el; 170 171 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); 172 if (el != NULL) { 173 retNode = el->val; 174 } 175 return retNode; 176 } 177 178 179 // 180 // RBBISymbolTable::addEntry Add a new entry to the symbol table. 181 // Indicate an error if the name already exists - 182 // this will only occur in the case of duplicate 183 // variable assignments. 184 // 185 void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { 186 RBBISymbolTableEntry *e; 187 /* test for buffer overflows */ 188 if (U_FAILURE(err)) { 189 return; 190 } 191 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); 192 if (e != NULL) { 193 err = U_BRK_VARIABLE_REDFINITION; 194 return; 195 } 196 197 e = new RBBISymbolTableEntry; 198 if (e == NULL) { 199 err = U_MEMORY_ALLOCATION_ERROR; 200 return; 201 } 202 e->key = key; 203 e->val = val; 204 uhash_put( fHashTable, &e->key, e, &err); 205 } 206 207 208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} 209 210 RBBISymbolTableEntry::~RBBISymbolTableEntry() { 211 // The "val" of a symbol table entry is a variable reference node. 212 // The l. child of the val is the rhs expression from the assignment. 213 // Unlike other node types, children of variable reference nodes are not 214 // automatically recursively deleted. We do it manually here. 215 delete val->fLeftChild; 216 val->fLeftChild = NULL; 217 218 delete val; 219 220 // Note: the key UnicodeString is destructed by virtue of being in the object by value. 221 } 222 223 224 // 225 // RBBISymbolTable::print Debugging function, dump out the symbol table contents. 226 // 227 #ifdef RBBI_DEBUG 228 void RBBISymbolTable::rbbiSymtablePrint() const { 229 RBBIDebugPrintf("Variable Definitions\n" 230 "Name Node Val String Val\n" 231 "----------------------------------------------------------------------\n"); 232 233 int32_t pos = -1; 234 const UHashElement *e = NULL; 235 for (;;) { 236 e = uhash_nextElement(fHashTable, &pos); 237 if (e == NULL ) { 238 break; 239 } 240 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; 241 242 RBBI_DEBUG_printUnicodeString(s->key, 15); 243 RBBIDebugPrintf(" %8p ", (void *)s->val); 244 RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText); 245 RBBIDebugPrintf("\n"); 246 } 247 248 RBBIDebugPrintf("\nParsed Variable Definitions\n"); 249 pos = -1; 250 for (;;) { 251 e = uhash_nextElement(fHashTable, &pos); 252 if (e == NULL ) { 253 break; 254 } 255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; 256 RBBI_DEBUG_printUnicodeString(s->key); 257 s->val->fLeftChild->printTree(TRUE); 258 RBBIDebugPrintf("\n"); 259 } 260 } 261 #endif 262 263 264 265 266 267 U_NAMESPACE_END 268 269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 270