Home | History | Annotate | Download | only in common
      1 //
      2 //  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class
      3 //
      4 /*
      5 ***************************************************************************
      6 *   Copyright (C) 2002-2006 International Business Machines Corporation   *
      7 *   and others. All rights reserved.                                      *
      8 ***************************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_BREAK_ITERATION
     14 
     15 #include "unicode/unistr.h"
     16 #include "unicode/uniset.h"
     17 #include "unicode/uchar.h"
     18 #include "unicode/parsepos.h"
     19 
     20 #include "umutex.h"
     21 
     22 #include "rbbirb.h"
     23 #include "rbbinode.h"
     24 
     25 
     26 //
     27 //  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents
     28 //                                  when the hash table is deleted.
     29 //
     30 U_CDECL_BEGIN
     31 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
     32     U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *px = (U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *)p;
     33     delete px;
     34 }
     35 U_CDECL_END
     36 
     37 
     38 
     39 U_NAMESPACE_BEGIN
     40 
     41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
     42     :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
     43 {
     44     fHashTable       = NULL;
     45     fCachedSetLookup = NULL;
     46 
     47     fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
     48     // uhash_open checks status
     49     if (U_FAILURE(status)) {
     50         return;
     51     }
     52     uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
     53 }
     54 
     55 
     56 
     57 RBBISymbolTable::~RBBISymbolTable()
     58 {
     59     uhash_close(fHashTable);
     60 }
     61 
     62 
     63 //
     64 //  RBBISymbolTable::lookup       This function from the abstract symbol table inteface
     65 //                                looks up a variable name and returns a UnicodeString
     66 //                                containing the substitution text.
     67 //
     68 //                                The variable name does NOT include the leading $.
     69 //
     70 const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const
     71 {
     72     RBBISymbolTableEntry  *el;
     73     RBBINode              *varRefNode;
     74     RBBINode              *exprNode;
     75     RBBINode              *usetNode;
     76     const UnicodeString   *retString;
     77     RBBISymbolTable       *This = (RBBISymbolTable *)this;   // cast off const
     78 
     79     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
     80     if (el == NULL) {
     81         return NULL;
     82     }
     83 
     84     varRefNode = el->val;
     85     exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable
     86     if (exprNode->fType == RBBINode::setRef) {
     87         // The $variable refers to a single UnicodeSet
     88         //   return the ffffString, which will subsequently be interpreted as a
     89         //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
     90         usetNode = exprNode->fLeftChild;
     91         This->fCachedSetLookup = usetNode->fInputSet;
     92         retString = &ffffString;
     93     }
     94     else
     95     {
     96         // The variable refers to something other than just a set.
     97         // return the original source string for the expression
     98         retString = &exprNode->fText;
     99         This->fCachedSetLookup = NULL;
    100     }
    101     return retString;
    102 }
    103 
    104 
    105 
    106 //
    107 //  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
    108 //                                   interface maps a single stand-in character to a
    109 //                                   pointer to a Unicode Set.   The Unicode Set code uses this
    110 //                                   mechanism to get all references to the same $variable
    111 //                                   name to refer to a single common Unicode Set instance.
    112 //
    113 //    This implementation cheats a little, and does not maintain a map of stand-in chars
    114 //    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
    115 //    constructor will always call this function right after calling lookup(),
    116 //    and we just need to remember what set to return between these two calls.
    117 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
    118 {
    119     UnicodeSet *retVal = NULL;
    120     RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const
    121     if (ch == 0xffff) {
    122         retVal = fCachedSetLookup;
    123         This->fCachedSetLookup = 0;
    124     }
    125     return retVal;
    126 }
    127 
    128 //
    129 // RBBISymbolTable::parseReference   This function from the abstract symbol table interface
    130 //                                   looks for a $variable name in the source text.
    131 //                                   It does not look it up, only scans for it.
    132 //                                   It is used by the UnicodeSet parser.
    133 //
    134 //                                   This implementation is lifted pretty much verbatim
    135 //                                   from the rules based transliterator implementation.
    136 //                                   I didn't see an obvious way of sharing it.
    137 //
    138 UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
    139                                                 ParsePosition& pos, int32_t limit) const
    140 {
    141     int32_t start = pos.getIndex();
    142     int32_t i = start;
    143     UnicodeString result;
    144     while (i < limit) {
    145         UChar c = text.charAt(i);
    146         if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
    147             break;
    148         }
    149         ++i;
    150     }
    151     if (i == start) { // No valid name chars
    152         return result; // Indicate failure with empty string
    153     }
    154     pos.setIndex(i);
    155     text.extractBetween(start, i, result);
    156     return result;
    157 }
    158 
    159 
    160 
    161 //
    162 // RBBISymbolTable::lookupNode      Given a key (a variable name), return the
    163 //                                  corresponding RBBI Node.  If there is no entry
    164 //                                  in the table for this name, return NULL.
    165 //
    166 RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
    167 
    168     RBBINode             *retNode = NULL;
    169     RBBISymbolTableEntry *el;
    170 
    171     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
    172     if (el != NULL) {
    173         retNode = el->val;
    174     }
    175     return retNode;
    176 }
    177 
    178 
    179 //
    180 //    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
    181 //                                  Indicate an error if the name already exists -
    182 //                                    this will only occur in the case of duplicate
    183 //                                    variable assignments.
    184 //
    185 void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
    186     RBBISymbolTableEntry *e;
    187     /* test for buffer overflows */
    188     if (U_FAILURE(err)) {
    189         return;
    190     }
    191     e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
    192     if (e != NULL) {
    193         err = U_BRK_VARIABLE_REDFINITION;
    194         return;
    195     }
    196 
    197     e = new RBBISymbolTableEntry;
    198     if (e == NULL) {
    199         err = U_MEMORY_ALLOCATION_ERROR;
    200         return;
    201     }
    202     e->key = key;
    203     e->val = val;
    204     uhash_put( fHashTable, &e->key, e, &err);
    205 }
    206 
    207 
    208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
    209 
    210 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
    211     // The "val" of a symbol table entry is a variable reference node.
    212     // The l. child of the val is the rhs expression from the assignment.
    213     // Unlike other node types, children of variable reference nodes are not
    214     //    automatically recursively deleted.  We do it manually here.
    215     delete val->fLeftChild;
    216     val->fLeftChild = NULL;
    217 
    218     delete  val;
    219 
    220     // Note: the key UnicodeString is destructed by virtue of being in the object by value.
    221 }
    222 
    223 
    224 //
    225 //  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
    226 //
    227 #ifdef RBBI_DEBUG
    228 void RBBISymbolTable::rbbiSymtablePrint() const {
    229     RBBIDebugPrintf("Variable Definitions\n"
    230            "Name               Node Val     String Val\n"
    231            "----------------------------------------------------------------------\n");
    232 
    233     int32_t pos = -1;
    234     const UHashElement  *e   = NULL;
    235     for (;;) {
    236         e = uhash_nextElement(fHashTable,  &pos);
    237         if (e == NULL ) {
    238             break;
    239         }
    240         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
    241 
    242         RBBI_DEBUG_printUnicodeString(s->key, 15);
    243         RBBIDebugPrintf("   %8p   ", (void *)s->val);
    244         RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
    245         RBBIDebugPrintf("\n");
    246     }
    247 
    248     RBBIDebugPrintf("\nParsed Variable Definitions\n");
    249     pos = -1;
    250     for (;;) {
    251         e = uhash_nextElement(fHashTable,  &pos);
    252         if (e == NULL ) {
    253             break;
    254         }
    255         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
    256         RBBI_DEBUG_printUnicodeString(s->key);
    257         s->val->fLeftChild->printTree(TRUE);
    258         RBBIDebugPrintf("\n");
    259     }
    260 }
    261 #endif
    262 
    263 
    264 
    265 
    266 
    267 U_NAMESPACE_END
    268 
    269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    270