Home | History | Annotate | Download | only in common
      1 //
      2 //  rbbirb.h
      3 //
      4 //  Copyright (C) 2002-2008, International Business Machines Corporation and others.
      5 //  All Rights Reserved.
      6 //
      7 //  This file contains declarations for several classes from the
      8 //    Rule Based Break Iterator rule builder.
      9 //
     10 
     11 
     12 #ifndef RBBIRB_H
     13 #define RBBIRB_H
     14 
     15 #include "unicode/utypes.h"
     16 #include "unicode/uobject.h"
     17 #include "unicode/rbbi.h"
     18 #include "unicode/uniset.h"
     19 #include "unicode/parseerr.h"
     20 #include "uhash.h"
     21 #include "uvector.h"
     22 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
     23                           //    looks up references to $variables within a set.
     24 
     25 
     26 
     27 U_NAMESPACE_BEGIN
     28 
     29 class               RBBIRuleScanner;
     30 struct              RBBIRuleTableEl;
     31 class               RBBISetBuilder;
     32 class               RBBINode;
     33 class               RBBITableBuilder;
     34 
     35 
     36 
     37 //--------------------------------------------------------------------------------
     38 //
     39 //   RBBISymbolTable.    Implements SymbolTable interface that is used by the
     40 //                       UnicodeSet parser to resolve references to $variables.
     41 //
     42 //--------------------------------------------------------------------------------
     43 class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
     44 public:                                       //   of these structs for each entry.
     45     RBBISymbolTableEntry();
     46     UnicodeString          key;
     47     RBBINode               *val;
     48     ~RBBISymbolTableEntry();
     49 
     50 private:
     51     RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
     52     RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
     53 };
     54 
     55 
     56 class RBBISymbolTable : public UMemory, public SymbolTable {
     57 private:
     58     const UnicodeString      &fRules;
     59     UHashtable               *fHashTable;
     60     RBBIRuleScanner          *fRuleScanner;
     61 
     62     // These next two fields are part of the mechanism for passing references to
     63     //   already-constructed UnicodeSets back to the UnicodeSet constructor
     64     //   when the pattern includes $variable references.
     65     const UnicodeString      ffffString;      // = "/uffff"
     66     UnicodeSet              *fCachedSetLookup;
     67 
     68 public:
     69     //  API inherited from class SymbolTable
     70     virtual const UnicodeString*  lookup(const UnicodeString& s) const;
     71     virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
     72     virtual UnicodeString parseReference(const UnicodeString& text,
     73                                          ParsePosition& pos, int32_t limit) const;
     74 
     75     //  Additional Functions
     76     RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
     77     virtual ~RBBISymbolTable();
     78 
     79     virtual RBBINode *lookupNode(const UnicodeString &key) const;
     80     virtual void      addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err);
     81 
     82 #ifdef RBBI_DEBUG
     83     virtual void      rbbiSymtablePrint() const;
     84 #else
     85     // A do-nothing inline function for non-debug builds.  Member funcs can't be empty
     86     //  or the call sites won't compile.
     87     int32_t fFakeField;
     88     #define rbbiSymtablePrint() fFakeField=0;
     89 #endif
     90 
     91 private:
     92     RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
     93     RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
     94 };
     95 
     96 
     97 //--------------------------------------------------------------------------------
     98 //
     99 //  class RBBIRuleBuilder       The top-level class handling RBBI rule compiling.
    100 //
    101 //--------------------------------------------------------------------------------
    102 class RBBIRuleBuilder : public UMemory {
    103 public:
    104 
    105     //  Create a rule based break iterator from a set of rules.
    106     //  This function is the main entry point into the rule builder.  The
    107     //   public ICU API for creating RBBIs uses this function to do the actual work.
    108     //
    109     static BreakIterator * createRuleBasedBreakIterator( const UnicodeString    &rules,
    110                                     UParseError      *parseError,
    111                                     UErrorCode       &status);
    112 
    113 public:
    114     // The "public" functions and data members that appear below are accessed
    115     //  (and shared) by the various parts that make up the rule builder.  They
    116     //  are NOT intended to be accessed by anything outside of the
    117     //  rule builder implementation.
    118     RBBIRuleBuilder(const UnicodeString  &rules,
    119                     UParseError          *parseErr,
    120                     UErrorCode           &status
    121         );
    122 
    123     virtual    ~RBBIRuleBuilder();
    124     char                          *fDebugEnv;        // controls debug trace output
    125     UErrorCode                    *fStatus;          // Error reporting.  Keeping status
    126     UParseError                   *fParseError;      //   here avoids passing it everywhere.
    127     const UnicodeString           &fRules;           // The rule string that we are compiling
    128 
    129     RBBIRuleScanner               *fScanner;         // The scanner.
    130     RBBINode                      *fForwardTree;     // The parse trees, generated by the scanner,
    131     RBBINode                      *fReverseTree;     //   then manipulated by subsequent steps.
    132     RBBINode                      *fSafeFwdTree;
    133     RBBINode                      *fSafeRevTree;
    134 
    135     RBBINode                      **fDefaultTree;    // For rules not qualified with a !
    136                                                      //   the tree to which they belong to.
    137 
    138     UBool                         fChainRules;       // True for chained Unicode TR style rules.
    139                                                      // False for traditional regexp rules.
    140 
    141     UBool                         fLBCMNoChain;      // True:  suppress chaining of rules on
    142                                                      //   chars with LineBreak property == CM.
    143 
    144     UBool                         fLookAheadHardBreak;  // True:  Look ahead matches cause an
    145                                                      // immediate break, no continuing for the
    146                                                      // longest match.
    147 
    148     RBBISetBuilder                *fSetBuilder;      // Set and Character Category builder.
    149     UVector                       *fUSetNodes;       // Vector of all uset nodes.
    150 
    151     RBBITableBuilder              *fForwardTables;   // State transition tables
    152     RBBITableBuilder              *fReverseTables;
    153     RBBITableBuilder              *fSafeFwdTables;
    154     RBBITableBuilder              *fSafeRevTables;
    155 
    156     UVector                       *fRuleStatusVals;  // The values that can be returned
    157                                                      //   from getRuleStatus().
    158 
    159     RBBIDataHeader                *flattenData();    // Create the flattened (runtime format)
    160                                                      // data tables..
    161 private:
    162     RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
    163     RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
    164 };
    165 
    166 
    167 
    168 
    169 //----------------------------------------------------------------------------
    170 //
    171 //   RBBISetTableEl   is an entry in the hash table of UnicodeSets that have
    172 //                    been encountered.  The val Node will be of nodetype uset
    173 //                    and contain pointers to the actual UnicodeSets.
    174 //                    The Key is the source string for initializing the set.
    175 //
    176 //                    The hash table is used to avoid creating duplicate
    177 //                    unnamed (not $var references) UnicodeSets.
    178 //
    179 //                    Memory Management:
    180 //                       The Hash Table owns these RBBISetTableEl structs and
    181 //                            the key strings.  It does NOT own the val nodes.
    182 //
    183 //----------------------------------------------------------------------------
    184 struct RBBISetTableEl {
    185     UnicodeString *key;
    186     RBBINode      *val;
    187 };
    188 
    189 
    190 //----------------------------------------------------------------------------
    191 //
    192 //   RBBIDebugPrintf    Printf equivalent, for debugging output.
    193 //                      Conditional compilation of the implementation lets us
    194 //                      get rid of the stdio dependency in environments where it
    195 //                      is unavailable.
    196 //
    197 //----------------------------------------------------------------------------
    198 #ifdef RBBI_DEBUG
    199 #include <stdio.h>
    200 #define RBBIDebugPrintf printf
    201 #define RBBIDebugPuts puts
    202 #else
    203 #undef RBBIDebugPrintf
    204 #define RBBIDebugPuts(arg)
    205 #endif
    206 
    207 U_NAMESPACE_END
    208 #endif
    209 
    210 
    211 
    212