Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 //
      4 //  rbbirb.h
      5 //
      6 //  Copyright (C) 2002-2008, International Business Machines Corporation and others.
      7 //  All Rights Reserved.
      8 //
      9 //  This file contains declarations for several classes from the
     10 //    Rule Based Break Iterator rule builder.
     11 //
     12 
     13 
     14 #ifndef RBBIRB_H
     15 #define RBBIRB_H
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_BREAK_ITERATION
     20 
     21 #include "unicode/uobject.h"
     22 #include "unicode/rbbi.h"
     23 #include "unicode/uniset.h"
     24 #include "unicode/parseerr.h"
     25 #include "uhash.h"
     26 #include "uvector.h"
     27 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
     28                           //    looks up references to $variables within a set.
     29 
     30 
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 class               RBBIRuleScanner;
     35 struct              RBBIRuleTableEl;
     36 class               RBBISetBuilder;
     37 class               RBBINode;
     38 class               RBBITableBuilder;
     39 
     40 
     41 
     42 //--------------------------------------------------------------------------------
     43 //
     44 //   RBBISymbolTable.    Implements SymbolTable interface that is used by the
     45 //                       UnicodeSet parser to resolve references to $variables.
     46 //
     47 //--------------------------------------------------------------------------------
     48 class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
     49 public:                                       //   of these structs for each entry.
     50     RBBISymbolTableEntry();
     51     UnicodeString          key;
     52     RBBINode               *val;
     53     ~RBBISymbolTableEntry();
     54 
     55 private:
     56     RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
     57     RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
     58 };
     59 
     60 
     61 class RBBISymbolTable : public UMemory, public SymbolTable {
     62 private:
     63     const UnicodeString      &fRules;
     64     UHashtable               *fHashTable;
     65     RBBIRuleScanner          *fRuleScanner;
     66 
     67     // These next two fields are part of the mechanism for passing references to
     68     //   already-constructed UnicodeSets back to the UnicodeSet constructor
     69     //   when the pattern includes $variable references.
     70     const UnicodeString      ffffString;      // = "/uffff"
     71     UnicodeSet              *fCachedSetLookup;
     72 
     73 public:
     74     //  API inherited from class SymbolTable
     75     virtual const UnicodeString*  lookup(const UnicodeString& s) const;
     76     virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
     77     virtual UnicodeString parseReference(const UnicodeString& text,
     78                                          ParsePosition& pos, int32_t limit) const;
     79 
     80     //  Additional Functions
     81     RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
     82     virtual ~RBBISymbolTable();
     83 
     84     virtual RBBINode *lookupNode(const UnicodeString &key) const;
     85     virtual void      addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err);
     86 
     87 #ifdef RBBI_DEBUG
     88     virtual void      rbbiSymtablePrint() const;
     89 #else
     90     // A do-nothing inline function for non-debug builds.  Member funcs can't be empty
     91     //  or the call sites won't compile.
     92     int32_t fFakeField;
     93     #define rbbiSymtablePrint() fFakeField=0;
     94 #endif
     95 
     96 private:
     97     RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
     98     RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
     99 };
    100 
    101 
    102 //--------------------------------------------------------------------------------
    103 //
    104 //  class RBBIRuleBuilder       The top-level class handling RBBI rule compiling.
    105 //
    106 //--------------------------------------------------------------------------------
    107 class RBBIRuleBuilder : public UMemory {
    108 public:
    109 
    110     //  Create a rule based break iterator from a set of rules.
    111     //  This function is the main entry point into the rule builder.  The
    112     //   public ICU API for creating RBBIs uses this function to do the actual work.
    113     //
    114     static BreakIterator * createRuleBasedBreakIterator( const UnicodeString    &rules,
    115                                     UParseError      *parseError,
    116                                     UErrorCode       &status);
    117 
    118 public:
    119     // The "public" functions and data members that appear below are accessed
    120     //  (and shared) by the various parts that make up the rule builder.  They
    121     //  are NOT intended to be accessed by anything outside of the
    122     //  rule builder implementation.
    123     RBBIRuleBuilder(const UnicodeString  &rules,
    124                     UParseError          *parseErr,
    125                     UErrorCode           &status
    126         );
    127 
    128     virtual    ~RBBIRuleBuilder();
    129     char                          *fDebugEnv;        // controls debug trace output
    130     UErrorCode                    *fStatus;          // Error reporting.  Keeping status
    131     UParseError                   *fParseError;      //   here avoids passing it everywhere.
    132     const UnicodeString           &fRules;           // The rule string that we are compiling
    133 
    134     RBBIRuleScanner               *fScanner;         // The scanner.
    135     RBBINode                      *fForwardTree;     // The parse trees, generated by the scanner,
    136     RBBINode                      *fReverseTree;     //   then manipulated by subsequent steps.
    137     RBBINode                      *fSafeFwdTree;
    138     RBBINode                      *fSafeRevTree;
    139 
    140     RBBINode                      **fDefaultTree;    // For rules not qualified with a !
    141                                                      //   the tree to which they belong to.
    142 
    143     UBool                         fChainRules;       // True for chained Unicode TR style rules.
    144                                                      // False for traditional regexp rules.
    145 
    146     UBool                         fLBCMNoChain;      // True:  suppress chaining of rules on
    147                                                      //   chars with LineBreak property == CM.
    148 
    149     UBool                         fLookAheadHardBreak;  // True:  Look ahead matches cause an
    150                                                      // immediate break, no continuing for the
    151                                                      // longest match.
    152 
    153     RBBISetBuilder                *fSetBuilder;      // Set and Character Category builder.
    154     UVector                       *fUSetNodes;       // Vector of all uset nodes.
    155 
    156     RBBITableBuilder              *fForwardTables;   // State transition tables
    157     RBBITableBuilder              *fReverseTables;
    158     RBBITableBuilder              *fSafeFwdTables;
    159     RBBITableBuilder              *fSafeRevTables;
    160 
    161     UVector                       *fRuleStatusVals;  // The values that can be returned
    162                                                      //   from getRuleStatus().
    163 
    164     RBBIDataHeader                *flattenData();    // Create the flattened (runtime format)
    165                                                      // data tables..
    166 private:
    167     RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
    168     RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
    169 };
    170 
    171 
    172 
    173 
    174 //----------------------------------------------------------------------------
    175 //
    176 //   RBBISetTableEl   is an entry in the hash table of UnicodeSets that have
    177 //                    been encountered.  The val Node will be of nodetype uset
    178 //                    and contain pointers to the actual UnicodeSets.
    179 //                    The Key is the source string for initializing the set.
    180 //
    181 //                    The hash table is used to avoid creating duplicate
    182 //                    unnamed (not $var references) UnicodeSets.
    183 //
    184 //                    Memory Management:
    185 //                       The Hash Table owns these RBBISetTableEl structs and
    186 //                            the key strings.  It does NOT own the val nodes.
    187 //
    188 //----------------------------------------------------------------------------
    189 struct RBBISetTableEl {
    190     UnicodeString *key;
    191     RBBINode      *val;
    192 };
    193 
    194 
    195 //----------------------------------------------------------------------------
    196 //
    197 //   RBBIDebugPrintf    Printf equivalent, for debugging output.
    198 //                      Conditional compilation of the implementation lets us
    199 //                      get rid of the stdio dependency in environments where it
    200 //                      is unavailable.
    201 //
    202 //----------------------------------------------------------------------------
    203 #ifdef RBBI_DEBUG
    204 #include <stdio.h>
    205 #define RBBIDebugPrintf printf
    206 #define RBBIDebugPuts puts
    207 #else
    208 #undef RBBIDebugPrintf
    209 #define RBBIDebugPuts(arg)
    210 #endif
    211 
    212 U_NAMESPACE_END
    213 
    214 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    215 
    216 #endif
    217 
    218 
    219 
    220