Home | History | Annotate | Download | only in unicode
      1 /*
      2 **********************************************************************
      3 * Copyright (C) 1999-2010, International Business Machines Corporation and others.
      4 * All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   11/17/99    aliu        Creation.
      8 **********************************************************************
      9 */
     10 #ifndef UNIFILT_H
     11 #define UNIFILT_H
     12 
     13 #include "unicode/unifunct.h"
     14 #include "unicode/unimatch.h"
     15 
     16 /**
     17  * \file
     18  * \brief C++ API: Unicode Filter
     19  */
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 /**
     24  * U_ETHER is used to represent character values for positions outside
     25  * a range.  For example, transliterator uses this to represent
     26  * characters outside the range contextStart..contextLimit-1.  This
     27  * allows explicit matching by rules and UnicodeSets of text outside a
     28  * defined range.
     29  * @stable ICU 3.0
     30  */
     31 #define U_ETHER ((UChar)0xFFFF)
     32 
     33 /**
     34  *
     35  * <code>UnicodeFilter</code> defines a protocol for selecting a
     36  * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
     37  * Currently, filters are used in conjunction with classes like {@link
     38  * Transliterator} to only process selected characters through a
     39  * transformation.
     40  *
     41  * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
     42  * of its base class, UnicodeMatcher.  These methods are toPattern()
     43  * and matchesIndexValue().  This is done so that filter classes that
     44  * are not actually used as matchers -- specifically, those in the
     45  * UnicodeFilterLogic component, and those in tests -- can continue to
     46  * work without defining these methods.  As long as a filter is not
     47  * used in an RBT during real transliteration, these methods will not
     48  * be called.  However, this breaks the UnicodeMatcher base class
     49  * protocol, and it is not a correct solution.
     50  *
     51  * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
     52  * hierarchy and either redesign it, or simply remove the stubs in
     53  * UnicodeFilter and force subclasses to implement the full
     54  * UnicodeMatcher protocol.
     55  *
     56  * @see UnicodeFilterLogic
     57  * @stable ICU 2.0
     58  */
     59 class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
     60 
     61 public:
     62     /**
     63      * Destructor
     64      * @stable ICU 2.0
     65      */
     66     virtual ~UnicodeFilter();
     67 
     68     /**
     69      * Returns <tt>true</tt> for characters that are in the selected
     70      * subset.  In other words, if a character is <b>to be
     71      * filtered</b>, then <tt>contains()</tt> returns
     72      * <b><tt>false</tt></b>.
     73      * @stable ICU 2.0
     74      */
     75     virtual UBool contains(UChar32 c) const = 0;
     76 
     77     /**
     78      * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
     79      * and return the pointer.
     80      * @stable ICU 2.4
     81      */
     82     virtual UnicodeMatcher* toMatcher() const;
     83 
     84     /**
     85      * Implement UnicodeMatcher API.
     86      * @stable ICU 2.4
     87      */
     88     virtual UMatchDegree matches(const Replaceable& text,
     89                                  int32_t& offset,
     90                                  int32_t limit,
     91                                  UBool incremental);
     92 
     93     /**
     94      * UnicodeFunctor API.  Nothing to do.
     95      * @stable ICU 2.4
     96      */
     97     virtual void setData(const TransliterationRuleData*);
     98 
     99     /**
    100      * ICU "poor man's RTTI", returns a UClassID for this class.
    101      *
    102      * @stable ICU 2.2
    103      */
    104     static UClassID U_EXPORT2 getStaticClassID();
    105 
    106 protected:
    107 
    108     /*
    109      * Since this class has pure virtual functions,
    110      * a constructor can't be used.
    111      * @stable ICU 2.0
    112      */
    113 /*    UnicodeFilter();*/
    114 };
    115 
    116 /*inline UnicodeFilter::UnicodeFilter() {}*/
    117 
    118 U_NAMESPACE_END
    119 
    120 #endif
    121