Home | History | Annotate | Download | only in text
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 1996-2016, International Business Machines Corporation and    *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.text;
     10 
     11 /**
     12  * <code>UnicodeFilter</code> defines a protocol for selecting a
     13  * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
     14  * Currently, filters are used in conjunction with classes like
     15  * {@link com.ibm.icu.text.Transliterator}
     16  * to only process selected characters through a
     17  * transformation.
     18  * @stable ICU 2.0
     19  */
     20 @SuppressWarnings("javadoc")    // com.imb.icu.text.Transliterator is in another project
     21 public abstract class UnicodeFilter implements UnicodeMatcher {
     22 
     23     /**
     24      * Returns <tt>true</tt> for characters that are in the selected
     25      * subset.  In other words, if a character is <b>to be
     26      * filtered</b>, then <tt>contains()</tt> returns
     27      * <b><tt>false</tt></b>.
     28      * @stable ICU 2.0
     29      */
     30     public abstract boolean contains(int c);
     31 
     32     /**
     33      * Default implementation of UnicodeMatcher::matches() for Unicode
     34      * filters.  Matches a single 16-bit code unit at offset.
     35      * @stable ICU 2.0
     36      */
     37     @Override
     38     public int matches(Replaceable text,
     39                        int[] offset,
     40                        int limit,
     41                        boolean incremental) {
     42         int c;
     43         if (offset[0] < limit &&
     44             contains(c = text.char32At(offset[0]))) {
     45             offset[0] += UTF16.getCharCount(c);
     46             return U_MATCH;
     47         }
     48         if (offset[0] > limit && contains(text.char32At(offset[0]))) {
     49             // Backup offset by 1, unless the preceding character is a
     50             // surrogate pair -- then backup by 2 (keep offset pointing at
     51             // the lead surrogate).
     52             --offset[0];
     53             if (offset[0] >= 0) {
     54                 offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
     55             }
     56             return U_MATCH;
     57         }
     58         if (incremental && offset[0] == limit) {
     59             return U_PARTIAL_MATCH;
     60         }
     61         return U_MISMATCH;
     62     }
     63 
     64     // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
     65     /**
     66      * (This should not be here; it is declared to make CheckTags
     67      * happy.  Java inserts a synthetic constructor and CheckTags
     68      * can't tell that it's synthetic.)
     69      *
     70      * @internal
     71      * @deprecated This API is ICU internal only.
     72      */
     73     @Deprecated
     74     protected UnicodeFilter() {}
     75 }
     76