Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  *   Copyright (C) 2002-2010, International Business Machines
      6  *   Corporation and others.  All Rights Reserved.
      7  *******************************************************************************
      8 */
      9 
     10 package com.ibm.icu.impl;
     11 /**
     12  * @version     1.1
     13  * @author     Markus W. Scherer
     14  * Ram: Add documentation, remove unwanted methods, improve coverage.
     15  */
     16 
     17 /**
     18  * Simple class for handling serialized USet/UnicodeSet structures
     19  * without object creation. See ICU4C icu/source/common/uset.c.
     20  *
     21  * @internal
     22  */
     23 public final class USerializedSet {
     24     /**
     25      * Fill in the given serialized set object.
     26      * @param src pointer to start of array
     27      * @param srcStart pointer to start of serialized data (length value)
     28      * @return true if the given array is valid, otherwise false
     29      */
     30     public final boolean getSet(char src[], int srcStart) {
     31         // leave most argument checking up to Java exceptions
     32         array=null;
     33         arrayOffset=bmpLength=length=0;
     34 
     35         length=src[srcStart++];
     36 
     37         if ((length&0x8000) != 0) {
     38             /* there are supplementary values */
     39             length&=0x7fff;
     40             if(src.length<(srcStart+1+length)) {
     41                 length=0;
     42                 throw new IndexOutOfBoundsException();
     43             }
     44             bmpLength=src[srcStart++];
     45         } else {
     46             /* only BMP values */
     47             if(src.length<(srcStart+length)) {
     48                 length=0;
     49                 throw new IndexOutOfBoundsException();
     50             }
     51             bmpLength=length;
     52         }
     53         array = new char[length];
     54         System.arraycopy(src,srcStart,array,0,length);
     55         //arrayOffset=srcStart;
     56         return true;
     57     }
     58 
     59     /**
     60      * Set the USerializedSet to contain the given character (and nothing
     61      * else).
     62      */
     63     public final void setToOne(int c) {
     64         if( 0x10ffff<c) {
     65             return;
     66         }
     67 
     68         if(c<0xffff) {
     69             bmpLength=length=2;
     70             array[0]=(char)c;
     71             array[1]=(char)(c+1);
     72         } else if(c==0xffff) {
     73             bmpLength=1;
     74             length=3;
     75             array[0]=0xffff;
     76             array[1]=1;
     77             array[2]=0;
     78         } else if(c<0x10ffff) {
     79             bmpLength=0;
     80             length=4;
     81             array[0]=(char)(c>>16);
     82             array[1]=(char)c;
     83             ++c;
     84             array[2]=(char)(c>>16);
     85             array[3]=(char)c;
     86         } else /* c==0x10ffff */ {
     87             bmpLength=0;
     88             length=2;
     89             array[0]=0x10;
     90             array[1]=0xffff;
     91         }
     92     }
     93 
     94     /**
     95      * Returns a range of characters contained in the given serialized
     96      * set.
     97      * @param rangeIndex a non-negative integer in the range <code>0..
     98      * getSerializedRangeCount()-1</code>
     99      * @param range variable to receive the data in the range
    100      * @return true if rangeIndex is valid, otherwise false
    101      */
    102     public final boolean getRange(int rangeIndex, int[] range) {
    103         if( rangeIndex<0) {
    104             return false;
    105         }
    106         if(array==null){
    107             array = new char[8];
    108         }
    109         if(range==null || range.length <2){
    110             throw new IllegalArgumentException();
    111         }
    112         rangeIndex*=2; /* address start/limit pairs */
    113         if(rangeIndex<bmpLength) {
    114             range[0]=array[rangeIndex++];
    115             if(rangeIndex<bmpLength) {
    116                 range[1]=array[rangeIndex]-1;
    117             } else if(rangeIndex<length) {
    118                 range[1]=((((int)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
    119             } else {
    120                 range[1]=0x10ffff;
    121             }
    122             return true;
    123         } else {
    124             rangeIndex-=bmpLength;
    125             rangeIndex*=2; /* address pairs of pairs of units */
    126             int suppLength=length-bmpLength;
    127             if(rangeIndex<suppLength) {
    128                 int offset=arrayOffset+bmpLength;
    129                 range[0]=(((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1];
    130                 rangeIndex+=2;
    131                 if(rangeIndex<suppLength) {
    132                     range[1]=((((int)array[offset+rangeIndex])<<16)|array[offset+rangeIndex+1])-1;
    133                 } else {
    134                     range[1]=0x10ffff;
    135                 }
    136                 return true;
    137             } else {
    138                 return false;
    139             }
    140         }
    141     }
    142 
    143     /**
    144      * Returns true if the given USerializedSet contains the given
    145      * character.
    146      * @param c the character to test for
    147      * @return true if set contains c
    148      */
    149     public final boolean contains(int c) {
    150 
    151         if(c>0x10ffff) {
    152             return false;
    153         }
    154 
    155         if(c<=0xffff) {
    156             int i;
    157             /* find c in the BMP part */
    158             for(i=0; i<bmpLength && (char)c>=array[i]; ++i) {}
    159             return ((i&1) != 0);
    160         } else {
    161             int i;
    162             /* find c in the supplementary part */
    163             char high=(char)(c>>16), low=(char)c;
    164             for(i=bmpLength;
    165                 i<length && (high>array[i] || (high==array[i] && low>=array[i+1]));
    166                 i+=2) {}
    167 
    168             /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
    169             return (((i+bmpLength)&2)!=0);
    170         }
    171     }
    172 
    173     /**
    174      * Returns the number of disjoint ranges of characters contained in
    175      * the given serialized set.  Ignores any strings contained in the
    176      * set.
    177      * @return a non-negative integer counting the character ranges
    178      * contained in set
    179      */
    180     public final int countRanges() {
    181         return (bmpLength+(length-bmpLength)/2+1)/2;
    182     }
    183 
    184     private char array[] = new char[8];
    185     private int arrayOffset, bmpLength, length;
    186 }
    187