Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  *
      6  *   Copyright (C) 2004-2015, International Business Machines
      7  *   Corporation and others.  All Rights Reserved.
      8  *
      9  *******************************************************************************
     10  *   file name:  UBiDiProps.java
     11  *   encoding:   US-ASCII
     12  *   tab size:   8 (not used)
     13  *   indentation:4
     14  *
     15  *   created on: 2005jan16
     16  *   created by: Markus W. Scherer
     17  *
     18  *   Low-level Unicode bidi/shaping properties access.
     19  *   Java port of ubidi_props.h/.c.
     20  */
     21 
     22 package com.ibm.icu.impl;
     23 
     24 import java.io.IOException;
     25 import java.nio.ByteBuffer;
     26 import java.util.Iterator;
     27 
     28 import com.ibm.icu.lang.UCharacter;
     29 import com.ibm.icu.lang.UProperty;
     30 import com.ibm.icu.text.UnicodeSet;
     31 import com.ibm.icu.util.ICUUncheckedIOException;
     32 
     33 public final class UBiDiProps {
     34     // constructors etc. --------------------------------------------------- ***
     35 
     36     // port of ubidi_openProps()
     37     private UBiDiProps() throws IOException{
     38         ByteBuffer bytes=ICUBinary.getData(DATA_FILE_NAME);
     39         readData(bytes);
     40     }
     41 
     42     private void readData(ByteBuffer bytes) throws IOException {
     43         // read the header
     44         ICUBinary.readHeader(bytes, FMT, new IsAcceptable());
     45 
     46         // read indexes[]
     47         int i, count;
     48         count=bytes.getInt();
     49         if(count<IX_TOP) {
     50             throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
     51         }
     52         indexes=new int[count];
     53 
     54         indexes[0]=count;
     55         for(i=1; i<count; ++i) {
     56             indexes[i]=bytes.getInt();
     57         }
     58 
     59         // read the trie
     60         trie=Trie2_16.createFromSerialized(bytes);
     61         int expectedTrieLength=indexes[IX_TRIE_SIZE];
     62         int trieLength=trie.getSerializedLength();
     63         if(trieLength>expectedTrieLength) {
     64             throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
     65         }
     66         // skip padding after trie bytes
     67         ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength);
     68 
     69         // read mirrors[]
     70         count=indexes[IX_MIRROR_LENGTH];
     71         if(count>0) {
     72             mirrors=ICUBinary.getInts(bytes, count, 0);
     73         }
     74 
     75         // read jgArray[]
     76         count=indexes[IX_JG_LIMIT]-indexes[IX_JG_START];
     77         jgArray=new byte[count];
     78         bytes.get(jgArray);
     79 
     80         // read jgArray2[]
     81         count=indexes[IX_JG_LIMIT2]-indexes[IX_JG_START2];
     82         jgArray2=new byte[count];
     83         bytes.get(jgArray2);
     84     }
     85 
     86     // implement ICUBinary.Authenticate
     87     private final static class IsAcceptable implements ICUBinary.Authenticate {
     88         @Override
     89         public boolean isDataVersionAcceptable(byte version[]) {
     90             return version[0]==2;
     91         }
     92     }
     93 
     94     // set of property starts for UnicodeSet ------------------------------- ***
     95 
     96     public final void addPropertyStarts(UnicodeSet set) {
     97         int i, length;
     98         int c, start, limit;
     99 
    100         byte prev, jg;
    101 
    102         /* add the start code point of each same-value range of the trie */
    103         Iterator<Trie2.Range> trieIterator=trie.iterator();
    104         Trie2.Range range;
    105         while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
    106             set.add(range.startCodePoint);
    107         }
    108 
    109         /* add the code points from the bidi mirroring table */
    110         length=indexes[IX_MIRROR_LENGTH];
    111         for(i=0; i<length; ++i) {
    112             c=getMirrorCodePoint(mirrors[i]);
    113             set.add(c, c+1);
    114         }
    115 
    116         /* add the code points from the Joining_Group array where the value changes */
    117         start=indexes[IX_JG_START];
    118         limit=indexes[IX_JG_LIMIT];
    119         byte[] jga=jgArray;
    120         for(;;) {
    121             length=limit-start;
    122             prev=0;
    123             for(i=0; i<length; ++i) {
    124                 jg=jga[i];
    125                 if(jg!=prev) {
    126                     set.add(start);
    127                     prev=jg;
    128                 }
    129                 ++start;
    130             }
    131             if(prev!=0) {
    132                 /* add the limit code point if the last value was not 0 (it is now start==limit) */
    133                 set.add(limit);
    134             }
    135             if(limit==indexes[IX_JG_LIMIT]) {
    136                 /* switch to the second Joining_Group range */
    137                 start=indexes[IX_JG_START2];
    138                 limit=indexes[IX_JG_LIMIT2];
    139                 jga=jgArray2;
    140             } else {
    141                 break;
    142             }
    143         }
    144 
    145         /* add code points with hardcoded properties, plus the ones following them */
    146 
    147         /* (none right now) */
    148     }
    149 
    150     // property access functions ------------------------------------------- ***
    151 
    152     public final int getMaxValue(int which) {
    153         int max;
    154 
    155         max=indexes[IX_MAX_VALUES];
    156         switch(which) {
    157         case UProperty.BIDI_CLASS:
    158             return (max&CLASS_MASK);
    159         case UProperty.JOINING_GROUP:
    160             return (max&MAX_JG_MASK)>>MAX_JG_SHIFT;
    161         case UProperty.JOINING_TYPE:
    162             return (max&JT_MASK)>>JT_SHIFT;
    163         case UProperty.BIDI_PAIRED_BRACKET_TYPE:
    164             return (max&BPT_MASK)>>BPT_SHIFT;
    165         default:
    166             return -1; /* undefined */
    167         }
    168     }
    169 
    170     public final int getClass(int c) {
    171         return getClassFromProps(trie.get(c));
    172     }
    173 
    174     public final boolean isMirrored(int c) {
    175         return getFlagFromProps(trie.get(c), IS_MIRRORED_SHIFT);
    176     }
    177 
    178     private final int getMirror(int c, int props) {
    179         int delta=getMirrorDeltaFromProps(props);
    180         if(delta!=ESC_MIRROR_DELTA) {
    181             return c+delta;
    182         } else {
    183             /* look for mirror code point in the mirrors[] table */
    184             int m;
    185             int i, length;
    186             int c2;
    187 
    188             length=indexes[IX_MIRROR_LENGTH];
    189 
    190             /* linear search */
    191             for(i=0; i<length; ++i) {
    192                 m=mirrors[i];
    193                 c2=getMirrorCodePoint(m);
    194                 if(c==c2) {
    195                     /* found c, return its mirror code point using the index in m */
    196                     return getMirrorCodePoint(mirrors[getMirrorIndex(m)]);
    197                 } else if(c<c2) {
    198                     break;
    199                 }
    200             }
    201 
    202             /* c not found, return it itself */
    203             return c;
    204         }
    205     }
    206 
    207     public final int getMirror(int c) {
    208         int props=trie.get(c);
    209         return getMirror(c, props);
    210     }
    211 
    212     public final boolean isBidiControl(int c) {
    213         return getFlagFromProps(trie.get(c), BIDI_CONTROL_SHIFT);
    214     }
    215 
    216     public final boolean isJoinControl(int c) {
    217         return getFlagFromProps(trie.get(c), JOIN_CONTROL_SHIFT);
    218     }
    219 
    220     public final int getJoiningType(int c) {
    221         return (trie.get(c)&JT_MASK)>>JT_SHIFT;
    222     }
    223 
    224     public final int getJoiningGroup(int c) {
    225         int start, limit;
    226 
    227         start=indexes[IX_JG_START];
    228         limit=indexes[IX_JG_LIMIT];
    229         if(start<=c && c<limit) {
    230             return jgArray[c-start]&0xff;
    231         }
    232         start=indexes[IX_JG_START2];
    233         limit=indexes[IX_JG_LIMIT2];
    234         if(start<=c && c<limit) {
    235             return jgArray2[c-start]&0xff;
    236         }
    237         return UCharacter.JoiningGroup.NO_JOINING_GROUP;
    238     }
    239 
    240     public final int getPairedBracketType(int c) {
    241         return (trie.get(c)&BPT_MASK)>>BPT_SHIFT;
    242     }
    243 
    244     public final int getPairedBracket(int c) {
    245         int props=trie.get(c);
    246         if((props&BPT_MASK)==0) {
    247             return c;
    248         } else {
    249             return getMirror(c, props);
    250         }
    251     }
    252 
    253     // data members -------------------------------------------------------- ***
    254     private int indexes[];
    255     private int mirrors[];
    256     private byte jgArray[];
    257     private byte jgArray2[];
    258 
    259     private Trie2_16 trie;
    260 
    261     // data format constants ----------------------------------------------- ***
    262     private static final String DATA_NAME="ubidi";
    263     private static final String DATA_TYPE="icu";
    264     private static final String DATA_FILE_NAME=DATA_NAME+"."+DATA_TYPE;
    265 
    266     /* format "BiDi" */
    267     private static final int FMT=0x42694469;
    268 
    269     /* indexes into indexes[] */
    270     //private static final int IX_INDEX_TOP=0;
    271     //private static final int IX_LENGTH=1;
    272     private static final int IX_TRIE_SIZE=2;
    273     private static final int IX_MIRROR_LENGTH=3;
    274 
    275     private static final int IX_JG_START=4;
    276     private static final int IX_JG_LIMIT=5;
    277     private static final int IX_JG_START2=6;  /* new in format version 2.2, ICU 54 */
    278     private static final int IX_JG_LIMIT2=7;
    279 
    280     private static final int IX_MAX_VALUES=15;
    281     private static final int IX_TOP=16;
    282 
    283     // definitions for 16-bit bidi/shaping properties word ----------------- ***
    284 
    285                           /* CLASS_SHIFT=0, */     /* bidi class: 5 bits (4..0) */
    286     private static final int JT_SHIFT=5;           /* joining type: 3 bits (7..5) */
    287 
    288     private static final int BPT_SHIFT=8;          /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */
    289 
    290     private static final int JOIN_CONTROL_SHIFT=10;
    291     private static final int BIDI_CONTROL_SHIFT=11;
    292 
    293     private static final int IS_MIRRORED_SHIFT=12;         /* 'is mirrored' */
    294     private static final int MIRROR_DELTA_SHIFT=13;        /* bidi mirroring delta: 3 bits (15..13) */
    295 
    296     private static final int MAX_JG_SHIFT=16;              /* max JG value in indexes[MAX_VALUES_INDEX] bits 23..16 */
    297 
    298     private static final int CLASS_MASK=    0x0000001f;
    299     private static final int JT_MASK=       0x000000e0;
    300     private static final int BPT_MASK=      0x00000300;
    301 
    302     private static final int MAX_JG_MASK=   0x00ff0000;
    303 
    304     private static final int getClassFromProps(int props) {
    305         return props&CLASS_MASK;
    306     }
    307     private static final boolean getFlagFromProps(int props, int shift) {
    308         return ((props>>shift)&1)!=0;
    309     }
    310     private static final int getMirrorDeltaFromProps(int props) {
    311         return (short)props>>MIRROR_DELTA_SHIFT;
    312     }
    313 
    314     private static final int ESC_MIRROR_DELTA=-4;
    315     //private static final int MIN_MIRROR_DELTA=-3;
    316     //private static final int MAX_MIRROR_DELTA=3;
    317 
    318     // definitions for 32-bit mirror table entry --------------------------- ***
    319 
    320     /* the source Unicode code point takes 21 bits (20..0) */
    321     private static final int MIRROR_INDEX_SHIFT=21;
    322     //private static final int MAX_MIRROR_INDEX=0x7ff;
    323 
    324     private static final int getMirrorCodePoint(int m) {
    325         return m&0x1fffff;
    326     }
    327     private static final int getMirrorIndex(int m) {
    328         return m>>>MIRROR_INDEX_SHIFT;
    329     }
    330 
    331 
    332     /*
    333      * public singleton instance
    334      */
    335     public static final UBiDiProps INSTANCE;
    336 
    337     // This static initializer block must be placed after
    338     // other static member initialization
    339     static {
    340         try {
    341             INSTANCE = new UBiDiProps();
    342         } catch (IOException e) {
    343             throw new ICUUncheckedIOException(e);
    344         }
    345     }
    346 }
    347