Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 1996-2015, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.impl;
     11 
     12 import java.io.IOException;
     13 import java.nio.ByteBuffer;
     14 import java.util.Arrays;
     15 
     16 /**
     17 * <p>Internal reader class for ICU data file uname.dat containing
     18 * Unicode codepoint name data.</p>
     19 * <p>This class simply reads unames.icu, authenticates that it is a valid
     20 * ICU data file and split its contents up into blocks of data for use in
     21 * <a href=UCharacterName.html>com.ibm.icu.impl.UCharacterName</a>.
     22 * </p>
     23 * <p>unames.icu which is in big-endian format is jared together with this
     24 * package.</p>
     25 * @author Syn Wee Quek
     26 * @since release 2.1, February 1st 2002
     27 */
     28 
     29 final class UCharacterNameReader implements ICUBinary.Authenticate
     30 {
     31     // public methods ----------------------------------------------------
     32 
     33     @Override
     34     public boolean isDataVersionAcceptable(byte version[])
     35     {
     36         return version[0] == 1;
     37     }
     38 
     39     // protected constructor ---------------------------------------------
     40 
     41     /**
     42     * <p>Protected constructor.</p>
     43     * @param bytes ICU uprop.dat file buffer
     44     * @exception IOException throw if data file fails authentication
     45     */
     46     protected UCharacterNameReader(ByteBuffer bytes) throws IOException
     47     {
     48         ICUBinary.readHeader(bytes, DATA_FORMAT_ID_, this);
     49         m_byteBuffer_ = bytes;
     50     }
     51 
     52     // protected methods -------------------------------------------------
     53 
     54     /**
     55     * Read and break up the stream of data passed in as arguments
     56     * and fills up UCharacterName.
     57     * If unsuccessful false will be returned.
     58     * @param data instance of datablock
     59     * @exception IOException thrown when there's a data error.
     60     */
     61     protected void read(UCharacterName data) throws IOException
     62     {
     63         // reading index
     64         m_tokenstringindex_ = m_byteBuffer_.getInt();
     65         m_groupindex_       = m_byteBuffer_.getInt();
     66         m_groupstringindex_ = m_byteBuffer_.getInt();
     67         m_algnamesindex_    = m_byteBuffer_.getInt();
     68 
     69         // reading tokens
     70         int count = m_byteBuffer_.getChar();
     71         char token[] = ICUBinary.getChars(m_byteBuffer_, count, 0);
     72         int size = m_groupindex_ - m_tokenstringindex_;
     73         byte tokenstr[] = new byte[size];
     74         m_byteBuffer_.get(tokenstr);
     75         data.setToken(token, tokenstr);
     76 
     77         // reading the group information records
     78         count = m_byteBuffer_.getChar();
     79         data.setGroupCountSize(count, GROUP_INFO_SIZE_);
     80         count *= GROUP_INFO_SIZE_;
     81         char group[] = ICUBinary.getChars(m_byteBuffer_, count, 0);
     82 
     83         size = m_algnamesindex_ - m_groupstringindex_;
     84         byte groupstring[] = new byte[size];
     85         m_byteBuffer_.get(groupstring);
     86 
     87         data.setGroup(group, groupstring);
     88 
     89         count = m_byteBuffer_.getInt();
     90         UCharacterName.AlgorithmName alg[] =
     91                                  new UCharacterName.AlgorithmName[count];
     92 
     93         for (int i = 0; i < count; i ++)
     94         {
     95             UCharacterName.AlgorithmName an = readAlg();
     96             if (an == null) {
     97                 throw new IOException("unames.icu read error: Algorithmic names creation error");
     98             }
     99             alg[i] = an;
    100         }
    101         data.setAlgorithm(alg);
    102     }
    103 
    104     /**
    105     * <p>Checking the file for the correct format.</p>
    106     * @param dataformatid
    107     * @param dataformatversion
    108     * @return true if the file format version is correct
    109     */
    110     ///CLOVER:OFF
    111     protected boolean authenticate(byte dataformatid[],
    112                                    byte dataformatversion[])
    113     {
    114         return Arrays.equals(
    115                 ICUBinary.getVersionByteArrayFromCompactInt(DATA_FORMAT_ID_),
    116                 dataformatid) &&
    117                isDataVersionAcceptable(dataformatversion);
    118     }
    119     ///CLOVER:ON
    120 
    121     // private variables -------------------------------------------------
    122 
    123     /**
    124     * Byte buffer for names
    125     */
    126     private ByteBuffer m_byteBuffer_;
    127     /**
    128     * Size of the group information block in number of char
    129     */
    130     private static final int GROUP_INFO_SIZE_ = 3;
    131 
    132     /**
    133     * Index of the offset information
    134     */
    135     private int m_tokenstringindex_;
    136     private int m_groupindex_;
    137     private int m_groupstringindex_;
    138     private int m_algnamesindex_;
    139 
    140     /**
    141     * Size of an algorithmic name information group
    142     * start code point size + end code point size + type size + variant size +
    143     * size of data size
    144     */
    145     private static final int ALG_INFO_SIZE_ = 12;
    146 
    147     /**
    148     * File format id that this class understands.
    149     */
    150     private static final int DATA_FORMAT_ID_ = 0x756E616D;
    151 
    152     // private methods ---------------------------------------------------
    153 
    154     /**
    155     * Reads an individual record of AlgorithmNames
    156     * @return an instance of AlgorithNames if read is successful otherwise null
    157     * @exception IOException thrown when file read error occurs or data is corrupted
    158     */
    159     private UCharacterName.AlgorithmName readAlg() throws IOException
    160     {
    161         UCharacterName.AlgorithmName result =
    162                                        new UCharacterName.AlgorithmName();
    163         int rangestart = m_byteBuffer_.getInt();
    164         int rangeend   = m_byteBuffer_.getInt();
    165         byte type      = m_byteBuffer_.get();
    166         byte variant   = m_byteBuffer_.get();
    167         if (!result.setInfo(rangestart, rangeend, type, variant)) {
    168             return null;
    169         }
    170 
    171         int size = m_byteBuffer_.getChar();
    172         if (type == UCharacterName.AlgorithmName.TYPE_1_)
    173         {
    174             char factor[] = ICUBinary.getChars(m_byteBuffer_, variant, 0);
    175 
    176             result.setFactor(factor);
    177             size -= (variant << 1);
    178         }
    179 
    180         StringBuilder prefix = new StringBuilder();
    181         char c = (char)(m_byteBuffer_.get() & 0x00FF);
    182         while (c != 0)
    183         {
    184             prefix.append(c);
    185             c = (char)(m_byteBuffer_.get() & 0x00FF);
    186         }
    187 
    188         result.setPrefix(prefix.toString());
    189 
    190         size -= (ALG_INFO_SIZE_ + prefix.length() + 1);
    191 
    192         if (size > 0)
    193         {
    194             byte string[] = new byte[size];
    195             m_byteBuffer_.get(string);
    196             result.setFactorString(string);
    197         }
    198         return result;
    199     }
    200 }
    201