Home | History | Annotate | Download | only in genrb
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2000-2003, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *
      9 * File writejava.c
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   01/11/02    Ram        Creation.
     15 *******************************************************************************
     16 */
     17 #include "rle.h"
     18 /**
     19  * The ESCAPE character is used during run-length encoding.  It signals
     20  * a run of identical chars.
     21  */
     22 static const uint16_t ESCAPE = 0xA5A5;
     23 
     24 /**
     25  * The ESCAPE_BYTE character is used during run-length encoding.  It signals
     26  * a run of identical bytes.
     27  */
     28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
     29 
     30 /**
     31  * Append a byte to the given StringBuffer, packing two bytes into each
     32  * character.  The state parameter maintains intermediary data between
     33  * calls.
     34  * @param state A two-element array, with state[0] == 0 if this is the
     35  * first byte of a pair, or state[0] != 0 if this is the second byte
     36  * of a pair, in which case state[1] is the first byte.
     37  */
     38 static uint16_t*
     39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
     40     if(!status || U_FAILURE(*status)){
     41         return NULL;
     42     }
     43     if (state[0] != 0) {
     44         uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
     45         if(buffer < buffLimit){
     46             *buffer++ = c;
     47         }else{
     48             *status = U_BUFFER_OVERFLOW_ERROR;
     49         }
     50         state[0] = 0;
     51         return buffer;
     52     }
     53     else {
     54         state[0] = 1;
     55         state[1] = value;
     56         return buffer;
     57     }
     58 }
     59 /**
     60  * Encode a run, possibly a degenerate run (of < 4 values).
     61  * @param length The length of the run; must be > 0 && <= 0xFF.
     62  */
     63 static uint16_t*
     64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
     65     if(!status || U_FAILURE(*status)){
     66         return NULL;
     67     }
     68     if (length < 4) {
     69         int32_t j=0;
     70         for (; j<length; ++j) {
     71             if (value == ESCAPE_BYTE) {
     72                 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
     73             }
     74             buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
     75         }
     76     }
     77     else {
     78         if (length == ESCAPE_BYTE) {
     79             if (value == ESCAPE_BYTE){
     80                buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
     81             }
     82             buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
     83             --length;
     84         }
     85         buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
     86         buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
     87         buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
     88     }
     89     return buffer;
     90 }
     91 
     92 #define APPEND( buffer, bufLimit, value, num, status){  \
     93     if(buffer<bufLimit){                    \
     94         *buffer++=(value);                  \
     95     }else{                                  \
     96         *status = U_BUFFER_OVERFLOW_ERROR;  \
     97     }                                       \
     98     num++;                                  \
     99 }
    100 
    101 /**
    102  * Encode a run, possibly a degenerate run (of < 4 values).
    103  * @param length The length of the run; must be > 0 && <= 0xFFFF.
    104  */
    105 static uint16_t*
    106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
    107     int32_t num=0;
    108     if (length < 4) {
    109         int j=0;
    110         for (; j<length; ++j) {
    111             if (value == (int32_t) ESCAPE){
    112                 APPEND(buffer,bufLimit,ESCAPE, num, status);
    113 
    114             }
    115             APPEND(buffer,bufLimit,value,num, status);
    116         }
    117     }
    118     else {
    119         if (length == (int32_t) ESCAPE) {
    120             if (value == (int32_t) ESCAPE){
    121                 APPEND(buffer,bufLimit,ESCAPE,num,status);
    122 
    123             }
    124             APPEND(buffer,bufLimit,value,num,status);
    125             --length;
    126         }
    127         APPEND(buffer,bufLimit,ESCAPE,num,status);
    128         APPEND(buffer,bufLimit,(uint16_t) length, num,status);
    129         APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
    130     }
    131     return buffer;
    132 }
    133 
    134 /**
    135  * Construct a string representing a char array.  Use run-length encoding.
    136  * A character represents itself, unless it is the ESCAPE character.  Then
    137  * the following notations are possible:
    138  *   ESCAPE ESCAPE   ESCAPE literal
    139  *   ESCAPE n c      n instances of character c
    140  * Since an encoded run occupies 3 characters, we only encode runs of 4 or
    141  * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
    142  * If we encounter a run where n == ESCAPE, we represent this as:
    143  *   c ESCAPE n-1 c
    144  * The ESCAPE value is chosen so as not to collide with commonly
    145  * seen values.
    146  */
    147 int32_t
    148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
    149     uint16_t* bufLimit =  buffer+bufLen;
    150     uint16_t* saveBuffer = buffer;
    151     if(buffer < bufLimit){
    152         *buffer++ =  (uint16_t)(srcLen>>16);
    153         if(buffer<bufLimit){
    154             uint16_t runValue = src[0];
    155             int32_t runLength = 1;
    156             int i=1;
    157             *buffer++ = (uint16_t) srcLen;
    158 
    159             for (; i<srcLen; ++i) {
    160                 uint16_t s = src[i];
    161                 if (s == runValue && runLength < 0xFFFF){
    162                     ++runLength;
    163                 }else {
    164                     buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
    165                     runValue = s;
    166                     runLength = 1;
    167                 }
    168             }
    169             buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
    170         }else{
    171             *status = U_BUFFER_OVERFLOW_ERROR;
    172         }
    173     }else{
    174         *status = U_BUFFER_OVERFLOW_ERROR;
    175     }
    176     return (int32_t)(buffer - saveBuffer);
    177 }
    178 
    179 /**
    180  * Construct a string representing a byte array.  Use run-length encoding.
    181  * Two bytes are packed into a single char, with a single extra zero byte at
    182  * the end if needed.  A byte represents itself, unless it is the
    183  * ESCAPE_BYTE.  Then the following notations are possible:
    184  *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
    185  *   ESCAPE_BYTE n b           n instances of byte b
    186  * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
    187  * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
    188  * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
    189  *   b ESCAPE_BYTE n-1 b
    190  * The ESCAPE_BYTE value is chosen so as not to collide with commonly
    191  * seen values.
    192  */
    193 int32_t
    194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
    195     const uint16_t* saveBuf = buffer;
    196     uint16_t* bufLimit =  buffer+bufLen;
    197     if(buffer < bufLimit){
    198         *buffer++ = ((uint16_t) (srcLen >> 16));
    199 
    200         if(buffer<bufLimit){
    201             uint8_t runValue = src[0];
    202             int runLength = 1;
    203             uint8_t state[2]= {0};
    204             int i=1;
    205             *buffer++=((uint16_t) srcLen);
    206             for (; i<srcLen; ++i) {
    207                 uint8_t b = src[i];
    208                 if (b == runValue && runLength < 0xFF){
    209                     ++runLength;
    210                 }
    211                 else {
    212                     buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
    213                     runValue = b;
    214                     runLength = 1;
    215                 }
    216             }
    217             buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
    218 
    219             /* We must save the final byte, if there is one, by padding
    220              * an extra zero.
    221              */
    222             if (state[0] != 0) {
    223                 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
    224             }
    225         }else{
    226             *status = U_BUFFER_OVERFLOW_ERROR;
    227         }
    228     }else{
    229         *status = U_BUFFER_OVERFLOW_ERROR;
    230     }
    231     return (int32_t) (buffer - saveBuf);
    232 }
    233 
    234 
    235 /**
    236  * Construct an array of shorts from a run-length encoded string.
    237  */
    238 int32_t
    239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
    240     int32_t length = 0;
    241     int32_t ai = 0;
    242     int i=2;
    243 
    244     if(!status || U_FAILURE(*status)){
    245         return 0;
    246     }
    247     /* the source is null terminated */
    248     if(srcLen == -1){
    249         srcLen = u_strlen(src);
    250     }
    251     if(srcLen <= 2){
    252         return 2;
    253     }
    254     length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
    255 
    256     if(target == NULL){
    257         return length;
    258     }
    259     if(tgtLen < length){
    260         *status = U_BUFFER_OVERFLOW_ERROR;
    261         return length;
    262     }
    263 
    264     for (; i<srcLen; ++i) {
    265         uint16_t c = src[i];
    266         if (c == ESCAPE) {
    267             c = src[++i];
    268             if (c == ESCAPE) {
    269                 target[ai++] = c;
    270             } else {
    271                 int32_t runLength = (int32_t) c;
    272                 uint16_t runValue = src[++i];
    273                 int j=0;
    274                 for (; j<runLength; ++j) {
    275                     target[ai++] = runValue;
    276                 }
    277             }
    278         }
    279         else {
    280             target[ai++] = c;
    281         }
    282     }
    283 
    284     if (ai != length){
    285         *status = U_INTERNAL_PROGRAM_ERROR;
    286     }
    287 
    288     return length;
    289 }
    290 
    291 /**
    292  * Construct an array of bytes from a run-length encoded string.
    293  */
    294 int32_t
    295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
    296 
    297     int32_t length = 0;
    298     UBool nextChar = TRUE;
    299     uint16_t c = 0;
    300     int32_t node = 0;
    301     int32_t runLength = 0;
    302     int32_t i = 2;
    303     int32_t ai=0;
    304 
    305     if(!status || U_FAILURE(*status)){
    306         return 0;
    307     }
    308     /* the source is null terminated */
    309     if(srcLen == -1){
    310         srcLen = u_strlen(src);
    311     }
    312     if(srcLen <= 2){
    313         return 2;
    314     }
    315     length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
    316 
    317     if(target == NULL){
    318         return length;
    319     }
    320     if(tgtLen < length){
    321         *status = U_BUFFER_OVERFLOW_ERROR;
    322         return length;
    323     }
    324 
    325     for (; ai<tgtLen; ) {
    326        /* This part of the loop places the next byte into the local
    327         * variable 'b' each time through the loop.  It keeps the
    328         * current character in 'c' and uses the boolean 'nextChar'
    329         * to see if we've taken both bytes out of 'c' yet.
    330         */
    331         uint8_t b;
    332         if (nextChar) {
    333             c = src[i++];
    334             b = (uint8_t) (c >> 8);
    335             nextChar = FALSE;
    336         }
    337         else {
    338             b = (uint8_t) (c & 0xFF);
    339             nextChar = TRUE;
    340         }
    341 
    342        /* This part of the loop is a tiny state machine which handles
    343         * the parsing of the run-length encoding.  This would be simpler
    344         * if we could look ahead, but we can't, so we use 'node' to
    345         * move between three nodes in the state machine.
    346         */
    347         switch (node) {
    348         case 0:
    349             /* Normal idle node */
    350             if (b == ESCAPE_BYTE) {
    351                 node = 1;
    352             }
    353             else {
    354                 target[ai++] = b;
    355             }
    356             break;
    357         case 1:
    358            /* We have seen one ESCAPE_BYTE; we expect either a second
    359             * one, or a run length and value.
    360             */
    361             if (b == ESCAPE_BYTE) {
    362                 target[ai++] = ESCAPE_BYTE;
    363                 node = 0;
    364             }
    365             else {
    366                 runLength = b;
    367                 node = 2;
    368             }
    369             break;
    370         case 2:
    371             {
    372                 int j=0;
    373                /* We have seen an ESCAPE_BYTE and length byte.  We interpret
    374                 * the next byte as the value to be repeated.
    375                 */
    376                 for (; j<runLength; ++j){
    377                     if(ai<tgtLen){
    378                         target[ai++] = b;
    379                     }else{
    380                         *status = U_BUFFER_OVERFLOW_ERROR;
    381                         return ai;
    382                     }
    383                 }
    384                 node = 0;
    385                 break;
    386             }
    387         }
    388     }
    389 
    390     if (node != 0){
    391         *status = U_INTERNAL_PROGRAM_ERROR;
    392         /*("Bad run-length encoded byte array")*/
    393         return 0;
    394     }
    395 
    396 
    397     if (i != srcLen){
    398         /*("Excess data in RLE byte array string");*/
    399         *status = U_INTERNAL_PROGRAM_ERROR;
    400         return ai;
    401     }
    402 
    403     return ai;
    404 }
    405 
    406