1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2000-2003, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File writejava.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 01/11/02 Ram Creation. 15 ******************************************************************************* 16 */ 17 #include "rle.h" 18 /** 19 * The ESCAPE character is used during run-length encoding. It signals 20 * a run of identical chars. 21 */ 22 static const uint16_t ESCAPE = 0xA5A5; 23 24 /** 25 * The ESCAPE_BYTE character is used during run-length encoding. It signals 26 * a run of identical bytes. 27 */ 28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; 29 30 /** 31 * Append a byte to the given StringBuffer, packing two bytes into each 32 * character. The state parameter maintains intermediary data between 33 * calls. 34 * @param state A two-element array, with state[0] == 0 if this is the 35 * first byte of a pair, or state[0] != 0 if this is the second byte 36 * of a pair, in which case state[1] is the first byte. 37 */ 38 static uint16_t* 39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { 40 if(!status || U_FAILURE(*status)){ 41 return NULL; 42 } 43 if (state[0] != 0) { 44 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); 45 if(buffer < buffLimit){ 46 *buffer++ = c; 47 }else{ 48 *status = U_BUFFER_OVERFLOW_ERROR; 49 } 50 state[0] = 0; 51 return buffer; 52 } 53 else { 54 state[0] = 1; 55 state[1] = value; 56 return buffer; 57 } 58 } 59 /** 60 * Encode a run, possibly a degenerate run (of < 4 values). 61 * @param length The length of the run; must be > 0 && <= 0xFF. 62 */ 63 static uint16_t* 64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { 65 if(!status || U_FAILURE(*status)){ 66 return NULL; 67 } 68 if (length < 4) { 69 int32_t j=0; 70 for (; j<length; ++j) { 71 if (value == ESCAPE_BYTE) { 72 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 73 } 74 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 75 } 76 } 77 else { 78 if (length == ESCAPE_BYTE) { 79 if (value == ESCAPE_BYTE){ 80 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); 81 } 82 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); 83 --length; 84 } 85 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); 86 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); 87 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ 88 } 89 return buffer; 90 } 91 92 #define APPEND( buffer, bufLimit, value, num, status){ \ 93 if(buffer<bufLimit){ \ 94 *buffer++=(value); \ 95 }else{ \ 96 *status = U_BUFFER_OVERFLOW_ERROR; \ 97 } \ 98 num++; \ 99 } 100 101 /** 102 * Encode a run, possibly a degenerate run (of < 4 values). 103 * @param length The length of the run; must be > 0 && <= 0xFFFF. 104 */ 105 static uint16_t* 106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { 107 int32_t num=0; 108 if (length < 4) { 109 int j=0; 110 for (; j<length; ++j) { 111 if (value == (int32_t) ESCAPE){ 112 APPEND(buffer,bufLimit,ESCAPE, num, status); 113 114 } 115 APPEND(buffer,bufLimit,value,num, status); 116 } 117 } 118 else { 119 if (length == (int32_t) ESCAPE) { 120 if (value == (int32_t) ESCAPE){ 121 APPEND(buffer,bufLimit,ESCAPE,num,status); 122 123 } 124 APPEND(buffer,bufLimit,value,num,status); 125 --length; 126 } 127 APPEND(buffer,bufLimit,ESCAPE,num,status); 128 APPEND(buffer,bufLimit,(uint16_t) length, num,status); 129 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */ 130 } 131 return buffer; 132 } 133 134 /** 135 * Construct a string representing a char array. Use run-length encoding. 136 * A character represents itself, unless it is the ESCAPE character. Then 137 * the following notations are possible: 138 * ESCAPE ESCAPE ESCAPE literal 139 * ESCAPE n c n instances of character c 140 * Since an encoded run occupies 3 characters, we only encode runs of 4 or 141 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. 142 * If we encounter a run where n == ESCAPE, we represent this as: 143 * c ESCAPE n-1 c 144 * The ESCAPE value is chosen so as not to collide with commonly 145 * seen values. 146 */ 147 int32_t 148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { 149 uint16_t* bufLimit = buffer+bufLen; 150 uint16_t* saveBuffer = buffer; 151 if(buffer < bufLimit){ 152 *buffer++ = (uint16_t)(srcLen>>16); 153 if(buffer<bufLimit){ 154 uint16_t runValue = src[0]; 155 int32_t runLength = 1; 156 int i=1; 157 *buffer++ = (uint16_t) srcLen; 158 159 for (; i<srcLen; ++i) { 160 uint16_t s = src[i]; 161 if (s == runValue && runLength < 0xFFFF){ 162 ++runLength; 163 }else { 164 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status); 165 runValue = s; 166 runLength = 1; 167 } 168 } 169 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status); 170 }else{ 171 *status = U_BUFFER_OVERFLOW_ERROR; 172 } 173 }else{ 174 *status = U_BUFFER_OVERFLOW_ERROR; 175 } 176 return (int32_t)(buffer - saveBuffer); 177 } 178 179 /** 180 * Construct a string representing a byte array. Use run-length encoding. 181 * Two bytes are packed into a single char, with a single extra zero byte at 182 * the end if needed. A byte represents itself, unless it is the 183 * ESCAPE_BYTE. Then the following notations are possible: 184 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal 185 * ESCAPE_BYTE n b n instances of byte b 186 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or 187 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. 188 * If we encounter a run where n == ESCAPE_BYTE, we represent this as: 189 * b ESCAPE_BYTE n-1 b 190 * The ESCAPE_BYTE value is chosen so as not to collide with commonly 191 * seen values. 192 */ 193 int32_t 194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { 195 const uint16_t* saveBuf = buffer; 196 uint16_t* bufLimit = buffer+bufLen; 197 if(buffer < bufLimit){ 198 *buffer++ = ((uint16_t) (srcLen >> 16)); 199 200 if(buffer<bufLimit){ 201 uint8_t runValue = src[0]; 202 int runLength = 1; 203 uint8_t state[2]= {0}; 204 int i=1; 205 *buffer++=((uint16_t) srcLen); 206 for (; i<srcLen; ++i) { 207 uint8_t b = src[i]; 208 if (b == runValue && runLength < 0xFF){ 209 ++runLength; 210 } 211 else { 212 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); 213 runValue = b; 214 runLength = 1; 215 } 216 } 217 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); 218 219 /* We must save the final byte, if there is one, by padding 220 * an extra zero. 221 */ 222 if (state[0] != 0) { 223 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); 224 } 225 }else{ 226 *status = U_BUFFER_OVERFLOW_ERROR; 227 } 228 }else{ 229 *status = U_BUFFER_OVERFLOW_ERROR; 230 } 231 return (int32_t) (buffer - saveBuf); 232 } 233 234 235 /** 236 * Construct an array of shorts from a run-length encoded string. 237 */ 238 int32_t 239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { 240 int32_t length = 0; 241 int32_t ai = 0; 242 int i=2; 243 244 if(!status || U_FAILURE(*status)){ 245 return 0; 246 } 247 /* the source is null terminated */ 248 if(srcLen == -1){ 249 srcLen = u_strlen(src); 250 } 251 if(srcLen <= 2){ 252 return 2; 253 } 254 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 255 256 if(target == NULL){ 257 return length; 258 } 259 if(tgtLen < length){ 260 *status = U_BUFFER_OVERFLOW_ERROR; 261 return length; 262 } 263 264 for (; i<srcLen; ++i) { 265 uint16_t c = src[i]; 266 if (c == ESCAPE) { 267 c = src[++i]; 268 if (c == ESCAPE) { 269 target[ai++] = c; 270 } else { 271 int32_t runLength = (int32_t) c; 272 uint16_t runValue = src[++i]; 273 int j=0; 274 for (; j<runLength; ++j) { 275 target[ai++] = runValue; 276 } 277 } 278 } 279 else { 280 target[ai++] = c; 281 } 282 } 283 284 if (ai != length){ 285 *status = U_INTERNAL_PROGRAM_ERROR; 286 } 287 288 return length; 289 } 290 291 /** 292 * Construct an array of bytes from a run-length encoded string. 293 */ 294 int32_t 295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { 296 297 int32_t length = 0; 298 UBool nextChar = TRUE; 299 uint16_t c = 0; 300 int32_t node = 0; 301 int32_t runLength = 0; 302 int32_t i = 2; 303 int32_t ai=0; 304 305 if(!status || U_FAILURE(*status)){ 306 return 0; 307 } 308 /* the source is null terminated */ 309 if(srcLen == -1){ 310 srcLen = u_strlen(src); 311 } 312 if(srcLen <= 2){ 313 return 2; 314 } 315 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); 316 317 if(target == NULL){ 318 return length; 319 } 320 if(tgtLen < length){ 321 *status = U_BUFFER_OVERFLOW_ERROR; 322 return length; 323 } 324 325 for (; ai<tgtLen; ) { 326 /* This part of the loop places the next byte into the local 327 * variable 'b' each time through the loop. It keeps the 328 * current character in 'c' and uses the boolean 'nextChar' 329 * to see if we've taken both bytes out of 'c' yet. 330 */ 331 uint8_t b; 332 if (nextChar) { 333 c = src[i++]; 334 b = (uint8_t) (c >> 8); 335 nextChar = FALSE; 336 } 337 else { 338 b = (uint8_t) (c & 0xFF); 339 nextChar = TRUE; 340 } 341 342 /* This part of the loop is a tiny state machine which handles 343 * the parsing of the run-length encoding. This would be simpler 344 * if we could look ahead, but we can't, so we use 'node' to 345 * move between three nodes in the state machine. 346 */ 347 switch (node) { 348 case 0: 349 /* Normal idle node */ 350 if (b == ESCAPE_BYTE) { 351 node = 1; 352 } 353 else { 354 target[ai++] = b; 355 } 356 break; 357 case 1: 358 /* We have seen one ESCAPE_BYTE; we expect either a second 359 * one, or a run length and value. 360 */ 361 if (b == ESCAPE_BYTE) { 362 target[ai++] = ESCAPE_BYTE; 363 node = 0; 364 } 365 else { 366 runLength = b; 367 node = 2; 368 } 369 break; 370 case 2: 371 { 372 int j=0; 373 /* We have seen an ESCAPE_BYTE and length byte. We interpret 374 * the next byte as the value to be repeated. 375 */ 376 for (; j<runLength; ++j){ 377 if(ai<tgtLen){ 378 target[ai++] = b; 379 }else{ 380 *status = U_BUFFER_OVERFLOW_ERROR; 381 return ai; 382 } 383 } 384 node = 0; 385 break; 386 } 387 } 388 } 389 390 if (node != 0){ 391 *status = U_INTERNAL_PROGRAM_ERROR; 392 /*("Bad run-length encoded byte array")*/ 393 return 0; 394 } 395 396 397 if (i != srcLen){ 398 /*("Excess data in RLE byte array string");*/ 399 *status = U_INTERNAL_PROGRAM_ERROR; 400 return ai; 401 } 402 403 return ai; 404 } 405 406