1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: udataswp.c 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003jun05 16 * created by: Markus W. Scherer 17 * 18 * Definitions for ICU data transformations for different platforms, 19 * changing between big- and little-endian data and/or between 20 * charset families (ASCII<->EBCDIC). 21 */ 22 23 #include <stdarg.h> 24 #include "unicode/utypes.h" 25 #include "unicode/udata.h" /* UDataInfo */ 26 #include "ucmndata.h" /* DataHeader */ 27 #include "cmemory.h" 28 #include "udataswp.h" 29 30 /* swapping primitives ------------------------------------------------------ */ 31 32 static int32_t U_CALLCONV 33 uprv_swapArray16(const UDataSwapper *ds, 34 const void *inData, int32_t length, void *outData, 35 UErrorCode *pErrorCode) { 36 const uint16_t *p; 37 uint16_t *q; 38 int32_t count; 39 uint16_t x; 40 41 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 42 return 0; 43 } 44 if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { 45 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 46 return 0; 47 } 48 49 /* setup and swapping */ 50 p=(const uint16_t *)inData; 51 q=(uint16_t *)outData; 52 count=length/2; 53 while(count>0) { 54 x=*p++; 55 *q++=(uint16_t)((x<<8)|(x>>8)); 56 --count; 57 } 58 59 return length; 60 } 61 62 static int32_t U_CALLCONV 63 uprv_copyArray16(const UDataSwapper *ds, 64 const void *inData, int32_t length, void *outData, 65 UErrorCode *pErrorCode) { 66 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 67 return 0; 68 } 69 if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { 70 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 71 return 0; 72 } 73 74 if(length>0 && inData!=outData) { 75 uprv_memcpy(outData, inData, length); 76 } 77 return length; 78 } 79 80 static int32_t U_CALLCONV 81 uprv_swapArray32(const UDataSwapper *ds, 82 const void *inData, int32_t length, void *outData, 83 UErrorCode *pErrorCode) { 84 const uint32_t *p; 85 uint32_t *q; 86 int32_t count; 87 uint32_t x; 88 89 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 90 return 0; 91 } 92 if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { 93 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 94 return 0; 95 } 96 97 /* setup and swapping */ 98 p=(const uint32_t *)inData; 99 q=(uint32_t *)outData; 100 count=length/4; 101 while(count>0) { 102 x=*p++; 103 *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); 104 --count; 105 } 106 107 return length; 108 } 109 110 static int32_t U_CALLCONV 111 uprv_copyArray32(const UDataSwapper *ds, 112 const void *inData, int32_t length, void *outData, 113 UErrorCode *pErrorCode) { 114 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 115 return 0; 116 } 117 if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { 118 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 119 return 0; 120 } 121 122 if(length>0 && inData!=outData) { 123 uprv_memcpy(outData, inData, length); 124 } 125 return length; 126 } 127 128 static int32_t U_CALLCONV 129 uprv_swapArray64(const UDataSwapper *ds, 130 const void *inData, int32_t length, void *outData, 131 UErrorCode *pErrorCode) { 132 const uint64_t *p; 133 uint64_t *q; 134 int32_t count; 135 136 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 137 return 0; 138 } 139 if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { 140 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 141 return 0; 142 } 143 144 /* setup and swapping */ 145 p=(const uint64_t *)inData; 146 q=(uint64_t *)outData; 147 count=length/8; 148 while(count>0) { 149 uint64_t x=*p++; 150 x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)| 151 ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56); 152 *q++=x; 153 --count; 154 } 155 156 return length; 157 } 158 159 static int32_t U_CALLCONV 160 uprv_copyArray64(const UDataSwapper *ds, 161 const void *inData, int32_t length, void *outData, 162 UErrorCode *pErrorCode) { 163 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 164 return 0; 165 } 166 if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { 167 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 168 return 0; 169 } 170 171 if(length>0 && inData!=outData) { 172 uprv_memcpy(outData, inData, length); 173 } 174 return length; 175 } 176 177 static uint16_t U_CALLCONV 178 uprv_readSwapUInt16(uint16_t x) { 179 return (uint16_t)((x<<8)|(x>>8)); 180 } 181 182 static uint16_t U_CALLCONV 183 uprv_readDirectUInt16(uint16_t x) { 184 return x; 185 } 186 187 static uint32_t U_CALLCONV 188 uprv_readSwapUInt32(uint32_t x) { 189 return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); 190 } 191 192 static uint32_t U_CALLCONV 193 uprv_readDirectUInt32(uint32_t x) { 194 return x; 195 } 196 197 static void U_CALLCONV 198 uprv_writeSwapUInt16(uint16_t *p, uint16_t x) { 199 *p=(uint16_t)((x<<8)|(x>>8)); 200 } 201 202 static void U_CALLCONV 203 uprv_writeDirectUInt16(uint16_t *p, uint16_t x) { 204 *p=x; 205 } 206 207 static void U_CALLCONV 208 uprv_writeSwapUInt32(uint32_t *p, uint32_t x) { 209 *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); 210 } 211 212 static void U_CALLCONV 213 uprv_writeDirectUInt32(uint32_t *p, uint32_t x) { 214 *p=x; 215 } 216 217 U_CAPI int16_t U_EXPORT2 218 udata_readInt16(const UDataSwapper *ds, int16_t x) { 219 return (int16_t)ds->readUInt16((uint16_t)x); 220 } 221 222 U_CAPI int32_t U_EXPORT2 223 udata_readInt32(const UDataSwapper *ds, int32_t x) { 224 return (int32_t)ds->readUInt32((uint32_t)x); 225 } 226 227 /** 228 * Swap a block of invariant, NUL-terminated strings, but not padding 229 * bytes after the last string. 230 * @internal 231 */ 232 U_CAPI int32_t U_EXPORT2 233 udata_swapInvStringBlock(const UDataSwapper *ds, 234 const void *inData, int32_t length, void *outData, 235 UErrorCode *pErrorCode) { 236 const char *inChars; 237 int32_t stringsLength; 238 239 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 240 return 0; 241 } 242 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 243 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 244 return 0; 245 } 246 247 /* reduce the strings length to not include bytes after the last NUL */ 248 inChars=(const char *)inData; 249 stringsLength=length; 250 while(stringsLength>0 && inChars[stringsLength-1]!=0) { 251 --stringsLength; 252 } 253 254 /* swap up to the last NUL */ 255 ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode); 256 257 /* copy the bytes after the last NUL */ 258 if(inData!=outData && length>stringsLength) { 259 uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength); 260 } 261 262 /* return the length including padding bytes */ 263 if(U_SUCCESS(*pErrorCode)) { 264 return length; 265 } else { 266 return 0; 267 } 268 } 269 270 U_CAPI void U_EXPORT2 271 udata_printError(const UDataSwapper *ds, 272 const char *fmt, 273 ...) { 274 va_list args; 275 276 if(ds->printError!=NULL) { 277 va_start(args, fmt); 278 ds->printError(ds->printErrorContext, fmt, args); 279 va_end(args); 280 } 281 } 282 283 /* swap a data header ------------------------------------------------------- */ 284 285 U_CAPI int32_t U_EXPORT2 286 udata_swapDataHeader(const UDataSwapper *ds, 287 const void *inData, int32_t length, void *outData, 288 UErrorCode *pErrorCode) { 289 const DataHeader *pHeader; 290 uint16_t headerSize, infoSize; 291 292 /* argument checking */ 293 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 294 return 0; 295 } 296 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 297 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 298 return 0; 299 } 300 301 /* check minimum length and magic bytes */ 302 pHeader=(const DataHeader *)inData; 303 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 304 pHeader->dataHeader.magic1!=0xda || 305 pHeader->dataHeader.magic2!=0x27 || 306 pHeader->info.sizeofUChar!=2 307 ) { 308 udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n"); 309 *pErrorCode=U_UNSUPPORTED_ERROR; 310 return 0; 311 } 312 313 headerSize=ds->readUInt16(pHeader->dataHeader.headerSize); 314 infoSize=ds->readUInt16(pHeader->info.size); 315 316 if( headerSize<sizeof(DataHeader) || 317 infoSize<sizeof(UDataInfo) || 318 headerSize<(sizeof(pHeader->dataHeader)+infoSize) || 319 (length>=0 && length<headerSize) 320 ) { 321 udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n", 322 headerSize, infoSize, length); 323 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 324 return 0; 325 } 326 327 if(length>0) { 328 DataHeader *outHeader; 329 const char *s; 330 int32_t maxLength; 331 332 /* Most of the fields are just bytes and need no swapping. */ 333 if(inData!=outData) { 334 uprv_memcpy(outData, inData, headerSize); 335 } 336 outHeader=(DataHeader *)outData; 337 338 outHeader->info.isBigEndian = ds->outIsBigEndian; 339 outHeader->info.charsetFamily = ds->outCharset; 340 341 /* swap headerSize */ 342 ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode); 343 344 /* swap UDataInfo size and reservedWord */ 345 ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode); 346 347 /* swap copyright statement after the UDataInfo */ 348 infoSize+=sizeof(pHeader->dataHeader); 349 s=(const char *)inData+infoSize; 350 maxLength=headerSize-infoSize; 351 /* get the length of the string */ 352 for(length=0; length<maxLength && s[length]!=0; ++length) {} 353 /* swap the string contents */ 354 ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode); 355 } 356 357 return headerSize; 358 } 359 360 /* API functions ------------------------------------------------------------ */ 361 362 U_CAPI UDataSwapper * U_EXPORT2 363 udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, 364 UBool outIsBigEndian, uint8_t outCharset, 365 UErrorCode *pErrorCode) { 366 UDataSwapper *swapper; 367 368 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 369 return NULL; 370 } 371 if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) { 372 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 373 return NULL; 374 } 375 376 /* allocate the swapper */ 377 swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper)); 378 if(swapper==NULL) { 379 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 380 return NULL; 381 } 382 uprv_memset(swapper, 0, sizeof(UDataSwapper)); 383 384 /* set values and functions pointers according to in/out parameters */ 385 swapper->inIsBigEndian=inIsBigEndian; 386 swapper->inCharset=inCharset; 387 swapper->outIsBigEndian=outIsBigEndian; 388 swapper->outCharset=outCharset; 389 390 swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16; 391 swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32; 392 393 swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16; 394 swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32; 395 396 swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic; 397 398 if(inIsBigEndian==outIsBigEndian) { 399 swapper->swapArray16=uprv_copyArray16; 400 swapper->swapArray32=uprv_copyArray32; 401 swapper->swapArray64=uprv_copyArray64; 402 } else { 403 swapper->swapArray16=uprv_swapArray16; 404 swapper->swapArray32=uprv_swapArray32; 405 swapper->swapArray64=uprv_swapArray64; 406 } 407 408 if(inCharset==U_ASCII_FAMILY) { 409 swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii; 410 } else /* U_EBCDIC_FAMILY */ { 411 swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic; 412 } 413 414 return swapper; 415 } 416 417 U_CAPI UDataSwapper * U_EXPORT2 418 udata_openSwapperForInputData(const void *data, int32_t length, 419 UBool outIsBigEndian, uint8_t outCharset, 420 UErrorCode *pErrorCode) { 421 const DataHeader *pHeader; 422 uint16_t headerSize, infoSize; 423 UBool inIsBigEndian; 424 int8_t inCharset; 425 426 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 427 return NULL; 428 } 429 if( data==NULL || 430 (length>=0 && length<(int32_t)sizeof(DataHeader)) || 431 outCharset>U_EBCDIC_FAMILY 432 ) { 433 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 434 return NULL; 435 } 436 437 pHeader=(const DataHeader *)data; 438 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 439 pHeader->dataHeader.magic1!=0xda || 440 pHeader->dataHeader.magic2!=0x27 || 441 pHeader->info.sizeofUChar!=2 442 ) { 443 *pErrorCode=U_UNSUPPORTED_ERROR; 444 return 0; 445 } 446 447 inIsBigEndian=(UBool)pHeader->info.isBigEndian; 448 inCharset=pHeader->info.charsetFamily; 449 450 if(inIsBigEndian==U_IS_BIG_ENDIAN) { 451 headerSize=pHeader->dataHeader.headerSize; 452 infoSize=pHeader->info.size; 453 } else { 454 headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize); 455 infoSize=uprv_readSwapUInt16(pHeader->info.size); 456 } 457 458 if( headerSize<sizeof(DataHeader) || 459 infoSize<sizeof(UDataInfo) || 460 headerSize<(sizeof(pHeader->dataHeader)+infoSize) || 461 (length>=0 && length<headerSize) 462 ) { 463 *pErrorCode=U_UNSUPPORTED_ERROR; 464 return 0; 465 } 466 467 return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode); 468 } 469 470 U_CAPI void U_EXPORT2 471 udata_closeSwapper(UDataSwapper *ds) { 472 uprv_free(ds); 473 } 474