1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2004-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: ubidi_props.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2004dec30 14 * created by: Markus W. Scherer 15 * 16 * Low-level Unicode bidi/shaping properties access. 17 */ 18 19 #include "unicode/utypes.h" 20 #include "unicode/uset.h" 21 #include "unicode/udata.h" /* UDataInfo */ 22 #include "ucmndata.h" /* DataHeader */ 23 #include "udatamem.h" 24 #include "umutex.h" 25 #include "uassert.h" 26 #include "cmemory.h" 27 #include "utrie2.h" 28 #include "ubidi_props.h" 29 #include "ucln_cmn.h" 30 31 struct UBiDiProps { 32 UDataMemory *mem; 33 const int32_t *indexes; 34 const uint32_t *mirrors; 35 const uint8_t *jgArray; 36 37 UTrie2 trie; 38 uint8_t formatVersion[4]; 39 }; 40 41 /* data loading etc. -------------------------------------------------------- */ 42 43 #if UBIDI_HARDCODE_DATA 44 45 /* ubidi_props_data.c is machine-generated by genbidi --csource */ 46 #include "ubidi_props_data.c" 47 48 #else 49 50 static UBool U_CALLCONV 51 isAcceptable(void *context, 52 const char *type, const char *name, 53 const UDataInfo *pInfo) { 54 if( 55 pInfo->size>=20 && 56 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 57 pInfo->charsetFamily==U_CHARSET_FAMILY && 58 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ 59 pInfo->dataFormat[1]==UBIDI_FMT_1 && 60 pInfo->dataFormat[2]==UBIDI_FMT_2 && 61 pInfo->dataFormat[3]==UBIDI_FMT_3 && 62 pInfo->formatVersion[0]==1 && 63 pInfo->formatVersion[2]==UTRIE_SHIFT && 64 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 65 ) { 66 UBiDiProps *bdp=(UBiDiProps *)context; 67 uprv_memcpy(bdp->formatVersion, pInfo->formatVersion, 4); 68 return TRUE; 69 } else { 70 return FALSE; 71 } 72 } 73 74 static UBiDiProps * 75 ubidi_openData(UBiDiProps *bdpProto, 76 const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) { 77 UBiDiProps *bdp; 78 int32_t size; 79 80 bdpProto->indexes=(const int32_t *)bin; 81 if( (length>=0 && length<16*4) || 82 bdpProto->indexes[UBIDI_IX_INDEX_TOP]<16 83 ) { 84 /* length or indexes[] too short for minimum indexes[] length of 16 */ 85 *pErrorCode=U_INVALID_FORMAT_ERROR; 86 return NULL; 87 } 88 size=bdpProto->indexes[UBIDI_IX_INDEX_TOP]*4; 89 if(length>=0) { 90 if(length>=size && length>=bdpProto->indexes[UBIDI_IX_LENGTH]) { 91 length-=size; 92 } else { 93 /* length too short for indexes[] or for the whole data length */ 94 *pErrorCode=U_INVALID_FORMAT_ERROR; 95 return NULL; 96 } 97 } 98 bin+=size; 99 /* from here on, assume that the sizes of the items fit into the total length */ 100 101 /* unserialize the trie, after indexes[] */ 102 size=bdpProto->indexes[UBIDI_IX_TRIE_SIZE]; 103 utrie_unserialize(&bdpProto->trie, bin, size, pErrorCode); 104 if(U_FAILURE(*pErrorCode)) { 105 return NULL; 106 } 107 bin+=size; 108 109 /* get mirrors[] */ 110 size=4*bdpProto->indexes[UBIDI_IX_MIRROR_LENGTH]; 111 bdpProto->mirrors=(const uint32_t *)bin; 112 bin+=size; 113 114 /* get jgArray[] */ 115 size=bdpProto->indexes[UBIDI_IX_JG_LIMIT]-bdpProto->indexes[UBIDI_IX_JG_START]; 116 bdpProto->jgArray=bin; 117 bin+=size; 118 119 /* allocate, copy, and return the new UBiDiProps */ 120 bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps)); 121 if(bdp==NULL) { 122 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 123 return NULL; 124 } else { 125 uprv_memcpy(bdp, bdpProto, sizeof(UBiDiProps)); 126 return bdp; 127 } 128 } 129 130 U_CFUNC UBiDiProps * 131 ubidi_openProps(UErrorCode *pErrorCode) { 132 UBiDiProps bdpProto={ NULL }, *bdp; 133 134 bdpProto.mem=udata_openChoice(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, isAcceptable, &bdpProto, pErrorCode); 135 if(U_FAILURE(*pErrorCode)) { 136 return NULL; 137 } 138 139 bdp=ubidi_openData( 140 &bdpProto, 141 udata_getMemory(bdpProto.mem), 142 udata_getLength(bdpProto.mem), 143 pErrorCode); 144 if(U_FAILURE(*pErrorCode)) { 145 udata_close(bdpProto.mem); 146 return NULL; 147 } else { 148 return bdp; 149 } 150 } 151 152 U_CFUNC UBiDiProps * 153 ubidi_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) { 154 UBiDiProps bdpProto={ NULL }; 155 const DataHeader *hdr; 156 157 if(U_FAILURE(*pErrorCode)) { 158 return NULL; 159 } 160 if(bin==NULL) { 161 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 162 return NULL; 163 } 164 165 /* check the header */ 166 if(length>=0 && length<20) { 167 *pErrorCode=U_INVALID_FORMAT_ERROR; 168 return NULL; 169 } 170 hdr=(const DataHeader *)bin; 171 if( 172 !(hdr->dataHeader.magic1==0xda && hdr->dataHeader.magic2==0x27 && 173 hdr->info.isBigEndian==U_IS_BIG_ENDIAN && 174 isAcceptable(&bdpProto, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &hdr->info)) 175 ) { 176 *pErrorCode=U_INVALID_FORMAT_ERROR; 177 return NULL; 178 } 179 180 bin+=hdr->dataHeader.headerSize; 181 if(length>=0) { 182 length-=hdr->dataHeader.headerSize; 183 } 184 return ubidi_openData(&bdpProto, bin, length, pErrorCode); 185 } 186 187 #endif 188 189 U_CFUNC void 190 ubidi_closeProps(UBiDiProps *bdp) { 191 if(bdp!=NULL) { 192 #if !UBIDI_HARDCODE_DATA 193 udata_close(bdp->mem); 194 #endif 195 uprv_free(bdp); 196 } 197 } 198 199 /* UBiDiProps singleton ----------------------------------------------------- */ 200 201 #if !UBIDI_HARDCODE_DATA 202 static UBiDiProps *gBdpDummy=NULL; 203 static UBiDiProps *gBdp=NULL; 204 static UErrorCode gErrorCode=U_ZERO_ERROR; 205 static int8_t gHaveData=0; 206 207 static UBool U_CALLCONV 208 ubidi_cleanup(void) { 209 ubidi_closeProps(gBdpDummy); 210 gBdpDummy=NULL; 211 ubidi_closeProps(gBdp); 212 gBdp=NULL; 213 gErrorCode=U_ZERO_ERROR; 214 gHaveData=0; 215 return TRUE; 216 } 217 #endif 218 219 U_CFUNC const UBiDiProps * 220 ubidi_getSingleton(UErrorCode *pErrorCode) { 221 #if UBIDI_HARDCODE_DATA 222 if(U_FAILURE(*pErrorCode)) { 223 return NULL; 224 } 225 return &ubidi_props_singleton; 226 #else 227 int8_t haveData; 228 229 if(U_FAILURE(*pErrorCode)) { 230 return NULL; 231 } 232 233 UMTX_CHECK(NULL, gHaveData, haveData); 234 235 if(haveData>0) { 236 /* data was loaded */ 237 return gBdp; 238 } else if(haveData<0) { 239 /* data loading failed */ 240 *pErrorCode=gErrorCode; 241 return NULL; 242 } else /* haveData==0 */ { 243 /* load the data */ 244 UBiDiProps *bdp=ubidi_openProps(pErrorCode); 245 if(U_FAILURE(*pErrorCode)) { 246 gHaveData=-1; 247 gErrorCode=*pErrorCode; 248 return NULL; 249 } 250 251 /* set the static variables */ 252 umtx_lock(NULL); 253 if(gBdp==NULL) { 254 gBdp=bdp; 255 bdp=NULL; 256 gHaveData=1; 257 ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup); 258 } 259 umtx_unlock(NULL); 260 261 ubidi_closeProps(bdp); 262 return gBdp; 263 } 264 #endif 265 } 266 267 #if !UBIDI_HARDCODE_DATA 268 U_CAPI const UBiDiProps * 269 ubidi_getDummy(UErrorCode *pErrorCode) { 270 UBiDiProps *bdp; 271 272 if(U_FAILURE(*pErrorCode)) { 273 return NULL; 274 } 275 276 UMTX_CHECK(NULL, gBdpDummy, bdp); 277 278 if(bdp!=NULL) { 279 /* the dummy object was already created */ 280 return bdp; 281 } else /* bdp==NULL */ { 282 /* create the dummy object */ 283 int32_t *indexes; 284 285 bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps)+UBIDI_IX_TOP*4+UTRIE_DUMMY_SIZE); 286 if(bdp==NULL) { 287 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 288 return NULL; 289 } 290 uprv_memset(bdp, 0, sizeof(UBiDiProps)+UBIDI_IX_TOP*4); 291 292 bdp->indexes=indexes=(int32_t *)(bdp+1); 293 indexes[UBIDI_IX_INDEX_TOP]=UBIDI_IX_TOP; 294 295 indexes[UBIDI_IX_TRIE_SIZE]= 296 utrie_unserializeDummy(&bdp->trie, indexes+UBIDI_IX_TOP, UTRIE_DUMMY_SIZE, 0, 0, TRUE, pErrorCode); 297 if(U_FAILURE(*pErrorCode)) { 298 uprv_free(bdp); 299 return NULL; 300 } 301 302 bdp->formatVersion[0]=1; 303 bdp->formatVersion[2]=UTRIE_SHIFT; 304 bdp->formatVersion[3]=UTRIE_INDEX_SHIFT; 305 306 /* set the static variables */ 307 umtx_lock(NULL); 308 if(gBdpDummy==NULL) { 309 gBdpDummy=bdp; 310 bdp=NULL; 311 ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup); 312 } 313 umtx_unlock(NULL); 314 315 uprv_free(bdp); 316 return gBdpDummy; 317 } 318 } 319 #endif 320 321 /* set of property starts for UnicodeSet ------------------------------------ */ 322 323 static UBool U_CALLCONV 324 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 325 /* add the start code point to the USet */ 326 const USetAdder *sa=(const USetAdder *)context; 327 sa->add(sa->set, start); 328 return TRUE; 329 } 330 331 U_CFUNC void 332 ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) { 333 int32_t i, length; 334 UChar32 c, start, limit; 335 336 const uint8_t *jgArray; 337 uint8_t prev, jg; 338 339 if(U_FAILURE(*pErrorCode)) { 340 return; 341 } 342 343 /* add the start code point of each same-value range of the trie */ 344 utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa); 345 346 /* add the code points from the bidi mirroring table */ 347 length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; 348 for(i=0; i<length; ++i) { 349 c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]); 350 sa->addRange(sa->set, c, c+1); 351 } 352 353 /* add the code points from the Joining_Group array where the value changes */ 354 start=bdp->indexes[UBIDI_IX_JG_START]; 355 limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; 356 jgArray=bdp->jgArray; 357 prev=0; 358 while(start<limit) { 359 jg=*jgArray++; 360 if(jg!=prev) { 361 sa->add(sa->set, start); 362 prev=jg; 363 } 364 ++start; 365 } 366 if(prev!=0) { 367 /* add the limit code point if the last value was not 0 (it is now start==limit) */ 368 sa->add(sa->set, limit); 369 } 370 371 /* add code points with hardcoded properties, plus the ones following them */ 372 373 /* (none right now) */ 374 } 375 376 /* property access functions ------------------------------------------------ */ 377 378 U_CFUNC int32_t 379 ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { 380 int32_t max; 381 382 if(bdp==NULL) { 383 return -1; 384 } 385 386 max=bdp->indexes[UBIDI_MAX_VALUES_INDEX]; 387 switch(which) { 388 case UCHAR_BIDI_CLASS: 389 return (max&UBIDI_CLASS_MASK); 390 case UCHAR_JOINING_GROUP: 391 return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT; 392 case UCHAR_JOINING_TYPE: 393 return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT; 394 default: 395 return -1; /* undefined */ 396 } 397 } 398 399 U_CAPI UCharDirection 400 ubidi_getClass(const UBiDiProps *bdp, UChar32 c) { 401 uint16_t props=UTRIE2_GET16(&bdp->trie, c); 402 return (UCharDirection)UBIDI_GET_CLASS(props); 403 } 404 405 U_CFUNC UBool 406 ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) { 407 uint16_t props=UTRIE2_GET16(&bdp->trie, c); 408 return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); 409 } 410 411 U_CFUNC UChar32 412 ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) { 413 uint16_t props=UTRIE2_GET16(&bdp->trie, c); 414 int32_t delta=((int16_t)props)>>UBIDI_MIRROR_DELTA_SHIFT; 415 if(delta!=UBIDI_ESC_MIRROR_DELTA) { 416 return c+delta; 417 } else { 418 /* look for mirror code point in the mirrors[] table */ 419 const uint32_t *mirrors; 420 uint32_t m; 421 int32_t i, length; 422 UChar32 c2; 423 424 mirrors=bdp->mirrors; 425 length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; 426 427 /* linear search */ 428 for(i=0; i<length; ++i) { 429 m=mirrors[i]; 430 c2=UBIDI_GET_MIRROR_CODE_POINT(m); 431 if(c==c2) { 432 /* found c, return its mirror code point using the index in m */ 433 return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]); 434 } else if(c<c2) { 435 break; 436 } 437 } 438 439 /* c not found, return it itself */ 440 return c; 441 } 442 } 443 444 U_CFUNC UBool 445 ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) { 446 uint16_t props=UTRIE2_GET16(&bdp->trie, c); 447 return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); 448 } 449 450 U_CFUNC UBool 451 ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) { 452 uint16_t props=UTRIE2_GET16(&bdp->trie, c); 453 return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); 454 } 455 456 U_CFUNC UJoiningType 457 ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) { 458 uint16_t props=UTRIE2_GET16(&bdp->trie, c); 459 return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); 460 } 461 462 U_CFUNC UJoiningGroup 463 ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) { 464 UChar32 start, limit; 465 466 start=bdp->indexes[UBIDI_IX_JG_START]; 467 limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; 468 if(start<=c && c<limit) { 469 return (UJoiningGroup)bdp->jgArray[c-start]; 470 } else { 471 return U_JG_NO_JOINING_GROUP; 472 } 473 } 474 475 /* public API (see uchar.h) ------------------------------------------------- */ 476 477 U_CFUNC UCharDirection 478 u_charDirection(UChar32 c) { 479 UErrorCode errorCode=U_ZERO_ERROR; 480 const UBiDiProps *bdp=ubidi_getSingleton(&errorCode); 481 if(bdp!=NULL) { 482 return ubidi_getClass(bdp, c); 483 } else { 484 return U_LEFT_TO_RIGHT; 485 } 486 } 487 488 U_CFUNC UBool 489 u_isMirrored(UChar32 c) { 490 UErrorCode errorCode=U_ZERO_ERROR; 491 const UBiDiProps *bdp=ubidi_getSingleton(&errorCode); 492 return (UBool)(bdp!=NULL && ubidi_isMirrored(bdp, c)); 493 } 494 495 U_CFUNC UChar32 496 u_charMirror(UChar32 c) { 497 UErrorCode errorCode=U_ZERO_ERROR; 498 const UBiDiProps *bdp=ubidi_getSingleton(&errorCode); 499 if(bdp!=NULL) { 500 return ubidi_getMirror(bdp, c); 501 } else { 502 return c; 503 } 504 } 505