Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2004-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ubidi_props.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004dec30
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Low-level Unicode bidi/shaping properties access.
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/uset.h"
     21 #include "unicode/udata.h" /* UDataInfo */
     22 #include "ucmndata.h" /* DataHeader */
     23 #include "udatamem.h"
     24 #include "umutex.h"
     25 #include "uassert.h"
     26 #include "cmemory.h"
     27 #include "utrie2.h"
     28 #include "ubidi_props.h"
     29 #include "ucln_cmn.h"
     30 
     31 struct UBiDiProps {
     32     UDataMemory *mem;
     33     const int32_t *indexes;
     34     const uint32_t *mirrors;
     35     const uint8_t *jgArray;
     36 
     37     UTrie2 trie;
     38     uint8_t formatVersion[4];
     39 };
     40 
     41 /* data loading etc. -------------------------------------------------------- */
     42 
     43 #if UBIDI_HARDCODE_DATA
     44 
     45 /* ubidi_props_data.c is machine-generated by genbidi --csource */
     46 #include "ubidi_props_data.c"
     47 
     48 #else
     49 
     50 static UBool U_CALLCONV
     51 isAcceptable(void *context,
     52              const char *type, const char *name,
     53              const UDataInfo *pInfo) {
     54     if(
     55         pInfo->size>=20 &&
     56         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     57         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     58         pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
     59         pInfo->dataFormat[1]==UBIDI_FMT_1 &&
     60         pInfo->dataFormat[2]==UBIDI_FMT_2 &&
     61         pInfo->dataFormat[3]==UBIDI_FMT_3 &&
     62         pInfo->formatVersion[0]==1 &&
     63         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     64         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     65     ) {
     66         UBiDiProps *bdp=(UBiDiProps *)context;
     67         uprv_memcpy(bdp->formatVersion, pInfo->formatVersion, 4);
     68         return TRUE;
     69     } else {
     70         return FALSE;
     71     }
     72 }
     73 
     74 static UBiDiProps *
     75 ubidi_openData(UBiDiProps *bdpProto,
     76                const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
     77     UBiDiProps *bdp;
     78     int32_t size;
     79 
     80     bdpProto->indexes=(const int32_t *)bin;
     81     if( (length>=0 && length<16*4) ||
     82         bdpProto->indexes[UBIDI_IX_INDEX_TOP]<16
     83     ) {
     84         /* length or indexes[] too short for minimum indexes[] length of 16 */
     85         *pErrorCode=U_INVALID_FORMAT_ERROR;
     86         return NULL;
     87     }
     88     size=bdpProto->indexes[UBIDI_IX_INDEX_TOP]*4;
     89     if(length>=0) {
     90         if(length>=size && length>=bdpProto->indexes[UBIDI_IX_LENGTH]) {
     91             length-=size;
     92         } else {
     93             /* length too short for indexes[] or for the whole data length */
     94             *pErrorCode=U_INVALID_FORMAT_ERROR;
     95             return NULL;
     96         }
     97     }
     98     bin+=size;
     99     /* from here on, assume that the sizes of the items fit into the total length */
    100 
    101     /* unserialize the trie, after indexes[] */
    102     size=bdpProto->indexes[UBIDI_IX_TRIE_SIZE];
    103     utrie_unserialize(&bdpProto->trie, bin, size, pErrorCode);
    104     if(U_FAILURE(*pErrorCode)) {
    105         return NULL;
    106     }
    107     bin+=size;
    108 
    109     /* get mirrors[] */
    110     size=4*bdpProto->indexes[UBIDI_IX_MIRROR_LENGTH];
    111     bdpProto->mirrors=(const uint32_t *)bin;
    112     bin+=size;
    113 
    114     /* get jgArray[] */
    115     size=bdpProto->indexes[UBIDI_IX_JG_LIMIT]-bdpProto->indexes[UBIDI_IX_JG_START];
    116     bdpProto->jgArray=bin;
    117     bin+=size;
    118 
    119     /* allocate, copy, and return the new UBiDiProps */
    120     bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps));
    121     if(bdp==NULL) {
    122         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    123         return NULL;
    124     } else {
    125         uprv_memcpy(bdp, bdpProto, sizeof(UBiDiProps));
    126         return bdp;
    127     }
    128 }
    129 
    130 U_CFUNC UBiDiProps *
    131 ubidi_openProps(UErrorCode *pErrorCode) {
    132     UBiDiProps bdpProto={ NULL }, *bdp;
    133 
    134     bdpProto.mem=udata_openChoice(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, isAcceptable, &bdpProto, pErrorCode);
    135     if(U_FAILURE(*pErrorCode)) {
    136         return NULL;
    137     }
    138 
    139     bdp=ubidi_openData(
    140             &bdpProto,
    141             udata_getMemory(bdpProto.mem),
    142             udata_getLength(bdpProto.mem),
    143             pErrorCode);
    144     if(U_FAILURE(*pErrorCode)) {
    145         udata_close(bdpProto.mem);
    146         return NULL;
    147     } else {
    148         return bdp;
    149     }
    150 }
    151 
    152 U_CFUNC UBiDiProps *
    153 ubidi_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
    154     UBiDiProps bdpProto={ NULL };
    155     const DataHeader *hdr;
    156 
    157     if(U_FAILURE(*pErrorCode)) {
    158         return NULL;
    159     }
    160     if(bin==NULL) {
    161         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    162         return NULL;
    163     }
    164 
    165     /* check the header */
    166     if(length>=0 && length<20) {
    167         *pErrorCode=U_INVALID_FORMAT_ERROR;
    168         return NULL;
    169     }
    170     hdr=(const DataHeader *)bin;
    171     if(
    172         !(hdr->dataHeader.magic1==0xda && hdr->dataHeader.magic2==0x27 &&
    173           hdr->info.isBigEndian==U_IS_BIG_ENDIAN &&
    174           isAcceptable(&bdpProto, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &hdr->info))
    175     ) {
    176         *pErrorCode=U_INVALID_FORMAT_ERROR;
    177         return NULL;
    178     }
    179 
    180     bin+=hdr->dataHeader.headerSize;
    181     if(length>=0) {
    182         length-=hdr->dataHeader.headerSize;
    183     }
    184     return ubidi_openData(&bdpProto, bin, length, pErrorCode);
    185 }
    186 
    187 #endif
    188 
    189 U_CFUNC void
    190 ubidi_closeProps(UBiDiProps *bdp) {
    191     if(bdp!=NULL) {
    192 #if !UBIDI_HARDCODE_DATA
    193         udata_close(bdp->mem);
    194 #endif
    195         uprv_free(bdp);
    196     }
    197 }
    198 
    199 /* UBiDiProps singleton ----------------------------------------------------- */
    200 
    201 #if !UBIDI_HARDCODE_DATA
    202 static UBiDiProps *gBdpDummy=NULL;
    203 static UBiDiProps *gBdp=NULL;
    204 static UErrorCode gErrorCode=U_ZERO_ERROR;
    205 static int8_t gHaveData=0;
    206 
    207 static UBool U_CALLCONV
    208 ubidi_cleanup(void) {
    209     ubidi_closeProps(gBdpDummy);
    210     gBdpDummy=NULL;
    211     ubidi_closeProps(gBdp);
    212     gBdp=NULL;
    213     gErrorCode=U_ZERO_ERROR;
    214     gHaveData=0;
    215     return TRUE;
    216 }
    217 #endif
    218 
    219 U_CFUNC const UBiDiProps *
    220 ubidi_getSingleton(UErrorCode *pErrorCode) {
    221 #if UBIDI_HARDCODE_DATA
    222     if(U_FAILURE(*pErrorCode)) {
    223         return NULL;
    224     }
    225     return &ubidi_props_singleton;
    226 #else
    227     int8_t haveData;
    228 
    229     if(U_FAILURE(*pErrorCode)) {
    230         return NULL;
    231     }
    232 
    233     UMTX_CHECK(NULL, gHaveData, haveData);
    234 
    235     if(haveData>0) {
    236         /* data was loaded */
    237         return gBdp;
    238     } else if(haveData<0) {
    239         /* data loading failed */
    240         *pErrorCode=gErrorCode;
    241         return NULL;
    242     } else /* haveData==0 */ {
    243         /* load the data */
    244         UBiDiProps *bdp=ubidi_openProps(pErrorCode);
    245         if(U_FAILURE(*pErrorCode)) {
    246             gHaveData=-1;
    247             gErrorCode=*pErrorCode;
    248             return NULL;
    249         }
    250 
    251         /* set the static variables */
    252         umtx_lock(NULL);
    253         if(gBdp==NULL) {
    254             gBdp=bdp;
    255             bdp=NULL;
    256             gHaveData=1;
    257             ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup);
    258         }
    259         umtx_unlock(NULL);
    260 
    261         ubidi_closeProps(bdp);
    262         return gBdp;
    263     }
    264 #endif
    265 }
    266 
    267 #if !UBIDI_HARDCODE_DATA
    268 U_CAPI const UBiDiProps *
    269 ubidi_getDummy(UErrorCode *pErrorCode) {
    270     UBiDiProps *bdp;
    271 
    272     if(U_FAILURE(*pErrorCode)) {
    273         return NULL;
    274     }
    275 
    276     UMTX_CHECK(NULL, gBdpDummy, bdp);
    277 
    278     if(bdp!=NULL) {
    279         /* the dummy object was already created */
    280         return bdp;
    281     } else /* bdp==NULL */ {
    282         /* create the dummy object */
    283         int32_t *indexes;
    284 
    285         bdp=(UBiDiProps *)uprv_malloc(sizeof(UBiDiProps)+UBIDI_IX_TOP*4+UTRIE_DUMMY_SIZE);
    286         if(bdp==NULL) {
    287             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    288             return NULL;
    289         }
    290         uprv_memset(bdp, 0, sizeof(UBiDiProps)+UBIDI_IX_TOP*4);
    291 
    292         bdp->indexes=indexes=(int32_t *)(bdp+1);
    293         indexes[UBIDI_IX_INDEX_TOP]=UBIDI_IX_TOP;
    294 
    295         indexes[UBIDI_IX_TRIE_SIZE]=
    296             utrie_unserializeDummy(&bdp->trie, indexes+UBIDI_IX_TOP, UTRIE_DUMMY_SIZE, 0, 0, TRUE, pErrorCode);
    297         if(U_FAILURE(*pErrorCode)) {
    298             uprv_free(bdp);
    299             return NULL;
    300         }
    301 
    302         bdp->formatVersion[0]=1;
    303         bdp->formatVersion[2]=UTRIE_SHIFT;
    304         bdp->formatVersion[3]=UTRIE_INDEX_SHIFT;
    305 
    306         /* set the static variables */
    307         umtx_lock(NULL);
    308         if(gBdpDummy==NULL) {
    309             gBdpDummy=bdp;
    310             bdp=NULL;
    311             ucln_common_registerCleanup(UCLN_COMMON_UBIDI, ubidi_cleanup);
    312         }
    313         umtx_unlock(NULL);
    314 
    315         uprv_free(bdp);
    316         return gBdpDummy;
    317     }
    318 }
    319 #endif
    320 
    321 /* set of property starts for UnicodeSet ------------------------------------ */
    322 
    323 static UBool U_CALLCONV
    324 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    325     /* add the start code point to the USet */
    326     const USetAdder *sa=(const USetAdder *)context;
    327     sa->add(sa->set, start);
    328     return TRUE;
    329 }
    330 
    331 U_CFUNC void
    332 ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
    333     int32_t i, length;
    334     UChar32 c, start, limit;
    335 
    336     const uint8_t *jgArray;
    337     uint8_t prev, jg;
    338 
    339     if(U_FAILURE(*pErrorCode)) {
    340         return;
    341     }
    342 
    343     /* add the start code point of each same-value range of the trie */
    344     utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);
    345 
    346     /* add the code points from the bidi mirroring table */
    347     length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
    348     for(i=0; i<length; ++i) {
    349         c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
    350         sa->addRange(sa->set, c, c+1);
    351     }
    352 
    353     /* add the code points from the Joining_Group array where the value changes */
    354     start=bdp->indexes[UBIDI_IX_JG_START];
    355     limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
    356     jgArray=bdp->jgArray;
    357     prev=0;
    358     while(start<limit) {
    359         jg=*jgArray++;
    360         if(jg!=prev) {
    361             sa->add(sa->set, start);
    362             prev=jg;
    363         }
    364         ++start;
    365     }
    366     if(prev!=0) {
    367         /* add the limit code point if the last value was not 0 (it is now start==limit) */
    368         sa->add(sa->set, limit);
    369     }
    370 
    371     /* add code points with hardcoded properties, plus the ones following them */
    372 
    373     /* (none right now) */
    374 }
    375 
    376 /* property access functions ------------------------------------------------ */
    377 
    378 U_CFUNC int32_t
    379 ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
    380     int32_t max;
    381 
    382     if(bdp==NULL) {
    383         return -1;
    384     }
    385 
    386     max=bdp->indexes[UBIDI_MAX_VALUES_INDEX];
    387     switch(which) {
    388     case UCHAR_BIDI_CLASS:
    389         return (max&UBIDI_CLASS_MASK);
    390     case UCHAR_JOINING_GROUP:
    391         return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT;
    392     case UCHAR_JOINING_TYPE:
    393         return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT;
    394     default:
    395         return -1; /* undefined */
    396     }
    397 }
    398 
    399 U_CAPI UCharDirection
    400 ubidi_getClass(const UBiDiProps *bdp, UChar32 c) {
    401     uint16_t props=UTRIE2_GET16(&bdp->trie, c);
    402     return (UCharDirection)UBIDI_GET_CLASS(props);
    403 }
    404 
    405 U_CFUNC UBool
    406 ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) {
    407     uint16_t props=UTRIE2_GET16(&bdp->trie, c);
    408     return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
    409 }
    410 
    411 U_CFUNC UChar32
    412 ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) {
    413     uint16_t props=UTRIE2_GET16(&bdp->trie, c);
    414     int32_t delta=((int16_t)props)>>UBIDI_MIRROR_DELTA_SHIFT;
    415     if(delta!=UBIDI_ESC_MIRROR_DELTA) {
    416         return c+delta;
    417     } else {
    418         /* look for mirror code point in the mirrors[] table */
    419         const uint32_t *mirrors;
    420         uint32_t m;
    421         int32_t i, length;
    422         UChar32 c2;
    423 
    424         mirrors=bdp->mirrors;
    425         length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
    426 
    427         /* linear search */
    428         for(i=0; i<length; ++i) {
    429             m=mirrors[i];
    430             c2=UBIDI_GET_MIRROR_CODE_POINT(m);
    431             if(c==c2) {
    432                 /* found c, return its mirror code point using the index in m */
    433                 return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]);
    434             } else if(c<c2) {
    435                 break;
    436             }
    437         }
    438 
    439         /* c not found, return it itself */
    440         return c;
    441     }
    442 }
    443 
    444 U_CFUNC UBool
    445 ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) {
    446     uint16_t props=UTRIE2_GET16(&bdp->trie, c);
    447     return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
    448 }
    449 
    450 U_CFUNC UBool
    451 ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) {
    452     uint16_t props=UTRIE2_GET16(&bdp->trie, c);
    453     return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
    454 }
    455 
    456 U_CFUNC UJoiningType
    457 ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) {
    458     uint16_t props=UTRIE2_GET16(&bdp->trie, c);
    459     return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
    460 }
    461 
    462 U_CFUNC UJoiningGroup
    463 ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
    464     UChar32 start, limit;
    465 
    466     start=bdp->indexes[UBIDI_IX_JG_START];
    467     limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
    468     if(start<=c && c<limit) {
    469         return (UJoiningGroup)bdp->jgArray[c-start];
    470     } else {
    471         return U_JG_NO_JOINING_GROUP;
    472     }
    473 }
    474 
    475 /* public API (see uchar.h) ------------------------------------------------- */
    476 
    477 U_CFUNC UCharDirection
    478 u_charDirection(UChar32 c) {
    479     UErrorCode errorCode=U_ZERO_ERROR;
    480     const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
    481     if(bdp!=NULL) {
    482         return ubidi_getClass(bdp, c);
    483     } else {
    484         return U_LEFT_TO_RIGHT;
    485     }
    486 }
    487 
    488 U_CFUNC UBool
    489 u_isMirrored(UChar32 c) {
    490     UErrorCode errorCode=U_ZERO_ERROR;
    491     const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
    492     return (UBool)(bdp!=NULL && ubidi_isMirrored(bdp, c));
    493 }
    494 
    495 U_CFUNC UChar32
    496 u_charMirror(UChar32 c) {
    497     UErrorCode errorCode=U_ZERO_ERROR;
    498     const UBiDiProps *bdp=ubidi_getSingleton(&errorCode);
    499     if(bdp!=NULL) {
    500         return ubidi_getMirror(bdp, c);
    501     } else {
    502         return c;
    503     }
    504 }
    505