Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  ubidiimp.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999aug06
     14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
     15 */
     16 
     17 #ifndef UBIDIIMP_H
     18 #define UBIDIIMP_H
     19 
     20 /* set import/export definitions */
     21 #ifdef U_COMMON_IMPLEMENTATION
     22 
     23 #include "unicode/utypes.h"
     24 #include "unicode/uchar.h"
     25 #include "ubidi_props.h"
     26 
     27 /* miscellaneous definitions ---------------------------------------------- */
     28 
     29 typedef uint8_t DirProp;
     30 typedef uint32_t Flags;
     31 
     32 /*  Comparing the description of the BiDi algorithm with this implementation
     33     is easier with the same names for the BiDi types in the code as there.
     34     See UCharDirection in uchar.h .
     35 */
     36 enum {
     37     L=  U_LEFT_TO_RIGHT,
     38     R=  U_RIGHT_TO_LEFT,
     39     EN= U_EUROPEAN_NUMBER,
     40     ES= U_EUROPEAN_NUMBER_SEPARATOR,
     41     ET= U_EUROPEAN_NUMBER_TERMINATOR,
     42     AN= U_ARABIC_NUMBER,
     43     CS= U_COMMON_NUMBER_SEPARATOR,
     44     B=  U_BLOCK_SEPARATOR,
     45     S=  U_SEGMENT_SEPARATOR,
     46     WS= U_WHITE_SPACE_NEUTRAL,
     47     ON= U_OTHER_NEUTRAL,
     48     LRE=U_LEFT_TO_RIGHT_EMBEDDING,
     49     LRO=U_LEFT_TO_RIGHT_OVERRIDE,
     50     AL= U_RIGHT_TO_LEFT_ARABIC,
     51     RLE=U_RIGHT_TO_LEFT_EMBEDDING,
     52     RLO=U_RIGHT_TO_LEFT_OVERRIDE,
     53     PDF=U_POP_DIRECTIONAL_FORMAT,
     54     NSM=U_DIR_NON_SPACING_MARK,
     55     BN= U_BOUNDARY_NEUTRAL,
     56     dirPropCount
     57 };
     58 
     59 /*
     60  * Sometimes, bit values are more appropriate
     61  * to deal with directionality properties.
     62  * Abbreviations in these macro names refer to names
     63  * used in the BiDi algorithm.
     64  */
     65 #define DIRPROP_FLAG(dir) (1UL<<(dir))
     66 
     67 /* special flag for multiple runs from explicit embedding codes */
     68 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
     69 
     70 /* are there any characters that are LTR or RTL? */
     71 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
     72 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
     73 #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
     74 
     75 /* explicit embedding codes */
     76 #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
     77 #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
     78 #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
     79 
     80 #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
     81 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
     82 
     83 /* paragraph and segment separators */
     84 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
     85 
     86 /* all types that are counted as White Space or Neutral in some steps */
     87 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
     88 #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS)
     89 
     90 /* all types that are included in a sequence of European Terminators for (W5) */
     91 #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
     92 
     93 /* types that are neutrals or could becomes neutrals in (Wn) */
     94 #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
     95 
     96 /*
     97  * These types may be changed to "e",
     98  * the embedding type (L or R) of the run,
     99  * in the BiDi algorithm (N2)
    100  */
    101 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
    102 
    103 /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
    104 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
    105 
    106 #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
    107 
    108 /*
    109  * The following bit is ORed to the property of characters in paragraphs
    110  * with contextual RTL direction when paraLevel is contextual.
    111  */
    112 #define CONTEXT_RTL 0x80
    113 #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL)
    114 /*
    115  * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit.
    116  */
    117 #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir)))
    118 
    119 #define GET_PARALEVEL(ubidi, index) \
    120             (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \
    121                                                    : (ubidi)->paraLevel)
    122 
    123 /* Paragraph type for multiple paragraph support ---------------------------- */
    124 typedef int32_t Para;
    125 
    126 #define CR  0x000D
    127 #define LF  0x000A
    128 
    129 /* Run structure for reordering --------------------------------------------- */
    130 enum {
    131     LRM_BEFORE=1,
    132     LRM_AFTER=2,
    133     RLM_BEFORE=4,
    134     RLM_AFTER=8
    135 };
    136 
    137 typedef struct Run {
    138     int32_t logicalStart,   /* first character of the run; b31 indicates even/odd level */
    139             visualLimit,    /* last visual position of the run +1 */
    140             insertRemove;   /* if >0, flags for inserting LRM/RLM before/after run,
    141                                if <0, count of bidi controls within run            */
    142 } Run;
    143 
    144 /* in a Run, logicalStart will get this bit set if the run level is odd */
    145 #define INDEX_ODD_BIT (1UL<<31)
    146 
    147 #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
    148 #define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((int32_t)(level)<<31))
    149 #define REMOVE_ODD_BIT(x)                 ((x)&=~INDEX_ODD_BIT)
    150 
    151 #define GET_INDEX(x)   ((x)&~INDEX_ODD_BIT)
    152 #define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
    153 #define IS_ODD_RUN(x)  ((UBool)(((x)&INDEX_ODD_BIT)!=0))
    154 #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
    155 
    156 U_CFUNC UBool
    157 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
    158 
    159 /** BiDi control code points */
    160 enum {
    161     ZWNJ_CHAR=0x200c,
    162     ZWJ_CHAR,
    163     LRM_CHAR,
    164     RLM_CHAR,
    165     LRE_CHAR=0x202a,
    166     RLE_CHAR,
    167     PDF_CHAR,
    168     LRO_CHAR,
    169     RLO_CHAR
    170 };
    171 
    172 #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5)
    173 
    174 /* InsertPoints structure for noting where to put BiDi marks ---------------- */
    175 
    176 typedef struct Point {
    177     int32_t pos;            /* position in text */
    178     int32_t flag;           /* flag for LRM/RLM, before/after */
    179 } Point;
    180 
    181 typedef struct InsertPoints {
    182     int32_t capacity;       /* number of points allocated */
    183     int32_t size;           /* number of points used */
    184     int32_t confirmed;      /* number of points confirmed */
    185     UErrorCode errorCode;   /* for eventual memory shortage */
    186     Point *points;          /* pointer to array of points */
    187 } InsertPoints;
    188 
    189 
    190 /* UBiDi structure ----------------------------------------------------------- */
    191 
    192 struct UBiDi {
    193     /* pointer to parent paragraph object (pointer to self if this object is
    194      * a paragraph object); set to NULL in a newly opened object; set to a
    195      * real value after a successful execution of ubidi_setPara or ubidi_setLine
    196      */
    197     const UBiDi * pParaBiDi;
    198 
    199     const UBiDiProps *bdp;
    200 
    201     /* alias pointer to the current text */
    202     const UChar *text;
    203 
    204     /* length of the current text */
    205     int32_t originalLength;
    206 
    207     /* if the UBIDI_OPTION_STREAMING option is set, this is the length
    208      * of text actually processed by ubidi_setPara, which may be shorter than
    209      * the original length.
    210      * Otherwise, it is identical to the original length.
    211      */
    212     int32_t length;
    213 
    214     /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
    215      * marks are allowed to be inserted in one of the reordering mode, the
    216      * length of the result string may be different from the processed length.
    217      */
    218     int32_t resultLength;
    219 
    220     /* memory sizes in bytes */
    221     int32_t dirPropsSize, levelsSize, parasSize, runsSize;
    222 
    223     /* allocated memory */
    224     DirProp *dirPropsMemory;
    225     UBiDiLevel *levelsMemory;
    226     Para *parasMemory;
    227     Run *runsMemory;
    228 
    229     /* indicators for whether memory may be allocated after ubidi_open() */
    230     UBool mayAllocateText, mayAllocateRuns;
    231 
    232     /* arrays with one value per text-character */
    233     const DirProp *dirProps;
    234     UBiDiLevel *levels;
    235 
    236     /* are we performing an approximation of the "inverse BiDi" algorithm? */
    237     UBool isInverse;
    238 
    239     /* are we using the basic algorithm or its variation? */
    240     UBiDiReorderingMode reorderingMode;
    241 
    242     /* UBIDI_REORDER_xxx values must be ordered so that all the regular
    243      * logical to visual modes come first, and all inverse BiDi modes
    244      * come last.
    245      */
    246     #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL    UBIDI_REORDER_NUMBERS_SPECIAL
    247 
    248     /* bitmask for reordering options */
    249     uint32_t reorderingOptions;
    250 
    251     /* must block separators receive level 0? */
    252     UBool orderParagraphsLTR;
    253 
    254     /* the paragraph level */
    255     UBiDiLevel paraLevel;
    256     /* original paraLevel when contextual */
    257     /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
    258     UBiDiLevel defaultParaLevel;
    259 
    260     /* context data */
    261     const UChar *prologue;
    262     int32_t proLength;
    263     const UChar *epilogue;
    264     int32_t epiLength;
    265 
    266     /* the following is set in ubidi_setPara, used in processPropertySeq */
    267     const struct ImpTabPair * pImpTabPair;  /* pointer to levels state table pair */
    268 
    269     /* the overall paragraph or line directionality - see UBiDiDirection */
    270     UBiDiDirection direction;
    271 
    272     /* flags is a bit set for which directional properties are in the text */
    273     Flags flags;
    274 
    275     /* lastArabicPos is index to the last AL in the text, -1 if none */
    276     int32_t lastArabicPos;
    277 
    278     /* characters after trailingWSStart are WS and are */
    279     /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
    280     int32_t trailingWSStart;
    281 
    282     /* fields for paragraph handling */
    283     int32_t paraCount;                  /* set in getDirProps() */
    284     Para *paras;                        /* limits of paragraphs, filled in
    285                             ResolveExplicitLevels() or CheckExplicitLevels() */
    286 
    287     /* for single paragraph text, we only need a tiny array of paras (no malloc()) */
    288     Para simpleParas[1];
    289 
    290     /* fields for line reordering */
    291     int32_t runCount;     /* ==-1: runs not set up yet */
    292     Run *runs;
    293 
    294     /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
    295     Run simpleRuns[1];
    296 
    297     /* for inverse Bidi with insertion of directional marks */
    298     InsertPoints insertPoints;
    299 
    300     /* for option UBIDI_OPTION_REMOVE_CONTROLS */
    301     int32_t controlCount;
    302 
    303     /* for Bidi class callback */
    304     UBiDiClassCallback *fnClassCallback;    /* action pointer */
    305     const void *coClassCallback;            /* context pointer */
    306 };
    307 
    308 #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
    309 #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
    310 
    311 typedef union {
    312     DirProp *dirPropsMemory;
    313     UBiDiLevel *levelsMemory;
    314     Para *parasMemory;
    315     Run *runsMemory;
    316 } BidiMemoryForAllocation;
    317 
    318 /* Macros for initial checks at function entry */
    319 #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue)   \
    320         if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
    321 #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue)   \
    322         if(!IS_VALID_PARA(bidi)) {  \
    323             errcode=U_INVALID_STATE_ERROR;  \
    324             return retvalue;                \
    325         }
    326 #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue)   \
    327         if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
    328             errcode=U_INVALID_STATE_ERROR;  \
    329             return retvalue;                \
    330         }
    331 #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue)   \
    332         if((arg)<(start) || (arg)>=(limit)) {       \
    333             (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
    334             return retvalue;                        \
    335         }
    336 
    337 #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode)   \
    338         if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
    339 #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode)   \
    340         if(!IS_VALID_PARA(bidi)) {  \
    341             errcode=U_INVALID_STATE_ERROR;  \
    342             return;                \
    343         }
    344 #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode)   \
    345         if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
    346             errcode=U_INVALID_STATE_ERROR;  \
    347             return;                \
    348         }
    349 #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode)   \
    350         if((arg)<(start) || (arg)>=(limit)) {       \
    351             (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
    352             return;                        \
    353         }
    354 
    355 /* helper function to (re)allocate memory if allowed */
    356 U_CFUNC UBool
    357 ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
    358 
    359 /* helper macros for each allocated array in UBiDi */
    360 #define getDirPropsMemory(pBiDi, length) \
    361         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
    362                         (pBiDi)->mayAllocateText, (length))
    363 
    364 #define getLevelsMemory(pBiDi, length) \
    365         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
    366                         (pBiDi)->mayAllocateText, (length))
    367 
    368 #define getRunsMemory(pBiDi, length) \
    369         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
    370                         (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
    371 
    372 /* additional macros used by ubidi_open() - always allow allocation */
    373 #define getInitialDirPropsMemory(pBiDi, length) \
    374         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
    375                         TRUE, (length))
    376 
    377 #define getInitialLevelsMemory(pBiDi, length) \
    378         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
    379                         TRUE, (length))
    380 
    381 #define getInitialParasMemory(pBiDi, length) \
    382         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
    383                         TRUE, (length)*sizeof(Para))
    384 
    385 #define getInitialRunsMemory(pBiDi, length) \
    386         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
    387                         TRUE, (length)*sizeof(Run))
    388 
    389 #endif
    390 
    391 #endif
    392