1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: ubidiimp.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999aug06 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche 15 */ 16 17 #ifndef UBIDIIMP_H 18 #define UBIDIIMP_H 19 20 /* set import/export definitions */ 21 #ifdef U_COMMON_IMPLEMENTATION 22 23 #include "unicode/utypes.h" 24 #include "unicode/uchar.h" 25 #include "ubidi_props.h" 26 27 /* miscellaneous definitions ---------------------------------------------- */ 28 29 typedef uint8_t DirProp; 30 typedef uint32_t Flags; 31 32 /* Comparing the description of the BiDi algorithm with this implementation 33 is easier with the same names for the BiDi types in the code as there. 34 See UCharDirection in uchar.h . 35 */ 36 enum { 37 L= U_LEFT_TO_RIGHT, 38 R= U_RIGHT_TO_LEFT, 39 EN= U_EUROPEAN_NUMBER, 40 ES= U_EUROPEAN_NUMBER_SEPARATOR, 41 ET= U_EUROPEAN_NUMBER_TERMINATOR, 42 AN= U_ARABIC_NUMBER, 43 CS= U_COMMON_NUMBER_SEPARATOR, 44 B= U_BLOCK_SEPARATOR, 45 S= U_SEGMENT_SEPARATOR, 46 WS= U_WHITE_SPACE_NEUTRAL, 47 ON= U_OTHER_NEUTRAL, 48 LRE=U_LEFT_TO_RIGHT_EMBEDDING, 49 LRO=U_LEFT_TO_RIGHT_OVERRIDE, 50 AL= U_RIGHT_TO_LEFT_ARABIC, 51 RLE=U_RIGHT_TO_LEFT_EMBEDDING, 52 RLO=U_RIGHT_TO_LEFT_OVERRIDE, 53 PDF=U_POP_DIRECTIONAL_FORMAT, 54 NSM=U_DIR_NON_SPACING_MARK, 55 BN= U_BOUNDARY_NEUTRAL, 56 dirPropCount 57 }; 58 59 /* 60 * Sometimes, bit values are more appropriate 61 * to deal with directionality properties. 62 * Abbreviations in these macro names refer to names 63 * used in the BiDi algorithm. 64 */ 65 #define DIRPROP_FLAG(dir) (1UL<<(dir)) 66 67 /* special flag for multiple runs from explicit embedding codes */ 68 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) 69 70 /* are there any characters that are LTR or RTL? */ 71 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) 72 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) 73 #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) 74 75 /* explicit embedding codes */ 76 #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) 77 #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) 78 #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) 79 80 #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) 81 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) 82 83 /* paragraph and segment separators */ 84 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) 85 86 /* all types that are counted as White Space or Neutral in some steps */ 87 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) 88 #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS) 89 90 /* all types that are included in a sequence of European Terminators for (W5) */ 91 #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) 92 93 /* types that are neutrals or could becomes neutrals in (Wn) */ 94 #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) 95 96 /* 97 * These types may be changed to "e", 98 * the embedding type (L or R) of the run, 99 * in the BiDi algorithm (N2) 100 */ 101 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) 102 103 /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ 104 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) 105 106 #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) 107 108 /* 109 * The following bit is ORed to the property of characters in paragraphs 110 * with contextual RTL direction when paraLevel is contextual. 111 */ 112 #define CONTEXT_RTL 0x80 113 #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) 114 /* 115 * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. 116 */ 117 #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) 118 119 #define GET_PARALEVEL(ubidi, index) \ 120 (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \ 121 : (ubidi)->paraLevel) 122 123 /* Paragraph type for multiple paragraph support ---------------------------- */ 124 typedef int32_t Para; 125 126 #define CR 0x000D 127 #define LF 0x000A 128 129 /* Run structure for reordering --------------------------------------------- */ 130 enum { 131 LRM_BEFORE=1, 132 LRM_AFTER=2, 133 RLM_BEFORE=4, 134 RLM_AFTER=8 135 }; 136 137 typedef struct Run { 138 int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ 139 visualLimit, /* last visual position of the run +1 */ 140 insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, 141 if <0, count of bidi controls within run */ 142 } Run; 143 144 /* in a Run, logicalStart will get this bit set if the run level is odd */ 145 #define INDEX_ODD_BIT (1UL<<31) 146 147 #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) 148 #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) 149 #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) 150 151 #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) 152 #define GET_ODD_BIT(x) ((uint32_t)(x)>>31) 153 #define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0)) 154 #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0)) 155 156 U_CFUNC UBool 157 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); 158 159 /** BiDi control code points */ 160 enum { 161 ZWNJ_CHAR=0x200c, 162 ZWJ_CHAR, 163 LRM_CHAR, 164 RLM_CHAR, 165 LRE_CHAR=0x202a, 166 RLE_CHAR, 167 PDF_CHAR, 168 LRO_CHAR, 169 RLO_CHAR 170 }; 171 172 #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5) 173 174 /* InsertPoints structure for noting where to put BiDi marks ---------------- */ 175 176 typedef struct Point { 177 int32_t pos; /* position in text */ 178 int32_t flag; /* flag for LRM/RLM, before/after */ 179 } Point; 180 181 typedef struct InsertPoints { 182 int32_t capacity; /* number of points allocated */ 183 int32_t size; /* number of points used */ 184 int32_t confirmed; /* number of points confirmed */ 185 UErrorCode errorCode; /* for eventual memory shortage */ 186 Point *points; /* pointer to array of points */ 187 } InsertPoints; 188 189 190 /* UBiDi structure ----------------------------------------------------------- */ 191 192 struct UBiDi { 193 /* pointer to parent paragraph object (pointer to self if this object is 194 * a paragraph object); set to NULL in a newly opened object; set to a 195 * real value after a successful execution of ubidi_setPara or ubidi_setLine 196 */ 197 const UBiDi * pParaBiDi; 198 199 const UBiDiProps *bdp; 200 201 /* alias pointer to the current text */ 202 const UChar *text; 203 204 /* length of the current text */ 205 int32_t originalLength; 206 207 /* if the UBIDI_OPTION_STREAMING option is set, this is the length 208 * of text actually processed by ubidi_setPara, which may be shorter than 209 * the original length. 210 * Otherwise, it is identical to the original length. 211 */ 212 int32_t length; 213 214 /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or 215 * marks are allowed to be inserted in one of the reordering mode, the 216 * length of the result string may be different from the processed length. 217 */ 218 int32_t resultLength; 219 220 /* memory sizes in bytes */ 221 int32_t dirPropsSize, levelsSize, parasSize, runsSize; 222 223 /* allocated memory */ 224 DirProp *dirPropsMemory; 225 UBiDiLevel *levelsMemory; 226 Para *parasMemory; 227 Run *runsMemory; 228 229 /* indicators for whether memory may be allocated after ubidi_open() */ 230 UBool mayAllocateText, mayAllocateRuns; 231 232 /* arrays with one value per text-character */ 233 const DirProp *dirProps; 234 UBiDiLevel *levels; 235 236 /* are we performing an approximation of the "inverse BiDi" algorithm? */ 237 UBool isInverse; 238 239 /* are we using the basic algorithm or its variation? */ 240 UBiDiReorderingMode reorderingMode; 241 242 /* UBIDI_REORDER_xxx values must be ordered so that all the regular 243 * logical to visual modes come first, and all inverse BiDi modes 244 * come last. 245 */ 246 #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL 247 248 /* bitmask for reordering options */ 249 uint32_t reorderingOptions; 250 251 /* must block separators receive level 0? */ 252 UBool orderParagraphsLTR; 253 254 /* the paragraph level */ 255 UBiDiLevel paraLevel; 256 /* original paraLevel when contextual */ 257 /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ 258 UBiDiLevel defaultParaLevel; 259 260 /* context data */ 261 const UChar *prologue; 262 int32_t proLength; 263 const UChar *epilogue; 264 int32_t epiLength; 265 266 /* the following is set in ubidi_setPara, used in processPropertySeq */ 267 const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ 268 269 /* the overall paragraph or line directionality - see UBiDiDirection */ 270 UBiDiDirection direction; 271 272 /* flags is a bit set for which directional properties are in the text */ 273 Flags flags; 274 275 /* lastArabicPos is index to the last AL in the text, -1 if none */ 276 int32_t lastArabicPos; 277 278 /* characters after trailingWSStart are WS and are */ 279 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ 280 int32_t trailingWSStart; 281 282 /* fields for paragraph handling */ 283 int32_t paraCount; /* set in getDirProps() */ 284 Para *paras; /* limits of paragraphs, filled in 285 ResolveExplicitLevels() or CheckExplicitLevels() */ 286 287 /* for single paragraph text, we only need a tiny array of paras (no malloc()) */ 288 Para simpleParas[1]; 289 290 /* fields for line reordering */ 291 int32_t runCount; /* ==-1: runs not set up yet */ 292 Run *runs; 293 294 /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ 295 Run simpleRuns[1]; 296 297 /* for inverse Bidi with insertion of directional marks */ 298 InsertPoints insertPoints; 299 300 /* for option UBIDI_OPTION_REMOVE_CONTROLS */ 301 int32_t controlCount; 302 303 /* for Bidi class callback */ 304 UBiDiClassCallback *fnClassCallback; /* action pointer */ 305 const void *coClassCallback; /* context pointer */ 306 }; 307 308 #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) 309 #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) 310 311 typedef union { 312 DirProp *dirPropsMemory; 313 UBiDiLevel *levelsMemory; 314 Para *parasMemory; 315 Run *runsMemory; 316 } BidiMemoryForAllocation; 317 318 /* Macros for initial checks at function entry */ 319 #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ 320 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue 321 #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ 322 if(!IS_VALID_PARA(bidi)) { \ 323 errcode=U_INVALID_STATE_ERROR; \ 324 return retvalue; \ 325 } 326 #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ 327 if(!IS_VALID_PARA_OR_LINE(bidi)) { \ 328 errcode=U_INVALID_STATE_ERROR; \ 329 return retvalue; \ 330 } 331 #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ 332 if((arg)<(start) || (arg)>=(limit)) { \ 333 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ 334 return retvalue; \ 335 } 336 337 #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \ 338 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return 339 #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \ 340 if(!IS_VALID_PARA(bidi)) { \ 341 errcode=U_INVALID_STATE_ERROR; \ 342 return; \ 343 } 344 #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \ 345 if(!IS_VALID_PARA_OR_LINE(bidi)) { \ 346 errcode=U_INVALID_STATE_ERROR; \ 347 return; \ 348 } 349 #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \ 350 if((arg)<(start) || (arg)>=(limit)) { \ 351 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ 352 return; \ 353 } 354 355 /* helper function to (re)allocate memory if allowed */ 356 U_CFUNC UBool 357 ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); 358 359 /* helper macros for each allocated array in UBiDi */ 360 #define getDirPropsMemory(pBiDi, length) \ 361 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ 362 (pBiDi)->mayAllocateText, (length)) 363 364 #define getLevelsMemory(pBiDi, length) \ 365 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ 366 (pBiDi)->mayAllocateText, (length)) 367 368 #define getRunsMemory(pBiDi, length) \ 369 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ 370 (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) 371 372 /* additional macros used by ubidi_open() - always allow allocation */ 373 #define getInitialDirPropsMemory(pBiDi, length) \ 374 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ 375 TRUE, (length)) 376 377 #define getInitialLevelsMemory(pBiDi, length) \ 378 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ 379 TRUE, (length)) 380 381 #define getInitialParasMemory(pBiDi, length) \ 382 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ 383 TRUE, (length)*sizeof(Para)) 384 385 #define getInitialRunsMemory(pBiDi, length) \ 386 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ 387 TRUE, (length)*sizeof(Run)) 388 389 #endif 390 391 #endif 392