1 /* 2 ****************************************************************************** 3 * 4 * 2016 and later: Unicode, Inc. and others. 5 * License & terms of use: http://www.unicode.org/copyright.html 6 * 7 ****************************************************************************** 8 * file name: ubiditransform.c 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2016jul24 14 * created by: Lina Kemmel 15 * 16 */ 17 18 #include "cmemory.h" 19 #include "unicode/ubidi.h" 20 #include "unicode/ustring.h" 21 #include "unicode/ushape.h" 22 #include "unicode/utf16.h" 23 #include "ustr_imp.h" 24 #include "unicode/ubiditransform.h" 25 26 /* Some convenience defines */ 27 #define LTR UBIDI_LTR 28 #define RTL UBIDI_RTL 29 #define LOGICAL UBIDI_LOGICAL 30 #define VISUAL UBIDI_VISUAL 31 #define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL 32 #define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 33 34 #define CHECK_LEN(STR, LEN, ERROR) { \ 35 if (LEN == 0) return 0; \ 36 if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ 37 if (LEN == -1) LEN = u_strlen(STR); \ 38 } 39 40 #define MAX_ACTIONS 7 41 42 /** 43 * Typedef for a pointer to a function, which performs some operation (such as 44 * reordering, setting "inverse" mode, character mirroring, etc.). Return value 45 * indicates whether the text was changed in the course of this operation or 46 * not. 47 */ 48 typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); 49 50 /** 51 * Structure that holds a predefined reordering scheme, including the following 52 * information: 53 * <ul> 54 * <li>an input base direction,</li> 55 * <li>an input order,</li> 56 * <li>an output base direction,</li> 57 * <li>an output order,</li> 58 * <li>a digit shaping direction,</li> 59 * <li>a letter shaping direction,</li> 60 * <li>a base direction that should be applied when the reordering engine is 61 * invoked (which can not always be derived from the caller-defined 62 * options),</li> 63 * <li>an array of pointers to functions that accomplish the bidi layout 64 * transformation.</li> 65 * </ul> 66 */ 67 typedef struct { 68 UBiDiLevel inLevel; /* input level */ 69 UBiDiOrder inOrder; /* input order */ 70 UBiDiLevel outLevel; /* output level */ 71 UBiDiOrder outOrder; /* output order */ 72 uint32_t digitsDir; /* digit shaping direction */ 73 uint32_t lettersDir; /* letter shaping direction */ 74 UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ 75 const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ 76 } ReorderingScheme; 77 78 struct UBiDiTransform { 79 UBiDi *pBidi; /* pointer to a UBiDi object */ 80 const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ 81 UChar *src; /* input text */ 82 UChar *dest; /* output text */ 83 uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ 84 uint32_t srcSize; /* input text capacity excluding the trailing zero */ 85 uint32_t destSize; /* output text capacity */ 86 uint32_t *pDestLength; /* number of UChars written to dest */ 87 uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ 88 uint32_t digits; /* digit option for ArabicShaping */ 89 uint32_t letters; /* letter option for ArabicShaping */ 90 }; 91 92 U_DRAFT UBiDiTransform* U_EXPORT2 93 ubiditransform_open(UErrorCode *pErrorCode) 94 { 95 UBiDiTransform *pBiDiTransform = NULL; 96 if (U_SUCCESS(*pErrorCode)) { 97 pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); 98 if (pBiDiTransform == NULL) { 99 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 100 } 101 } 102 return pBiDiTransform; 103 } 104 105 U_DRAFT void U_EXPORT2 106 ubiditransform_close(UBiDiTransform *pBiDiTransform) 107 { 108 if (pBiDiTransform != NULL) { 109 if (pBiDiTransform->pBidi != NULL) { 110 ubidi_close(pBiDiTransform->pBidi); 111 } 112 if (pBiDiTransform->src != NULL) { 113 uprv_free(pBiDiTransform->src); 114 } 115 uprv_free(pBiDiTransform); 116 } 117 } 118 119 /** 120 * Performs Bidi resolution of text. 121 * 122 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 123 * @param pErrorCode Pointer to the error code value. 124 * 125 * @return Whether or not this function modifies the text. Besides the return 126 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 127 */ 128 static UBool 129 action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 130 { 131 ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, 132 pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); 133 return FALSE; 134 } 135 136 /** 137 * Performs basic reordering of text (Logical -> Visual LTR). 138 * 139 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 140 * @param pErrorCode Pointer to the error code value. 141 * 142 * @return Whether or not this function modifies the text. Besides the return 143 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 144 */ 145 static UBool 146 action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 147 { 148 ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, 149 pTransform->reorderingOptions, pErrorCode); 150 151 *pTransform->pDestLength = pTransform->srcLength; 152 pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; 153 return TRUE; 154 } 155 156 /** 157 * Sets "inverse" mode on the <code>UBiDi</code> object. 158 * 159 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 160 * @param pErrorCode Pointer to the error code value. 161 * 162 * @return Whether or not this function modifies the text. Besides the return 163 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 164 */ 165 static UBool 166 action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 167 { 168 (void)pErrorCode; 169 ubidi_setInverse(pTransform->pBidi, TRUE); 170 ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); 171 return FALSE; 172 } 173 174 /** 175 * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL 176 * transformation. 177 * 178 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 179 * @param pErrorCode Pointer to the error code value. 180 * 181 * @return Whether or not this function modifies the text. Besides the return 182 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 183 */ 184 static UBool 185 action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 186 { 187 (void)pErrorCode; 188 ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); 189 return FALSE; 190 } 191 192 /** 193 * Performs string reverse. 194 * 195 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 196 * @param pErrorCode Pointer to the error code value. 197 * 198 * @return Whether or not this function modifies the text. Besides the return 199 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 200 */ 201 static UBool 202 action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 203 { 204 ubidi_writeReverse(pTransform->src, pTransform->srcLength, 205 pTransform->dest, pTransform->destSize, 206 UBIDI_REORDER_DEFAULT, pErrorCode); 207 *pTransform->pDestLength = pTransform->srcLength; 208 return TRUE; 209 } 210 211 /** 212 * Applies a new value to the text that serves as input at the current 213 * processing step. This value is identical to the original one when we begin 214 * the processing, but usually changes as the transformation progresses. 215 * 216 * @param pTransform A pointer to the <code>UBiDiTransform</code> structure. 217 * @param newSrc A pointer whose value is to be used as input text. 218 * @param newLength A length of the new text in <code>UChar</code>s. 219 * @param newSize A new source capacity in <code>UChar</code>s. 220 * @param pErrorCode Pointer to the error code value. 221 */ 222 static void 223 updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, 224 uint32_t newSize, UErrorCode *pErrorCode) 225 { 226 if (newSize < newLength) { 227 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 228 return; 229 } 230 if (newSize > pTransform->srcSize) { 231 newSize += 50; // allocate slightly more than needed right now 232 if (pTransform->src != NULL) { 233 uprv_free(pTransform->src); 234 pTransform->src = NULL; 235 } 236 pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); 237 if (pTransform->src == NULL) { 238 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 239 //pTransform->srcLength = pTransform->srcSize = 0; 240 return; 241 } 242 pTransform->srcSize = newSize; 243 } 244 u_strncpy(pTransform->src, newSrc, newLength); 245 pTransform->srcLength = u_terminateUChars(pTransform->src, 246 pTransform->srcSize, newLength, pErrorCode); 247 } 248 249 /** 250 * Calls a lower level shaping function. 251 * 252 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 253 * @param options Shaping options. 254 * @param pErrorCode Pointer to the error code value. 255 */ 256 static void 257 doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) 258 { 259 *pTransform->pDestLength = u_shapeArabic(pTransform->src, 260 pTransform->srcLength, pTransform->dest, pTransform->destSize, 261 options, pErrorCode); 262 } 263 264 /** 265 * Performs digit and letter shaping. 266 * 267 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 268 * @param pErrorCode Pointer to the error code value. 269 * 270 * @return Whether or not this function modifies the text. Besides the return 271 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 272 */ 273 static UBool 274 action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 275 { 276 if ((pTransform->letters | pTransform->digits) == 0) { 277 return FALSE; 278 } 279 if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { 280 doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, 281 pErrorCode); 282 } else { 283 doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); 284 if (U_SUCCESS(*pErrorCode)) { 285 updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, 286 *pTransform->pDestLength, pErrorCode); 287 doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, 288 pErrorCode); 289 } 290 } 291 return TRUE; 292 } 293 294 /** 295 * Performs character mirroring. 296 * 297 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 298 * @param pErrorCode Pointer to the error code value. 299 * 300 * @return Whether or not this function modifies the text. Besides the return 301 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 302 */ 303 static UBool 304 action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 305 { 306 UChar32 c; 307 uint32_t i = 0, j = 0; 308 if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { 309 return FALSE; 310 } 311 if (pTransform->destSize < pTransform->srcLength) { 312 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 313 return FALSE; 314 } 315 do { 316 UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; 317 U16_NEXT(pTransform->src, i, pTransform->srcLength, c); 318 U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); 319 } while (i < pTransform->srcLength); 320 321 *pTransform->pDestLength = pTransform->srcLength; 322 pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; 323 return TRUE; 324 } 325 326 /** 327 * All possible reordering schemes. 328 * 329 */ 330 static const ReorderingScheme Schemes[] = 331 { 332 /* 0: Logical LTR => Visual LTR */ 333 {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 334 {action_shapeArabic, action_resolve, action_reorder, NULL}}, 335 /* 1: Logical RTL => Visual LTR */ 336 {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 337 {action_resolve, action_reorder, action_shapeArabic, NULL}}, 338 /* 2: Logical LTR => Visual RTL */ 339 {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 340 {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, 341 /* 3: Logical RTL => Visual RTL */ 342 {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 343 {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, 344 /* 4: Visual LTR => Logical RTL */ 345 {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 346 {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, 347 /* 5: Visual RTL => Logical RTL */ 348 {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 349 {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, 350 /* 6: Visual LTR => Logical LTR */ 351 {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 352 {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, 353 /* 7: Visual RTL => Logical LTR */ 354 {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 355 {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, 356 /* 8: Logical LTR => Logical RTL */ 357 {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 358 {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, 359 /* 9: Logical RTL => Logical LTR */ 360 {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, 361 {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, 362 /* 10: Visual LTR => Visual RTL */ 363 {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 364 {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, 365 /* 11: Visual RTL => Visual LTR */ 366 {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 367 {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, 368 /* 12: Logical LTR => Logical LTR */ 369 {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 370 {action_resolve, action_mirror, action_shapeArabic, NULL}}, 371 /* 13: Logical RTL => Logical RTL */ 372 {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, 373 {action_resolve, action_mirror, action_shapeArabic, NULL}}, 374 /* 14: Visual LTR => Visual LTR */ 375 {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 376 {action_resolve, action_mirror, action_shapeArabic, NULL}}, 377 /* 15: Visual RTL => Visual RTL */ 378 {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 379 {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} 380 }; 381 382 static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); 383 384 /** 385 * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or 386 * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that 387 * of the first strong bidi character. 388 */ 389 static void 390 resolveBaseDirection(const UChar *text, uint32_t length, 391 UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) 392 { 393 switch (*pInLevel) { 394 case UBIDI_DEFAULT_LTR: 395 case UBIDI_DEFAULT_RTL: { 396 UBiDiLevel level = ubidi_getBaseDirection(text, length); 397 *pInLevel = level != UBIDI_NEUTRAL ? level 398 : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; 399 break; 400 } 401 default: 402 *pInLevel &= 1; 403 break; 404 } 405 switch (*pOutLevel) { 406 case UBIDI_DEFAULT_LTR: 407 case UBIDI_DEFAULT_RTL: 408 *pOutLevel = *pInLevel; 409 break; 410 default: 411 *pOutLevel &= 1; 412 break; 413 } 414 } 415 416 /** 417 * Finds a valid <code>ReorderingScheme</code> matching the 418 * caller-defined scheme. 419 * 420 * @return A valid <code>ReorderingScheme</code> object or NULL 421 */ 422 static const ReorderingScheme* 423 findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, 424 UBiDiOrder inOrder, UBiDiOrder outOrder) 425 { 426 uint32_t i; 427 for (i = 0; i < nSchemes; i++) { 428 const ReorderingScheme *pScheme = Schemes + i; 429 if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel 430 && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { 431 return pScheme; 432 } 433 } 434 return NULL; 435 } 436 437 U_DRAFT uint32_t U_EXPORT2 438 ubiditransform_transform(UBiDiTransform *pBiDiTransform, 439 const UChar *src, int32_t srcLength, 440 UChar *dest, int32_t destSize, 441 UBiDiLevel inParaLevel, UBiDiOrder inOrder, 442 UBiDiLevel outParaLevel, UBiDiOrder outOrder, 443 UBiDiMirroring doMirroring, uint32_t shapingOptions, 444 UErrorCode *pErrorCode) 445 { 446 uint32_t destLength = 0; 447 UBool textChanged = FALSE; 448 const UBiDiTransform *pOrigTransform = pBiDiTransform; 449 const UBiDiAction *action = NULL; 450 451 if (U_FAILURE(*pErrorCode)) { 452 return 0; 453 } 454 if (src == NULL || dest == NULL) { 455 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 456 return 0; 457 } 458 CHECK_LEN(src, srcLength, pErrorCode); 459 CHECK_LEN(dest, destSize, pErrorCode); 460 461 if (pBiDiTransform == NULL) { 462 pBiDiTransform = ubiditransform_open(pErrorCode); 463 if (U_FAILURE(*pErrorCode)) { 464 return 0; 465 } 466 } 467 /* Current limitation: in multiple paragraphs will be resolved according 468 to the 1st paragraph */ 469 resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); 470 471 pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, 472 inOrder, outOrder); 473 if (pBiDiTransform->pActiveScheme == NULL) { 474 goto cleanup; 475 } 476 pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING 477 : UBIDI_REORDER_DEFAULT; 478 479 /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text 480 scheme at the time shaping is invoked. */ 481 shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; 482 pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; 483 pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; 484 485 updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); 486 if (U_FAILURE(*pErrorCode)) { 487 goto cleanup; 488 } 489 if (pBiDiTransform->pBidi == NULL) { 490 pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); 491 if (U_FAILURE(*pErrorCode)) { 492 goto cleanup; 493 } 494 } 495 pBiDiTransform->dest = dest; 496 pBiDiTransform->destSize = destSize; 497 pBiDiTransform->pDestLength = &destLength; 498 499 /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ 500 for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { 501 if ((*action)(pBiDiTransform, pErrorCode)) { 502 if (action + 1) { 503 updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, 504 *pBiDiTransform->pDestLength, pErrorCode); 505 } 506 textChanged = TRUE; 507 } 508 } 509 ubidi_setInverse(pBiDiTransform->pBidi, FALSE); 510 511 if (!textChanged && U_SUCCESS(*pErrorCode)) { 512 /* Text was not changed - just copy src to dest */ 513 if (destSize < srcLength) { 514 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 515 } else { 516 u_strncpy(dest, src, srcLength); 517 destLength = srcLength; 518 } 519 } 520 cleanup: 521 if (pOrigTransform != pBiDiTransform) { 522 ubiditransform_close(pBiDiTransform); 523 } else { 524 pBiDiTransform->dest = NULL; 525 pBiDiTransform->pDestLength = NULL; 526 pBiDiTransform->srcLength = 0; 527 pBiDiTransform->destSize = 0; 528 } 529 return U_FAILURE(*pErrorCode) ? 0 : destLength; 530 } 531