1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2016 and later: Unicode, Inc. and others. 5 * License & terms of use: http://www.unicode.org/copyright.html 6 * 7 ****************************************************************************** 8 * file name: ubiditransform.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2016jul24 14 * created by: Lina Kemmel 15 * 16 */ 17 18 #include "cmemory.h" 19 #include "unicode/ubidi.h" 20 #include "unicode/ustring.h" 21 #include "unicode/ushape.h" 22 #include "unicode/utf16.h" 23 #include "ustr_imp.h" 24 #include "unicode/ubiditransform.h" 25 26 /* Some convenience defines */ 27 #define LTR UBIDI_LTR 28 #define RTL UBIDI_RTL 29 #define LOGICAL UBIDI_LOGICAL 30 #define VISUAL UBIDI_VISUAL 31 #define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL 32 #define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 33 34 #define CHECK_LEN(STR, LEN, ERROR) { \ 35 if (LEN == 0) return 0; \ 36 if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ 37 if (LEN == -1) LEN = u_strlen(STR); \ 38 } 39 40 #define MAX_ACTIONS 7 41 42 /** 43 * Typedef for a pointer to a function, which performs some operation (such as 44 * reordering, setting "inverse" mode, character mirroring, etc.). Return value 45 * indicates whether the text was changed in the course of this operation or 46 * not. 47 */ 48 typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); 49 50 /** 51 * Structure that holds a predefined reordering scheme, including the following 52 * information: 53 * <ul> 54 * <li>an input base direction,</li> 55 * <li>an input order,</li> 56 * <li>an output base direction,</li> 57 * <li>an output order,</li> 58 * <li>a digit shaping direction,</li> 59 * <li>a letter shaping direction,</li> 60 * <li>a base direction that should be applied when the reordering engine is 61 * invoked (which can not always be derived from the caller-defined 62 * options),</li> 63 * <li>an array of pointers to functions that accomplish the bidi layout 64 * transformation.</li> 65 * </ul> 66 */ 67 typedef struct { 68 UBiDiLevel inLevel; /* input level */ 69 UBiDiOrder inOrder; /* input order */ 70 UBiDiLevel outLevel; /* output level */ 71 UBiDiOrder outOrder; /* output order */ 72 uint32_t digitsDir; /* digit shaping direction */ 73 uint32_t lettersDir; /* letter shaping direction */ 74 UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ 75 const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ 76 } ReorderingScheme; 77 78 struct UBiDiTransform { 79 UBiDi *pBidi; /* pointer to a UBiDi object */ 80 const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ 81 UChar *src; /* input text */ 82 UChar *dest; /* output text */ 83 uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ 84 uint32_t srcSize; /* input text capacity excluding the trailing zero */ 85 uint32_t destSize; /* output text capacity */ 86 uint32_t *pDestLength; /* number of UChars written to dest */ 87 uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ 88 uint32_t digits; /* digit option for ArabicShaping */ 89 uint32_t letters; /* letter option for ArabicShaping */ 90 }; 91 92 U_DRAFT UBiDiTransform* U_EXPORT2 93 ubiditransform_open(UErrorCode *pErrorCode) 94 { 95 UBiDiTransform *pBiDiTransform = NULL; 96 if (U_SUCCESS(*pErrorCode)) { 97 pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); 98 if (pBiDiTransform == NULL) { 99 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 100 } 101 } 102 return pBiDiTransform; 103 } 104 105 U_DRAFT void U_EXPORT2 106 ubiditransform_close(UBiDiTransform *pBiDiTransform) 107 { 108 if (pBiDiTransform != NULL) { 109 if (pBiDiTransform->pBidi != NULL) { 110 ubidi_close(pBiDiTransform->pBidi); 111 } 112 if (pBiDiTransform->src != NULL) { 113 uprv_free(pBiDiTransform->src); 114 } 115 uprv_free(pBiDiTransform); 116 } 117 } 118 119 /** 120 * Performs Bidi resolution of text. 121 * 122 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 123 * @param pErrorCode Pointer to the error code value. 124 * 125 * @return Whether or not this function modifies the text. Besides the return 126 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 127 */ 128 static UBool 129 action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 130 { 131 ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, 132 pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); 133 return FALSE; 134 } 135 136 /** 137 * Performs basic reordering of text (Logical -> Visual LTR). 138 * 139 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 140 * @param pErrorCode Pointer to the error code value. 141 * 142 * @return Whether or not this function modifies the text. Besides the return 143 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 144 */ 145 static UBool 146 action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 147 { 148 ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, 149 pTransform->reorderingOptions, pErrorCode); 150 151 *pTransform->pDestLength = pTransform->srcLength; 152 pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; 153 return TRUE; 154 } 155 156 /** 157 * Sets "inverse" mode on the <code>UBiDi</code> object. 158 * 159 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 160 * @param pErrorCode Pointer to the error code value. 161 * 162 * @return Whether or not this function modifies the text. Besides the return 163 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 164 */ 165 static UBool 166 action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 167 { 168 ubidi_setInverse(pTransform->pBidi, TRUE); 169 ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); 170 return FALSE; 171 } 172 173 /** 174 * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL 175 * transformation. 176 * 177 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 178 * @param pErrorCode Pointer to the error code value. 179 * 180 * @return Whether or not this function modifies the text. Besides the return 181 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 182 */ 183 static UBool 184 action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 185 { 186 ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); 187 return FALSE; 188 } 189 190 /** 191 * Performs string reverse. 192 * 193 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 194 * @param pErrorCode Pointer to the error code value. 195 * 196 * @return Whether or not this function modifies the text. Besides the return 197 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 198 */ 199 static UBool 200 action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 201 { 202 ubidi_writeReverse(pTransform->src, pTransform->srcLength, 203 pTransform->dest, pTransform->destSize, 204 UBIDI_REORDER_DEFAULT, pErrorCode); 205 *pTransform->pDestLength = pTransform->srcLength; 206 return TRUE; 207 } 208 209 /** 210 * Applies a new value to the text that serves as input at the current 211 * processing step. This value is identical to the original one when we begin 212 * the processing, but usually changes as the transformation progresses. 213 * 214 * @param pTransform A pointer to the <code>UBiDiTransform</code> structure. 215 * @param newSrc A pointer whose value is to be used as input text. 216 * @param newLength A length of the new text in <code>UChar</code>s. 217 * @param newSize A new source capacity in <code>UChar</code>s. 218 * @param pErrorCode Pointer to the error code value. 219 */ 220 static void 221 updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, 222 uint32_t newSize, UErrorCode *pErrorCode) 223 { 224 if (newSize < newLength) { 225 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 226 return; 227 } 228 if (newSize > pTransform->srcSize) { 229 newSize += 50; // allocate slightly more than needed right now 230 if (pTransform->src != NULL) { 231 uprv_free(pTransform->src); 232 pTransform->src = NULL; 233 } 234 pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); 235 if (pTransform->src == NULL) { 236 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 237 //pTransform->srcLength = pTransform->srcSize = 0; 238 return; 239 } 240 pTransform->srcSize = newSize; 241 } 242 u_strncpy(pTransform->src, newSrc, newLength); 243 pTransform->srcLength = u_terminateUChars(pTransform->src, 244 pTransform->srcSize, newLength, pErrorCode); 245 } 246 247 /** 248 * Calls a lower level shaping function. 249 * 250 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 251 * @param options Shaping options. 252 * @param pErrorCode Pointer to the error code value. 253 */ 254 static void 255 doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) 256 { 257 *pTransform->pDestLength = u_shapeArabic(pTransform->src, 258 pTransform->srcLength, pTransform->dest, pTransform->destSize, 259 options, pErrorCode); 260 } 261 262 /** 263 * Performs digit and letter shaping. 264 * 265 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 266 * @param pErrorCode Pointer to the error code value. 267 * 268 * @return Whether or not this function modifies the text. Besides the return 269 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 270 */ 271 static UBool 272 action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 273 { 274 if ((pTransform->letters | pTransform->digits) == 0) { 275 return FALSE; 276 } 277 if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { 278 doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, 279 pErrorCode); 280 } else { 281 doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); 282 if (U_SUCCESS(*pErrorCode)) { 283 updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, 284 *pTransform->pDestLength, pErrorCode); 285 doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, 286 pErrorCode); 287 } 288 } 289 return TRUE; 290 } 291 292 /** 293 * Performs character mirroring. 294 * 295 * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. 296 * @param pErrorCode Pointer to the error code value. 297 * 298 * @return Whether or not this function modifies the text. Besides the return 299 * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. 300 */ 301 static UBool 302 action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) 303 { 304 UChar32 c; 305 uint32_t i = 0, j = 0; 306 if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { 307 return FALSE; 308 } 309 if (pTransform->destSize < pTransform->srcLength) { 310 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 311 return FALSE; 312 } 313 do { 314 UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; 315 U16_NEXT(pTransform->src, i, pTransform->srcLength, c); 316 U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); 317 } while (i < pTransform->srcLength); 318 319 *pTransform->pDestLength = pTransform->srcLength; 320 pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; 321 return TRUE; 322 } 323 324 /** 325 * All possible reordering schemes. 326 * 327 */ 328 static const ReorderingScheme Schemes[] = 329 { 330 /* 0: Logical LTR => Visual LTR */ 331 {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 332 {action_shapeArabic, action_resolve, action_reorder, NULL}}, 333 /* 1: Logical RTL => Visual LTR */ 334 {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 335 {action_resolve, action_reorder, action_shapeArabic, NULL}}, 336 /* 2: Logical LTR => Visual RTL */ 337 {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 338 {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, 339 /* 3: Logical RTL => Visual RTL */ 340 {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 341 {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, 342 /* 4: Visual LTR => Logical RTL */ 343 {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 344 {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, 345 /* 5: Visual RTL => Logical RTL */ 346 {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, 347 {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, 348 /* 6: Visual LTR => Logical LTR */ 349 {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 350 {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, 351 /* 7: Visual RTL => Logical LTR */ 352 {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 353 {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, 354 /* 8: Logical LTR => Logical RTL */ 355 {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 356 {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, 357 /* 9: Logical RTL => Logical LTR */ 358 {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, 359 {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, 360 /* 10: Visual LTR => Visual RTL */ 361 {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 362 {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, 363 /* 11: Visual RTL => Visual LTR */ 364 {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 365 {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, 366 /* 12: Logical LTR => Logical LTR */ 367 {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, 368 {action_resolve, action_mirror, action_shapeArabic, NULL}}, 369 /* 13: Logical RTL => Logical RTL */ 370 {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, 371 {action_resolve, action_mirror, action_shapeArabic, NULL}}, 372 /* 14: Visual LTR => Visual LTR */ 373 {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 374 {action_resolve, action_mirror, action_shapeArabic, NULL}}, 375 /* 15: Visual RTL => Visual RTL */ 376 {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, 377 {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} 378 }; 379 380 static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); 381 382 /** 383 * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or 384 * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that 385 * of the first strong bidi character. 386 */ 387 static void 388 resolveBaseDirection(const UChar *text, uint32_t length, 389 UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) 390 { 391 switch (*pInLevel) { 392 case UBIDI_DEFAULT_LTR: 393 case UBIDI_DEFAULT_RTL: { 394 UBiDiLevel level = ubidi_getBaseDirection(text, length); 395 *pInLevel = level != UBIDI_NEUTRAL ? level 396 : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; 397 break; 398 } 399 default: 400 *pInLevel &= 1; 401 break; 402 } 403 switch (*pOutLevel) { 404 case UBIDI_DEFAULT_LTR: 405 case UBIDI_DEFAULT_RTL: 406 *pOutLevel = *pInLevel; 407 break; 408 default: 409 *pOutLevel &= 1; 410 break; 411 } 412 } 413 414 /** 415 * Finds a valid <code>ReorderingScheme</code> matching the 416 * caller-defined scheme. 417 * 418 * @return A valid <code>ReorderingScheme</code> object or NULL 419 */ 420 static const ReorderingScheme* 421 findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, 422 UBiDiOrder inOrder, UBiDiOrder outOrder) 423 { 424 uint32_t i; 425 for (i = 0; i < nSchemes; i++) { 426 const ReorderingScheme *pScheme = Schemes + i; 427 if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel 428 && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { 429 return pScheme; 430 } 431 } 432 return NULL; 433 } 434 435 U_DRAFT uint32_t U_EXPORT2 436 ubiditransform_transform(UBiDiTransform *pBiDiTransform, 437 const UChar *src, int32_t srcLength, 438 UChar *dest, int32_t destSize, 439 UBiDiLevel inParaLevel, UBiDiOrder inOrder, 440 UBiDiLevel outParaLevel, UBiDiOrder outOrder, 441 UBiDiMirroring doMirroring, uint32_t shapingOptions, 442 UErrorCode *pErrorCode) 443 { 444 uint32_t destLength = 0; 445 UBool textChanged = FALSE; 446 const UBiDiTransform *pOrigTransform = pBiDiTransform; 447 const UBiDiAction *action = NULL; 448 449 if (U_FAILURE(*pErrorCode)) { 450 return 0; 451 } 452 if (src == NULL || dest == NULL) { 453 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 454 return 0; 455 } 456 CHECK_LEN(src, srcLength, pErrorCode); 457 CHECK_LEN(dest, destSize, pErrorCode); 458 459 if (pBiDiTransform == NULL) { 460 pBiDiTransform = ubiditransform_open(pErrorCode); 461 if (U_FAILURE(*pErrorCode)) { 462 return 0; 463 } 464 } 465 /* Current limitation: in multiple paragraphs will be resolved according 466 to the 1st paragraph */ 467 resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); 468 469 pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, 470 inOrder, outOrder); 471 if (pBiDiTransform->pActiveScheme == NULL) { 472 goto cleanup; 473 } 474 pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING 475 : UBIDI_REORDER_DEFAULT; 476 477 /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text 478 scheme at the time shaping is invoked. */ 479 shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; 480 pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; 481 pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; 482 483 updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); 484 if (U_FAILURE(*pErrorCode)) { 485 goto cleanup; 486 } 487 if (pBiDiTransform->pBidi == NULL) { 488 pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); 489 if (U_FAILURE(*pErrorCode)) { 490 goto cleanup; 491 } 492 } 493 pBiDiTransform->dest = dest; 494 pBiDiTransform->destSize = destSize; 495 pBiDiTransform->pDestLength = &destLength; 496 497 /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ 498 for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { 499 if ((*action)(pBiDiTransform, pErrorCode)) { 500 if (action + 1) { 501 updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, 502 *pBiDiTransform->pDestLength, pErrorCode); 503 } 504 textChanged = TRUE; 505 } 506 } 507 ubidi_setInverse(pBiDiTransform->pBidi, FALSE); 508 509 if (!textChanged && U_SUCCESS(*pErrorCode)) { 510 /* Text was not changed - just copy src to dest */ 511 if (destSize < srcLength) { 512 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 513 } else { 514 u_strncpy(dest, src, srcLength); 515 destLength = srcLength; 516 } 517 } 518 cleanup: 519 if (pOrigTransform != pBiDiTransform) { 520 ubiditransform_close(pBiDiTransform); 521 } else { 522 pBiDiTransform->dest = NULL; 523 pBiDiTransform->pDestLength = NULL; 524 pBiDiTransform->srcLength = 0; 525 pBiDiTransform->destSize = 0; 526 } 527 return U_FAILURE(*pErrorCode) ? 0 : destLength; 528 } 529