1 /* 2 / * 3 * (C) Copyright IBM Corp. 1998-2009 - All Rights Reserved 4 * 5 */ 6 7 #include "LETypes.h" 8 #include "OpenTypeTables.h" 9 #include "OpenTypeUtilities.h" 10 #include "IndicReordering.h" 11 #include "LEGlyphStorage.h" 12 #include "MPreFixups.h" 13 14 U_NAMESPACE_BEGIN 15 16 #define loclFeatureTag LE_LOCL_FEATURE_TAG 17 #define initFeatureTag LE_INIT_FEATURE_TAG 18 #define nuktFeatureTag LE_NUKT_FEATURE_TAG 19 #define akhnFeatureTag LE_AKHN_FEATURE_TAG 20 #define rphfFeatureTag LE_RPHF_FEATURE_TAG 21 #define rkrfFeatureTag LE_RKRF_FEATURE_TAG 22 #define blwfFeatureTag LE_BLWF_FEATURE_TAG 23 #define halfFeatureTag LE_HALF_FEATURE_TAG 24 #define pstfFeatureTag LE_PSTF_FEATURE_TAG 25 #define vatuFeatureTag LE_VATU_FEATURE_TAG 26 #define presFeatureTag LE_PRES_FEATURE_TAG 27 #define blwsFeatureTag LE_BLWS_FEATURE_TAG 28 #define abvsFeatureTag LE_ABVS_FEATURE_TAG 29 #define pstsFeatureTag LE_PSTS_FEATURE_TAG 30 #define halnFeatureTag LE_HALN_FEATURE_TAG 31 #define cjctFeatureTag LE_CJCT_FEATURE_TAG 32 #define blwmFeatureTag LE_BLWM_FEATURE_TAG 33 #define abvmFeatureTag LE_ABVM_FEATURE_TAG 34 #define distFeatureTag LE_DIST_FEATURE_TAG 35 #define caltFeatureTag LE_CALT_FEATURE_TAG 36 #define kernFeatureTag LE_KERN_FEATURE_TAG 37 38 #define loclFeatureMask 0x80000000UL 39 #define rphfFeatureMask 0x40000000UL 40 #define blwfFeatureMask 0x20000000UL 41 #define halfFeatureMask 0x10000000UL 42 #define pstfFeatureMask 0x08000000UL 43 #define nuktFeatureMask 0x04000000UL 44 #define akhnFeatureMask 0x02000000UL 45 #define vatuFeatureMask 0x01000000UL 46 #define presFeatureMask 0x00800000UL 47 #define blwsFeatureMask 0x00400000UL 48 #define abvsFeatureMask 0x00200000UL 49 #define pstsFeatureMask 0x00100000UL 50 #define halnFeatureMask 0x00080000UL 51 #define blwmFeatureMask 0x00040000UL 52 #define abvmFeatureMask 0x00020000UL 53 #define distFeatureMask 0x00010000UL 54 #define initFeatureMask 0x00008000UL 55 #define cjctFeatureMask 0x00004000UL 56 #define rkrfFeatureMask 0x00002000UL 57 #define caltFeatureMask 0x00001000UL 58 #define kernFeatureMask 0x00000800UL 59 60 // Syllable structure bits 61 #define baseConsonantMask 0x00000400UL 62 #define consonantMask 0x00000200UL 63 #define halfConsonantMask 0x00000100UL 64 #define rephConsonantMask 0x00000080UL 65 #define matraMask 0x00000040UL 66 #define vowelModifierMask 0x00000020UL 67 #define markPositionMask 0x00000018UL 68 69 #define postBasePosition 0x00000000UL 70 #define preBasePosition 0x00000008UL 71 #define aboveBasePosition 0x00000010UL 72 #define belowBasePosition 0x00000018UL 73 74 #define repositionedGlyphMask 0x00000002UL 75 76 #define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask ) 77 #define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask ) 78 #define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask ) 79 80 81 #define C_MALAYALAM_VOWEL_SIGN_U 0x0D41 82 #define C_DOTTED_CIRCLE 0x25CC 83 #define NO_GLYPH 0xFFFF 84 85 #define INDIC_BLOCK_SIZE 0x7F 86 87 class IndicReorderingOutput : public UMemory { 88 private: 89 le_int32 fSyllableCount; 90 le_int32 fOutIndex; 91 LEUnicode *fOutChars; 92 93 LEGlyphStorage &fGlyphStorage; 94 95 LEUnicode fMpre; 96 le_int32 fMpreIndex; 97 98 LEUnicode fMbelow; 99 le_int32 fMbelowIndex; 100 101 LEUnicode fMabove; 102 le_int32 fMaboveIndex; 103 104 LEUnicode fMpost; 105 le_int32 fMpostIndex; 106 107 LEUnicode fLengthMark; 108 le_int32 fLengthMarkIndex; 109 110 LEUnicode fAlLakuna; 111 le_int32 fAlLakunaIndex; 112 113 FeatureMask fMatraFeatures; 114 115 le_int32 fMPreOutIndex; 116 MPreFixups *fMPreFixups; 117 118 LEUnicode fVMabove; 119 LEUnicode fVMpost; 120 le_int32 fVMIndex; 121 FeatureMask fVMFeatures; 122 123 LEUnicode fSMabove; 124 LEUnicode fSMbelow; 125 le_int32 fSMIndex; 126 FeatureMask fSMFeatures; 127 128 129 void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass) 130 { 131 // FIXME: check if already set, or if not a matra... 132 if (IndicClassTable::isLengthMark(matraClass)) { 133 fLengthMark = matra; 134 fLengthMarkIndex = matraIndex; 135 } else if (IndicClassTable::isAlLakuna(matraClass)) { 136 fAlLakuna = matra; 137 fAlLakunaIndex = matraIndex; 138 } else { 139 switch (matraClass & CF_POS_MASK) { 140 case CF_POS_BEFORE: 141 fMpre = matra; 142 fMpreIndex = matraIndex; 143 break; 144 145 case CF_POS_BELOW: 146 fMbelow = matra; 147 fMbelowIndex = matraIndex; 148 break; 149 150 case CF_POS_ABOVE: 151 fMabove = matra; 152 fMaboveIndex = matraIndex; 153 break; 154 155 case CF_POS_AFTER: 156 fMpost = matra; 157 fMpostIndex = matraIndex; 158 break; 159 160 default: 161 // can't get here... 162 break; 163 } 164 } 165 } 166 167 public: 168 IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups) 169 : fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage), 170 fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0), 171 fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0), 172 fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups), 173 fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0), 174 fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0) 175 { 176 // nothing else to do... 177 } 178 179 ~IndicReorderingOutput() 180 { 181 // nothing to do here... 182 } 183 184 void reset() 185 { 186 fSyllableCount += 1; 187 188 fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0; 189 fMPreOutIndex = -1; 190 191 fVMabove = fVMpost = 0; 192 fSMabove = fSMbelow = 0; 193 } 194 195 void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures) 196 { 197 LEErrorCode success = LE_NO_ERROR; 198 199 fOutChars[fOutIndex] = ch; 200 201 fGlyphStorage.setCharIndex(fOutIndex, charIndex, success); 202 fGlyphStorage.setAuxData(fOutIndex, charFeatures | (fSyllableCount & LE_GLYPH_GROUP_MASK), success); 203 204 fOutIndex += 1; 205 } 206 207 void setFeatures ( le_uint32 charIndex, FeatureMask charFeatures) 208 { 209 LEErrorCode success = LE_NO_ERROR; 210 211 fGlyphStorage.setAuxData( charIndex, charFeatures, success ); 212 213 } 214 215 FeatureMask getFeatures ( le_uint32 charIndex ) 216 { 217 LEErrorCode success = LE_NO_ERROR; 218 return fGlyphStorage.getAuxData(charIndex,success); 219 } 220 221 void decomposeReorderMatras ( const IndicClassTable *classTable, le_int32 beginSyllable, le_int32 nextSyllable, le_int32 inv_count ) { 222 le_int32 i; 223 LEErrorCode success = LE_NO_ERROR; 224 225 for ( i = beginSyllable ; i < nextSyllable ; i++ ) { 226 if ( classTable->isMatra(fOutChars[i+inv_count])) { 227 IndicClassTable::CharClass matraClass = classTable->getCharClass(fOutChars[i+inv_count]); 228 if ( classTable->isSplitMatra(matraClass)) { 229 le_int32 saveIndex = fGlyphStorage.getCharIndex(i+inv_count,success); 230 le_uint32 saveAuxData = fGlyphStorage.getAuxData(i+inv_count,success); 231 const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); 232 int j; 233 for (j = 0 ; *(splitMatra)[j] != 0 ; j++) { 234 LEUnicode piece = (*splitMatra)[j]; 235 if ( j == 0 ) { 236 fOutChars[i+inv_count] = piece; 237 matraClass = classTable->getCharClass(piece); 238 } else { 239 insertCharacter(piece,i+1+inv_count,saveIndex,saveAuxData); 240 nextSyllable++; 241 } 242 } 243 } 244 245 if ((matraClass & CF_POS_MASK) == CF_POS_BEFORE) { 246 moveCharacter(i+inv_count,beginSyllable+inv_count); 247 } 248 } 249 } 250 } 251 252 void moveCharacter( le_int32 fromPosition, le_int32 toPosition ) { 253 le_int32 i,saveIndex; 254 le_uint32 saveAuxData; 255 LEUnicode saveChar = fOutChars[fromPosition]; 256 LEErrorCode success = LE_NO_ERROR; 257 LEErrorCode success2 = LE_NO_ERROR; 258 saveIndex = fGlyphStorage.getCharIndex(fromPosition,success); 259 saveAuxData = fGlyphStorage.getAuxData(fromPosition,success); 260 261 if ( fromPosition > toPosition ) { 262 for ( i = fromPosition ; i > toPosition ; i-- ) { 263 fOutChars[i] = fOutChars[i-1]; 264 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success2),success); 265 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success2), success); 266 267 } 268 } else { 269 for ( i = fromPosition ; i < toPosition ; i++ ) { 270 fOutChars[i] = fOutChars[i+1]; 271 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success2),success); 272 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success2), success); 273 } 274 275 } 276 fOutChars[toPosition] = saveChar; 277 fGlyphStorage.setCharIndex(toPosition,saveIndex,success); 278 fGlyphStorage.setAuxData(toPosition,saveAuxData,success); 279 280 } 281 void insertCharacter( LEUnicode ch, le_int32 toPosition, le_int32 charIndex, le_uint32 auxData ) { 282 LEErrorCode success = LE_NO_ERROR; 283 le_int32 i; 284 fOutIndex += 1; 285 286 for ( i = fOutIndex ; i > toPosition ; i--) { 287 fOutChars[i] = fOutChars[i-1]; 288 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success),success); 289 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success), success); 290 } 291 292 fOutChars[toPosition] = ch; 293 fGlyphStorage.setCharIndex(toPosition,charIndex,success); 294 fGlyphStorage.setAuxData(toPosition,auxData,success); 295 296 } 297 void removeCharacter( le_int32 fromPosition ) { 298 LEErrorCode success = LE_NO_ERROR; 299 le_int32 i; 300 fOutIndex -= 1; 301 302 for ( i = fromPosition ; i < fOutIndex ; i--) { 303 fOutChars[i] = fOutChars[i+1]; 304 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success),success); 305 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success), success); 306 } 307 } 308 309 le_bool noteMatra(const IndicClassTable *classTable, LEUnicode matra, le_uint32 matraIndex, FeatureMask matraFeatures, le_bool wordStart) 310 { 311 IndicClassTable::CharClass matraClass = classTable->getCharClass(matra); 312 313 fMatraFeatures = matraFeatures; 314 315 if (wordStart) { 316 fMatraFeatures |= initFeatureMask; 317 } 318 319 if (IndicClassTable::isMatra(matraClass)) { 320 if (IndicClassTable::isSplitMatra(matraClass)) { 321 const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); 322 int i; 323 324 for (i = 0; i < 3 && (*splitMatra)[i] != 0; i += 1) { 325 LEUnicode piece = (*splitMatra)[i]; 326 IndicClassTable::CharClass pieceClass = classTable->getCharClass(piece); 327 328 saveMatra(piece, matraIndex, pieceClass); 329 } 330 } else { 331 saveMatra(matra, matraIndex, matraClass); 332 } 333 334 return TRUE; 335 } 336 337 return FALSE; 338 } 339 340 void noteVowelModifier(const IndicClassTable *classTable, LEUnicode vowelModifier, le_uint32 vowelModifierIndex, FeatureMask vowelModifierFeatures) 341 { 342 IndicClassTable::CharClass vmClass = classTable->getCharClass(vowelModifier); 343 344 fVMIndex = vowelModifierIndex; 345 fVMFeatures = vowelModifierFeatures; 346 347 if (IndicClassTable::isVowelModifier(vmClass)) { 348 switch (vmClass & CF_POS_MASK) { 349 case CF_POS_ABOVE: 350 fVMabove = vowelModifier; 351 break; 352 353 case CF_POS_AFTER: 354 fVMpost = vowelModifier; 355 break; 356 357 default: 358 // FIXME: this is an error... 359 break; 360 } 361 } 362 } 363 364 void noteStressMark(const IndicClassTable *classTable, LEUnicode stressMark, le_uint32 stressMarkIndex, FeatureMask stressMarkFeatures) 365 { 366 IndicClassTable::CharClass smClass = classTable->getCharClass(stressMark); 367 368 fSMIndex = stressMarkIndex; 369 fSMFeatures = stressMarkFeatures; 370 371 if (IndicClassTable::isStressMark(smClass)) { 372 switch (smClass & CF_POS_MASK) { 373 case CF_POS_ABOVE: 374 fSMabove = stressMark; 375 break; 376 377 case CF_POS_BELOW: 378 fSMbelow = stressMark; 379 break; 380 381 default: 382 // FIXME: this is an error... 383 break; 384 } 385 } 386 } 387 388 void noteBaseConsonant() 389 { 390 if (fMPreFixups != NULL && fMPreOutIndex >= 0) { 391 fMPreFixups->add(fOutIndex, fMPreOutIndex); 392 } 393 } 394 395 // Handles Al-Lakuna in Sinhala split vowels. 396 void writeAlLakuna() 397 { 398 if (fAlLakuna != 0) { 399 writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures); 400 } 401 } 402 403 void writeMpre() 404 { 405 if (fMpre != 0) { 406 fMPreOutIndex = fOutIndex; 407 writeChar(fMpre, fMpreIndex, fMatraFeatures); 408 } 409 } 410 411 void writeMbelow() 412 { 413 if (fMbelow != 0) { 414 writeChar(fMbelow, fMbelowIndex, fMatraFeatures); 415 } 416 } 417 418 void writeMabove() 419 { 420 if (fMabove != 0) { 421 writeChar(fMabove, fMaboveIndex, fMatraFeatures); 422 } 423 } 424 425 void writeMpost() 426 { 427 if (fMpost != 0) { 428 writeChar(fMpost, fMpostIndex, fMatraFeatures); 429 } 430 } 431 432 void writeLengthMark() 433 { 434 if (fLengthMark != 0) { 435 writeChar(fLengthMark, fLengthMarkIndex, fMatraFeatures); 436 } 437 } 438 439 void writeVMabove() 440 { 441 if (fVMabove != 0) { 442 writeChar(fVMabove, fVMIndex, fVMFeatures); 443 } 444 } 445 446 void writeVMpost() 447 { 448 if (fVMpost != 0) { 449 writeChar(fVMpost, fVMIndex, fVMFeatures); 450 } 451 } 452 453 void writeSMabove() 454 { 455 if (fSMabove != 0) { 456 writeChar(fSMabove, fSMIndex, fSMFeatures); 457 } 458 } 459 460 void writeSMbelow() 461 { 462 if (fSMbelow != 0) { 463 writeChar(fSMbelow, fSMIndex, fSMFeatures); 464 } 465 } 466 467 le_int32 getOutputIndex() 468 { 469 return fOutIndex; 470 } 471 }; 472 473 474 475 // TODO: Find better names for these! 476 #define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask) 477 #define tagArray3 (pstfFeatureMask | tagArray4) 478 #define tagArray2 (halfFeatureMask | tagArray3) 479 #define tagArray1 (blwfFeatureMask | tagArray2) 480 #define tagArray0 (rphfFeatureMask | tagArray1) 481 482 static const FeatureMap featureMap[] = { 483 {loclFeatureTag, loclFeatureMask}, 484 {initFeatureTag, initFeatureMask}, 485 {nuktFeatureTag, nuktFeatureMask}, 486 {akhnFeatureTag, akhnFeatureMask}, 487 {rphfFeatureTag, rphfFeatureMask}, 488 {blwfFeatureTag, blwfFeatureMask}, 489 {halfFeatureTag, halfFeatureMask}, 490 {pstfFeatureTag, pstfFeatureMask}, 491 {vatuFeatureTag, vatuFeatureMask}, 492 {presFeatureTag, presFeatureMask}, 493 {blwsFeatureTag, blwsFeatureMask}, 494 {abvsFeatureTag, abvsFeatureMask}, 495 {pstsFeatureTag, pstsFeatureMask}, 496 {halnFeatureTag, halnFeatureMask}, 497 {blwmFeatureTag, blwmFeatureMask}, 498 {abvmFeatureTag, abvmFeatureMask}, 499 {distFeatureTag, distFeatureMask} 500 }; 501 502 static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap); 503 504 static const FeatureMap v2FeatureMap[] = { 505 {loclFeatureTag, loclFeatureMask}, 506 {nuktFeatureTag, nuktFeatureMask}, 507 {akhnFeatureTag, akhnFeatureMask}, 508 {rphfFeatureTag, rphfFeatureMask}, 509 {rkrfFeatureTag, rkrfFeatureMask}, 510 {blwfFeatureTag, blwfFeatureMask}, 511 {halfFeatureTag, halfFeatureMask}, 512 {vatuFeatureTag, vatuFeatureMask}, 513 {cjctFeatureTag, cjctFeatureMask}, 514 {presFeatureTag, presFeatureMask}, 515 {abvsFeatureTag, abvsFeatureMask}, 516 {blwsFeatureTag, blwsFeatureMask}, 517 {pstsFeatureTag, pstsFeatureMask}, 518 {halnFeatureTag, halnFeatureMask}, 519 {caltFeatureTag, caltFeatureMask}, 520 {kernFeatureTag, kernFeatureMask}, 521 {distFeatureTag, distFeatureMask}, 522 {abvmFeatureTag, abvmFeatureMask}, 523 {blwmFeatureTag, blwmFeatureMask} 524 }; 525 526 static const le_int32 v2FeatureMapCount = LE_ARRAY_SIZE(v2FeatureMap); 527 528 static const le_int8 stateTable[][CC_COUNT] = 529 { 530 // xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al 531 { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state 532 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state 533 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta 534 {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant 535 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama 536 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels 537 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark 538 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama 539 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama 540 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel 541 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel 542 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv 543 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?) 544 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant 545 }; 546 547 548 const FeatureMap *IndicReordering::getFeatureMap(le_int32 &count) 549 { 550 count = featureCount; 551 552 return featureMap; 553 } 554 555 const FeatureMap *IndicReordering::getv2FeatureMap(le_int32 &count) 556 { 557 count = v2FeatureMapCount; 558 559 return v2FeatureMap; 560 } 561 562 le_int32 IndicReordering::findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount) 563 { 564 le_int32 cursor = prev; 565 le_int8 state = 0; 566 567 while (cursor < charCount) { 568 IndicClassTable::CharClass charClass = classTable->getCharClass(chars[cursor]); 569 570 state = stateTable[state][charClass & CF_CLASS_MASK]; 571 572 if (state < 0) { 573 break; 574 } 575 576 cursor += 1; 577 } 578 579 return cursor; 580 } 581 582 le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, 583 LEUnicode *outChars, LEGlyphStorage &glyphStorage, 584 MPreFixups **outMPreFixups, LEErrorCode& success) 585 { 586 if (LE_FAILURE(success)) { 587 return 0; 588 } 589 590 MPreFixups *mpreFixups = NULL; 591 const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); 592 593 if (classTable->scriptFlags & SF_MPRE_FIXUP) { 594 mpreFixups = new MPreFixups(charCount); 595 if (mpreFixups == NULL) { 596 success = LE_MEMORY_ALLOCATION_ERROR; 597 return 0; 598 } 599 } 600 601 IndicReorderingOutput output(outChars, glyphStorage, mpreFixups); 602 le_int32 i, prev = 0; 603 le_bool lastInWord = FALSE; 604 605 while (prev < charCount) { 606 le_int32 syllable = findSyllable(classTable, chars, prev, charCount); 607 le_int32 matra, markStart = syllable; 608 609 output.reset(); 610 611 if (classTable->isStressMark(chars[markStart - 1])) { 612 markStart -= 1; 613 output.noteStressMark(classTable, chars[markStart], markStart, tagArray1); 614 } 615 616 if (markStart != prev && classTable->isVowelModifier(chars[markStart - 1])) { 617 markStart -= 1; 618 output.noteVowelModifier(classTable, chars[markStart], markStart, tagArray1); 619 } 620 621 matra = markStart - 1; 622 623 while (output.noteMatra(classTable, chars[matra], matra, tagArray1, !lastInWord) && matra != prev) { 624 matra -= 1; 625 } 626 627 lastInWord = TRUE; 628 629 switch (classTable->getCharClass(chars[prev]) & CF_CLASS_MASK) { 630 case CC_RESERVED: 631 lastInWord = FALSE; 632 /* fall through */ 633 634 case CC_INDEPENDENT_VOWEL: 635 case CC_ZERO_WIDTH_MARK: 636 for (i = prev; i < syllable; i += 1) { 637 output.writeChar(chars[i], i, tagArray1); 638 } 639 640 break; 641 642 case CC_AL_LAKUNA: 643 case CC_NUKTA: 644 output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); 645 output.writeChar(chars[prev], prev, tagArray1); 646 break; 647 648 case CC_VIRAMA: 649 // A lone virama is illegal unless it follows a 650 // MALAYALAM_VOWEL_SIGN_U. Such a usage is called 651 // "samvruthokaram". 652 if (chars[prev - 1] != C_MALAYALAM_VOWEL_SIGN_U) { 653 output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); 654 } 655 656 output.writeChar(chars[prev], prev, tagArray1); 657 break; 658 659 case CC_DEPENDENT_VOWEL: 660 case CC_SPLIT_VOWEL_PIECE_1: 661 case CC_SPLIT_VOWEL_PIECE_2: 662 case CC_SPLIT_VOWEL_PIECE_3: 663 case CC_VOWEL_MODIFIER: 664 case CC_STRESS_MARK: 665 output.writeMpre(); 666 667 output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); 668 669 output.writeMbelow(); 670 output.writeSMbelow(); 671 output.writeMabove(); 672 673 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { 674 output.writeMpost(); 675 } 676 677 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { 678 output.writeVMabove(); 679 output.writeSMabove(); // FIXME: there are no SM's in these scripts... 680 } 681 682 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { 683 output.writeMpost(); 684 } 685 686 output.writeLengthMark(); 687 output.writeAlLakuna(); 688 689 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { 690 output.writeVMabove(); 691 output.writeSMabove(); 692 } 693 694 output.writeVMpost(); 695 break; 696 697 case CC_INDEPENDENT_VOWEL_2: 698 case CC_INDEPENDENT_VOWEL_3: 699 case CC_CONSONANT: 700 case CC_CONSONANT_WITH_NUKTA: 701 { 702 le_uint32 length = markStart - prev; 703 le_int32 lastConsonant = markStart - 1; 704 le_int32 baseLimit = prev; 705 706 // Check for REPH at front of syllable 707 if (length > 2 && classTable->isReph(chars[prev]) && classTable->isVirama(chars[prev + 1]) && chars[prev + 2] != C_SIGN_ZWNJ) { 708 baseLimit += 2; 709 710 // Check for eyelash RA, if the script supports it 711 if ((classTable->scriptFlags & SF_EYELASH_RA) != 0 && 712 chars[baseLimit] == C_SIGN_ZWJ) { 713 if (length > 3) { 714 baseLimit += 1; 715 } else { 716 baseLimit -= 2; 717 } 718 } 719 } 720 721 while (lastConsonant > baseLimit && !classTable->isConsonant(chars[lastConsonant])) { 722 lastConsonant -= 1; 723 } 724 725 IndicClassTable::CharClass charClass = CC_RESERVED; 726 IndicClassTable::CharClass nextClass = CC_RESERVED; 727 le_int32 baseConsonant = lastConsonant; 728 le_int32 postBase = lastConsonant + 1; 729 le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK; 730 le_bool seenVattu = FALSE; 731 le_bool seenBelowBaseForm = FALSE; 732 le_bool hasNukta = FALSE; 733 le_bool hasBelowBaseForm = FALSE; 734 le_bool hasPostBaseForm = FALSE; 735 736 if (postBase < markStart && classTable->isNukta(chars[postBase])) { 737 charClass = CC_NUKTA; 738 postBase += 1; 739 } 740 741 while (baseConsonant > baseLimit) { 742 nextClass = charClass; 743 hasNukta = IndicClassTable::isNukta(nextClass); 744 charClass = classTable->getCharClass(chars[baseConsonant]); 745 746 hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta; 747 hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta; 748 749 if (IndicClassTable::isConsonant(charClass)) { 750 if (postBaseLimit == 0 || seenVattu || 751 (baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) || 752 !(hasBelowBaseForm || hasPostBaseForm)) { 753 break; 754 } 755 756 // consonants with nuktas are never vattus 757 seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta; 758 759 // consonants with nuktas never have below- or post-base forms 760 if (hasPostBaseForm) { 761 if (seenBelowBaseForm) { 762 break; 763 } 764 765 postBase = baseConsonant; 766 } else if (hasBelowBaseForm) { 767 seenBelowBaseForm = TRUE; 768 } 769 770 postBaseLimit -= 1; 771 } 772 773 baseConsonant -= 1; 774 } 775 776 // Write Mpre 777 output.writeMpre(); 778 779 // Write eyelash RA 780 // NOTE: baseLimit == prev + 3 iff eyelash RA present... 781 if (baseLimit == prev + 3) { 782 output.writeChar(chars[prev], prev, tagArray2); 783 output.writeChar(chars[prev + 1], prev + 1, tagArray2); 784 output.writeChar(chars[prev + 2], prev + 2, tagArray2); 785 } 786 787 // write any pre-base consonants 788 le_bool supressVattu = TRUE; 789 790 for (i = baseLimit; i < baseConsonant; i += 1) { 791 LEUnicode ch = chars[i]; 792 // Don't put 'blwf' on first consonant. 793 FeatureMask features = (i == baseLimit? tagArray2 : tagArray1); 794 795 charClass = classTable->getCharClass(ch); 796 nextClass = classTable->getCharClass(chars[i + 1]); 797 hasNukta = IndicClassTable::isNukta(nextClass); 798 799 if (IndicClassTable::isConsonant(charClass)) { 800 if (IndicClassTable::isVattu(charClass) && !hasNukta && supressVattu) { 801 features = tagArray4; 802 } 803 804 supressVattu = IndicClassTable::isVattu(charClass) && !hasNukta; 805 } else if (IndicClassTable::isVirama(charClass) && chars[i + 1] == C_SIGN_ZWNJ) 806 { 807 features = tagArray4; 808 } 809 810 output.writeChar(ch, i, features); 811 } 812 813 le_int32 bcSpan = baseConsonant + 1; 814 815 if (bcSpan < markStart && classTable->isNukta(chars[bcSpan])) { 816 bcSpan += 1; 817 } 818 819 if (baseConsonant == lastConsonant && bcSpan < markStart && 820 (classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) { 821 bcSpan += 1; 822 823 if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) { 824 bcSpan += 1; 825 } 826 } 827 828 // note the base consonant for post-GSUB fixups 829 output.noteBaseConsonant(); 830 831 // write base consonant 832 for (i = baseConsonant; i < bcSpan; i += 1) { 833 output.writeChar(chars[i], i, tagArray4); 834 } 835 836 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { 837 output.writeMbelow(); 838 output.writeSMbelow(); // FIXME: there are no SMs in these scripts... 839 output.writeMabove(); 840 output.writeMpost(); 841 } 842 843 // write below-base consonants 844 if (baseConsonant != lastConsonant) { 845 for (i = bcSpan + 1; i < postBase; i += 1) { 846 output.writeChar(chars[i], i, tagArray1); 847 } 848 849 if (postBase > lastConsonant) { 850 // write halant that was after base consonant 851 output.writeChar(chars[bcSpan], bcSpan, tagArray1); 852 } 853 } 854 855 // write Mbelow, SMbelow, Mabove 856 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { 857 output.writeMbelow(); 858 output.writeSMbelow(); 859 output.writeMabove(); 860 } 861 862 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { 863 if (baseLimit == prev + 2) { 864 output.writeChar(chars[prev], prev, tagArray0); 865 output.writeChar(chars[prev + 1], prev + 1, tagArray0); 866 } 867 868 output.writeVMabove(); 869 output.writeSMabove(); // FIXME: there are no SM's in these scripts... 870 } 871 872 // write post-base consonants 873 // FIXME: does this put the right tags on post-base consonants? 874 if (baseConsonant != lastConsonant) { 875 if (postBase <= lastConsonant) { 876 for (i = postBase; i <= lastConsonant; i += 1) { 877 output.writeChar(chars[i], i, tagArray3); 878 } 879 880 // write halant that was after base consonant 881 output.writeChar(chars[bcSpan], bcSpan, tagArray1); 882 } 883 884 // write the training halant, if there is one 885 if (lastConsonant < matra && classTable->isVirama(chars[matra])) { 886 output.writeChar(chars[matra], matra, tagArray4); 887 } 888 } 889 890 // write Mpost 891 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { 892 output.writeMpost(); 893 } 894 895 output.writeLengthMark(); 896 output.writeAlLakuna(); 897 898 // write reph 899 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { 900 if (baseLimit == prev + 2) { 901 output.writeChar(chars[prev], prev, tagArray0); 902 output.writeChar(chars[prev + 1], prev + 1, tagArray0); 903 } 904 905 output.writeVMabove(); 906 output.writeSMabove(); 907 } 908 909 output.writeVMpost(); 910 911 break; 912 } 913 914 default: 915 break; 916 } 917 918 prev = syllable; 919 } 920 921 *outMPreFixups = mpreFixups; 922 923 return output.getOutputIndex(); 924 } 925 926 void IndicReordering::adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, LEErrorCode& success) 927 { 928 if (mpreFixups != NULL) { 929 mpreFixups->apply(glyphStorage, success); 930 931 delete mpreFixups; 932 } 933 } 934 935 void IndicReordering::applyPresentationForms(LEGlyphStorage &glyphStorage, le_int32 count) 936 { 937 LEErrorCode success = LE_NO_ERROR; 938 939 // This sets us up for 2nd pass of glyph substitution as well as setting the feature masks for the 940 // GPOS table lookups 941 942 for ( le_int32 i = 0 ; i < count ; i++ ) { 943 glyphStorage.setAuxData(i, ( presentationFormsMask | positioningFormsMask ), success); 944 } 945 946 } 947 void IndicReordering::finalReordering(LEGlyphStorage &glyphStorage, le_int32 count) 948 { 949 LEErrorCode success = LE_NO_ERROR; 950 951 // Reposition REPH as appropriate 952 953 for ( le_int32 i = 0 ; i < count ; i++ ) { 954 955 le_int32 tmpAuxData = glyphStorage.getAuxData(i,success); 956 LEGlyphID tmpGlyph = glyphStorage.getGlyphID(i,success); 957 958 if ( ( tmpGlyph != NO_GLYPH ) && (tmpAuxData & rephConsonantMask) && !(tmpAuxData & repositionedGlyphMask)) { 959 960 le_bool targetPositionFound = false; 961 le_int32 targetPosition = i+1; 962 le_int32 baseConsonantData; 963 964 while (!targetPositionFound) { 965 tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); 966 tmpAuxData = glyphStorage.getAuxData(targetPosition,success); 967 968 if ( tmpAuxData & baseConsonantMask ) { 969 baseConsonantData = tmpAuxData; 970 targetPositionFound = true; 971 } else { 972 targetPosition++; 973 } 974 } 975 976 // Make sure we are not putting the reph into an empty hole 977 978 le_bool targetPositionHasGlyph = false; 979 while (!targetPositionHasGlyph) { 980 tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); 981 if ( tmpGlyph != NO_GLYPH ) { 982 targetPositionHasGlyph = true; 983 } else { 984 targetPosition--; 985 } 986 } 987 988 // Make sure that REPH is positioned after any above base or post base matras 989 // 990 le_bool checkMatraDone = false; 991 le_int32 checkMatraPosition = targetPosition+1; 992 while ( !checkMatraDone ) { 993 tmpAuxData = glyphStorage.getAuxData(checkMatraPosition,success); 994 if ( checkMatraPosition >= count || ( (tmpAuxData ^ baseConsonantData) & LE_GLYPH_GROUP_MASK)) { 995 checkMatraDone = true; 996 continue; 997 } 998 if ( (tmpAuxData & matraMask) && 999 (((tmpAuxData & markPositionMask) == aboveBasePosition) || 1000 ((tmpAuxData & markPositionMask) == postBasePosition))) { 1001 targetPosition = checkMatraPosition; 1002 } 1003 checkMatraPosition++; 1004 } 1005 1006 glyphStorage.moveGlyph(i,targetPosition,repositionedGlyphMask); 1007 } 1008 } 1009 } 1010 1011 1012 le_int32 IndicReordering::v2process(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, 1013 LEUnicode *outChars, LEGlyphStorage &glyphStorage) 1014 { 1015 const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); 1016 1017 DynamicProperties dynProps[INDIC_BLOCK_SIZE]; 1018 IndicReordering::getDynamicProperties(dynProps,classTable); 1019 1020 IndicReorderingOutput output(outChars, glyphStorage, NULL); 1021 le_int32 i, firstConsonant, baseConsonant, secondConsonant, inv_count = 0, beginSyllable = 0; 1022 //le_bool lastInWord = FALSE; 1023 1024 while (beginSyllable < charCount) { 1025 le_int32 nextSyllable = findSyllable(classTable, chars, beginSyllable, charCount); 1026 1027 output.reset(); 1028 1029 // Find the First Consonant 1030 for ( firstConsonant = beginSyllable ; firstConsonant < nextSyllable ; firstConsonant++ ) { 1031 if ( classTable->isConsonant(chars[firstConsonant]) ) { 1032 break; 1033 } 1034 } 1035 1036 // Find the base consonant 1037 1038 baseConsonant = nextSyllable - 1; 1039 secondConsonant = firstConsonant; 1040 1041 // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm() 1042 1043 while ( baseConsonant > firstConsonant ) { 1044 if ( classTable->isConsonant(chars[baseConsonant]) && 1045 !classTable->hasBelowBaseForm(chars[baseConsonant]) && 1046 !classTable->hasPostBaseForm(chars[baseConsonant]) ) { 1047 break; 1048 } 1049 else { 1050 if ( classTable->isConsonant(chars[baseConsonant]) ) { 1051 secondConsonant = baseConsonant; 1052 } 1053 baseConsonant--; 1054 } 1055 } 1056 1057 // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one 1058 // consonant, Ra is excluced from candidates for base consonants 1059 1060 if ( classTable->isReph(chars[beginSyllable]) && 1061 beginSyllable+1 < nextSyllable && classTable->isVirama(chars[beginSyllable+1]) && 1062 secondConsonant != firstConsonant) { 1063 baseConsonant = secondConsonant; 1064 } 1065 1066 // Populate the output 1067 for ( i = beginSyllable ; i < nextSyllable ; i++ ) { 1068 1069 // Handle invalid combinartions 1070 1071 if ( classTable->isVirama(chars[beginSyllable]) || 1072 classTable->isMatra(chars[beginSyllable]) || 1073 classTable->isVowelModifier(chars[beginSyllable]) || 1074 classTable->isNukta(chars[beginSyllable]) ) { 1075 output.writeChar(C_DOTTED_CIRCLE,beginSyllable,basicShapingFormsMask); 1076 inv_count++; 1077 } 1078 output.writeChar(chars[i],i, basicShapingFormsMask); 1079 1080 } 1081 1082 // Adjust features and set syllable structure bits 1083 1084 for ( i = beginSyllable ; i < nextSyllable ; i++ ) { 1085 1086 FeatureMask outMask = output.getFeatures(i+inv_count); 1087 FeatureMask saveMask = outMask; 1088 1089 // Since reph can only validly occur at the beginning of a syllable 1090 // We only apply it to the first 2 characters in the syllable, to keep it from 1091 // conflicting with other features ( i.e. rkrf ) 1092 1093 // TODO : Use the dynamic property for determining isREPH 1094 if ( i == beginSyllable && i < baseConsonant && classTable->isReph(chars[i]) && 1095 i+1 < nextSyllable && classTable->isVirama(chars[i+1])) { 1096 outMask |= rphfFeatureMask; 1097 outMask |= rephConsonantMask; 1098 output.setFeatures(i+1+inv_count,outMask); 1099 1100 } 1101 1102 if ( i == baseConsonant ) { 1103 outMask |= baseConsonantMask; 1104 } 1105 1106 if ( classTable->isMatra(chars[i])) { 1107 outMask |= matraMask; 1108 if ( classTable->hasAboveBaseForm(chars[i])) { 1109 outMask |= aboveBasePosition; 1110 } else if ( classTable->hasBelowBaseForm(chars[i])) { 1111 outMask |= belowBasePosition; 1112 } 1113 } 1114 1115 // Don't apply half form to virama that stands alone at the end of a syllable 1116 // to prevent half forms from forming when syllable ends with virama 1117 1118 if ( classTable->isVirama(chars[i]) && (i+1 == nextSyllable) ) { 1119 outMask ^= halfFeatureMask; 1120 if ( classTable->isConsonant(chars[i-1]) ) { 1121 FeatureMask tmp = output.getFeatures(i-1+inv_count); 1122 tmp ^= halfFeatureMask; 1123 output.setFeatures(i-1+inv_count,tmp); 1124 } 1125 } 1126 1127 if ( outMask != saveMask ) { 1128 output.setFeatures(i+inv_count,outMask); 1129 } 1130 } 1131 1132 output.decomposeReorderMatras(classTable,beginSyllable,nextSyllable,inv_count); 1133 1134 beginSyllable = nextSyllable; 1135 } 1136 1137 1138 return output.getOutputIndex(); 1139 } 1140 1141 1142 void IndicReordering::getDynamicProperties( DynamicProperties */*dProps*/, const IndicClassTable *classTable ) { 1143 1144 1145 LEUnicode currentChar; 1146 LEUnicode virama; 1147 LEUnicode workChars[2]; 1148 LEGlyphStorage workGlyphs; 1149 1150 IndicReorderingOutput workOutput(workChars, workGlyphs, NULL); 1151 1152 //le_int32 offset = 0; 1153 1154 // First find the relevant virama for the script we are dealing with 1155 1156 for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { 1157 if ( classTable->isVirama(currentChar)) { 1158 virama = currentChar; 1159 break; 1160 } 1161 } 1162 1163 for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { 1164 if ( classTable->isConsonant(currentChar)) { 1165 workOutput.reset(); 1166 } 1167 } 1168 1169 1170 } 1171 1172 U_NAMESPACE_END 1173