1 /* 2 / * 3 * (C) Copyright IBM Corp. 1998-2009 - All Rights Reserved 4 * 5 */ 6 7 #include "LETypes.h" 8 #include "OpenTypeTables.h" 9 #include "OpenTypeUtilities.h" 10 #include "IndicReordering.h" 11 #include "LEGlyphStorage.h" 12 #include "MPreFixups.h" 13 14 U_NAMESPACE_BEGIN 15 16 #define loclFeatureTag LE_LOCL_FEATURE_TAG 17 #define initFeatureTag LE_INIT_FEATURE_TAG 18 #define nuktFeatureTag LE_NUKT_FEATURE_TAG 19 #define akhnFeatureTag LE_AKHN_FEATURE_TAG 20 #define rphfFeatureTag LE_RPHF_FEATURE_TAG 21 #define rkrfFeatureTag LE_RKRF_FEATURE_TAG 22 #define blwfFeatureTag LE_BLWF_FEATURE_TAG 23 #define halfFeatureTag LE_HALF_FEATURE_TAG 24 #define pstfFeatureTag LE_PSTF_FEATURE_TAG 25 #define vatuFeatureTag LE_VATU_FEATURE_TAG 26 #define presFeatureTag LE_PRES_FEATURE_TAG 27 #define blwsFeatureTag LE_BLWS_FEATURE_TAG 28 #define abvsFeatureTag LE_ABVS_FEATURE_TAG 29 #define pstsFeatureTag LE_PSTS_FEATURE_TAG 30 #define halnFeatureTag LE_HALN_FEATURE_TAG 31 #define cjctFeatureTag LE_CJCT_FEATURE_TAG 32 #define blwmFeatureTag LE_BLWM_FEATURE_TAG 33 #define abvmFeatureTag LE_ABVM_FEATURE_TAG 34 #define distFeatureTag LE_DIST_FEATURE_TAG 35 #define caltFeatureTag LE_CALT_FEATURE_TAG 36 #define kernFeatureTag LE_KERN_FEATURE_TAG 37 38 #define loclFeatureMask 0x80000000UL 39 #define rphfFeatureMask 0x40000000UL 40 #define blwfFeatureMask 0x20000000UL 41 #define halfFeatureMask 0x10000000UL 42 #define pstfFeatureMask 0x08000000UL 43 #define nuktFeatureMask 0x04000000UL 44 #define akhnFeatureMask 0x02000000UL 45 #define vatuFeatureMask 0x01000000UL 46 #define presFeatureMask 0x00800000UL 47 #define blwsFeatureMask 0x00400000UL 48 #define abvsFeatureMask 0x00200000UL 49 #define pstsFeatureMask 0x00100000UL 50 #define halnFeatureMask 0x00080000UL 51 #define blwmFeatureMask 0x00040000UL 52 #define abvmFeatureMask 0x00020000UL 53 #define distFeatureMask 0x00010000UL 54 #define initFeatureMask 0x00008000UL 55 #define cjctFeatureMask 0x00004000UL 56 #define rkrfFeatureMask 0x00002000UL 57 #define caltFeatureMask 0x00001000UL 58 #define kernFeatureMask 0x00000800UL 59 60 // Syllable structure bits 61 #define baseConsonantMask 0x00000400UL 62 #define consonantMask 0x00000200UL 63 #define halfConsonantMask 0x00000100UL 64 #define rephConsonantMask 0x00000080UL 65 #define matraMask 0x00000040UL 66 #define vowelModifierMask 0x00000020UL 67 #define markPositionMask 0x00000018UL 68 69 #define postBasePosition 0x00000000UL 70 #define preBasePosition 0x00000008UL 71 #define aboveBasePosition 0x00000010UL 72 #define belowBasePosition 0x00000018UL 73 74 #define repositionedGlyphMask 0x00000002UL 75 76 #define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask ) 77 #define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask ) 78 #define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask ) 79 80 81 #define C_MALAYALAM_VOWEL_SIGN_U 0x0D41 82 #define C_DOTTED_CIRCLE 0x25CC 83 #define NO_GLYPH 0xFFFF 84 85 // Some level of debate as to the proper value for MAX_CONSONANTS_PER_SYLLABLE. Ticket 5588 states that 4 86 // is the magic number according to ISCII, but 5 seems to be the more consistent with XP. 87 #define MAX_CONSONANTS_PER_SYLLABLE 5 88 89 #define INDIC_BLOCK_SIZE 0x7F 90 91 class IndicReorderingOutput : public UMemory { 92 private: 93 le_int32 fSyllableCount; 94 le_int32 fOutIndex; 95 LEUnicode *fOutChars; 96 97 LEGlyphStorage &fGlyphStorage; 98 99 LEUnicode fMpre; 100 le_int32 fMpreIndex; 101 102 LEUnicode fMbelow; 103 le_int32 fMbelowIndex; 104 105 LEUnicode fMabove; 106 le_int32 fMaboveIndex; 107 108 LEUnicode fMpost; 109 le_int32 fMpostIndex; 110 111 LEUnicode fLengthMark; 112 le_int32 fLengthMarkIndex; 113 114 LEUnicode fAlLakuna; 115 le_int32 fAlLakunaIndex; 116 117 FeatureMask fMatraFeatures; 118 119 le_int32 fMPreOutIndex; 120 MPreFixups *fMPreFixups; 121 122 LEUnicode fVMabove; 123 LEUnicode fVMpost; 124 le_int32 fVMIndex; 125 FeatureMask fVMFeatures; 126 127 LEUnicode fSMabove; 128 LEUnicode fSMbelow; 129 le_int32 fSMIndex; 130 FeatureMask fSMFeatures; 131 132 LEUnicode fPreBaseConsonant; 133 LEUnicode fPreBaseVirama; 134 le_int32 fPBCIndex; 135 FeatureMask fPBCFeatures; 136 137 void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass) 138 { 139 // FIXME: check if already set, or if not a matra... 140 if (IndicClassTable::isLengthMark(matraClass)) { 141 fLengthMark = matra; 142 fLengthMarkIndex = matraIndex; 143 } else if (IndicClassTable::isAlLakuna(matraClass)) { 144 fAlLakuna = matra; 145 fAlLakunaIndex = matraIndex; 146 } else { 147 switch (matraClass & CF_POS_MASK) { 148 case CF_POS_BEFORE: 149 fMpre = matra; 150 fMpreIndex = matraIndex; 151 break; 152 153 case CF_POS_BELOW: 154 fMbelow = matra; 155 fMbelowIndex = matraIndex; 156 break; 157 158 case CF_POS_ABOVE: 159 fMabove = matra; 160 fMaboveIndex = matraIndex; 161 break; 162 163 case CF_POS_AFTER: 164 fMpost = matra; 165 fMpostIndex = matraIndex; 166 break; 167 168 default: 169 // can't get here... 170 break; 171 } 172 } 173 } 174 175 public: 176 IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups) 177 : fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage), 178 fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0), 179 fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0), 180 fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups), 181 fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0), 182 fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0), 183 fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0) 184 { 185 // nothing else to do... 186 } 187 188 ~IndicReorderingOutput() 189 { 190 // nothing to do here... 191 } 192 193 void reset() 194 { 195 fSyllableCount += 1; 196 197 fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0; 198 fMPreOutIndex = -1; 199 200 fVMabove = fVMpost = 0; 201 fSMabove = fSMbelow = 0; 202 203 fPreBaseConsonant = fPreBaseVirama = 0; 204 } 205 206 void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures) 207 { 208 LEErrorCode success = LE_NO_ERROR; 209 210 fOutChars[fOutIndex] = ch; 211 212 fGlyphStorage.setCharIndex(fOutIndex, charIndex, success); 213 fGlyphStorage.setAuxData(fOutIndex, charFeatures | (fSyllableCount & LE_GLYPH_GROUP_MASK), success); 214 215 fOutIndex += 1; 216 } 217 218 void setFeatures ( le_uint32 charIndex, FeatureMask charFeatures) 219 { 220 LEErrorCode success = LE_NO_ERROR; 221 222 fGlyphStorage.setAuxData( charIndex, charFeatures, success ); 223 224 } 225 226 FeatureMask getFeatures ( le_uint32 charIndex ) 227 { 228 LEErrorCode success = LE_NO_ERROR; 229 return fGlyphStorage.getAuxData(charIndex,success); 230 } 231 232 void decomposeReorderMatras ( const IndicClassTable *classTable, le_int32 beginSyllable, le_int32 nextSyllable, le_int32 inv_count ) { 233 le_int32 i; 234 LEErrorCode success = LE_NO_ERROR; 235 236 for ( i = beginSyllable ; i < nextSyllable ; i++ ) { 237 if ( classTable->isMatra(fOutChars[i+inv_count])) { 238 IndicClassTable::CharClass matraClass = classTable->getCharClass(fOutChars[i+inv_count]); 239 if ( classTable->isSplitMatra(matraClass)) { 240 le_int32 saveIndex = fGlyphStorage.getCharIndex(i+inv_count,success); 241 le_uint32 saveAuxData = fGlyphStorage.getAuxData(i+inv_count,success); 242 const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); 243 int j; 244 for (j = 0 ; *(splitMatra)[j] != 0 ; j++) { 245 LEUnicode piece = (*splitMatra)[j]; 246 if ( j == 0 ) { 247 fOutChars[i+inv_count] = piece; 248 matraClass = classTable->getCharClass(piece); 249 } else { 250 insertCharacter(piece,i+1+inv_count,saveIndex,saveAuxData); 251 nextSyllable++; 252 } 253 } 254 } 255 256 if ((matraClass & CF_POS_MASK) == CF_POS_BEFORE) { 257 moveCharacter(i+inv_count,beginSyllable+inv_count); 258 } 259 } 260 } 261 } 262 263 void moveCharacter( le_int32 fromPosition, le_int32 toPosition ) { 264 le_int32 i,saveIndex; 265 le_uint32 saveAuxData; 266 LEUnicode saveChar = fOutChars[fromPosition]; 267 LEErrorCode success = LE_NO_ERROR; 268 LEErrorCode success2 = LE_NO_ERROR; 269 saveIndex = fGlyphStorage.getCharIndex(fromPosition,success); 270 saveAuxData = fGlyphStorage.getAuxData(fromPosition,success); 271 272 if ( fromPosition > toPosition ) { 273 for ( i = fromPosition ; i > toPosition ; i-- ) { 274 fOutChars[i] = fOutChars[i-1]; 275 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success2),success); 276 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success2), success); 277 278 } 279 } else { 280 for ( i = fromPosition ; i < toPosition ; i++ ) { 281 fOutChars[i] = fOutChars[i+1]; 282 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success2),success); 283 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success2), success); 284 } 285 286 } 287 fOutChars[toPosition] = saveChar; 288 fGlyphStorage.setCharIndex(toPosition,saveIndex,success); 289 fGlyphStorage.setAuxData(toPosition,saveAuxData,success); 290 291 } 292 void insertCharacter( LEUnicode ch, le_int32 toPosition, le_int32 charIndex, le_uint32 auxData ) { 293 LEErrorCode success = LE_NO_ERROR; 294 le_int32 i; 295 fOutIndex += 1; 296 297 for ( i = fOutIndex ; i > toPosition ; i--) { 298 fOutChars[i] = fOutChars[i-1]; 299 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success),success); 300 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success), success); 301 } 302 303 fOutChars[toPosition] = ch; 304 fGlyphStorage.setCharIndex(toPosition,charIndex,success); 305 fGlyphStorage.setAuxData(toPosition,auxData,success); 306 307 } 308 void removeCharacter( le_int32 fromPosition ) { 309 LEErrorCode success = LE_NO_ERROR; 310 le_int32 i; 311 fOutIndex -= 1; 312 313 for ( i = fromPosition ; i < fOutIndex ; i--) { 314 fOutChars[i] = fOutChars[i+1]; 315 fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success),success); 316 fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success), success); 317 } 318 } 319 320 le_bool noteMatra(const IndicClassTable *classTable, LEUnicode matra, le_uint32 matraIndex, FeatureMask matraFeatures, le_bool wordStart) 321 { 322 IndicClassTable::CharClass matraClass = classTable->getCharClass(matra); 323 324 fMatraFeatures = matraFeatures; 325 326 if (wordStart) { 327 fMatraFeatures |= initFeatureMask; 328 } 329 330 if (IndicClassTable::isMatra(matraClass)) { 331 if (IndicClassTable::isSplitMatra(matraClass)) { 332 const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); 333 int i; 334 335 for (i = 0; i < 3 && (*splitMatra)[i] != 0; i += 1) { 336 LEUnicode piece = (*splitMatra)[i]; 337 IndicClassTable::CharClass pieceClass = classTable->getCharClass(piece); 338 339 saveMatra(piece, matraIndex, pieceClass); 340 } 341 } else { 342 saveMatra(matra, matraIndex, matraClass); 343 } 344 345 return TRUE; 346 } 347 348 return FALSE; 349 } 350 351 void noteVowelModifier(const IndicClassTable *classTable, LEUnicode vowelModifier, le_uint32 vowelModifierIndex, FeatureMask vowelModifierFeatures) 352 { 353 IndicClassTable::CharClass vmClass = classTable->getCharClass(vowelModifier); 354 355 fVMIndex = vowelModifierIndex; 356 fVMFeatures = vowelModifierFeatures; 357 358 if (IndicClassTable::isVowelModifier(vmClass)) { 359 switch (vmClass & CF_POS_MASK) { 360 case CF_POS_ABOVE: 361 fVMabove = vowelModifier; 362 break; 363 364 case CF_POS_AFTER: 365 fVMpost = vowelModifier; 366 break; 367 368 default: 369 // FIXME: this is an error... 370 break; 371 } 372 } 373 } 374 375 void noteStressMark(const IndicClassTable *classTable, LEUnicode stressMark, le_uint32 stressMarkIndex, FeatureMask stressMarkFeatures) 376 { 377 IndicClassTable::CharClass smClass = classTable->getCharClass(stressMark); 378 379 fSMIndex = stressMarkIndex; 380 fSMFeatures = stressMarkFeatures; 381 382 if (IndicClassTable::isStressMark(smClass)) { 383 switch (smClass & CF_POS_MASK) { 384 case CF_POS_ABOVE: 385 fSMabove = stressMark; 386 break; 387 388 case CF_POS_BELOW: 389 fSMbelow = stressMark; 390 break; 391 392 default: 393 // FIXME: this is an error... 394 break; 395 } 396 } 397 } 398 399 void notePreBaseConsonant(le_uint32 index,LEUnicode PBConsonant, LEUnicode PBVirama, FeatureMask features) 400 { 401 fPBCIndex = index; 402 fPreBaseConsonant = PBConsonant; 403 fPreBaseVirama = PBVirama; 404 fPBCFeatures = features; 405 } 406 407 void noteBaseConsonant() 408 { 409 if (fMPreFixups != NULL && fMPreOutIndex >= 0) { 410 fMPreFixups->add(fOutIndex, fMPreOutIndex); 411 } 412 } 413 414 // Handles Al-Lakuna in Sinhala split vowels. 415 void writeAlLakuna() 416 { 417 if (fAlLakuna != 0) { 418 writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures); 419 } 420 } 421 422 void writeMpre() 423 { 424 if (fMpre != 0) { 425 fMPreOutIndex = fOutIndex; 426 writeChar(fMpre, fMpreIndex, fMatraFeatures); 427 } 428 } 429 430 void writeMbelow() 431 { 432 if (fMbelow != 0) { 433 writeChar(fMbelow, fMbelowIndex, fMatraFeatures); 434 } 435 } 436 437 void writeMabove() 438 { 439 if (fMabove != 0) { 440 writeChar(fMabove, fMaboveIndex, fMatraFeatures); 441 } 442 } 443 444 void writeMpost() 445 { 446 if (fMpost != 0) { 447 writeChar(fMpost, fMpostIndex, fMatraFeatures); 448 } 449 } 450 451 void writeLengthMark() 452 { 453 if (fLengthMark != 0) { 454 writeChar(fLengthMark, fLengthMarkIndex, fMatraFeatures); 455 } 456 } 457 458 void writeVMabove() 459 { 460 if (fVMabove != 0) { 461 writeChar(fVMabove, fVMIndex, fVMFeatures); 462 } 463 } 464 465 void writeVMpost() 466 { 467 if (fVMpost != 0) { 468 writeChar(fVMpost, fVMIndex, fVMFeatures); 469 } 470 } 471 472 void writeSMabove() 473 { 474 if (fSMabove != 0) { 475 writeChar(fSMabove, fSMIndex, fSMFeatures); 476 } 477 } 478 479 void writeSMbelow() 480 { 481 if (fSMbelow != 0) { 482 writeChar(fSMbelow, fSMIndex, fSMFeatures); 483 } 484 } 485 486 void writePreBaseConsonant() 487 { 488 // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam. However, 489 // it seems that almost none of the fonts for Malayalam are set up to handle this. 490 // So, we're going to force the issue here by using the rakar as defined with RA in most fonts. 491 492 if (fPreBaseConsonant == 0x0d31) { // RRA 493 fPreBaseConsonant = 0x0d30; // RA 494 } 495 496 if (fPreBaseConsonant != 0) { 497 writeChar(fPreBaseConsonant, fPBCIndex, fPBCFeatures); 498 writeChar(fPreBaseVirama,fPBCIndex-1,fPBCFeatures); 499 } 500 } 501 502 le_int32 getOutputIndex() 503 { 504 return fOutIndex; 505 } 506 }; 507 508 509 510 // TODO: Find better names for these! 511 #define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask) 512 #define tagArray3 (pstfFeatureMask | tagArray4) 513 #define tagArray2 (halfFeatureMask | tagArray3) 514 #define tagArray1 (blwfFeatureMask | tagArray2) 515 #define tagArray0 (rphfFeatureMask | tagArray1) 516 517 static const FeatureMap featureMap[] = { 518 {loclFeatureTag, loclFeatureMask}, 519 {initFeatureTag, initFeatureMask}, 520 {nuktFeatureTag, nuktFeatureMask}, 521 {akhnFeatureTag, akhnFeatureMask}, 522 {rphfFeatureTag, rphfFeatureMask}, 523 {blwfFeatureTag, blwfFeatureMask}, 524 {halfFeatureTag, halfFeatureMask}, 525 {pstfFeatureTag, pstfFeatureMask}, 526 {vatuFeatureTag, vatuFeatureMask}, 527 {presFeatureTag, presFeatureMask}, 528 {blwsFeatureTag, blwsFeatureMask}, 529 {abvsFeatureTag, abvsFeatureMask}, 530 {pstsFeatureTag, pstsFeatureMask}, 531 {halnFeatureTag, halnFeatureMask}, 532 {blwmFeatureTag, blwmFeatureMask}, 533 {abvmFeatureTag, abvmFeatureMask}, 534 {distFeatureTag, distFeatureMask} 535 }; 536 537 static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap); 538 539 static const FeatureMap v2FeatureMap[] = { 540 {loclFeatureTag, loclFeatureMask}, 541 {nuktFeatureTag, nuktFeatureMask}, 542 {akhnFeatureTag, akhnFeatureMask}, 543 {rphfFeatureTag, rphfFeatureMask}, 544 {rkrfFeatureTag, rkrfFeatureMask}, 545 {blwfFeatureTag, blwfFeatureMask}, 546 {halfFeatureTag, halfFeatureMask}, 547 {vatuFeatureTag, vatuFeatureMask}, 548 {cjctFeatureTag, cjctFeatureMask}, 549 {presFeatureTag, presFeatureMask}, 550 {abvsFeatureTag, abvsFeatureMask}, 551 {blwsFeatureTag, blwsFeatureMask}, 552 {pstsFeatureTag, pstsFeatureMask}, 553 {halnFeatureTag, halnFeatureMask}, 554 {caltFeatureTag, caltFeatureMask}, 555 {kernFeatureTag, kernFeatureMask}, 556 {distFeatureTag, distFeatureMask}, 557 {abvmFeatureTag, abvmFeatureMask}, 558 {blwmFeatureTag, blwmFeatureMask} 559 }; 560 561 static const le_int32 v2FeatureMapCount = LE_ARRAY_SIZE(v2FeatureMap); 562 563 static const le_int8 stateTable[][CC_COUNT] = 564 { 565 // xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al 566 { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state 567 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state 568 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta 569 {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant 570 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama 571 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels 572 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark 573 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama 574 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama 575 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel 576 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel 577 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv 578 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?) 579 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant 580 }; 581 582 583 const FeatureMap *IndicReordering::getFeatureMap(le_int32 &count) 584 { 585 count = featureCount; 586 587 return featureMap; 588 } 589 590 const FeatureMap *IndicReordering::getv2FeatureMap(le_int32 &count) 591 { 592 count = v2FeatureMapCount; 593 594 return v2FeatureMap; 595 } 596 597 le_int32 IndicReordering::findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount) 598 { 599 le_int32 cursor = prev; 600 le_int8 state = 0; 601 le_int8 consonant_count = 0; 602 603 while (cursor < charCount) { 604 IndicClassTable::CharClass charClass = classTable->getCharClass(chars[cursor]); 605 606 if ( IndicClassTable::isConsonant(charClass) ) { 607 consonant_count++; 608 if ( consonant_count > MAX_CONSONANTS_PER_SYLLABLE ) { 609 break; 610 } 611 } 612 613 state = stateTable[state][charClass & CF_CLASS_MASK]; 614 615 if (state < 0) { 616 break; 617 } 618 619 cursor += 1; 620 } 621 622 return cursor; 623 } 624 625 le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, 626 LEUnicode *outChars, LEGlyphStorage &glyphStorage, 627 MPreFixups **outMPreFixups, LEErrorCode& success) 628 { 629 if (LE_FAILURE(success)) { 630 return 0; 631 } 632 633 MPreFixups *mpreFixups = NULL; 634 const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); 635 636 if (classTable->scriptFlags & SF_MPRE_FIXUP) { 637 mpreFixups = new MPreFixups(charCount); 638 if (mpreFixups == NULL) { 639 success = LE_MEMORY_ALLOCATION_ERROR; 640 return 0; 641 } 642 } 643 644 IndicReorderingOutput output(outChars, glyphStorage, mpreFixups); 645 le_int32 i, prev = 0; 646 le_bool lastInWord = FALSE; 647 648 while (prev < charCount) { 649 le_int32 syllable = findSyllable(classTable, chars, prev, charCount); 650 le_int32 matra, markStart = syllable; 651 652 output.reset(); 653 654 if (classTable->isStressMark(chars[markStart - 1])) { 655 markStart -= 1; 656 output.noteStressMark(classTable, chars[markStart], markStart, tagArray1); 657 } 658 659 if (markStart != prev && classTable->isVowelModifier(chars[markStart - 1])) { 660 markStart -= 1; 661 output.noteVowelModifier(classTable, chars[markStart], markStart, tagArray1); 662 } 663 664 matra = markStart - 1; 665 666 while (output.noteMatra(classTable, chars[matra], matra, tagArray1, !lastInWord) && matra != prev) { 667 matra -= 1; 668 } 669 670 lastInWord = TRUE; 671 672 switch (classTable->getCharClass(chars[prev]) & CF_CLASS_MASK) { 673 case CC_RESERVED: 674 lastInWord = FALSE; 675 /* fall through */ 676 677 case CC_INDEPENDENT_VOWEL: 678 case CC_ZERO_WIDTH_MARK: 679 for (i = prev; i < syllable; i += 1) { 680 output.writeChar(chars[i], i, tagArray1); 681 } 682 683 break; 684 685 case CC_AL_LAKUNA: 686 case CC_NUKTA: 687 output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); 688 output.writeChar(chars[prev], prev, tagArray1); 689 break; 690 691 case CC_VIRAMA: 692 // A lone virama is illegal unless it follows a 693 // MALAYALAM_VOWEL_SIGN_U. Such a usage is called 694 // "samvruthokaram". 695 if (chars[prev - 1] != C_MALAYALAM_VOWEL_SIGN_U) { 696 output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); 697 } 698 699 output.writeChar(chars[prev], prev, tagArray1); 700 break; 701 702 case CC_DEPENDENT_VOWEL: 703 case CC_SPLIT_VOWEL_PIECE_1: 704 case CC_SPLIT_VOWEL_PIECE_2: 705 case CC_SPLIT_VOWEL_PIECE_3: 706 case CC_VOWEL_MODIFIER: 707 case CC_STRESS_MARK: 708 output.writeMpre(); 709 710 output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); 711 712 output.writeMbelow(); 713 output.writeSMbelow(); 714 output.writeMabove(); 715 716 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { 717 output.writeMpost(); 718 } 719 720 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { 721 output.writeVMabove(); 722 output.writeSMabove(); // FIXME: there are no SM's in these scripts... 723 } 724 725 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { 726 output.writeMpost(); 727 } 728 729 output.writeLengthMark(); 730 output.writeAlLakuna(); 731 732 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { 733 output.writeVMabove(); 734 output.writeSMabove(); 735 } 736 737 output.writeVMpost(); 738 break; 739 740 case CC_INDEPENDENT_VOWEL_2: 741 case CC_INDEPENDENT_VOWEL_3: 742 case CC_CONSONANT: 743 case CC_CONSONANT_WITH_NUKTA: 744 { 745 le_uint32 length = markStart - prev; 746 le_int32 lastConsonant = markStart - 1; 747 le_int32 baseLimit = prev; 748 749 // Check for REPH at front of syllable 750 if (length > 2 && classTable->isReph(chars[prev]) && classTable->isVirama(chars[prev + 1]) && chars[prev + 2] != C_SIGN_ZWNJ) { 751 baseLimit += 2; 752 753 // Check for eyelash RA, if the script supports it 754 if ((classTable->scriptFlags & SF_EYELASH_RA) != 0 && 755 chars[baseLimit] == C_SIGN_ZWJ) { 756 if (length > 3) { 757 baseLimit += 1; 758 } else { 759 baseLimit -= 2; 760 } 761 } 762 } 763 764 while (lastConsonant > baseLimit && !classTable->isConsonant(chars[lastConsonant])) { 765 lastConsonant -= 1; 766 } 767 768 769 IndicClassTable::CharClass charClass = CC_RESERVED; 770 IndicClassTable::CharClass nextClass = CC_RESERVED; 771 le_int32 baseConsonant = lastConsonant; 772 le_int32 postBase = lastConsonant + 1; 773 le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK; 774 le_bool seenVattu = FALSE; 775 le_bool seenBelowBaseForm = FALSE; 776 le_bool seenPreBaseForm = FALSE; 777 le_bool hasNukta = FALSE; 778 le_bool hasBelowBaseForm = FALSE; 779 le_bool hasPostBaseForm = FALSE; 780 le_bool hasPreBaseForm = FALSE; 781 782 if (postBase < markStart && classTable->isNukta(chars[postBase])) { 783 charClass = CC_NUKTA; 784 postBase += 1; 785 } 786 787 while (baseConsonant > baseLimit) { 788 nextClass = charClass; 789 hasNukta = IndicClassTable::isNukta(nextClass); 790 charClass = classTable->getCharClass(chars[baseConsonant]); 791 792 hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta; 793 hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta; 794 hasPreBaseForm = IndicClassTable::hasPreBaseForm(charClass) && !hasNukta; 795 796 if (IndicClassTable::isConsonant(charClass)) { 797 if (postBaseLimit == 0 || seenVattu || 798 (baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) || 799 !(hasBelowBaseForm || hasPostBaseForm || hasPreBaseForm)) { 800 break; 801 } 802 803 // Note any pre-base consonants 804 if ( baseConsonant == lastConsonant && lastConsonant > 0 && 805 hasPreBaseForm && classTable->isVirama(chars[baseConsonant - 1])) { 806 output.notePreBaseConsonant(lastConsonant,chars[lastConsonant],chars[lastConsonant-1],tagArray2); 807 seenPreBaseForm = TRUE; 808 809 } 810 // consonants with nuktas are never vattus 811 seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta; 812 813 // consonants with nuktas never have below- or post-base forms 814 if (hasPostBaseForm) { 815 if (seenBelowBaseForm) { 816 break; 817 } 818 819 postBase = baseConsonant; 820 } else if (hasBelowBaseForm) { 821 seenBelowBaseForm = TRUE; 822 } 823 824 postBaseLimit -= 1; 825 } 826 827 baseConsonant -= 1; 828 } 829 830 // Write Mpre 831 output.writeMpre(); 832 833 // Write eyelash RA 834 // NOTE: baseLimit == prev + 3 iff eyelash RA present... 835 if (baseLimit == prev + 3) { 836 output.writeChar(chars[prev], prev, tagArray2); 837 output.writeChar(chars[prev + 1], prev + 1, tagArray2); 838 output.writeChar(chars[prev + 2], prev + 2, tagArray2); 839 } 840 841 // write any pre-base consonants 842 output.writePreBaseConsonant(); 843 844 le_bool supressVattu = TRUE; 845 846 for (i = baseLimit; i < baseConsonant; i += 1) { 847 LEUnicode ch = chars[i]; 848 // Don't put 'pstf' or 'blwf' on anything before the base consonant. 849 FeatureMask features = tagArray1 & ~( pstfFeatureMask | blwfFeatureMask ); 850 851 charClass = classTable->getCharClass(ch); 852 nextClass = classTable->getCharClass(chars[i + 1]); 853 hasNukta = IndicClassTable::isNukta(nextClass); 854 855 if (IndicClassTable::isConsonant(charClass)) { 856 if (IndicClassTable::isVattu(charClass) && !hasNukta && supressVattu) { 857 features = tagArray4; 858 } 859 860 supressVattu = IndicClassTable::isVattu(charClass) && !hasNukta; 861 } else if (IndicClassTable::isVirama(charClass) && chars[i + 1] == C_SIGN_ZWNJ) 862 { 863 features = tagArray4; 864 } 865 866 output.writeChar(ch, i, features); 867 } 868 869 le_int32 bcSpan = baseConsonant + 1; 870 871 if (bcSpan < markStart && classTable->isNukta(chars[bcSpan])) { 872 bcSpan += 1; 873 } 874 875 if (baseConsonant == lastConsonant && bcSpan < markStart && 876 (classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) { 877 bcSpan += 1; 878 879 if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) { 880 bcSpan += 1; 881 } 882 } 883 884 // note the base consonant for post-GSUB fixups 885 output.noteBaseConsonant(); 886 887 // write base consonant 888 for (i = baseConsonant; i < bcSpan; i += 1) { 889 output.writeChar(chars[i], i, tagArray4); 890 } 891 892 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { 893 output.writeMbelow(); 894 output.writeSMbelow(); // FIXME: there are no SMs in these scripts... 895 output.writeMabove(); 896 output.writeMpost(); 897 } 898 899 // write below-base consonants 900 if (baseConsonant != lastConsonant && !seenPreBaseForm) { 901 for (i = bcSpan + 1; i < postBase; i += 1) { 902 output.writeChar(chars[i], i, tagArray1); 903 } 904 905 if (postBase > lastConsonant) { 906 // write halant that was after base consonant 907 output.writeChar(chars[bcSpan], bcSpan, tagArray1); 908 } 909 } 910 911 // write Mbelow, SMbelow, Mabove 912 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { 913 output.writeMbelow(); 914 output.writeSMbelow(); 915 output.writeMabove(); 916 } 917 918 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { 919 if (baseLimit == prev + 2) { 920 output.writeChar(chars[prev], prev, tagArray0); 921 output.writeChar(chars[prev + 1], prev + 1, tagArray0); 922 } 923 924 output.writeVMabove(); 925 output.writeSMabove(); // FIXME: there are no SM's in these scripts... 926 } 927 928 // write post-base consonants 929 // FIXME: does this put the right tags on post-base consonants? 930 if (baseConsonant != lastConsonant && !seenPreBaseForm) { 931 if (postBase <= lastConsonant) { 932 for (i = postBase; i <= lastConsonant; i += 1) { 933 output.writeChar(chars[i], i, tagArray3); 934 } 935 936 // write halant that was after base consonant 937 output.writeChar(chars[bcSpan], bcSpan, tagArray1); 938 } 939 940 // write the training halant, if there is one 941 if (lastConsonant < matra && classTable->isVirama(chars[matra])) { 942 output.writeChar(chars[matra], matra, tagArray4); 943 } 944 } 945 946 // write Mpost 947 if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { 948 output.writeMpost(); 949 } 950 951 output.writeLengthMark(); 952 output.writeAlLakuna(); 953 954 // write reph 955 if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { 956 if (baseLimit == prev + 2) { 957 output.writeChar(chars[prev], prev, tagArray0); 958 output.writeChar(chars[prev + 1], prev + 1, tagArray0); 959 } 960 961 output.writeVMabove(); 962 output.writeSMabove(); 963 } 964 965 output.writeVMpost(); 966 967 break; 968 } 969 970 default: 971 break; 972 } 973 974 prev = syllable; 975 } 976 977 *outMPreFixups = mpreFixups; 978 979 return output.getOutputIndex(); 980 } 981 982 void IndicReordering::adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, LEErrorCode& success) 983 { 984 if (mpreFixups != NULL) { 985 mpreFixups->apply(glyphStorage, success); 986 987 delete mpreFixups; 988 } 989 } 990 991 void IndicReordering::applyPresentationForms(LEGlyphStorage &glyphStorage, le_int32 count) 992 { 993 LEErrorCode success = LE_NO_ERROR; 994 995 // This sets us up for 2nd pass of glyph substitution as well as setting the feature masks for the 996 // GPOS table lookups 997 998 for ( le_int32 i = 0 ; i < count ; i++ ) { 999 glyphStorage.setAuxData(i, ( presentationFormsMask | positioningFormsMask ), success); 1000 } 1001 1002 } 1003 void IndicReordering::finalReordering(LEGlyphStorage &glyphStorage, le_int32 count) 1004 { 1005 LEErrorCode success = LE_NO_ERROR; 1006 1007 // Reposition REPH as appropriate 1008 1009 for ( le_int32 i = 0 ; i < count ; i++ ) { 1010 1011 le_int32 tmpAuxData = glyphStorage.getAuxData(i,success); 1012 LEGlyphID tmpGlyph = glyphStorage.getGlyphID(i,success); 1013 1014 if ( ( tmpGlyph != NO_GLYPH ) && (tmpAuxData & rephConsonantMask) && !(tmpAuxData & repositionedGlyphMask)) { 1015 1016 le_bool targetPositionFound = false; 1017 le_int32 targetPosition = i+1; 1018 le_int32 baseConsonantData; 1019 1020 while (!targetPositionFound) { 1021 tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); 1022 tmpAuxData = glyphStorage.getAuxData(targetPosition,success); 1023 1024 if ( tmpAuxData & baseConsonantMask ) { 1025 baseConsonantData = tmpAuxData; 1026 targetPositionFound = true; 1027 } else { 1028 targetPosition++; 1029 } 1030 } 1031 1032 // Make sure we are not putting the reph into an empty hole 1033 1034 le_bool targetPositionHasGlyph = false; 1035 while (!targetPositionHasGlyph) { 1036 tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); 1037 if ( tmpGlyph != NO_GLYPH ) { 1038 targetPositionHasGlyph = true; 1039 } else { 1040 targetPosition--; 1041 } 1042 } 1043 1044 // Make sure that REPH is positioned after any above base or post base matras 1045 // 1046 le_bool checkMatraDone = false; 1047 le_int32 checkMatraPosition = targetPosition+1; 1048 while ( !checkMatraDone ) { 1049 tmpAuxData = glyphStorage.getAuxData(checkMatraPosition,success); 1050 if ( checkMatraPosition >= count || ( (tmpAuxData ^ baseConsonantData) & LE_GLYPH_GROUP_MASK)) { 1051 checkMatraDone = true; 1052 continue; 1053 } 1054 if ( (tmpAuxData & matraMask) && 1055 (((tmpAuxData & markPositionMask) == aboveBasePosition) || 1056 ((tmpAuxData & markPositionMask) == postBasePosition))) { 1057 targetPosition = checkMatraPosition; 1058 } 1059 checkMatraPosition++; 1060 } 1061 1062 glyphStorage.moveGlyph(i,targetPosition,repositionedGlyphMask); 1063 } 1064 } 1065 } 1066 1067 1068 le_int32 IndicReordering::v2process(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, 1069 LEUnicode *outChars, LEGlyphStorage &glyphStorage) 1070 { 1071 const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); 1072 1073 DynamicProperties dynProps[INDIC_BLOCK_SIZE]; 1074 IndicReordering::getDynamicProperties(dynProps,classTable); 1075 1076 IndicReorderingOutput output(outChars, glyphStorage, NULL); 1077 le_int32 i, firstConsonant, baseConsonant, secondConsonant, inv_count = 0, beginSyllable = 0; 1078 //le_bool lastInWord = FALSE; 1079 1080 while (beginSyllable < charCount) { 1081 le_int32 nextSyllable = findSyllable(classTable, chars, beginSyllable, charCount); 1082 1083 output.reset(); 1084 1085 // Find the First Consonant 1086 for ( firstConsonant = beginSyllable ; firstConsonant < nextSyllable ; firstConsonant++ ) { 1087 if ( classTable->isConsonant(chars[firstConsonant]) ) { 1088 break; 1089 } 1090 } 1091 1092 // Find the base consonant 1093 1094 baseConsonant = nextSyllable - 1; 1095 secondConsonant = firstConsonant; 1096 1097 // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm() 1098 1099 while ( baseConsonant > firstConsonant ) { 1100 if ( classTable->isConsonant(chars[baseConsonant]) && 1101 !classTable->hasBelowBaseForm(chars[baseConsonant]) && 1102 !classTable->hasPostBaseForm(chars[baseConsonant]) ) { 1103 break; 1104 } 1105 else { 1106 if ( classTable->isConsonant(chars[baseConsonant]) ) { 1107 secondConsonant = baseConsonant; 1108 } 1109 baseConsonant--; 1110 } 1111 } 1112 1113 // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one 1114 // consonant, Ra is excluced from candidates for base consonants 1115 1116 if ( classTable->isReph(chars[beginSyllable]) && 1117 beginSyllable+1 < nextSyllable && classTable->isVirama(chars[beginSyllable+1]) && 1118 secondConsonant != firstConsonant) { 1119 baseConsonant = secondConsonant; 1120 } 1121 1122 // Populate the output 1123 for ( i = beginSyllable ; i < nextSyllable ; i++ ) { 1124 1125 // Handle invalid combinartions 1126 1127 if ( classTable->isVirama(chars[beginSyllable]) || 1128 classTable->isMatra(chars[beginSyllable]) || 1129 classTable->isVowelModifier(chars[beginSyllable]) || 1130 classTable->isNukta(chars[beginSyllable]) ) { 1131 output.writeChar(C_DOTTED_CIRCLE,beginSyllable,basicShapingFormsMask); 1132 inv_count++; 1133 } 1134 output.writeChar(chars[i],i, basicShapingFormsMask); 1135 1136 } 1137 1138 // Adjust features and set syllable structure bits 1139 1140 for ( i = beginSyllable ; i < nextSyllable ; i++ ) { 1141 1142 FeatureMask outMask = output.getFeatures(i+inv_count); 1143 FeatureMask saveMask = outMask; 1144 1145 // Since reph can only validly occur at the beginning of a syllable 1146 // We only apply it to the first 2 characters in the syllable, to keep it from 1147 // conflicting with other features ( i.e. rkrf ) 1148 1149 // TODO : Use the dynamic property for determining isREPH 1150 if ( i == beginSyllable && i < baseConsonant && classTable->isReph(chars[i]) && 1151 i+1 < nextSyllable && classTable->isVirama(chars[i+1])) { 1152 outMask |= rphfFeatureMask; 1153 outMask |= rephConsonantMask; 1154 output.setFeatures(i+1+inv_count,outMask); 1155 1156 } 1157 1158 if ( i == baseConsonant ) { 1159 outMask |= baseConsonantMask; 1160 } 1161 1162 if ( classTable->isMatra(chars[i])) { 1163 outMask |= matraMask; 1164 if ( classTable->hasAboveBaseForm(chars[i])) { 1165 outMask |= aboveBasePosition; 1166 } else if ( classTable->hasBelowBaseForm(chars[i])) { 1167 outMask |= belowBasePosition; 1168 } 1169 } 1170 1171 // Don't apply half form to virama that stands alone at the end of a syllable 1172 // to prevent half forms from forming when syllable ends with virama 1173 1174 if ( classTable->isVirama(chars[i]) && (i+1 == nextSyllable) ) { 1175 outMask ^= halfFeatureMask; 1176 if ( classTable->isConsonant(chars[i-1]) ) { 1177 FeatureMask tmp = output.getFeatures(i-1+inv_count); 1178 tmp ^= halfFeatureMask; 1179 output.setFeatures(i-1+inv_count,tmp); 1180 } 1181 } 1182 1183 if ( outMask != saveMask ) { 1184 output.setFeatures(i+inv_count,outMask); 1185 } 1186 } 1187 1188 output.decomposeReorderMatras(classTable,beginSyllable,nextSyllable,inv_count); 1189 1190 beginSyllable = nextSyllable; 1191 } 1192 1193 1194 return output.getOutputIndex(); 1195 } 1196 1197 1198 void IndicReordering::getDynamicProperties( DynamicProperties *, const IndicClassTable *classTable ) { 1199 1200 1201 LEUnicode currentChar; 1202 LEUnicode virama; 1203 LEUnicode workChars[2]; 1204 LEGlyphStorage workGlyphs; 1205 1206 IndicReorderingOutput workOutput(workChars, workGlyphs, NULL); 1207 1208 //le_int32 offset = 0; 1209 1210 // First find the relevant virama for the script we are dealing with 1211 1212 for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { 1213 if ( classTable->isVirama(currentChar)) { 1214 virama = currentChar; 1215 break; 1216 } 1217 } 1218 1219 for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { 1220 if ( classTable->isConsonant(currentChar)) { 1221 workOutput.reset(); 1222 } 1223 } 1224 1225 1226 } 1227 1228 U_NAMESPACE_END 1229