1 /* 2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 3 * 4 * This is part of HarfBuzz, an OpenType Layout engine library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 */ 24 25 #include "harfbuzz-shaper.h" 26 #include "harfbuzz-shaper-private.h" 27 28 #include <assert.h> 29 #include <stdio.h> 30 31 /* 32 // Vocabulary 33 // Base -> A consonant or an independent vowel in its full (not subscript) form. It is the 34 // center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels, 35 // split vowels, signs... but there is only one base in a syllable, it has to be coded as 36 // the first character of the syllable. 37 // split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant). 38 // Khmer language has five of them. Khmer split vowels either have one part before the 39 // base and one after the base or they have a part before the base and a part above the base. 40 // The first part of all Khmer split vowels is the same character, identical to 41 // the glyph of Khmer dependent vowel SRA EI 42 // coeng --> modifier used in Khmer to construct coeng (subscript) consonants 43 // Differently than indian languages, the coeng modifies the consonant that follows it, 44 // not the one preceding it Each consonant has two forms, the base form and the subscript form 45 // the base form is the normal one (using the consonants code-point), the subscript form is 46 // displayed when the combination coeng + consonant is encountered. 47 // Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant 48 // Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO) 49 // Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA) 50 // Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds 51 // if it is attached to a consonant of the first series or a consonant of the second series 52 // Most consonants have an equivalent in the other series, but some of theme exist only in 53 // one series (for example SA). If we want to use the consonant SA with a vowel sound that 54 // can only be done with a vowel sound that corresponds to a vowel accompanying a consonant 55 // of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN 56 // x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and 57 // MUSIKATOAN a second series consonant to have a first series vowel sound. 58 // Consonant shifter are both normally supercript marks, but, when they are followed by a 59 // superscript, they change shape and take the form of subscript dependent vowel SRA U. 60 // If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they 61 // should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should 62 // be placed after the coeng consonant. 63 // Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base 64 // Each vowel has its own position. Only one vowel per syllable is allowed. 65 // Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are 66 // Allowed in a syllable. 67 // 68 // 69 // order is important here! This order must be the same that is found in each horizontal 70 // line in the statetable for Khmer (see khmerStateTable) . 71 */ 72 enum KhmerCharClassValues { 73 CC_RESERVED = 0, 74 CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */ 75 CC_CONSONANT2 = 2, /* Consonant of type 2 */ 76 CC_CONSONANT3 = 3, /* Consonant of type 3 */ 77 CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */ 78 CC_CONSONANT_SHIFTER = 5, 79 CC_ROBAT = 6, /* Khmer special diacritic accent -treated differently in state table */ 80 CC_COENG = 7, /* Subscript consonant combining character */ 81 CC_DEPENDENT_VOWEL = 8, 82 CC_SIGN_ABOVE = 9, 83 CC_SIGN_AFTER = 10, 84 CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */ 85 CC_COUNT = 12 /* This is the number of character classes */ 86 }; 87 88 89 enum KhmerCharClassFlags { 90 CF_CLASS_MASK = 0x0000FFFF, 91 92 CF_CONSONANT = 0x01000000, /* flag to speed up comparing */ 93 CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */ 94 CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with this flag is the first in a syllable */ 95 CF_COENG = 0x08000000, /* flag to speed up comparing */ 96 CF_SHIFTER = 0x10000000, /* flag to speed up comparing */ 97 CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */ 98 99 /* position flags */ 100 CF_POS_BEFORE = 0x00080000, 101 CF_POS_BELOW = 0x00040000, 102 CF_POS_ABOVE = 0x00020000, 103 CF_POS_AFTER = 0x00010000, 104 CF_POS_MASK = 0x000f0000 105 }; 106 107 108 /* Characters that get referred to by name */ 109 enum KhmerChar { 110 C_SIGN_ZWNJ = 0x200C, 111 C_SIGN_ZWJ = 0x200D, 112 C_RO = 0x179A, 113 C_VOWEL_AA = 0x17B6, 114 C_SIGN_NIKAHIT = 0x17C6, 115 C_VOWEL_E = 0x17C1, 116 C_COENG = 0x17D2 117 }; 118 119 120 /* 121 // simple classes, they are used in the statetable (in this file) to control the length of a syllable 122 // they are also used to know where a character should be placed (location in reference to the base character) 123 // and also to know if a character, when independently displayed, should be displayed with a dotted-circle to 124 // indicate error in syllable construction 125 */ 126 enum { 127 _xx = CC_RESERVED, 128 _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE, 129 _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER, 130 _c1 = CC_CONSONANT | CF_CONSONANT, 131 _c2 = CC_CONSONANT2 | CF_CONSONANT, 132 _c3 = CC_CONSONANT3 | CF_CONSONANT, 133 _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE, 134 _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER, 135 _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE, 136 _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE, 137 _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL, 138 _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE, 139 _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE, 140 141 /* split vowel */ 142 _va = _da | CF_SPLIT_VOWEL, 143 _vr = _dr | CF_SPLIT_VOWEL 144 }; 145 146 147 /* 148 // Character class: a character class value 149 // ORed with character class flags. 150 */ 151 typedef unsigned long KhmerCharClass; 152 153 154 /* 155 // Character class tables 156 // _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs... 157 // _sa Sign placed above the base 158 // _sp Sign placed after the base 159 // _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants) 160 // _c2 Consonant of type 2 (only RO) 161 // _c3 Consonant of type 3 162 // _rb Khmer sign robat u17CC. combining mark for subscript consonants 163 // _cd Consonant-shifter 164 // _dl Dependent vowel placed before the base (left of the base) 165 // _db Dependent vowel placed below the base 166 // _da Dependent vowel placed above the base 167 // _dr Dependent vowel placed behind the base (right of the base) 168 // _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following 169 // it to create a subscript consonant or independent vowel 170 // _va Khmer split vowel in which the first part is before the base and the second one above the base 171 // _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base 172 */ 173 static const KhmerCharClass khmerCharClasses[] = { 174 _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */ 175 _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */ 176 _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */ 177 _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */ 178 _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */ 179 _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx /* 17D0 - 17DF */ 180 }; 181 182 /* this enum must reflect the range of khmerCharClasses */ 183 enum KhmerCharClassesRange { 184 KhmerFirstChar = 0x1780, 185 KhmerLastChar = 0x17df 186 }; 187 188 /* 189 // Below we define how a character in the input string is either in the khmerCharClasses table 190 // (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear 191 // within the syllable, but are not in the table) we also get their type back, or an unknown object 192 // in which case we get _xx (CC_RESERVED) back 193 */ 194 static KhmerCharClass getKhmerCharClass(HB_UChar16 uc) 195 { 196 if (uc == C_SIGN_ZWJ) { 197 return CC_ZERO_WIDTH_J_MARK; 198 } 199 200 if (uc == C_SIGN_ZWNJ) { 201 return CC_ZERO_WIDTH_NJ_MARK; 202 } 203 204 if (uc < KhmerFirstChar || uc > KhmerLastChar) { 205 return CC_RESERVED; 206 } 207 208 return khmerCharClasses[uc - KhmerFirstChar]; 209 } 210 211 212 /* 213 // The stateTable is used to calculate the end (the length) of a well 214 // formed Khmer Syllable. 215 // 216 // Each horizontal line is ordered exactly the same way as the values in KhmerClassTable 217 // CharClassValues. This coincidence of values allows the follow up of the table. 218 // 219 // Each line corresponds to a state, which does not necessarily need to be a type 220 // of component... for example, state 2 is a base, with is always a first character 221 // in the syllable, but the state could be produced a consonant of any type when 222 // it is the first character that is analysed (in ground state). 223 // 224 // Differentiating 3 types of consonants is necessary in order to 225 // forbid the use of certain combinations, such as having a second 226 // coeng after a coeng RO, 227 // The inexistent possibility of having a type 3 after another type 3 is permitted, 228 // eliminating it would very much complicate the table, and it does not create typing 229 // problems, as the case above. 230 // 231 // The table is quite complex, in order to limit the number of coeng consonants 232 // to 2 (by means of the table). 233 // 234 // There a peculiarity, as far as Unicode is concerned: 235 // - The consonant-shifter is considered in two possible different 236 // locations, the one considered in Unicode 3.0 and the one considered in 237 // Unicode 4.0. (there is a backwards compatibility problem in this standard). 238 // 239 // 240 // xx independent character, such as a number, punctuation sign or non-khmer char 241 // 242 // c1 Khmer consonant of type 1 or an independent vowel 243 // that is, a letter in which the subscript for is only under the 244 // base, not taking any space to the right or to the left 245 // 246 // c2 Khmer consonant of type 2, the coeng form takes space under 247 // and to the left of the base (only RO is of this type) 248 // 249 // c3 Khmer consonant of type 3. Its subscript form takes space under 250 // and to the right of the base. 251 // 252 // cs Khmer consonant shifter 253 // 254 // rb Khmer robat 255 // 256 // co coeng character (u17D2) 257 // 258 // dv dependent vowel (including split vowels, they are treated in the same way). 259 // even if dv is not defined above, the component that is really tested for is 260 // KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels 261 // 262 // zwj Zero Width joiner 263 // 264 // zwnj Zero width non joiner 265 // 266 // sa above sign 267 // 268 // sp post sign 269 // 270 // there are lines with equal content but for an easier understanding 271 // (and maybe change in the future) we did not join them 272 */ 273 static const signed char khmerStateTable[][CC_COUNT] = 274 { 275 /* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */ 276 { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */ 277 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sign to the right of the syllable) */ 278 {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */ 279 {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */ 280 {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shifter */ 281 {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */ 282 {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */ 283 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */ 284 {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */ 285 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */ 286 {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */ 287 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */ 288 {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */ 289 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register shifter */ 290 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */ 291 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */ 292 {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */ 293 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */ 294 {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */ 295 {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */ 296 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel after a Robat */ 297 }; 298 299 300 /* #define KHMER_DEBUG */ 301 #ifdef KHMER_DEBUG 302 #define KHDEBUG qDebug 303 #else 304 #define KHDEBUG if(0) printf 305 #endif 306 307 /* 308 // Given an input string of characters and a location in which to start looking 309 // calculate, using the state table, which one is the last character of the syllable 310 // that starts in the starting position. 311 */ 312 static int khmer_nextSyllableBoundary(const HB_UChar16 *s, int start, int end, HB_Bool *invalid) 313 { 314 const HB_UChar16 *uc = s + start; 315 int state = 0; 316 int pos = start; 317 *invalid = FALSE; 318 319 while (pos < end) { 320 KhmerCharClass charClass = getKhmerCharClass(*uc); 321 if (pos == start) { 322 *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT); 323 } 324 state = khmerStateTable[state][charClass & CF_CLASS_MASK]; 325 326 KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state, 327 charClass, *uc ); 328 329 if (state < 0) { 330 break; 331 } 332 ++uc; 333 ++pos; 334 } 335 return pos; 336 } 337 338 #ifndef NO_OPENTYPE 339 static const HB_OpenTypeFeature khmer_features[] = { 340 { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty }, 341 { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty }, 342 { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty }, 343 { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty }, 344 { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty }, 345 { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty }, 346 { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty }, 347 { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty }, 348 { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty }, 349 { 0, 0 } 350 }; 351 #endif 352 353 354 static HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item) 355 { 356 /* KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length, 357 item->string->mid(item->from, item->length).toUtf8().data()); */ 358 359 int len = 0; 360 int syllableEnd = item->item.pos + item->item.length; 361 unsigned short reordered[16]; 362 unsigned char properties[16]; 363 enum { 364 AboveForm = 0x01, 365 PreForm = 0x02, 366 PostForm = 0x04, 367 BelowForm = 0x08 368 }; 369 #ifndef NO_OPENTYPE 370 const int availableGlyphs = item->num_glyphs; 371 #endif 372 int coengRo; 373 int i; 374 375 /* according to the specs this is the max length one can get 376 ### the real value should be smaller */ 377 assert(item->item.length < 13); 378 379 memset(properties, 0, 16*sizeof(unsigned char)); 380 381 #ifdef KHMER_DEBUG 382 qDebug("original:"); 383 for (int i = from; i < syllableEnd; i++) { 384 qDebug(" %d: %4x", i, string[i]); 385 } 386 #endif 387 388 /* 389 // write a pre vowel or the pre part of a split vowel first 390 // and look out for coeng + ro. RO is the only vowel of type 2, and 391 // therefore the only one that requires saving space before the base. 392 */ 393 coengRo = -1; /* There is no Coeng Ro, if found this value will change */ 394 for (i = item->item.pos; i < syllableEnd; i += 1) { 395 KhmerCharClass charClass = getKhmerCharClass(item->string[i]); 396 397 /* if a split vowel, write the pre part. In Khmer the pre part 398 is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */ 399 if (charClass & CF_SPLIT_VOWEL) { 400 reordered[len] = C_VOWEL_E; 401 properties[len] = PreForm; 402 ++len; 403 break; /* there can be only one vowel */ 404 } 405 /* if a vowel with pos before write it out */ 406 if (charClass & CF_POS_BEFORE) { 407 reordered[len] = item->string[i]; 408 properties[len] = PreForm; 409 ++len; 410 break; /* there can be only one vowel */ 411 } 412 /* look for coeng + ro and remember position 413 works because coeng + ro is always in front of a vowel (if there is a vowel) 414 and because CC_CONSONANT2 is enough to identify it, as it is the only consonant 415 with this flag */ 416 if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) && 417 ( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT2) ) { 418 coengRo = i; 419 } 420 } 421 422 /* write coeng + ro if found */ 423 if (coengRo > -1) { 424 reordered[len] = C_COENG; 425 properties[len] = PreForm; 426 ++len; 427 reordered[len] = C_RO; 428 properties[len] = PreForm; 429 ++len; 430 } 431 432 /* 433 shall we add a dotted circle? 434 If in the position in which the base should be (first char in the string) there is 435 a character that has the Dotted circle flag (a character that cannot be a base) 436 then write a dotted circle */ 437 if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) { 438 reordered[len] = C_DOTTED_CIRCLE; 439 ++len; 440 } 441 442 /* copy what is left to the output, skipping before vowels and 443 coeng Ro if they are present */ 444 for (i = item->item.pos; i < syllableEnd; i += 1) { 445 HB_UChar16 uc = item->string[i]; 446 KhmerCharClass charClass = getKhmerCharClass(uc); 447 448 /* skip a before vowel, it was already processed */ 449 if (charClass & CF_POS_BEFORE) { 450 continue; 451 } 452 453 /* skip coeng + ro, it was already processed */ 454 if (i == coengRo) { 455 i += 1; 456 continue; 457 } 458 459 switch (charClass & CF_POS_MASK) 460 { 461 case CF_POS_ABOVE : 462 reordered[len] = uc; 463 properties[len] = AboveForm; 464 ++len; 465 break; 466 467 case CF_POS_AFTER : 468 reordered[len] = uc; 469 properties[len] = PostForm; 470 ++len; 471 break; 472 473 case CF_POS_BELOW : 474 reordered[len] = uc; 475 properties[len] = BelowForm; 476 ++len; 477 break; 478 479 default: 480 /* assign the correct flags to a coeng consonant 481 Consonants of type 3 are taged as Post forms and those type 1 as below forms */ 482 if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) { 483 unsigned char property = (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT3 ? 484 PostForm : BelowForm; 485 reordered[len] = uc; 486 properties[len] = property; 487 ++len; 488 i += 1; 489 reordered[len] = item->string[i]; 490 properties[len] = property; 491 ++len; 492 break; 493 } 494 495 /* if a shifter is followed by an above vowel change the shifter to below form, 496 an above vowel can have two possible positions i + 1 or i + 3 497 (position i+1 corresponds to unicode 3, position i+3 to Unicode 4) 498 and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two 499 different positions, right after the shifter or after a vowel (Unicode 4) */ 500 if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) { 501 if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) { 502 reordered[len] = uc; 503 properties[len] = BelowForm; 504 ++len; 505 break; 506 } 507 if (i + 2 < syllableEnd && 508 (item->string[i+1] == C_VOWEL_AA) && 509 (item->string[i+2] == C_SIGN_NIKAHIT) ) 510 { 511 reordered[len] = uc; 512 properties[len] = BelowForm; 513 ++len; 514 break; 515 } 516 if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i+3]) & CF_ABOVE_VOWEL) ) { 517 reordered[len] = uc; 518 properties[len] = BelowForm; 519 ++len; 520 break; 521 } 522 if (i + 4 < syllableEnd && 523 (item->string[i+3] == C_VOWEL_AA) && 524 (item->string[i+4] == C_SIGN_NIKAHIT) ) 525 { 526 reordered[len] = uc; 527 properties[len] = BelowForm; 528 ++len; 529 break; 530 } 531 } 532 533 /* default - any other characters */ 534 reordered[len] = uc; 535 ++len; 536 break; 537 } /* switch */ 538 } /* for */ 539 540 if (!item->font->klass->convertStringToGlyphIndices(item->font, 541 reordered, len, 542 item->glyphs, &item->num_glyphs, 543 item->item.bidiLevel % 2)) 544 return FALSE; 545 546 547 KHDEBUG("after shaping: len=%d", len); 548 for (i = 0; i < len; i++) { 549 item->attributes[i].mark = FALSE; 550 item->attributes[i].clusterStart = FALSE; 551 item->attributes[i].justification = 0; 552 item->attributes[i].zeroWidth = FALSE; 553 KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]); 554 } 555 556 /* now we have the syllable in the right order, and can start running it through open type. */ 557 558 #ifndef NO_OPENTYPE 559 if (openType) { 560 hb_uint32 where[16]; 561 for (i = 0; i < len; ++i) { 562 where[i] = ~(PreSubstProperty 563 | BelowSubstProperty 564 | AboveSubstProperty 565 | PostSubstProperty 566 | CligProperty 567 | PositioningProperties); 568 if (properties[i] == PreForm) 569 where[i] &= ~PreFormProperty; 570 else if (properties[i] == BelowForm) 571 where[i] &= ~BelowFormProperty; 572 else if (properties[i] == AboveForm) 573 where[i] &= ~AboveFormProperty; 574 else if (properties[i] == PostForm) 575 where[i] &= ~PostFormProperty; 576 } 577 578 HB_OpenTypeShape(item, where); 579 if (!HB_OpenTypePosition(item, availableGlyphs, /*doLogClusters*/FALSE)) 580 return FALSE; 581 } else 582 #endif 583 { 584 KHDEBUG("Not using openType"); 585 HB_HeuristicPosition(item); 586 } 587 588 item->attributes[0].clusterStart = TRUE; 589 return TRUE; 590 } 591 592 HB_Bool HB_KhmerShape(HB_ShaperItem *item) 593 { 594 HB_Bool openType = FALSE; 595 unsigned short *logClusters = item->log_clusters; 596 int i; 597 598 HB_ShaperItem syllable = *item; 599 int first_glyph = 0; 600 601 int sstart = item->item.pos; 602 int end = sstart + item->item.length; 603 604 assert(item->item.script == HB_Script_Khmer); 605 606 #ifndef NO_OPENTYPE 607 openType = HB_SelectScript(item, khmer_features); 608 #endif 609 610 KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length); 611 while (sstart < end) { 612 HB_Bool invalid; 613 int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invalid); 614 KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, 615 invalid ? "TRUE" : "FALSE"); 616 syllable.item.pos = sstart; 617 syllable.item.length = send-sstart; 618 syllable.glyphs = item->glyphs + first_glyph; 619 syllable.attributes = item->attributes + first_glyph; 620 syllable.offsets = item->offsets + first_glyph; 621 syllable.advances = item->advances + first_glyph; 622 syllable.num_glyphs = item->num_glyphs - first_glyph; 623 if (!khmer_shape_syllable(openType, &syllable)) { 624 KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); 625 item->num_glyphs += syllable.num_glyphs; 626 return FALSE; 627 } 628 /* fix logcluster array */ 629 KHDEBUG("syllable:"); 630 for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i) 631 KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]); 632 KHDEBUG(" logclusters:"); 633 for (i = sstart; i < send; ++i) { 634 KHDEBUG(" %d -> glyph %d", i, first_glyph); 635 logClusters[i-item->item.pos] = first_glyph; 636 } 637 sstart = send; 638 first_glyph += syllable.num_glyphs; 639 } 640 item->num_glyphs = first_glyph; 641 return TRUE; 642 } 643 644 void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes) 645 { 646 int end = from + len; 647 const HB_UChar16 *uc = text + from; 648 hb_uint32 i = 0; 649 HB_UNUSED(script); 650 attributes += from; 651 while ( i < len ) { 652 HB_Bool invalid; 653 hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from; 654 655 attributes[i].charStop = TRUE; 656 657 if ( boundary > len-1 ) boundary = len; 658 i++; 659 while ( i < boundary ) { 660 attributes[i].charStop = FALSE; 661 ++uc; 662 ++i; 663 } 664 assert( i == boundary ); 665 } 666 } 667 668