1 /* 2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 3 * 4 * This is part of HarfBuzz, an OpenType Layout engine library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 */ 24 25 #include "harfbuzz-shaper.h" 26 #include "harfbuzz-shaper-private.h" 27 28 #include <assert.h> 29 #include <stdio.h> 30 31 #define FLAG(x) (1 << (x)) 32 33 static HB_Bool isLetter(HB_UChar16 ucs) 34 { 35 const int test = FLAG(HB_Letter_Uppercase) | 36 FLAG(HB_Letter_Lowercase) | 37 FLAG(HB_Letter_Titlecase) | 38 FLAG(HB_Letter_Modifier) | 39 FLAG(HB_Letter_Other); 40 return FLAG(HB_GetUnicodeCharCategory(ucs)) & test; 41 } 42 43 static HB_Bool isMark(HB_UChar16 ucs) 44 { 45 const int test = FLAG(HB_Mark_NonSpacing) | 46 FLAG(HB_Mark_SpacingCombining) | 47 FLAG(HB_Mark_Enclosing); 48 return FLAG(HB_GetUnicodeCharCategory(ucs)) & test; 49 } 50 51 enum Form { 52 Invalid = 0x0, 53 UnknownForm = Invalid, 54 Consonant, 55 Nukta, 56 Halant, 57 Matra, 58 VowelMark, 59 StressMark, 60 IndependentVowel, 61 LengthMark, 62 Control, 63 Other 64 }; 65 66 static const unsigned char indicForms[0xe00-0x900] = { 67 // Devangari 68 Invalid, VowelMark, VowelMark, VowelMark, 69 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 70 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 71 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 72 73 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 74 IndependentVowel, Consonant, Consonant, Consonant, 75 Consonant, Consonant, Consonant, Consonant, 76 Consonant, Consonant, Consonant, Consonant, 77 78 Consonant, Consonant, Consonant, Consonant, 79 Consonant, Consonant, Consonant, Consonant, 80 Consonant, Consonant, Consonant, Consonant, 81 Consonant, Consonant, Consonant, Consonant, 82 83 Consonant, Consonant, Consonant, Consonant, 84 Consonant, Consonant, Consonant, Consonant, 85 Consonant, Consonant, UnknownForm, UnknownForm, 86 Nukta, Other, Matra, Matra, 87 88 Matra, Matra, Matra, Matra, 89 Matra, Matra, Matra, Matra, 90 Matra, Matra, Matra, Matra, 91 Matra, Halant, UnknownForm, UnknownForm, 92 93 Other, StressMark, StressMark, StressMark, 94 StressMark, UnknownForm, UnknownForm, UnknownForm, 95 Consonant, Consonant, Consonant, Consonant, 96 Consonant, Consonant, Consonant, Consonant, 97 98 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 99 Other, Other, Other, Other, 100 Other, Other, Other, Other, 101 Other, Other, Other, Other, 102 103 Other, Other, Other, Other, 104 Other, Other, Other, Other, 105 Other, Other, Other, Consonant, 106 Consonant, Consonant /* ??? */, Consonant, Consonant, 107 108 // Bengali 109 Invalid, VowelMark, VowelMark, VowelMark, 110 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 111 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 112 IndependentVowel, Invalid, Invalid, IndependentVowel, 113 114 IndependentVowel, Invalid, Invalid, IndependentVowel, 115 IndependentVowel, Consonant, Consonant, Consonant, 116 Consonant, Consonant, Consonant, Consonant, 117 Consonant, Consonant, Consonant, Consonant, 118 119 Consonant, Consonant, Consonant, Consonant, 120 Consonant, Consonant, Consonant, Consonant, 121 Consonant, Invalid, Consonant, Consonant, 122 Consonant, Consonant, Consonant, Consonant, 123 124 Consonant, Invalid, Consonant, Invalid, 125 Invalid, Invalid, Consonant, Consonant, 126 Consonant, Consonant, UnknownForm, UnknownForm, 127 Nukta, Other, Matra, Matra, 128 129 Matra, Matra, Matra, Matra, 130 Matra, Invalid, Invalid, Matra, 131 Matra, Invalid, Invalid, Matra, 132 Matra, Halant, Consonant, UnknownForm, 133 134 Invalid, Invalid, Invalid, Invalid, 135 Invalid, Invalid, Invalid, VowelMark, 136 Invalid, Invalid, Invalid, Invalid, 137 Consonant, Consonant, Invalid, Consonant, 138 139 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 140 Other, Other, Other, Other, 141 Other, Other, Other, Other, 142 Other, Other, Other, Other, 143 144 Consonant, Consonant, Other, Other, 145 Other, Other, Other, Other, 146 Other, Other, Other, Other, 147 Other, Other, Other, Other, 148 149 // Gurmukhi 150 Invalid, VowelMark, VowelMark, VowelMark, 151 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 152 IndependentVowel, IndependentVowel, IndependentVowel, Invalid, 153 Invalid, Invalid, Invalid, IndependentVowel, 154 155 IndependentVowel, Invalid, Invalid, IndependentVowel, 156 IndependentVowel, Consonant, Consonant, Consonant, 157 Consonant, Consonant, Consonant, Consonant, 158 Consonant, Consonant, Consonant, Consonant, 159 160 Consonant, Consonant, Consonant, Consonant, 161 Consonant, Consonant, Consonant, Consonant, 162 Consonant, Invalid, Consonant, Consonant, 163 Consonant, Consonant, Consonant, Consonant, 164 165 Consonant, Invalid, Consonant, Consonant, 166 Invalid, Consonant, Consonant, Invalid, 167 Consonant, Consonant, UnknownForm, UnknownForm, 168 Nukta, Other, Matra, Matra, 169 170 Matra, Matra, Matra, Invalid, 171 Invalid, Invalid, Invalid, Matra, 172 Matra, Invalid, Invalid, Matra, 173 Matra, Halant, UnknownForm, UnknownForm, 174 175 Invalid, Invalid, Invalid, Invalid, 176 Invalid, UnknownForm, UnknownForm, UnknownForm, 177 Invalid, Consonant, Consonant, Consonant, 178 Consonant, Invalid, Consonant, Invalid, 179 180 Other, Other, Invalid, Invalid, 181 Other, Other, Other, Other, 182 Other, Other, Other, Other, 183 Other, Other, Other, Other, 184 185 StressMark, StressMark, Consonant, Consonant, 186 Other, Other, Other, Other, 187 Other, Other, Other, Other, 188 Other, Other, Other, Other, 189 190 // Gujarati 191 Invalid, VowelMark, VowelMark, VowelMark, 192 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 193 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 194 IndependentVowel, IndependentVowel, Invalid, IndependentVowel, 195 196 IndependentVowel, IndependentVowel, Invalid, IndependentVowel, 197 IndependentVowel, Consonant, Consonant, Consonant, 198 Consonant, Consonant, Consonant, Consonant, 199 Consonant, Consonant, Consonant, Consonant, 200 201 Consonant, Consonant, Consonant, Consonant, 202 Consonant, Consonant, Consonant, Consonant, 203 Consonant, Invalid, Consonant, Consonant, 204 Consonant, Consonant, Consonant, Consonant, 205 206 Consonant, Invalid, Consonant, Consonant, 207 Invalid, Consonant, Consonant, Consonant, 208 Consonant, Consonant, UnknownForm, UnknownForm, 209 Nukta, Other, Matra, Matra, 210 211 Matra, Matra, Matra, Matra, 212 Matra, Matra, Invalid, Matra, 213 Matra, Matra, Invalid, Matra, 214 Matra, Halant, UnknownForm, UnknownForm, 215 216 Other, UnknownForm, UnknownForm, UnknownForm, 217 UnknownForm, UnknownForm, UnknownForm, UnknownForm, 218 UnknownForm, UnknownForm, UnknownForm, UnknownForm, 219 UnknownForm, UnknownForm, UnknownForm, UnknownForm, 220 221 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 222 Other, Other, Other, Other, 223 Other, Other, Other, Other, 224 Other, Other, Other, Other, 225 226 Other, Other, Other, Other, 227 Other, Other, Other, Other, 228 Other, Other, Other, Other, 229 Other, Other, Other, Other, 230 231 // Oriya 232 Invalid, VowelMark, VowelMark, VowelMark, 233 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 234 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 235 IndependentVowel, Invalid, Invalid, IndependentVowel, 236 237 IndependentVowel, Invalid, Invalid, IndependentVowel, 238 IndependentVowel, Consonant, Consonant, Consonant, 239 Consonant, Consonant, Consonant, Consonant, 240 Consonant, Consonant, Consonant, Consonant, 241 242 Consonant, Consonant, Consonant, Consonant, 243 Consonant, Consonant, Consonant, Consonant, 244 Consonant, Invalid, Consonant, Consonant, 245 Consonant, Consonant, Consonant, Consonant, 246 247 Consonant, Invalid, Consonant, Consonant, 248 Invalid, Consonant, Consonant, Consonant, 249 Consonant, Consonant, UnknownForm, UnknownForm, 250 Nukta, Other, Matra, Matra, 251 252 Matra, Matra, Matra, Matra, 253 Invalid, Invalid, Invalid, Matra, 254 Matra, Invalid, Invalid, Matra, 255 Matra, Halant, UnknownForm, UnknownForm, 256 257 Other, Invalid, Invalid, Invalid, 258 Invalid, UnknownForm, LengthMark, LengthMark, 259 Invalid, Invalid, Invalid, Invalid, 260 Consonant, Consonant, Invalid, Consonant, 261 262 IndependentVowel, IndependentVowel, Invalid, Invalid, 263 Invalid, Invalid, Other, Other, 264 Other, Other, Other, Other, 265 Other, Other, Other, Other, 266 267 Other, Consonant, Other, Other, 268 Other, Other, Other, Other, 269 Other, Other, Other, Other, 270 Other, Other, Other, Other, 271 272 //Tamil 273 Invalid, Invalid, VowelMark, Other, 274 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 275 IndependentVowel, IndependentVowel, IndependentVowel, Invalid, 276 Invalid, Invalid, IndependentVowel, IndependentVowel, 277 278 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 279 IndependentVowel, Consonant, Invalid, Invalid, 280 Invalid, Consonant, Consonant, Invalid, 281 Consonant, Invalid, Consonant, Consonant, 282 283 Invalid, Invalid, Invalid, Consonant, 284 Consonant, Invalid, Invalid, Invalid, 285 Consonant, Consonant, Consonant, Invalid, 286 Invalid, Invalid, Consonant, Consonant, 287 288 Consonant, Consonant, Consonant, Consonant, 289 Consonant, Consonant, Consonant, Consonant, 290 Consonant, Consonant, UnknownForm, UnknownForm, 291 Invalid, Invalid, Matra, Matra, 292 293 Matra, Matra, Matra, Invalid, 294 Invalid, Invalid, Matra, Matra, 295 Matra, Invalid, Matra, Matra, 296 Matra, Halant, Invalid, Invalid, 297 298 Invalid, Invalid, Invalid, Invalid, 299 Invalid, Invalid, Invalid, LengthMark, 300 Invalid, Invalid, Invalid, Invalid, 301 Invalid, Invalid, Invalid, Invalid, 302 303 Invalid, Invalid, Invalid, Invalid, 304 Invalid, Invalid, Other, Other, 305 Other, Other, Other, Other, 306 Other, Other, Other, Other, 307 308 Other, Other, Other, Other, 309 Other, Other, Other, Other, 310 Other, Other, Other, Other, 311 Other, Other, Other, Other, 312 313 // Telugu 314 Invalid, VowelMark, VowelMark, VowelMark, 315 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 316 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 317 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 318 319 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 320 IndependentVowel, Consonant, Consonant, Consonant, 321 Consonant, Consonant, Consonant, Consonant, 322 Consonant, Consonant, Consonant, Consonant, 323 324 Consonant, Consonant, Consonant, Consonant, 325 Consonant, Consonant, Consonant, Consonant, 326 Consonant, Invalid, Consonant, Consonant, 327 Consonant, Consonant, Consonant, Consonant, 328 329 Consonant, Consonant, Consonant, Consonant, 330 Invalid, Consonant, Consonant, Consonant, 331 Consonant, Consonant, UnknownForm, UnknownForm, 332 Invalid, Invalid, Matra, Matra, 333 334 Matra, Matra, Matra, Matra, 335 Matra, Invalid, Matra, Matra, 336 Matra, Invalid, Matra, Matra, 337 Matra, Halant, Invalid, Invalid, 338 339 Invalid, Invalid, Invalid, Invalid, 340 Invalid, LengthMark, Matra, Invalid, 341 Invalid, Invalid, Invalid, Invalid, 342 Invalid, Invalid, Invalid, Invalid, 343 344 IndependentVowel, IndependentVowel, Invalid, Invalid, 345 Invalid, Invalid, Other, Other, 346 Other, Other, Other, Other, 347 Other, Other, Other, Other, 348 349 Other, Other, Other, Other, 350 Other, Other, Other, Other, 351 Other, Other, Other, Other, 352 Other, Other, Other, Other, 353 354 // Kannada 355 Invalid, Invalid, VowelMark, VowelMark, 356 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 357 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 358 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 359 360 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 361 IndependentVowel, Consonant, Consonant, Consonant, 362 Consonant, Consonant, Consonant, Consonant, 363 Consonant, Consonant, Consonant, Consonant, 364 365 Consonant, Consonant, Consonant, Consonant, 366 Consonant, Consonant, Consonant, Consonant, 367 Consonant, Invalid, Consonant, Consonant, 368 Consonant, Consonant, Consonant, Consonant, 369 370 Consonant, Consonant, Consonant, Consonant, 371 Invalid, Consonant, Consonant, Consonant, 372 Consonant, Consonant, UnknownForm, UnknownForm, 373 Nukta, Other, Matra, Matra, 374 375 Matra, Matra, Matra, Matra, 376 Matra, Invalid, Matra, Matra, 377 Matra, Invalid, Matra, Matra, 378 Matra, Halant, Invalid, Invalid, 379 380 Invalid, Invalid, Invalid, Invalid, 381 Invalid, LengthMark, LengthMark, Invalid, 382 Invalid, Invalid, Invalid, Invalid, 383 Invalid, Invalid, Consonant, Invalid, 384 385 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 386 Invalid, Invalid, Other, Other, 387 Other, Other, Other, Other, 388 Other, Other, Other, Other, 389 390 Other, Other, Other, Other, 391 Other, Other, Other, Other, 392 Other, Other, Other, Other, 393 Other, Other, Other, Other, 394 395 // Malayalam 396 Invalid, Invalid, VowelMark, VowelMark, 397 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 398 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 399 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 400 401 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 402 IndependentVowel, Consonant, Consonant, Consonant, 403 Consonant, Consonant, Consonant, Consonant, 404 Consonant, Consonant, Consonant, Consonant, 405 406 Consonant, Consonant, Consonant, Consonant, 407 Consonant, Consonant, Consonant, Consonant, 408 Consonant, Invalid, Consonant, Consonant, 409 Consonant, Consonant, Consonant, Consonant, 410 411 Consonant, Consonant, Consonant, Consonant, 412 Consonant, Consonant, Consonant, Consonant, 413 Consonant, Consonant, UnknownForm, UnknownForm, 414 Invalid, Invalid, Matra, Matra, 415 416 Matra, Matra, Matra, Matra, 417 Invalid, Invalid, Matra, Matra, 418 Matra, Invalid, Matra, Matra, 419 Matra, Halant, Invalid, Invalid, 420 421 Invalid, Invalid, Invalid, Invalid, 422 Invalid, Invalid, Invalid, Matra, 423 Invalid, Invalid, Invalid, Invalid, 424 Invalid, Invalid, Invalid, Invalid, 425 426 IndependentVowel, IndependentVowel, Invalid, Invalid, 427 Invalid, Invalid, Other, Other, 428 Other, Other, Other, Other, 429 Other, Other, Other, Other, 430 431 Other, Other, Other, Other, 432 Other, Other, Other, Other, 433 Other, Other, Other, Other, 434 Other, Other, Other, Other, 435 436 // Sinhala 437 Invalid, Invalid, VowelMark, VowelMark, 438 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 439 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 440 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 441 442 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 443 IndependentVowel, IndependentVowel, IndependentVowel, Invalid, 444 Invalid, Invalid, Consonant, Consonant, 445 Consonant, Consonant, Consonant, Consonant, 446 447 Consonant, Consonant, Consonant, Consonant, 448 Consonant, Consonant, Consonant, Consonant, 449 Consonant, Consonant, Consonant, Consonant, 450 Consonant, Consonant, Consonant, Consonant, 451 452 Consonant, Consonant, Invalid, Consonant, 453 Consonant, Consonant, Consonant, Consonant, 454 Consonant, Consonant, Consonant, Consonant, 455 Invalid, Consonant, Invalid, Invalid, 456 457 Consonant, Consonant, Consonant, Consonant, 458 Consonant, Consonant, Consonant, Invalid, 459 Invalid, Invalid, Halant, Invalid, 460 Invalid, Invalid, Invalid, Matra, 461 462 Matra, Matra, Matra, Matra, 463 Matra, Invalid, Matra, Invalid, 464 Matra, Matra, Matra, Matra, 465 Matra, Matra, Matra, Matra, 466 467 Invalid, Invalid, Invalid, Invalid, 468 Invalid, Invalid, Invalid, Invalid, 469 Invalid, Invalid, Invalid, Invalid, 470 Invalid, Invalid, Invalid, Invalid, 471 472 Invalid, Invalid, Matra, Matra, 473 Other, Other, Other, Other, 474 Other, Other, Other, Other, 475 Other, Other, Other, Other, 476 }; 477 478 enum Position { 479 None, 480 Pre, 481 Above, 482 Below, 483 Post, 484 Split, 485 Base, 486 Reph, 487 Vattu, 488 Inherit 489 }; 490 491 static const unsigned char indicPosition[0xe00-0x900] = { 492 // Devanagari 493 None, Above, Above, Post, 494 None, None, None, None, 495 None, None, None, None, 496 None, None, None, None, 497 498 None, None, None, None, 499 None, None, None, None, 500 None, None, None, None, 501 None, None, None, None, 502 503 None, None, None, None, 504 None, None, None, None, 505 None, None, None, None, 506 None, None, None, None, 507 508 Below, None, None, None, 509 None, None, None, None, 510 None, None, None, None, 511 None, None, Post, Pre, 512 513 Post, Below, Below, Below, 514 Below, Above, Above, Above, 515 Above, Post, Post, Post, 516 Post, None, None, None, 517 518 None, Above, Below, Above, 519 Above, None, None, None, 520 None, None, None, None, 521 None, None, None, None, 522 523 None, None, Below, Below, 524 None, None, None, None, 525 None, None, None, None, 526 None, None, None, None, 527 528 None, None, None, None, 529 None, None, None, None, 530 None, None, None, None, 531 None, None, None, None, 532 533 // Bengali 534 None, Above, Post, Post, 535 None, None, None, None, 536 None, None, None, None, 537 None, None, None, None, 538 539 None, None, None, None, 540 None, None, None, None, 541 None, None, None, None, 542 None, None, None, None, 543 544 None, None, None, None, 545 None, None, None, None, 546 None, None, None, None, 547 Below, None, None, Post, 548 549 Below, None, None, None, 550 None, None, None, None, 551 None, None, None, None, 552 Below, None, Post, Pre, 553 554 Post, Below, Below, Below, 555 Below, None, None, Pre, 556 Pre, None, None, Split, 557 Split, Below, None, None, 558 559 None, None, None, None, 560 None, None, None, Post, 561 None, None, None, None, 562 None, None, None, None, 563 564 None, None, Below, Below, 565 None, None, None, None, 566 None, None, None, None, 567 None, None, None, None, 568 569 Below, None, None, None, 570 None, None, None, None, 571 None, None, None, None, 572 None, None, None, None, 573 574 // Gurmukhi 575 None, Above, Above, Post, 576 None, None, None, None, 577 None, None, None, None, 578 None, None, None, None, 579 580 None, None, None, None, 581 None, None, None, None, 582 None, None, None, None, 583 None, None, None, None, 584 585 None, None, None, None, 586 None, None, None, None, 587 None, None, None, None, 588 None, None, None, Post, 589 590 Below, None, None, None, 591 None, Below, None, None, 592 None, Below, None, None, 593 Below, None, Post, Pre, 594 595 Post, Below, Below, None, 596 None, None, None, Above, 597 Above, None, None, Above, 598 Above, None, None, None, 599 600 None, None, None, None, 601 None, None, None, None, 602 None, None, None, None, 603 None, None, None, None, 604 605 None, None, None, None, 606 None, None, None, None, 607 None, None, None, None, 608 None, None, None, None, 609 610 Above, Above, None, None, 611 None, None, None, None, 612 None, None, None, None, 613 None, None, None, None, 614 615 // Gujarati 616 None, Above, Above, Post, 617 None, None, None, None, 618 None, None, None, None, 619 None, None, None, None, 620 621 None, None, None, None, 622 None, None, None, None, 623 None, None, None, None, 624 None, None, None, None, 625 626 None, None, None, None, 627 None, None, None, None, 628 None, None, None, None, 629 None, None, None, None, 630 631 Below, None, None, None, 632 None, None, None, None, 633 None, None, None, None, 634 None, None, Post, Pre, 635 636 Post, Below, Below, Below, 637 Below, Above, None, Above, 638 Above, Post, None, Post, 639 Post, None, None, None, 640 641 None, None, None, None, 642 None, None, None, None, 643 None, None, None, None, 644 None, None, None, None, 645 646 None, None, Below, Below, 647 None, None, None, None, 648 None, None, None, None, 649 None, None, None, None, 650 651 None, None, None, None, 652 None, None, None, None, 653 None, None, None, None, 654 None, None, None, None, 655 656 // Oriya 657 None, Above, Post, Post, 658 None, None, None, None, 659 None, None, None, None, 660 None, None, None, None, 661 662 None, None, None, None, 663 None, None, None, None, 664 None, None, None, None, 665 None, None, None, None, 666 667 None, None, None, None, 668 Below, None, None, None, 669 Below, None, None, None, 670 Below, Below, Below, Post, 671 672 Below, None, Below, Below, 673 None, None, None, None, 674 None, None, None, None, 675 None, None, Post, Above, 676 677 Post, Below, Below, Below, 678 None, None, None, Pre, 679 Split, None, None, Split, 680 Split, None, None, None, 681 682 None, None, None, None, 683 None, None, Above, Post, 684 None, None, None, None, 685 None, None, None, Post, 686 687 None, None, None, None, 688 None, None, None, None, 689 None, None, None, None, 690 None, None, None, None, 691 692 None, Below, None, None, 693 None, None, None, None, 694 None, None, None, None, 695 None, None, None, None, 696 697 // Tamil 698 None, None, Above, None, 699 None, None, None, None, 700 None, None, None, None, 701 None, None, None, None, 702 703 None, None, None, None, 704 None, None, None, None, 705 None, None, None, None, 706 None, None, None, None, 707 708 None, None, None, None, 709 None, None, None, None, 710 None, None, None, None, 711 None, None, None, None, 712 713 None, None, None, None, 714 None, None, None, None, 715 None, None, None, None, 716 None, None, Post, Post, 717 718 Above, Below, Below, None, 719 None, None, Pre, Pre, 720 Pre, None, Split, Split, 721 Split, Halant, None, None, 722 723 None, None, None, None, 724 None, None, None, Post, 725 None, None, None, None, 726 None, None, None, None, 727 728 None, None, None, None, 729 None, None, None, None, 730 None, None, None, None, 731 None, None, None, None, 732 733 None, None, None, None, 734 None, None, None, None, 735 None, None, None, None, 736 None, None, None, None, 737 738 // Telugu 739 None, Post, Post, Post, 740 None, None, None, None, 741 None, None, None, None, 742 None, None, None, None, 743 744 None, None, None, None, 745 None, Below, Below, Below, 746 Below, Below, Below, Below, 747 Below, Below, Below, Below, 748 749 Below, Below, Below, Below, 750 Below, Below, Below, Below, 751 Below, None, Below, Below, 752 Below, Below, Below, Below, 753 754 Below, None, Below, Below, 755 None, Below, Below, Below, 756 Below, Below, None, None, 757 None, None, Post, Above, 758 759 Above, Post, Post, Post, 760 Post, None, Above, Above, 761 Split, None, Post, Above, 762 Above, Halant, None, None, 763 764 None, None, None, None, 765 None, Above, Below, None, 766 None, None, None, None, 767 None, None, None, None, 768 769 None, None, None, None, 770 None, None, None, None, 771 None, None, None, None, 772 None, None, None, None, 773 774 None, None, None, None, 775 None, None, None, None, 776 None, None, None, None, 777 None, None, None, None, 778 779 // Kannada 780 None, None, Post, Post, 781 None, None, None, None, 782 None, None, None, None, 783 None, None, None, None, 784 785 None, None, None, None, 786 None, Below, Below, Below, 787 Below, Below, Below, Below, 788 Below, Below, Below, Below, 789 790 Below, Below, Below, Below, 791 Below, Below, Below, Below, 792 Below, Below, Below, Below, 793 Below, Below, Below, Below, 794 795 Below, None, Below, Below, 796 None, Below, Below, Below, 797 Below, Below, None, None, 798 None, None, Post, Above, 799 800 Split, Post, Post, Post, 801 Post, None, Above, Split, 802 Split, None, Split, Split, 803 Above, Halant, None, None, 804 805 None, None, None, None, 806 None, Post, Post, None, 807 None, None, None, None, 808 None, None, Below, None, 809 810 None, None, Below, Below, 811 None, None, None, None, 812 None, None, None, None, 813 None, None, None, None, 814 815 None, None, None, None, 816 None, None, None, None, 817 None, None, None, None, 818 None, None, None, None, 819 820 // Malayalam 821 None, None, Post, Post, 822 None, None, None, None, 823 None, None, None, None, 824 None, None, None, None, 825 826 None, None, None, None, 827 None, None, None, None, 828 None, None, None, None, 829 None, None, None, None, 830 831 None, None, None, None, 832 None, None, None, None, 833 None, None, None, None, 834 None, None, None, Post, 835 836 Post, None, Below, None, 837 None, Post, None, None, 838 None, None, None, None, 839 None, None, Post, Post, 840 841 Post, Post, Post, Post, 842 None, None, Pre, Pre, 843 Pre, None, Split, Split, 844 Split, Halant, None, None, 845 846 None, None, None, None, 847 None, None, None, Post, 848 None, None, None, None, 849 None, None, None, None, 850 851 None, None, None, None, 852 None, None, None, None, 853 None, None, None, None, 854 None, None, None, None, 855 856 None, None, None, None, 857 None, None, None, None, 858 None, None, None, None, 859 None, None, None, None, 860 861 // Sinhala 862 None, None, Post, Post, 863 None, None, None, None, 864 None, None, None, None, 865 None, None, None, None, 866 867 None, None, None, None, 868 None, None, None, None, 869 None, None, None, None, 870 None, None, None, None, 871 872 None, None, None, None, 873 None, None, None, None, 874 None, None, None, None, 875 None, None, None, None, 876 877 None, None, None, None, 878 None, None, None, None, 879 None, None, None, None, 880 None, None, None, None, 881 882 None, None, None, None, 883 None, None, None, None, 884 None, None, None, None, 885 None, None, None, Post, 886 887 Post, Post, Above, Above, 888 Below, None, Below, None, 889 Post, Pre, Split, Pre, 890 Split, Split, Split, Post, 891 892 None, None, None, None, 893 None, None, None, None, 894 None, None, None, None, 895 None, None, None, None, 896 897 None, None, Post, Post, 898 None, None, None, None, 899 None, None, None, None, 900 None, None, None, None 901 }; 902 903 static inline Form form(unsigned short uc) { 904 if (uc < 0x900 || uc > 0xdff) { 905 if (uc == 0x25cc) 906 return Consonant; 907 if (uc == 0x200c || uc == 0x200d) 908 return Control; 909 return Other; 910 } 911 return (Form)indicForms[uc-0x900]; 912 } 913 914 static inline Position indic_position(unsigned short uc) { 915 if (uc < 0x900 || uc > 0xdff) 916 return None; 917 return (Position) indicPosition[uc-0x900]; 918 } 919 920 921 enum IndicScriptProperties { 922 HasReph = 0x01, 923 HasSplit = 0x02 924 }; 925 926 const hb_uint8 scriptProperties[10] = { 927 // Devanagari, 928 HasReph, 929 // Bengali, 930 HasReph|HasSplit, 931 // Gurmukhi, 932 0, 933 // Gujarati, 934 HasReph, 935 // Oriya, 936 HasReph|HasSplit, 937 // Tamil, 938 HasSplit, 939 // Telugu, 940 HasSplit, 941 // Kannada, 942 HasSplit|HasReph, 943 // Malayalam, 944 HasSplit, 945 // Sinhala, 946 HasSplit 947 }; 948 949 struct IndicOrdering { 950 Form form; 951 Position position; 952 }; 953 954 static const IndicOrdering devanagari_order [] = { 955 { Consonant, Below }, 956 { Matra, Below }, 957 { VowelMark, Below }, 958 { StressMark, Below }, 959 { Matra, Above }, 960 { Matra, Post }, 961 { Consonant, Reph }, 962 { VowelMark, Above }, 963 { StressMark, Above }, 964 { VowelMark, Post }, 965 { (Form)0, None } 966 }; 967 968 static const IndicOrdering bengali_order [] = { 969 { Consonant, Below }, 970 { Matra, Below }, 971 { Matra, Above }, 972 { Consonant, Reph }, 973 { VowelMark, Above }, 974 { Consonant, Post }, 975 { Matra, Post }, 976 { VowelMark, Post }, 977 { (Form)0, None } 978 }; 979 980 static const IndicOrdering gurmukhi_order [] = { 981 { Consonant, Below }, 982 { Matra, Below }, 983 { Matra, Above }, 984 { Consonant, Post }, 985 { Matra, Post }, 986 { VowelMark, Above }, 987 { (Form)0, None } 988 }; 989 990 static const IndicOrdering tamil_order [] = { 991 { Matra, Above }, 992 { Matra, Post }, 993 { VowelMark, Post }, 994 { (Form)0, None } 995 }; 996 997 static const IndicOrdering telugu_order [] = { 998 { Matra, Above }, 999 { Matra, Below }, 1000 { Matra, Post }, 1001 { Consonant, Below }, 1002 { Consonant, Post }, 1003 { VowelMark, Post }, 1004 { (Form)0, None } 1005 }; 1006 1007 static const IndicOrdering kannada_order [] = { 1008 { Matra, Above }, 1009 { Matra, Post }, 1010 { Consonant, Below }, 1011 { Consonant, Post }, 1012 { LengthMark, Post }, 1013 { Consonant, Reph }, 1014 { VowelMark, Post }, 1015 { (Form)0, None } 1016 }; 1017 1018 static const IndicOrdering malayalam_order [] = { 1019 { Consonant, Below }, 1020 { Matra, Below }, 1021 { Consonant, Reph }, 1022 { Consonant, Post }, 1023 { Matra, Post }, 1024 { VowelMark, Post }, 1025 { (Form)0, None } 1026 }; 1027 1028 static const IndicOrdering sinhala_order [] = { 1029 { Matra, Below }, 1030 { Matra, Above }, 1031 { Matra, Post }, 1032 { VowelMark, Post }, 1033 { (Form)0, None } 1034 }; 1035 1036 static const IndicOrdering * const indic_order[] = { 1037 devanagari_order, // Devanagari 1038 bengali_order, // Bengali 1039 gurmukhi_order, // Gurmukhi 1040 devanagari_order, // Gujarati 1041 bengali_order, // Oriya 1042 tamil_order, // Tamil 1043 telugu_order, // Telugu 1044 kannada_order, // Kannada 1045 malayalam_order, // Malayalam 1046 sinhala_order // Sinhala 1047 }; 1048 1049 1050 1051 // vowel matras that have to be split into two parts. 1052 static const unsigned short split_matras[] = { 1053 // matra, split1, split2, split3 1054 1055 // bengalis 1056 0x9cb, 0x9c7, 0x9be, 0x0, 1057 0x9cc, 0x9c7, 0x9d7, 0x0, 1058 // oriya 1059 0xb48, 0xb47, 0xb56, 0x0, 1060 0xb4b, 0xb47, 0xb3e, 0x0, 1061 0xb4c, 0xb47, 0xb57, 0x0, 1062 // tamil 1063 0xbca, 0xbc6, 0xbbe, 0x0, 1064 0xbcb, 0xbc7, 0xbbe, 0x0, 1065 0xbcc, 0xbc6, 0xbd7, 0x0, 1066 // telugu 1067 0xc48, 0xc46, 0xc56, 0x0, 1068 // kannada 1069 0xcc0, 0xcbf, 0xcd5, 0x0, 1070 0xcc7, 0xcc6, 0xcd5, 0x0, 1071 0xcc8, 0xcc6, 0xcd6, 0x0, 1072 0xcca, 0xcc6, 0xcc2, 0x0, 1073 0xccb, 0xcc6, 0xcc2, 0xcd5, 1074 // malayalam 1075 0xd4a, 0xd46, 0xd3e, 0x0, 1076 0xd4b, 0xd47, 0xd3e, 0x0, 1077 0xd4c, 0xd46, 0xd57, 0x0, 1078 // sinhala 1079 0xdda, 0xdd9, 0xdca, 0x0, 1080 0xddc, 0xdd9, 0xdcf, 0x0, 1081 0xddd, 0xdd9, 0xdcf, 0xdca, 1082 0xdde, 0xdd9, 0xddf, 0x0, 1083 0xffff 1084 }; 1085 1086 static inline void splitMatra(unsigned short *reordered, int matra, int &len) 1087 { 1088 unsigned short matra_uc = reordered[matra]; 1089 //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]); 1090 1091 const unsigned short *split = split_matras; 1092 while (split[0] < matra_uc) 1093 split += 4; 1094 1095 assert(*split == matra_uc); 1096 ++split; 1097 1098 int added_chars = split[2] == 0x0 ? 1 : 2; 1099 1100 memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short)); 1101 reordered[matra] = split[0]; 1102 reordered[matra+1] = split[1]; 1103 if(added_chars == 2) 1104 reordered[matra+2] = split[2]; 1105 len += added_chars; 1106 } 1107 1108 #ifndef NO_OPENTYPE 1109 static const HB_OpenTypeFeature indic_features[] = { 1110 { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty }, 1111 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, 1112 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, 1113 { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty }, 1114 { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty }, 1115 { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty }, 1116 { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty }, 1117 { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty }, 1118 { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty }, 1119 { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty }, 1120 { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty }, 1121 { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty }, 1122 { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty }, 1123 { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty }, 1124 { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty }, 1125 { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty }, 1126 { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty }, 1127 { 0, 0 } 1128 }; 1129 #endif 1130 1131 // #define INDIC_DEBUG 1132 #ifdef INDIC_DEBUG 1133 #define IDEBUG hb_debug 1134 #include <stdarg.h> 1135 1136 static void hb_debug(const char *msg, ...) 1137 { 1138 va_list ap; 1139 va_start(ap, msg); // use variable arg list 1140 vfprintf(stderr, msg, ap); 1141 va_end(ap); 1142 fprintf(stderr, "\n"); 1143 } 1144 1145 #else 1146 #define IDEBUG if(0) printf 1147 #endif 1148 1149 #if 0 //def INDIC_DEBUG 1150 static QString propertiesToString(int properties) 1151 { 1152 QString res; 1153 properties = ~properties; 1154 if (properties & LocaProperty) 1155 res += "Loca "; 1156 if (properties & CcmpProperty) 1157 res += "Ccmp "; 1158 if (properties & InitProperty) 1159 res += "Init "; 1160 if (properties & NuktaProperty) 1161 res += "Nukta "; 1162 if (properties & AkhantProperty) 1163 res += "Akhant "; 1164 if (properties & RephProperty) 1165 res += "Reph "; 1166 if (properties & PreFormProperty) 1167 res += "PreForm "; 1168 if (properties & BelowFormProperty) 1169 res += "BelowForm "; 1170 if (properties & AboveFormProperty) 1171 res += "AboveForm "; 1172 if (properties & HalfFormProperty) 1173 res += "HalfForm "; 1174 if (properties & PostFormProperty) 1175 res += "PostForm "; 1176 if (properties & ConjunctFormProperty) 1177 res += "PostForm "; 1178 if (properties & VattuProperty) 1179 res += "Vattu "; 1180 if (properties & PreSubstProperty) 1181 res += "PreSubst "; 1182 if (properties & BelowSubstProperty) 1183 res += "BelowSubst "; 1184 if (properties & AboveSubstProperty) 1185 res += "AboveSubst "; 1186 if (properties & PostSubstProperty) 1187 res += "PostSubst "; 1188 if (properties & HalantProperty) 1189 res += "Halant "; 1190 if (properties & CligProperty) 1191 res += "Clig "; 1192 if (properties & IndicCaltProperty) 1193 res += "Calt "; 1194 return res; 1195 } 1196 #endif 1197 1198 static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid) 1199 { 1200 HB_Script script = item->item.script; 1201 assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala); 1202 const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari); 1203 const unsigned short ra = script_base + 0x30; 1204 const unsigned short halant = script_base + 0x4d; 1205 const unsigned short nukta = script_base + 0x3c; 1206 bool control = false; 1207 1208 int len = (int)item->item.length; 1209 IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid); 1210 1211 if ((int)item->num_glyphs < len+4) { 1212 item->num_glyphs = len+4; 1213 return false; 1214 } 1215 1216 HB_STACKARRAY(HB_UChar16, reordered, len + 4); 1217 HB_STACKARRAY(hb_uint8, position, len + 4); 1218 1219 unsigned char properties = scriptProperties[script-HB_Script_Devanagari]; 1220 1221 if (invalid) { 1222 *reordered = 0x25cc; 1223 memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16)); 1224 len++; 1225 } else { 1226 memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16)); 1227 } 1228 if (reordered[len-1] == 0x200c) // zero width non joiner 1229 len--; 1230 1231 int i; 1232 int base = 0; 1233 int reph = -1; 1234 1235 #ifdef INDIC_DEBUG 1236 IDEBUG("original:"); 1237 for (i = 0; i < len; i++) { 1238 IDEBUG(" %d: %4x", i, reordered[i]); 1239 } 1240 #endif 1241 1242 if (len != 1) { 1243 HB_UChar16 *uc = reordered; 1244 bool beginsWithRa = false; 1245 1246 // Rule 1: find base consonant 1247 // 1248 // The shaping engine finds the base consonant of the 1249 // syllable, using the following algorithm: starting from the 1250 // end of the syllable, move backwards until a consonant is 1251 // found that does not have a below-base or post-base form 1252 // (post-base forms have to follow below-base forms), or 1253 // arrive at the first consonant. The consonant stopped at 1254 // will be the base. 1255 // 1256 // * If the syllable starts with Ra + H (in a script that has 1257 // 'Reph'), Ra is excluded from candidates for base 1258 // consonants. 1259 // 1260 // * In Kannada and Telugu, the base consonant cannot be 1261 // farther than 3 consonants from the end of the syllable. 1262 // #### replace the HasReph property by testing if the feature exists in the font! 1263 if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) { 1264 if ((properties & HasReph) && (len > 2) && 1265 (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant) 1266 beginsWithRa = true; 1267 1268 if (beginsWithRa && form(*(uc+2)) == Control) 1269 beginsWithRa = false; 1270 1271 base = (beginsWithRa ? 2 : 0); 1272 IDEBUG(" length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base); 1273 1274 int lastConsonant = 0; 1275 int matra = -1; 1276 // we remember: 1277 // * the last consonant since we need it for rule 2 1278 // * the matras position for rule 3 and 4 1279 1280 // figure out possible base glyphs 1281 memset(position, 0, len); 1282 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) { 1283 bool vattu = false; 1284 for (i = base; i < len; ++i) { 1285 position[i] = form(uc[i]); 1286 if (position[i] == Consonant) { 1287 lastConsonant = i; 1288 vattu = (!vattu && uc[i] == ra); 1289 if (vattu) { 1290 IDEBUG("excluding vattu glyph at %d from base candidates", i); 1291 position[i] = Vattu; 1292 } 1293 } else if (position[i] == Matra) { 1294 matra = i; 1295 } 1296 } 1297 } else { 1298 for (i = base; i < len; ++i) { 1299 position[i] = form(uc[i]); 1300 if (position[i] == Consonant) 1301 lastConsonant = i; 1302 else if (matra < 0 && position[i] == Matra) 1303 matra = i; 1304 } 1305 } 1306 int skipped = 0; 1307 Position pos = Post; 1308 for (i = len-1; i >= base; i--) { 1309 if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada)) 1310 continue; 1311 1312 if (i < len-1 && position[i] == Control && position[i+1] == Consonant) { 1313 base = i+1; 1314 break; 1315 } 1316 1317 Position charPosition = indic_position(uc[i]); 1318 if (pos == Post && charPosition == Post) { 1319 pos = Post; 1320 } else if ((pos == Post || pos == Below) && charPosition == Below) { 1321 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) 1322 base = i; 1323 pos = Below; 1324 } else { 1325 base = i; 1326 break; 1327 } 1328 if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) { 1329 base = i; 1330 break; 1331 } 1332 ++skipped; 1333 } 1334 1335 IDEBUG(" base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant); 1336 1337 // Rule 2: 1338 // 1339 // If the base consonant is not the last one, Uniscribe 1340 // moves the halant from the base consonant to the last 1341 // one. 1342 if (lastConsonant > base) { 1343 int halantPos = 0; 1344 if (uc[base+1] == halant) 1345 halantPos = base + 1; 1346 else if (uc[base+1] == nukta && uc[base+2] == halant) 1347 halantPos = base + 2; 1348 if (halantPos > 0) { 1349 IDEBUG(" moving halant from %d to %d!", base+1, lastConsonant); 1350 for (i = halantPos; i < lastConsonant; i++) 1351 uc[i] = uc[i+1]; 1352 uc[lastConsonant] = halant; 1353 } 1354 } 1355 1356 // Rule 3: 1357 // 1358 // If the syllable starts with Ra + H, Uniscribe moves 1359 // this combination so that it follows either: 1360 1361 // * the post-base 'matra' (if any) or the base consonant 1362 // (in scripts that show similarity to Devanagari, i.e., 1363 // Devanagari, Gujarati, Bengali) 1364 // * the base consonant (other scripts) 1365 // * the end of the syllable (Kannada) 1366 1367 Position matra_position = None; 1368 if (matra > 0) 1369 matra_position = indic_position(uc[matra]); 1370 IDEBUG(" matra at %d with form %d, base=%d", matra, matra_position, base); 1371 1372 if (beginsWithRa && base != 0) { 1373 int toPos = base+1; 1374 if (toPos < len && uc[toPos] == nukta) 1375 toPos++; 1376 if (toPos < len && uc[toPos] == halant) 1377 toPos++; 1378 if (toPos < len && uc[toPos] == 0x200d) 1379 toPos++; 1380 if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant) 1381 toPos += 2; 1382 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) { 1383 if (matra_position == Post || matra_position == Split) { 1384 toPos = matra+1; 1385 matra -= 2; 1386 } 1387 } else if (script == HB_Script_Kannada) { 1388 toPos = len; 1389 matra -= 2; 1390 } 1391 1392 IDEBUG("moving leading ra+halant to position %d", toPos); 1393 for (i = 2; i < toPos; i++) 1394 uc[i-2] = uc[i]; 1395 uc[toPos-2] = ra; 1396 uc[toPos-1] = halant; 1397 base -= 2; 1398 if (properties & HasReph) 1399 reph = toPos-2; 1400 } 1401 1402 // Rule 4: 1403 1404 // Uniscribe splits two- or three-part matras into their 1405 // parts. This splitting is a character-to-character 1406 // operation). 1407 // 1408 // Uniscribe describes some moving operations for these 1409 // matras here. For shaping however all pre matras need 1410 // to be at the beginning of the syllable, so we just move 1411 // them there now. 1412 if (matra_position == Split) { 1413 splitMatra(uc, matra, len); 1414 // Handle three-part matras (0xccb in Kannada) 1415 matra_position = indic_position(uc[matra]); 1416 } 1417 1418 if (matra_position == Pre) { 1419 unsigned short m = uc[matra]; 1420 while (matra--) 1421 uc[matra+1] = uc[matra]; 1422 uc[0] = m; 1423 base++; 1424 } 1425 } 1426 1427 // Rule 5: 1428 // 1429 // Uniscribe classifies consonants and 'matra' parts as 1430 // pre-base, above-base (Reph), below-base or post-base. This 1431 // classification exists on the character code level and is 1432 // language-dependent, not font-dependent. 1433 for (i = 0; i < base; ++i) 1434 position[i] = Pre; 1435 position[base] = Base; 1436 for (i = base+1; i < len; ++i) { 1437 position[i] = indic_position(uc[i]); 1438 // #### replace by adjusting table 1439 if (uc[i] == nukta || uc[i] == halant) 1440 position[i] = Inherit; 1441 } 1442 if (reph > 0) { 1443 // recalculate reph, it might have changed. 1444 for (i = base+1; i < len; ++i) 1445 if (uc[i] == ra) 1446 reph = i; 1447 position[reph] = Reph; 1448 position[reph+1] = Inherit; 1449 } 1450 1451 // all reordering happens now to the chars after the base 1452 int fixed = base+1; 1453 if (fixed < len && uc[fixed] == nukta) 1454 fixed++; 1455 if (fixed < len && uc[fixed] == halant) 1456 fixed++; 1457 if (fixed < len && uc[fixed] == 0x200d) 1458 fixed++; 1459 1460 #ifdef INDIC_DEBUG 1461 for (i = fixed; i < len; ++i) 1462 IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]); 1463 #endif 1464 // we continuosly position the matras and vowel marks and increase the fixed 1465 // until we reached the end. 1466 const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari]; 1467 1468 IDEBUG(" reordering pass:"); 1469 IDEBUG(" base=%d fixed=%d", base, fixed); 1470 int toMove = 0; 1471 while (finalOrder[toMove].form && fixed < len-1) { 1472 IDEBUG(" fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position); 1473 for (i = fixed; i < len; i++) { 1474 // IDEBUG() << " i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i]) 1475 // << "position=" << position[i]; 1476 if (form(uc[i]) == finalOrder[toMove].form && 1477 position[i] == finalOrder[toMove].position) { 1478 // need to move this glyph 1479 int to = fixed; 1480 if (i < len-1 && position[i+1] == Inherit) { 1481 IDEBUG(" moving two chars from %d to %d", i, to); 1482 unsigned short ch = uc[i]; 1483 unsigned short ch2 = uc[i+1]; 1484 unsigned char pos = position[i]; 1485 for (int j = i+1; j > to+1; j--) { 1486 uc[j] = uc[j-2]; 1487 position[j] = position[j-2]; 1488 } 1489 uc[to] = ch; 1490 uc[to+1] = ch2; 1491 position[to] = pos; 1492 position[to+1] = pos; 1493 fixed += 2; 1494 } else { 1495 IDEBUG(" moving one char from %d to %d", i, to); 1496 unsigned short ch = uc[i]; 1497 unsigned char pos = position[i]; 1498 for (int j = i; j > to; j--) { 1499 uc[j] = uc[j-1]; 1500 position[j] = position[j-1]; 1501 } 1502 uc[to] = ch; 1503 position[to] = pos; 1504 fixed++; 1505 } 1506 } 1507 } 1508 toMove++; 1509 } 1510 1511 } 1512 1513 if (reph > 0) { 1514 // recalculate reph, it might have changed. 1515 for (i = base+1; i < len; ++i) 1516 if (reordered[i] == ra) 1517 reph = i; 1518 } 1519 1520 #ifndef NO_OPENTYPE 1521 const int availableGlyphs = item->num_glyphs; 1522 #endif 1523 if (!item->font->klass->convertStringToGlyphIndices(item->font, 1524 reordered, len, 1525 item->glyphs, &item->num_glyphs, 1526 item->item.bidiLevel % 2)) 1527 goto error; 1528 1529 1530 IDEBUG(" base=%d, reph=%d", base, reph); 1531 IDEBUG("reordered:"); 1532 for (i = 0; i < len; i++) { 1533 item->attributes[i].mark = false; 1534 item->attributes[i].clusterStart = false; 1535 item->attributes[i].justification = 0; 1536 item->attributes[i].zeroWidth = false; 1537 IDEBUG(" %d: %4x", i, reordered[i]); 1538 } 1539 1540 // now we have the syllable in the right order, and can start running it through open type. 1541 1542 for (i = 0; i < len; ++i) 1543 control |= (form(reordered[i]) == Control); 1544 1545 #ifndef NO_OPENTYPE 1546 if (openType) { 1547 1548 // we need to keep track of where the base glyph is for some 1549 // scripts and use the cluster feature for this. This 1550 // also means we have to correct the logCluster output from 1551 // the open type engine manually afterwards. for indic this 1552 // is rather simple, as all chars just point to the first 1553 // glyph in the syllable. 1554 HB_STACKARRAY(unsigned short, clusters, len); 1555 HB_STACKARRAY(unsigned int, properties, len); 1556 1557 for (i = 0; i < len; ++i) 1558 clusters[i] = i; 1559 1560 // features we should always apply 1561 for (i = 0; i < len; ++i) 1562 properties[i] = ~(LocaProperty 1563 | CcmpProperty 1564 | NuktaProperty 1565 | VattuProperty 1566 | ConjunctFormProperty 1567 | PreSubstProperty 1568 | BelowSubstProperty 1569 | AboveSubstProperty 1570 | PostSubstProperty 1571 | HalantProperty 1572 | IndicCaltProperty 1573 | PositioningProperties); 1574 1575 // Loca always applies 1576 // Ccmp always applies 1577 // Init 1578 if (item->item.pos == 0 1579 || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1]))) 1580 properties[0] &= ~InitProperty; 1581 1582 // Nukta always applies 1583 // Akhant 1584 for (i = 0; i <= base; ++i) 1585 properties[i] &= ~AkhantProperty; 1586 // Reph 1587 if (reph >= 0) { 1588 properties[reph] &= ~RephProperty; 1589 properties[reph+1] &= ~RephProperty; 1590 } 1591 // BelowForm 1592 for (i = base+1; i < len; ++i) 1593 properties[i] &= ~BelowFormProperty; 1594 1595 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) { 1596 // vattu glyphs need this aswell 1597 bool vattu = false; 1598 for (i = base-2; i > 1; --i) { 1599 if (form(reordered[i]) == Consonant) { 1600 vattu = (!vattu && reordered[i] == ra); 1601 if (vattu) { 1602 IDEBUG("forming vattu ligature at %d", i); 1603 properties[i] &= ~BelowFormProperty; 1604 properties[i+1] &= ~BelowFormProperty; 1605 } 1606 } 1607 } 1608 } 1609 // HalfFormProperty 1610 for (i = 0; i < base; ++i) 1611 properties[i] &= ~HalfFormProperty; 1612 if (control) { 1613 for (i = 2; i < len; ++i) { 1614 if (reordered[i] == 0x200d /* ZWJ */) { 1615 properties[i-1] &= ~HalfFormProperty; 1616 properties[i-2] &= ~HalfFormProperty; 1617 } else if (reordered[i] == 0x200c /* ZWNJ */) { 1618 properties[i-1] &= ~HalfFormProperty; 1619 properties[i-2] &= ~HalfFormProperty; 1620 } 1621 } 1622 } 1623 // PostFormProperty 1624 for (i = base+1; i < len; ++i) 1625 properties[i] &= ~PostFormProperty; 1626 // vattu always applies 1627 // pres always applies 1628 // blws always applies 1629 // abvs always applies 1630 // psts always applies 1631 // halant always applies 1632 // calt always applies 1633 1634 #ifdef INDIC_DEBUG 1635 // { 1636 // IDEBUG("OT properties:"); 1637 // for (int i = 0; i < len; ++i) 1638 // qDebug(" i: %s", ::propertiesToString(properties[i]).toLatin1().data()); 1639 // } 1640 #endif 1641 1642 // initialize 1643 item->log_clusters = clusters; 1644 HB_OpenTypeShape(item, properties); 1645 1646 int newLen = item->face->buffer->in_length; 1647 HB_GlyphItem otl_glyphs = item->face->buffer->in_string; 1648 1649 // move the left matra back to its correct position in malayalam and tamil 1650 if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) { 1651 // qDebug("reordering matra, len=%d", newLen); 1652 // need to find the base in the shaped string and move the matra there 1653 int basePos = 0; 1654 while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base) 1655 basePos++; 1656 --basePos; 1657 if (basePos < newLen && basePos > 1) { 1658 // qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen); 1659 HB_GlyphItemRec m = otl_glyphs[0]; 1660 --basePos; 1661 for (i = 0; i < basePos; ++i) 1662 otl_glyphs[i] = otl_glyphs[i+1]; 1663 otl_glyphs[basePos] = m; 1664 } 1665 } 1666 1667 HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false); 1668 1669 HB_FREE_STACKARRAY(clusters); 1670 HB_FREE_STACKARRAY(properties); 1671 1672 if (!positioned) 1673 goto error; 1674 1675 if (control) { 1676 IDEBUG("found a control char in the syllable"); 1677 hb_uint32 i = 0, j = 0; 1678 while (i < item->num_glyphs) { 1679 if (form(reordered[otl_glyphs[i].cluster]) == Control) { 1680 ++i; 1681 if (i >= item->num_glyphs) 1682 break; 1683 } 1684 item->glyphs[j] = item->glyphs[i]; 1685 item->attributes[j] = item->attributes[i]; 1686 ++i; 1687 ++j; 1688 } 1689 item->num_glyphs = j; 1690 } 1691 1692 } else { 1693 HB_HeuristicPosition(item); 1694 } 1695 #endif // NO_OPENTYPE 1696 item->attributes[0].clusterStart = true; 1697 1698 HB_FREE_STACKARRAY(reordered); 1699 HB_FREE_STACKARRAY(position); 1700 1701 IDEBUG("<<<<<<"); 1702 return true; 1703 1704 error: 1705 HB_FREE_STACKARRAY(reordered); 1706 HB_FREE_STACKARRAY(position); 1707 return false; 1708 } 1709 1710 /* syllables are of the form: 1711 1712 (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark? 1713 (Consonant Nukta? Halant)* Consonant Halant 1714 IndependentVowel VowelMark? StressMark? 1715 1716 We return syllable boundaries on invalid combinations aswell 1717 */ 1718 static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid) 1719 { 1720 *invalid = false; 1721 IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end); 1722 const HB_UChar16 *uc = s+start; 1723 1724 int pos = 0; 1725 Form state = form(uc[pos]); 1726 IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]); 1727 pos++; 1728 1729 if (state != Consonant && state != IndependentVowel) { 1730 if (state != Other) 1731 *invalid = true; 1732 goto finish; 1733 } 1734 1735 while (pos < end - start) { 1736 Form newState = form(uc[pos]); 1737 IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]); 1738 switch(newState) { 1739 case Control: 1740 newState = state; 1741 if (state == Halant && uc[pos] == 0x200d /* ZWJ */) 1742 break; 1743 // the control character should be the last char in the item 1744 ++pos; 1745 goto finish; 1746 case Consonant: 1747 if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */)) 1748 break; 1749 goto finish; 1750 case Halant: 1751 if (state == Nukta || state == Consonant) 1752 break; 1753 // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya 1754 if (script == HB_Script_Bengali && pos == 1 && 1755 (uc[0] == 0x0985 || uc[0] == 0x098f)) 1756 break; 1757 // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra. 1758 if (script == HB_Script_Sinhala && state == Matra) { 1759 ++pos; 1760 continue; 1761 } 1762 if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) { 1763 ++pos; 1764 continue; 1765 } 1766 goto finish; 1767 case Nukta: 1768 if (state == Consonant) 1769 break; 1770 goto finish; 1771 case StressMark: 1772 if (state == VowelMark) 1773 break; 1774 // fall through 1775 case VowelMark: 1776 if (state == Matra || state == LengthMark || state == IndependentVowel) 1777 break; 1778 // fall through 1779 case Matra: 1780 if (state == Consonant || state == Nukta) 1781 break; 1782 if (state == Matra) { 1783 // ### needs proper testing for correct two/three part matras 1784 break; 1785 } 1786 // ### not sure if this is correct. If it is, does it apply only to Bengali or should 1787 // it work for all Indic languages? 1788 // the combination Independent_A + Vowel Sign AA is allowed. 1789 if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985) 1790 break; 1791 if (script == HB_Script_Tamil && state == Matra) { 1792 if (uc[pos-1] == 0x0bc6 && 1793 (uc[pos] == 0xbbe || uc[pos] == 0xbd7)) 1794 break; 1795 if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe) 1796 break; 1797 } 1798 goto finish; 1799 1800 case LengthMark: 1801 if (state == Matra) { 1802 // ### needs proper testing for correct two/three part matras 1803 break; 1804 } 1805 case IndependentVowel: 1806 case Invalid: 1807 case Other: 1808 goto finish; 1809 } 1810 state = newState; 1811 pos++; 1812 } 1813 finish: 1814 return pos+start; 1815 } 1816 1817 HB_Bool HB_IndicShape(HB_ShaperItem *item) 1818 { 1819 assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala); 1820 1821 HB_Bool openType = false; 1822 #ifndef NO_OPENTYPE 1823 openType = HB_SelectScript(item, indic_features); 1824 #endif 1825 unsigned short *logClusters = item->log_clusters; 1826 1827 HB_ShaperItem syllable = *item; 1828 int first_glyph = 0; 1829 1830 int sstart = item->item.pos; 1831 int end = sstart + item->item.length; 1832 IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length); 1833 while (sstart < end) { 1834 bool invalid; 1835 int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid); 1836 IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, 1837 invalid ? "true" : "false"); 1838 syllable.item.pos = sstart; 1839 syllable.item.length = send-sstart; 1840 syllable.glyphs = item->glyphs + first_glyph; 1841 syllable.attributes = item->attributes + first_glyph; 1842 syllable.offsets = item->offsets + first_glyph; 1843 syllable.advances = item->advances + first_glyph; 1844 syllable.num_glyphs = item->num_glyphs - first_glyph; 1845 if (!indic_shape_syllable(openType, &syllable, invalid)) { 1846 IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); 1847 item->num_glyphs += syllable.num_glyphs; 1848 return false; 1849 } 1850 // fix logcluster array 1851 IDEBUG("syllable:"); 1852 hb_uint32 g; 1853 for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g) 1854 IDEBUG(" %d -> glyph %x", g, item->glyphs[g]); 1855 IDEBUG(" logclusters:"); 1856 int i; 1857 for (i = sstart; i < send; ++i) { 1858 IDEBUG(" %d -> glyph %d", i, first_glyph); 1859 logClusters[i-item->item.pos] = first_glyph; 1860 } 1861 sstart = send; 1862 first_glyph += syllable.num_glyphs; 1863 } 1864 item->num_glyphs = first_glyph; 1865 return true; 1866 } 1867 1868 void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes) 1869 { 1870 int end = from + len; 1871 const HB_UChar16 *uc = text + from; 1872 attributes += from; 1873 hb_uint32 i = 0; 1874 while (i < len) { 1875 bool invalid; 1876 hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from; 1877 attributes[i].charStop = true; 1878 1879 if (boundary > len-1) boundary = len; 1880 i++; 1881 while (i < boundary) { 1882 attributes[i].charStop = false; 1883 ++uc; 1884 ++i; 1885 } 1886 assert(i == boundary); 1887 } 1888 1889 1890 } 1891 1892 1893