Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      3  *
      4  * This is part of HarfBuzz, an OpenType Layout engine library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  */
     24 
     25 #include "harfbuzz-shaper.h"
     26 #include "harfbuzz-shaper-private.h"
     27 
     28 #include <assert.h>
     29 #include <stdio.h>
     30 
     31 #define FLAG(x) (1 << (x))
     32 
     33 static HB_Bool isLetter(HB_UChar16 ucs)
     34 {
     35     const int test = FLAG(HB_Letter_Uppercase) |
     36                      FLAG(HB_Letter_Lowercase) |
     37                      FLAG(HB_Letter_Titlecase) |
     38                      FLAG(HB_Letter_Modifier) |
     39                      FLAG(HB_Letter_Other);
     40     return FLAG(HB_GetUnicodeCharCategory(ucs)) & test;
     41 }
     42 
     43 static HB_Bool isMark(HB_UChar16 ucs)
     44 {
     45     const int test = FLAG(HB_Mark_NonSpacing) |
     46                      FLAG(HB_Mark_SpacingCombining) |
     47                      FLAG(HB_Mark_Enclosing);
     48     return FLAG(HB_GetUnicodeCharCategory(ucs)) & test;
     49 }
     50 
     51 enum Form {
     52     Invalid = 0x0,
     53     UnknownForm = Invalid,
     54     Consonant,
     55     Nukta,
     56     Halant,
     57     Matra,
     58     VowelMark,
     59     StressMark,
     60     IndependentVowel,
     61     LengthMark,
     62     Control,
     63     Other
     64 };
     65 
     66 static const unsigned char indicForms[0xe00-0x900] = {
     67     // Devangari
     68     Invalid, VowelMark, VowelMark, VowelMark,
     69     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
     70     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
     71     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
     72 
     73     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
     74     IndependentVowel, Consonant, Consonant, Consonant,
     75     Consonant, Consonant, Consonant, Consonant,
     76     Consonant, Consonant, Consonant, Consonant,
     77 
     78     Consonant, Consonant, Consonant, Consonant,
     79     Consonant, Consonant, Consonant, Consonant,
     80     Consonant, Consonant, Consonant, Consonant,
     81     Consonant, Consonant, Consonant, Consonant,
     82 
     83     Consonant, Consonant, Consonant, Consonant,
     84     Consonant, Consonant, Consonant, Consonant,
     85     Consonant, Consonant, UnknownForm, UnknownForm,
     86     Nukta, Other, Matra, Matra,
     87 
     88     Matra, Matra, Matra, Matra,
     89     Matra, Matra, Matra, Matra,
     90     Matra, Matra, Matra, Matra,
     91     Matra, Halant, UnknownForm, UnknownForm,
     92 
     93     Other, StressMark, StressMark, StressMark,
     94     StressMark, UnknownForm, UnknownForm, UnknownForm,
     95     Consonant, Consonant, Consonant, Consonant,
     96     Consonant, Consonant, Consonant, Consonant,
     97 
     98     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
     99     Other, Other, Other, Other,
    100     Other, Other, Other, Other,
    101     Other, Other, Other, Other,
    102 
    103     Other, Other, Other, Other,
    104     Other, Other, Other, Other,
    105     Other, Other, Other, Consonant,
    106     Consonant, Consonant /* ??? */, Consonant, Consonant,
    107 
    108     // Bengali
    109     Invalid, VowelMark, VowelMark, VowelMark,
    110     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    111     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    112     IndependentVowel, Invalid, Invalid, IndependentVowel,
    113 
    114     IndependentVowel, Invalid, Invalid, IndependentVowel,
    115     IndependentVowel, Consonant, Consonant, Consonant,
    116     Consonant, Consonant, Consonant, Consonant,
    117     Consonant, Consonant, Consonant, Consonant,
    118 
    119     Consonant, Consonant, Consonant, Consonant,
    120     Consonant, Consonant, Consonant, Consonant,
    121     Consonant, Invalid, Consonant, Consonant,
    122     Consonant, Consonant, Consonant, Consonant,
    123 
    124     Consonant, Invalid, Consonant, Invalid,
    125     Invalid, Invalid, Consonant, Consonant,
    126     Consonant, Consonant, UnknownForm, UnknownForm,
    127     Nukta, Other, Matra, Matra,
    128 
    129     Matra, Matra, Matra, Matra,
    130     Matra, Invalid, Invalid, Matra,
    131     Matra, Invalid, Invalid, Matra,
    132     Matra, Halant, Consonant, UnknownForm,
    133 
    134     Invalid, Invalid, Invalid, Invalid,
    135     Invalid, Invalid, Invalid, VowelMark,
    136     Invalid, Invalid, Invalid, Invalid,
    137     Consonant, Consonant, Invalid, Consonant,
    138 
    139     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
    140     Other, Other, Other, Other,
    141     Other, Other, Other, Other,
    142     Other, Other, Other, Other,
    143 
    144     Consonant, Consonant, Other, Other,
    145     Other, Other, Other, Other,
    146     Other, Other, Other, Other,
    147     Other, Other, Other, Other,
    148 
    149     // Gurmukhi
    150     Invalid, VowelMark, VowelMark, VowelMark,
    151     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    152     IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
    153     Invalid, Invalid, Invalid, IndependentVowel,
    154 
    155     IndependentVowel, Invalid, Invalid, IndependentVowel,
    156     IndependentVowel, Consonant, Consonant, Consonant,
    157     Consonant, Consonant, Consonant, Consonant,
    158     Consonant, Consonant, Consonant, Consonant,
    159 
    160     Consonant, Consonant, Consonant, Consonant,
    161     Consonant, Consonant, Consonant, Consonant,
    162     Consonant, Invalid, Consonant, Consonant,
    163     Consonant, Consonant, Consonant, Consonant,
    164 
    165     Consonant, Invalid, Consonant, Consonant,
    166     Invalid, Consonant, Consonant, Invalid,
    167     Consonant, Consonant, UnknownForm, UnknownForm,
    168     Nukta, Other, Matra, Matra,
    169 
    170     Matra, Matra, Matra, Invalid,
    171     Invalid, Invalid, Invalid, Matra,
    172     Matra, Invalid, Invalid, Matra,
    173     Matra, Halant, UnknownForm, UnknownForm,
    174 
    175     Invalid, Invalid, Invalid, Invalid,
    176     Invalid, UnknownForm, UnknownForm, UnknownForm,
    177     Invalid, Consonant, Consonant, Consonant,
    178     Consonant, Invalid, Consonant, Invalid,
    179 
    180     Other, Other, Invalid, Invalid,
    181     Other, Other, Other, Other,
    182     Other, Other, Other, Other,
    183     Other, Other, Other, Other,
    184 
    185     StressMark, StressMark, Consonant, Consonant,
    186     Other, Other, Other, Other,
    187     Other, Other, Other, Other,
    188     Other, Other, Other, Other,
    189 
    190     // Gujarati
    191     Invalid, VowelMark, VowelMark, VowelMark,
    192     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    193     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    194     IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
    195 
    196     IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
    197     IndependentVowel, Consonant, Consonant, Consonant,
    198     Consonant, Consonant, Consonant, Consonant,
    199     Consonant, Consonant, Consonant, Consonant,
    200 
    201     Consonant, Consonant, Consonant, Consonant,
    202     Consonant, Consonant, Consonant, Consonant,
    203     Consonant, Invalid, Consonant, Consonant,
    204     Consonant, Consonant, Consonant, Consonant,
    205 
    206     Consonant, Invalid, Consonant, Consonant,
    207     Invalid, Consonant, Consonant, Consonant,
    208     Consonant, Consonant, UnknownForm, UnknownForm,
    209     Nukta, Other, Matra, Matra,
    210 
    211     Matra, Matra, Matra, Matra,
    212     Matra, Matra, Invalid, Matra,
    213     Matra, Matra, Invalid, Matra,
    214     Matra, Halant, UnknownForm, UnknownForm,
    215 
    216     Other, UnknownForm, UnknownForm, UnknownForm,
    217     UnknownForm, UnknownForm, UnknownForm, UnknownForm,
    218     UnknownForm, UnknownForm, UnknownForm, UnknownForm,
    219     UnknownForm, UnknownForm, UnknownForm, UnknownForm,
    220 
    221     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
    222     Other, Other, Other, Other,
    223     Other, Other, Other, Other,
    224     Other, Other, Other, Other,
    225 
    226     Other, Other, Other, Other,
    227     Other, Other, Other, Other,
    228     Other, Other, Other, Other,
    229     Other, Other, Other, Other,
    230 
    231     // Oriya
    232     Invalid, VowelMark, VowelMark, VowelMark,
    233     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    234     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    235     IndependentVowel, Invalid, Invalid, IndependentVowel,
    236 
    237     IndependentVowel, Invalid, Invalid, IndependentVowel,
    238     IndependentVowel, Consonant, Consonant, Consonant,
    239     Consonant, Consonant, Consonant, Consonant,
    240     Consonant, Consonant, Consonant, Consonant,
    241 
    242     Consonant, Consonant, Consonant, Consonant,
    243     Consonant, Consonant, Consonant, Consonant,
    244     Consonant, Invalid, Consonant, Consonant,
    245     Consonant, Consonant, Consonant, Consonant,
    246 
    247     Consonant, Invalid, Consonant, Consonant,
    248     Invalid, Consonant, Consonant, Consonant,
    249     Consonant, Consonant, UnknownForm, UnknownForm,
    250     Nukta, Other, Matra, Matra,
    251 
    252     Matra, Matra, Matra, Matra,
    253     Invalid, Invalid, Invalid, Matra,
    254     Matra, Invalid, Invalid, Matra,
    255     Matra, Halant, UnknownForm, UnknownForm,
    256 
    257     Other, Invalid, Invalid, Invalid,
    258     Invalid, UnknownForm, LengthMark, LengthMark,
    259     Invalid, Invalid, Invalid, Invalid,
    260     Consonant, Consonant, Invalid, Consonant,
    261 
    262     IndependentVowel, IndependentVowel, Invalid, Invalid,
    263     Invalid, Invalid, Other, Other,
    264     Other, Other, Other, Other,
    265     Other, Other, Other, Other,
    266 
    267     Other, Consonant, Other, Other,
    268     Other, Other, Other, Other,
    269     Other, Other, Other, Other,
    270     Other, Other, Other, Other,
    271 
    272     //Tamil
    273     Invalid, Invalid, VowelMark, Other,
    274     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    275     IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
    276     Invalid, Invalid, IndependentVowel, IndependentVowel,
    277 
    278     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
    279     IndependentVowel, Consonant, Invalid, Invalid,
    280     Invalid, Consonant, Consonant, Invalid,
    281     Consonant, Invalid, Consonant, Consonant,
    282 
    283     Invalid, Invalid, Invalid, Consonant,
    284     Consonant, Invalid, Invalid, Invalid,
    285     Consonant, Consonant, Consonant, Invalid,
    286     Invalid, Invalid, Consonant, Consonant,
    287 
    288     Consonant, Consonant, Consonant, Consonant,
    289     Consonant, Consonant, Consonant, Consonant,
    290     Consonant, Consonant, UnknownForm, UnknownForm,
    291     Invalid, Invalid, Matra, Matra,
    292 
    293     Matra, Matra, Matra, Invalid,
    294     Invalid, Invalid, Matra, Matra,
    295     Matra, Invalid, Matra, Matra,
    296     Matra, Halant, Invalid, Invalid,
    297 
    298     Invalid, Invalid, Invalid, Invalid,
    299     Invalid, Invalid, Invalid, LengthMark,
    300     Invalid, Invalid, Invalid, Invalid,
    301     Invalid, Invalid, Invalid, Invalid,
    302 
    303     Invalid, Invalid, Invalid, Invalid,
    304     Invalid, Invalid, Other, Other,
    305     Other, Other, Other, Other,
    306     Other, Other, Other, Other,
    307 
    308     Other, Other, Other, Other,
    309     Other, Other, Other, Other,
    310     Other, Other, Other, Other,
    311     Other, Other, Other, Other,
    312 
    313     // Telugu
    314     Invalid, VowelMark, VowelMark, VowelMark,
    315     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    316     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    317     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
    318 
    319     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
    320     IndependentVowel, Consonant, Consonant, Consonant,
    321     Consonant, Consonant, Consonant, Consonant,
    322     Consonant, Consonant, Consonant, Consonant,
    323 
    324     Consonant, Consonant, Consonant, Consonant,
    325     Consonant, Consonant, Consonant, Consonant,
    326     Consonant, Invalid, Consonant, Consonant,
    327     Consonant, Consonant, Consonant, Consonant,
    328 
    329     Consonant, Consonant, Consonant, Consonant,
    330     Invalid, Consonant, Consonant, Consonant,
    331     Consonant, Consonant, UnknownForm, UnknownForm,
    332     Invalid, Invalid, Matra, Matra,
    333 
    334     Matra, Matra, Matra, Matra,
    335     Matra, Invalid, Matra, Matra,
    336     Matra, Invalid, Matra, Matra,
    337     Matra, Halant, Invalid, Invalid,
    338 
    339     Invalid, Invalid, Invalid, Invalid,
    340     Invalid, LengthMark, Matra, Invalid,
    341     Invalid, Invalid, Invalid, Invalid,
    342     Invalid, Invalid, Invalid, Invalid,
    343 
    344     IndependentVowel, IndependentVowel, Invalid, Invalid,
    345     Invalid, Invalid, Other, Other,
    346     Other, Other, Other, Other,
    347     Other, Other, Other, Other,
    348 
    349     Other, Other, Other, Other,
    350     Other, Other, Other, Other,
    351     Other, Other, Other, Other,
    352     Other, Other, Other, Other,
    353 
    354     // Kannada
    355     Invalid, Invalid, VowelMark, VowelMark,
    356     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    357     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    358     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
    359 
    360     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
    361     IndependentVowel, Consonant, Consonant, Consonant,
    362     Consonant, Consonant, Consonant, Consonant,
    363     Consonant, Consonant, Consonant, Consonant,
    364 
    365     Consonant, Consonant, Consonant, Consonant,
    366     Consonant, Consonant, Consonant, Consonant,
    367     Consonant, Invalid, Consonant, Consonant,
    368     Consonant, Consonant, Consonant, Consonant,
    369 
    370     Consonant, Consonant, Consonant, Consonant,
    371     Invalid, Consonant, Consonant, Consonant,
    372     Consonant, Consonant, UnknownForm, UnknownForm,
    373     Nukta, Other, Matra, Matra,
    374 
    375     Matra, Matra, Matra, Matra,
    376     Matra, Invalid, Matra, Matra,
    377     Matra, Invalid, Matra, Matra,
    378     Matra, Halant, Invalid, Invalid,
    379 
    380     Invalid, Invalid, Invalid, Invalid,
    381     Invalid, LengthMark, LengthMark, Invalid,
    382     Invalid, Invalid, Invalid, Invalid,
    383     Invalid, Invalid, Consonant, Invalid,
    384 
    385     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
    386     Invalid, Invalid, Other, Other,
    387     Other, Other, Other, Other,
    388     Other, Other, Other, Other,
    389 
    390     Other, Other, Other, Other,
    391     Other, Other, Other, Other,
    392     Other, Other, Other, Other,
    393     Other, Other, Other, Other,
    394 
    395     // Malayalam
    396     Invalid, Invalid, VowelMark, VowelMark,
    397     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    398     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    399     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
    400 
    401     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
    402     IndependentVowel, Consonant, Consonant, Consonant,
    403     Consonant, Consonant, Consonant, Consonant,
    404     Consonant, Consonant, Consonant, Consonant,
    405 
    406     Consonant, Consonant, Consonant, Consonant,
    407     Consonant, Consonant, Consonant, Consonant,
    408     Consonant, Invalid, Consonant, Consonant,
    409     Consonant, Consonant, Consonant, Consonant,
    410 
    411     Consonant, Consonant, Consonant, Consonant,
    412     Consonant, Consonant, Consonant, Consonant,
    413     Consonant, Consonant, UnknownForm, UnknownForm,
    414     Invalid, Invalid, Matra, Matra,
    415 
    416     Matra, Matra, Matra, Matra,
    417     Invalid, Invalid, Matra, Matra,
    418     Matra, Invalid, Matra, Matra,
    419     Matra, Halant, Invalid, Invalid,
    420 
    421     Invalid, Invalid, Invalid, Invalid,
    422     Invalid, Invalid, Invalid, Matra,
    423     Invalid, Invalid, Invalid, Invalid,
    424     Invalid, Invalid, Invalid, Invalid,
    425 
    426     IndependentVowel, IndependentVowel, Invalid, Invalid,
    427     Invalid, Invalid, Other, Other,
    428     Other, Other, Other, Other,
    429     Other, Other, Other, Other,
    430 
    431     Other, Other, Other, Other,
    432     Other, Other, Other, Other,
    433     Other, Other, Other, Other,
    434     Other, Other, Other, Other,
    435 
    436     // Sinhala
    437     Invalid, Invalid, VowelMark, VowelMark,
    438     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
    439     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    440     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    441 
    442     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
    443     IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
    444     Invalid, Invalid, Consonant, Consonant,
    445     Consonant, Consonant, Consonant, Consonant,
    446 
    447     Consonant, Consonant, Consonant, Consonant,
    448     Consonant, Consonant, Consonant, Consonant,
    449     Consonant, Consonant, Consonant, Consonant,
    450     Consonant, Consonant, Consonant, Consonant,
    451 
    452     Consonant, Consonant, Invalid, Consonant,
    453     Consonant, Consonant, Consonant, Consonant,
    454     Consonant, Consonant, Consonant, Consonant,
    455     Invalid, Consonant, Invalid, Invalid,
    456 
    457     Consonant, Consonant, Consonant, Consonant,
    458     Consonant, Consonant, Consonant, Invalid,
    459     Invalid, Invalid, Halant, Invalid,
    460     Invalid, Invalid, Invalid, Matra,
    461 
    462     Matra, Matra, Matra, Matra,
    463     Matra, Invalid, Matra, Invalid,
    464     Matra, Matra, Matra, Matra,
    465     Matra, Matra, Matra, Matra,
    466 
    467     Invalid, Invalid, Invalid, Invalid,
    468     Invalid, Invalid, Invalid, Invalid,
    469     Invalid, Invalid, Invalid, Invalid,
    470     Invalid, Invalid, Invalid, Invalid,
    471 
    472     Invalid, Invalid, Matra, Matra,
    473     Other, Other, Other, Other,
    474     Other, Other, Other, Other,
    475     Other, Other, Other, Other,
    476 };
    477 
    478 enum Position {
    479     None,
    480     Pre,
    481     Above,
    482     Below,
    483     Post,
    484     Split,
    485     Base,
    486     Reph,
    487     Vattu,
    488     Inherit
    489 };
    490 
    491 static const unsigned char indicPosition[0xe00-0x900] = {
    492     // Devanagari
    493     None, Above, Above, Post,
    494     None, None, None, None,
    495     None, None, None, None,
    496     None, None, None, None,
    497 
    498     None, None, None, None,
    499     None, None, None, None,
    500     None, None, None, None,
    501     None, None, None, None,
    502 
    503     None, None, None, None,
    504     None, None, None, None,
    505     None, None, None, None,
    506     None, None, None, None,
    507 
    508     Below, None, None, None,
    509     None, None, None, None,
    510     None, None, None, None,
    511     None, None, Post, Pre,
    512 
    513     Post, Below, Below, Below,
    514     Below, Above, Above, Above,
    515     Above, Post, Post, Post,
    516     Post, None, None, None,
    517 
    518     None, Above, Below, Above,
    519     Above, None, None, None,
    520     None, None, None, None,
    521     None, None, None, None,
    522 
    523     None, None, Below, Below,
    524     None, None, None, None,
    525     None, None, None, None,
    526     None, None, None, None,
    527 
    528     None, None, None, None,
    529     None, None, None, None,
    530     None, None, None, None,
    531     None, None, None, None,
    532 
    533     // Bengali
    534     None, Above, Post, Post,
    535     None, None, None, None,
    536     None, None, None, None,
    537     None, None, None, None,
    538 
    539     None, None, None, None,
    540     None, None, None, None,
    541     None, None, None, None,
    542     None, None, None, None,
    543 
    544     None, None, None, None,
    545     None, None, None, None,
    546     None, None, None, None,
    547     Below, None, None, Post,
    548 
    549     Below, None, None, None,
    550     None, None, None, None,
    551     None, None, None, None,
    552     Below, None, Post, Pre,
    553 
    554     Post, Below, Below, Below,
    555     Below, None, None, Pre,
    556     Pre, None, None, Split,
    557     Split, Below, None, None,
    558 
    559     None, None, None, None,
    560     None, None, None, Post,
    561     None, None, None, None,
    562     None, None, None, None,
    563 
    564     None, None, Below, Below,
    565     None, None, None, None,
    566     None, None, None, None,
    567     None, None, None, None,
    568 
    569     Below, None, None, None,
    570     None, None, None, None,
    571     None, None, None, None,
    572     None, None, None, None,
    573 
    574     // Gurmukhi
    575     None, Above, Above, Post,
    576     None, None, None, None,
    577     None, None, None, None,
    578     None, None, None, None,
    579 
    580     None, None, None, None,
    581     None, None, None, None,
    582     None, None, None, None,
    583     None, None, None, None,
    584 
    585     None, None, None, None,
    586     None, None, None, None,
    587     None, None, None, None,
    588     None, None, None, Post,
    589 
    590     Below, None, None, None,
    591     None, Below, None, None,
    592     None, Below, None, None,
    593     Below, None, Post, Pre,
    594 
    595     Post, Below, Below, None,
    596     None, None, None, Above,
    597     Above, None, None, Above,
    598     Above, None, None, None,
    599 
    600     None, None, None, None,
    601     None, None, None, None,
    602     None, None, None, None,
    603     None, None, None, None,
    604 
    605     None, None, None, None,
    606     None, None, None, None,
    607     None, None, None, None,
    608     None, None, None, None,
    609 
    610     Above, Above, None, None,
    611     None, None, None, None,
    612     None, None, None, None,
    613     None, None, None, None,
    614 
    615     // Gujarati
    616     None, Above, Above, Post,
    617     None, None, None, None,
    618     None, None, None, None,
    619     None, None, None, None,
    620 
    621     None, None, None, None,
    622     None, None, None, None,
    623     None, None, None, None,
    624     None, None, None, None,
    625 
    626     None, None, None, None,
    627     None, None, None, None,
    628     None, None, None, None,
    629     None, None, None, None,
    630 
    631     Below, None, None, None,
    632     None, None, None, None,
    633     None, None, None, None,
    634     None, None, Post, Pre,
    635 
    636     Post, Below, Below, Below,
    637     Below, Above, None, Above,
    638     Above, Post, None, Post,
    639     Post, None, None, None,
    640 
    641     None, None, None, None,
    642     None, None, None, None,
    643     None, None, None, None,
    644     None, None, None, None,
    645 
    646     None, None, Below, Below,
    647     None, None, None, None,
    648     None, None, None, None,
    649     None, None, None, None,
    650 
    651     None, None, None, None,
    652     None, None, None, None,
    653     None, None, None, None,
    654     None, None, None, None,
    655 
    656     // Oriya
    657     None, Above, Post, Post,
    658     None, None, None, None,
    659     None, None, None, None,
    660     None, None, None, None,
    661 
    662     None, None, None, None,
    663     None, None, None, None,
    664     None, None, None, None,
    665     None, None, None, None,
    666 
    667     None, None, None, None,
    668     Below, None, None, None,
    669     Below, None, None, None,
    670     Below, Below, Below, Post,
    671 
    672     Below, None, Below, Below,
    673     None, None, None, None,
    674     None, None, None, None,
    675     None, None, Post, Above,
    676 
    677     Post, Below, Below, Below,
    678     None, None, None, Pre,
    679     Split, None, None, Split,
    680     Split, None, None, None,
    681 
    682     None, None, None, None,
    683     None, None, Above, Post,
    684     None, None, None, None,
    685     None, None, None, Post,
    686 
    687     None, None, None, None,
    688     None, None, None, None,
    689     None, None, None, None,
    690     None, None, None, None,
    691 
    692     None, Below, None, None,
    693     None, None, None, None,
    694     None, None, None, None,
    695     None, None, None, None,
    696 
    697     // Tamil
    698     None, None, Above, None,
    699     None, None, None, None,
    700     None, None, None, None,
    701     None, None, None, None,
    702 
    703     None, None, None, None,
    704     None, None, None, None,
    705     None, None, None, None,
    706     None, None, None, None,
    707 
    708     None, None, None, None,
    709     None, None, None, None,
    710     None, None, None, None,
    711     None, None, None, None,
    712 
    713     None, None, None, None,
    714     None, None, None, None,
    715     None, None, None, None,
    716     None, None, Post, Post,
    717 
    718     Above, Below, Below, None,
    719     None, None, Pre, Pre,
    720     Pre, None, Split, Split,
    721     Split, Halant, None, None,
    722 
    723     None, None, None, None,
    724     None, None, None, Post,
    725     None, None, None, None,
    726     None, None, None, None,
    727 
    728     None, None, None, None,
    729     None, None, None, None,
    730     None, None, None, None,
    731     None, None, None, None,
    732 
    733     None, None, None, None,
    734     None, None, None, None,
    735     None, None, None, None,
    736     None, None, None, None,
    737 
    738     // Telugu
    739     None, Post, Post, Post,
    740     None, None, None, None,
    741     None, None, None, None,
    742     None, None, None, None,
    743 
    744     None, None, None, None,
    745     None, Below, Below, Below,
    746     Below, Below, Below, Below,
    747     Below, Below, Below, Below,
    748 
    749     Below, Below, Below, Below,
    750     Below, Below, Below, Below,
    751     Below, None, Below, Below,
    752     Below, Below, Below, Below,
    753 
    754     Below, None, Below, Below,
    755     None, Below, Below, Below,
    756     Below, Below, None, None,
    757     None, None, Post, Above,
    758 
    759     Above, Post, Post, Post,
    760     Post, None, Above, Above,
    761     Split, None, Post, Above,
    762     Above, Halant, None, None,
    763 
    764     None, None, None, None,
    765     None, Above, Below, None,
    766     None, None, None, None,
    767     None, None, None, None,
    768 
    769     None, None, None, None,
    770     None, None, None, None,
    771     None, None, None, None,
    772     None, None, None, None,
    773 
    774     None, None, None, None,
    775     None, None, None, None,
    776     None, None, None, None,
    777     None, None, None, None,
    778 
    779     // Kannada
    780     None, None, Post, Post,
    781     None, None, None, None,
    782     None, None, None, None,
    783     None, None, None, None,
    784 
    785     None, None, None, None,
    786     None, Below, Below, Below,
    787     Below, Below, Below, Below,
    788     Below, Below, Below, Below,
    789 
    790     Below, Below, Below, Below,
    791     Below, Below, Below, Below,
    792     Below, Below, Below, Below,
    793     Below, Below, Below, Below,
    794 
    795     Below, None, Below, Below,
    796     None, Below, Below, Below,
    797     Below, Below, None, None,
    798     None, None, Post, Above,
    799 
    800     Split, Post, Post, Post,
    801     Post, None, Above, Split,
    802     Split, None, Split, Split,
    803     Above, Halant, None, None,
    804 
    805     None, None, None, None,
    806     None, Post, Post, None,
    807     None, None, None, None,
    808     None, None, Below, None,
    809 
    810     None, None, Below, Below,
    811     None, None, None, None,
    812     None, None, None, None,
    813     None, None, None, None,
    814 
    815     None, None, None, None,
    816     None, None, None, None,
    817     None, None, None, None,
    818     None, None, None, None,
    819 
    820     // Malayalam
    821     None, None, Post, Post,
    822     None, None, None, None,
    823     None, None, None, None,
    824     None, None, None, None,
    825 
    826     None, None, None, None,
    827     None, None, None, None,
    828     None, None, None, None,
    829     None, None, None, None,
    830 
    831     None, None, None, None,
    832     None, None, None, None,
    833     None, None, None, None,
    834     None, None, None, Post,
    835 
    836     Post, None, Below, None,
    837     None, Post, None, None,
    838     None, None, None, None,
    839     None, None, Post, Post,
    840 
    841     Post, Post, Post, Post,
    842     None, None, Pre, Pre,
    843     Pre, None, Split, Split,
    844     Split, Halant, None, None,
    845 
    846     None, None, None, None,
    847     None, None, None, Post,
    848     None, None, None, None,
    849     None, None, None, None,
    850 
    851     None, None, None, None,
    852     None, None, None, None,
    853     None, None, None, None,
    854     None, None, None, None,
    855 
    856     None, None, None, None,
    857     None, None, None, None,
    858     None, None, None, None,
    859     None, None, None, None,
    860 
    861     // Sinhala
    862     None, None, Post, Post,
    863     None, None, None, None,
    864     None, None, None, None,
    865     None, None, None, None,
    866 
    867     None, None, None, None,
    868     None, None, None, None,
    869     None, None, None, None,
    870     None, None, None, None,
    871 
    872     None, None, None, None,
    873     None, None, None, None,
    874     None, None, None, None,
    875     None, None, None, None,
    876 
    877     None, None, None, None,
    878     None, None, None, None,
    879     None, None, None, None,
    880     None, None, None, None,
    881 
    882     None, None, None, None,
    883     None, None, None, None,
    884     None, None, None, None,
    885     None, None, None, Post,
    886 
    887     Post, Post, Above, Above,
    888     Below, None, Below, None,
    889     Post, Pre, Split, Pre,
    890     Split, Split, Split, Post,
    891 
    892     None, None, None, None,
    893     None, None, None, None,
    894     None, None, None, None,
    895     None, None, None, None,
    896 
    897     None, None, Post, Post,
    898     None, None, None, None,
    899     None, None, None, None,
    900     None, None, None, None
    901 };
    902 
    903 static inline Form form(unsigned short uc) {
    904     if (uc < 0x900 || uc > 0xdff) {
    905         if (uc == 0x25cc)
    906             return Consonant;
    907         if (uc == 0x200c || uc == 0x200d)
    908             return Control;
    909         return Other;
    910     }
    911     return (Form)indicForms[uc-0x900];
    912 }
    913 
    914 static inline Position indic_position(unsigned short uc) {
    915     if (uc < 0x900 || uc > 0xdff)
    916         return None;
    917     return (Position) indicPosition[uc-0x900];
    918 }
    919 
    920 
    921 enum IndicScriptProperties {
    922     HasReph = 0x01,
    923     HasSplit = 0x02
    924 };
    925 
    926 const hb_uint8 scriptProperties[10] = {
    927     // Devanagari,
    928     HasReph,
    929     // Bengali,
    930     HasReph|HasSplit,
    931     // Gurmukhi,
    932     0,
    933     // Gujarati,
    934     HasReph,
    935     // Oriya,
    936     HasReph|HasSplit,
    937     // Tamil,
    938     HasSplit,
    939     // Telugu,
    940     HasSplit,
    941     // Kannada,
    942     HasSplit|HasReph,
    943     // Malayalam,
    944     HasSplit,
    945     // Sinhala,
    946     HasSplit
    947 };
    948 
    949 struct IndicOrdering {
    950     Form form;
    951     Position position;
    952 };
    953 
    954 static const IndicOrdering devanagari_order [] = {
    955     { Consonant, Below },
    956     { Matra, Below },
    957     { VowelMark, Below },
    958     { StressMark, Below },
    959     { Matra, Above },
    960     { Matra, Post },
    961     { Consonant, Reph },
    962     { VowelMark, Above },
    963     { StressMark, Above },
    964     { VowelMark, Post },
    965     { (Form)0, None }
    966 };
    967 
    968 static const IndicOrdering bengali_order [] = {
    969     { Consonant, Below },
    970     { Matra, Below },
    971     { Matra, Above },
    972     { Consonant, Reph },
    973     { VowelMark, Above },
    974     { Consonant, Post },
    975     { Matra, Post },
    976     { VowelMark, Post },
    977     { (Form)0, None }
    978 };
    979 
    980 static const IndicOrdering gurmukhi_order [] = {
    981     { Consonant, Below },
    982     { Matra, Below },
    983     { Matra, Above },
    984     { Consonant, Post },
    985     { Matra, Post },
    986     { VowelMark, Above },
    987     { (Form)0, None }
    988 };
    989 
    990 static const IndicOrdering tamil_order [] = {
    991     { Matra, Above },
    992     { Matra, Post },
    993     { VowelMark, Post },
    994     { (Form)0, None }
    995 };
    996 
    997 static const IndicOrdering telugu_order [] = {
    998     { Matra, Above },
    999     { Matra, Below },
   1000     { Matra, Post },
   1001     { Consonant, Below },
   1002     { Consonant, Post },
   1003     { VowelMark, Post },
   1004     { (Form)0, None }
   1005 };
   1006 
   1007 static const IndicOrdering kannada_order [] = {
   1008     { Matra, Above },
   1009     { Matra, Post },
   1010     { Consonant, Below },
   1011     { Consonant, Post },
   1012     { LengthMark, Post },
   1013     { Consonant, Reph },
   1014     { VowelMark, Post },
   1015     { (Form)0, None }
   1016 };
   1017 
   1018 static const IndicOrdering malayalam_order [] = {
   1019     { Consonant, Below },
   1020     { Matra, Below },
   1021     { Consonant, Reph },
   1022     { Consonant, Post },
   1023     { Matra, Post },
   1024     { VowelMark, Post },
   1025     { (Form)0, None }
   1026 };
   1027 
   1028 static const IndicOrdering sinhala_order [] = {
   1029     { Matra, Below },
   1030     { Matra, Above },
   1031     { Matra, Post },
   1032     { VowelMark, Post },
   1033     { (Form)0, None }
   1034 };
   1035 
   1036 static const IndicOrdering * const indic_order[] = {
   1037     devanagari_order, // Devanagari
   1038     bengali_order, // Bengali
   1039     gurmukhi_order, // Gurmukhi
   1040     devanagari_order, // Gujarati
   1041     bengali_order, // Oriya
   1042     tamil_order, // Tamil
   1043     telugu_order, // Telugu
   1044     kannada_order, // Kannada
   1045     malayalam_order, // Malayalam
   1046     sinhala_order // Sinhala
   1047 };
   1048 
   1049 
   1050 
   1051 // vowel matras that have to be split into two parts.
   1052 static const unsigned short split_matras[]  = {
   1053     //  matra, split1, split2, split3
   1054 
   1055     // bengalis
   1056     0x9cb, 0x9c7, 0x9be, 0x0,
   1057     0x9cc, 0x9c7, 0x9d7, 0x0,
   1058     // oriya
   1059     0xb48, 0xb47, 0xb56, 0x0,
   1060     0xb4b, 0xb47, 0xb3e, 0x0,
   1061     0xb4c, 0xb47, 0xb57, 0x0,
   1062     // tamil
   1063     0xbca, 0xbc6, 0xbbe, 0x0,
   1064     0xbcb, 0xbc7, 0xbbe, 0x0,
   1065     0xbcc, 0xbc6, 0xbd7, 0x0,
   1066     // telugu
   1067     0xc48, 0xc46, 0xc56, 0x0,
   1068     // kannada
   1069     0xcc0, 0xcbf, 0xcd5, 0x0,
   1070     0xcc7, 0xcc6, 0xcd5, 0x0,
   1071     0xcc8, 0xcc6, 0xcd6, 0x0,
   1072     0xcca, 0xcc6, 0xcc2, 0x0,
   1073     0xccb, 0xcc6, 0xcc2, 0xcd5,
   1074     // malayalam
   1075     0xd4a, 0xd46, 0xd3e, 0x0,
   1076     0xd4b, 0xd47, 0xd3e, 0x0,
   1077     0xd4c, 0xd46, 0xd57, 0x0,
   1078     // sinhala
   1079     0xdda, 0xdd9, 0xdca, 0x0,
   1080     0xddc, 0xdd9, 0xdcf, 0x0,
   1081     0xddd, 0xdd9, 0xdcf, 0xdca,
   1082     0xdde, 0xdd9, 0xddf, 0x0,
   1083     0xffff
   1084 };
   1085 
   1086 static inline void splitMatra(unsigned short *reordered, int matra, int &len)
   1087 {
   1088     unsigned short matra_uc = reordered[matra];
   1089     //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]);
   1090 
   1091     const unsigned short *split = split_matras;
   1092     while (split[0] < matra_uc)
   1093         split += 4;
   1094 
   1095     assert(*split == matra_uc);
   1096     ++split;
   1097 
   1098     int added_chars = split[2] == 0x0 ? 1 : 2;
   1099 
   1100     memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short));
   1101     reordered[matra] = split[0];
   1102     reordered[matra+1] = split[1];
   1103     if(added_chars == 2)
   1104         reordered[matra+2] = split[2];
   1105     len += added_chars;
   1106 }
   1107 
   1108 #ifndef NO_OPENTYPE
   1109 static const HB_OpenTypeFeature indic_features[] = {
   1110     { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty },
   1111     { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
   1112     { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
   1113     { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty },
   1114     { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty },
   1115     { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty },
   1116     { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty },
   1117     { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty },
   1118     { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty },
   1119     { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty },
   1120     { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty },
   1121     { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty },
   1122     { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
   1123     { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
   1124     { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty },
   1125     { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty },
   1126     { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty },
   1127     { 0, 0 }
   1128 };
   1129 #endif
   1130 
   1131 // #define INDIC_DEBUG
   1132 #ifdef INDIC_DEBUG
   1133 #define IDEBUG hb_debug
   1134 #include <stdarg.h>
   1135 
   1136 static void hb_debug(const char *msg, ...)
   1137 {
   1138     va_list ap;
   1139     va_start(ap, msg); // use variable arg list
   1140     vfprintf(stderr, msg, ap);
   1141     va_end(ap);
   1142     fprintf(stderr, "\n");
   1143 }
   1144 
   1145 #else
   1146 #define IDEBUG if(0) printf
   1147 #endif
   1148 
   1149 #if 0 //def INDIC_DEBUG
   1150 static QString propertiesToString(int properties)
   1151 {
   1152     QString res;
   1153     properties = ~properties;
   1154     if (properties & LocaProperty)
   1155         res += "Loca ";
   1156     if (properties & CcmpProperty)
   1157         res += "Ccmp ";
   1158     if (properties & InitProperty)
   1159         res += "Init ";
   1160     if (properties & NuktaProperty)
   1161         res += "Nukta ";
   1162     if (properties & AkhantProperty)
   1163         res += "Akhant ";
   1164     if (properties & RephProperty)
   1165         res += "Reph ";
   1166     if (properties & PreFormProperty)
   1167         res += "PreForm ";
   1168     if (properties & BelowFormProperty)
   1169         res += "BelowForm ";
   1170     if (properties & AboveFormProperty)
   1171         res += "AboveForm ";
   1172     if (properties & HalfFormProperty)
   1173         res += "HalfForm ";
   1174     if (properties & PostFormProperty)
   1175         res += "PostForm ";
   1176     if (properties & ConjunctFormProperty)
   1177         res += "PostForm ";
   1178     if (properties & VattuProperty)
   1179         res += "Vattu ";
   1180     if (properties & PreSubstProperty)
   1181         res += "PreSubst ";
   1182     if (properties & BelowSubstProperty)
   1183         res += "BelowSubst ";
   1184     if (properties & AboveSubstProperty)
   1185         res += "AboveSubst ";
   1186     if (properties & PostSubstProperty)
   1187         res += "PostSubst ";
   1188     if (properties & HalantProperty)
   1189         res += "Halant ";
   1190     if (properties & CligProperty)
   1191         res += "Clig ";
   1192     if (properties & IndicCaltProperty)
   1193         res += "Calt ";
   1194     return res;
   1195 }
   1196 #endif
   1197 
   1198 static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid)
   1199 {
   1200     HB_Script script = item->item.script;
   1201     assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala);
   1202     const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari);
   1203     const unsigned short ra = script_base + 0x30;
   1204     const unsigned short halant = script_base + 0x4d;
   1205     const unsigned short nukta = script_base + 0x3c;
   1206     bool control = false;
   1207 
   1208     int len = (int)item->item.length;
   1209     IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid);
   1210 
   1211     if ((int)item->num_glyphs < len+4) {
   1212         item->num_glyphs = len+4;
   1213         return false;
   1214     }
   1215 
   1216     HB_STACKARRAY(HB_UChar16, reordered, len + 4);
   1217     HB_STACKARRAY(hb_uint8, position, len + 4);
   1218 
   1219     unsigned char properties = scriptProperties[script-HB_Script_Devanagari];
   1220 
   1221     if (invalid) {
   1222         *reordered = 0x25cc;
   1223         memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16));
   1224         len++;
   1225     } else {
   1226         memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16));
   1227     }
   1228     if (reordered[len-1] == 0x200c) // zero width non joiner
   1229         len--;
   1230 
   1231     int i;
   1232     int base = 0;
   1233     int reph = -1;
   1234 
   1235 #ifdef INDIC_DEBUG
   1236     IDEBUG("original:");
   1237     for (i = 0; i < len; i++) {
   1238         IDEBUG("    %d: %4x", i, reordered[i]);
   1239     }
   1240 #endif
   1241 
   1242     if (len != 1) {
   1243         HB_UChar16 *uc = reordered;
   1244         bool beginsWithRa = false;
   1245 
   1246         // Rule 1: find base consonant
   1247         //
   1248         // The shaping engine finds the base consonant of the
   1249         // syllable, using the following algorithm: starting from the
   1250         // end of the syllable, move backwards until a consonant is
   1251         // found that does not have a below-base or post-base form
   1252         // (post-base forms have to follow below-base forms), or
   1253         // arrive at the first consonant. The consonant stopped at
   1254         // will be the base.
   1255         //
   1256         //  * If the syllable starts with Ra + H (in a script that has
   1257         //    'Reph'), Ra is excluded from candidates for base
   1258         //    consonants.
   1259         //
   1260         // * In Kannada and Telugu, the base consonant cannot be
   1261         //   farther than 3 consonants from the end of the syllable.
   1262         // #### replace the HasReph property by testing if the feature exists in the font!
   1263         if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) {
   1264             if ((properties & HasReph) && (len > 2) &&
   1265                 (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant)
   1266                 beginsWithRa = true;
   1267 
   1268             if (beginsWithRa && form(*(uc+2)) == Control)
   1269                 beginsWithRa = false;
   1270 
   1271             base = (beginsWithRa ? 2 : 0);
   1272             IDEBUG("    length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base);
   1273 
   1274             int lastConsonant = 0;
   1275             int matra = -1;
   1276             // we remember:
   1277             // * the last consonant since we need it for rule 2
   1278             // * the matras position for rule 3 and 4
   1279 
   1280             // figure out possible base glyphs
   1281             memset(position, 0, len);
   1282             if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
   1283                 bool vattu = false;
   1284                 for (i = base; i < len; ++i) {
   1285                     position[i] = form(uc[i]);
   1286                     if (position[i] == Consonant) {
   1287                         lastConsonant = i;
   1288                         vattu = (!vattu && uc[i] == ra);
   1289                         if (vattu) {
   1290                             IDEBUG("excluding vattu glyph at %d from base candidates", i);
   1291                             position[i] = Vattu;
   1292                         }
   1293                     } else if (position[i] == Matra) {
   1294                         matra = i;
   1295                     }
   1296                 }
   1297             } else {
   1298                 for (i = base; i < len; ++i) {
   1299                     position[i] = form(uc[i]);
   1300                     if (position[i] == Consonant)
   1301                         lastConsonant = i;
   1302                     else if (matra < 0 && position[i] == Matra)
   1303                         matra = i;
   1304                 }
   1305             }
   1306             int skipped = 0;
   1307             Position pos = Post;
   1308             for (i = len-1; i >= base; i--) {
   1309                 if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada))
   1310                     continue;
   1311 
   1312                 if (i < len-1 && position[i] == Control && position[i+1] == Consonant) {
   1313                     base = i+1;
   1314                     break;
   1315                 }
   1316 
   1317                 Position charPosition = indic_position(uc[i]);
   1318                 if (pos == Post && charPosition == Post) {
   1319                     pos = Post;
   1320                 } else if ((pos == Post || pos == Below) && charPosition == Below) {
   1321                     if (script == HB_Script_Devanagari || script == HB_Script_Gujarati)
   1322                         base = i;
   1323                     pos = Below;
   1324                 } else {
   1325                     base = i;
   1326                     break;
   1327                 }
   1328                 if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) {
   1329                     base = i;
   1330                     break;
   1331                 }
   1332                 ++skipped;
   1333             }
   1334 
   1335             IDEBUG("    base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant);
   1336 
   1337             // Rule 2:
   1338             //
   1339             // If the base consonant is not the last one, Uniscribe
   1340             // moves the halant from the base consonant to the last
   1341             // one.
   1342             if (lastConsonant > base) {
   1343                 int halantPos = 0;
   1344                 if (uc[base+1] == halant)
   1345                     halantPos = base + 1;
   1346                 else if (uc[base+1] == nukta && uc[base+2] == halant)
   1347                     halantPos = base + 2;
   1348                 if (halantPos > 0) {
   1349                     IDEBUG("    moving halant from %d to %d!", base+1, lastConsonant);
   1350                     for (i = halantPos; i < lastConsonant; i++)
   1351                         uc[i] = uc[i+1];
   1352                     uc[lastConsonant] = halant;
   1353                 }
   1354             }
   1355 
   1356             // Rule 3:
   1357             //
   1358             // If the syllable starts with Ra + H, Uniscribe moves
   1359             // this combination so that it follows either:
   1360 
   1361             // * the post-base 'matra' (if any) or the base consonant
   1362             //   (in scripts that show similarity to Devanagari, i.e.,
   1363             //   Devanagari, Gujarati, Bengali)
   1364             // * the base consonant (other scripts)
   1365             // * the end of the syllable (Kannada)
   1366 
   1367             Position matra_position = None;
   1368             if (matra > 0)
   1369                 matra_position = indic_position(uc[matra]);
   1370             IDEBUG("    matra at %d with form %d, base=%d", matra, matra_position, base);
   1371 
   1372             if (beginsWithRa && base != 0) {
   1373                 int toPos = base+1;
   1374                 if (toPos < len && uc[toPos] == nukta)
   1375                     toPos++;
   1376                 if (toPos < len && uc[toPos] == halant)
   1377                     toPos++;
   1378                 if (toPos < len && uc[toPos] == 0x200d)
   1379                     toPos++;
   1380                 if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant)
   1381                     toPos += 2;
   1382                 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) {
   1383                     if (matra_position == Post || matra_position == Split) {
   1384                         toPos = matra+1;
   1385                         matra -= 2;
   1386                     }
   1387                 } else if (script == HB_Script_Kannada) {
   1388                     toPos = len;
   1389                     matra -= 2;
   1390                 }
   1391 
   1392                 IDEBUG("moving leading ra+halant to position %d", toPos);
   1393                 for (i = 2; i < toPos; i++)
   1394                     uc[i-2] = uc[i];
   1395                 uc[toPos-2] = ra;
   1396                 uc[toPos-1] = halant;
   1397                 base -= 2;
   1398                 if (properties & HasReph)
   1399                     reph = toPos-2;
   1400             }
   1401 
   1402             // Rule 4:
   1403 
   1404             // Uniscribe splits two- or three-part matras into their
   1405             // parts. This splitting is a character-to-character
   1406             // operation).
   1407             //
   1408             //      Uniscribe describes some moving operations for these
   1409             //      matras here. For shaping however all pre matras need
   1410             //      to be at the beginning of the syllable, so we just move
   1411             //      them there now.
   1412             if (matra_position == Split) {
   1413                 splitMatra(uc, matra, len);
   1414                 // Handle three-part matras (0xccb in Kannada)
   1415                 matra_position = indic_position(uc[matra]);
   1416             }
   1417 
   1418             if (matra_position == Pre) {
   1419                 unsigned short m = uc[matra];
   1420                 while (matra--)
   1421                     uc[matra+1] = uc[matra];
   1422                 uc[0] = m;
   1423                 base++;
   1424             }
   1425         }
   1426 
   1427         // Rule 5:
   1428         //
   1429         // Uniscribe classifies consonants and 'matra' parts as
   1430         // pre-base, above-base (Reph), below-base or post-base. This
   1431         // classification exists on the character code level and is
   1432         // language-dependent, not font-dependent.
   1433         for (i = 0; i < base; ++i)
   1434             position[i] = Pre;
   1435         position[base] = Base;
   1436         for (i = base+1; i < len; ++i) {
   1437             position[i] = indic_position(uc[i]);
   1438             // #### replace by adjusting table
   1439             if (uc[i] == nukta || uc[i] == halant)
   1440                 position[i] = Inherit;
   1441         }
   1442         if (reph > 0) {
   1443             // recalculate reph, it might have changed.
   1444             for (i = base+1; i < len; ++i)
   1445                 if (uc[i] == ra)
   1446                     reph = i;
   1447             position[reph] = Reph;
   1448             position[reph+1] = Inherit;
   1449         }
   1450 
   1451         // all reordering happens now to the chars after the base
   1452         int fixed = base+1;
   1453         if (fixed < len && uc[fixed] == nukta)
   1454             fixed++;
   1455         if (fixed < len && uc[fixed] == halant)
   1456             fixed++;
   1457         if (fixed < len && uc[fixed] == 0x200d)
   1458             fixed++;
   1459 
   1460 #ifdef INDIC_DEBUG
   1461         for (i = fixed; i < len; ++i)
   1462             IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]);
   1463 #endif
   1464         // we continuosly position the matras and vowel marks and increase the fixed
   1465         // until we reached the end.
   1466         const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari];
   1467 
   1468         IDEBUG("    reordering pass:");
   1469         IDEBUG("        base=%d fixed=%d", base, fixed);
   1470         int toMove = 0;
   1471         while (finalOrder[toMove].form && fixed < len-1) {
   1472             IDEBUG("        fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position);
   1473             for (i = fixed; i < len; i++) {
   1474 //                IDEBUG() << "           i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i])
   1475 //                         << "position=" << position[i];
   1476                 if (form(uc[i]) == finalOrder[toMove].form &&
   1477                      position[i] == finalOrder[toMove].position) {
   1478                     // need to move this glyph
   1479                     int to = fixed;
   1480                     if (i < len-1 && position[i+1] == Inherit) {
   1481                         IDEBUG("         moving two chars from %d to %d", i, to);
   1482                         unsigned short ch = uc[i];
   1483                         unsigned short ch2 = uc[i+1];
   1484                         unsigned char pos = position[i];
   1485                         for (int j = i+1; j > to+1; j--) {
   1486                             uc[j] = uc[j-2];
   1487                             position[j] = position[j-2];
   1488                         }
   1489                         uc[to] = ch;
   1490                         uc[to+1] = ch2;
   1491                         position[to] = pos;
   1492                         position[to+1] = pos;
   1493                         fixed += 2;
   1494                     } else {
   1495                         IDEBUG("         moving one char from %d to %d", i, to);
   1496                         unsigned short ch = uc[i];
   1497                         unsigned char pos = position[i];
   1498                         for (int j = i; j > to; j--) {
   1499                             uc[j] = uc[j-1];
   1500                             position[j] = position[j-1];
   1501                         }
   1502                         uc[to] = ch;
   1503                         position[to] = pos;
   1504                         fixed++;
   1505                     }
   1506                 }
   1507             }
   1508             toMove++;
   1509         }
   1510 
   1511     }
   1512 
   1513     if (reph > 0) {
   1514         // recalculate reph, it might have changed.
   1515         for (i = base+1; i < len; ++i)
   1516             if (reordered[i] == ra)
   1517                 reph = i;
   1518     }
   1519 
   1520 #ifndef NO_OPENTYPE
   1521     const int availableGlyphs = item->num_glyphs;
   1522 #endif
   1523     if (!item->font->klass->convertStringToGlyphIndices(item->font,
   1524                                                         reordered, len,
   1525                                                         item->glyphs, &item->num_glyphs,
   1526                                                         item->item.bidiLevel % 2))
   1527         goto error;
   1528 
   1529 
   1530     IDEBUG("  base=%d, reph=%d", base, reph);
   1531     IDEBUG("reordered:");
   1532     for (i = 0; i < len; i++) {
   1533         item->attributes[i].mark = false;
   1534         item->attributes[i].clusterStart = false;
   1535         item->attributes[i].justification = 0;
   1536         item->attributes[i].zeroWidth = false;
   1537         IDEBUG("    %d: %4x", i, reordered[i]);
   1538     }
   1539 
   1540     // now we have the syllable in the right order, and can start running it through open type.
   1541 
   1542     for (i = 0; i < len; ++i)
   1543         control |= (form(reordered[i]) == Control);
   1544 
   1545 #ifndef NO_OPENTYPE
   1546     if (openType) {
   1547 
   1548         // we need to keep track of where the base glyph is for some
   1549         // scripts and use the cluster feature for this.  This
   1550         // also means we have to correct the logCluster output from
   1551         // the open type engine manually afterwards.  for indic this
   1552         // is rather simple, as all chars just point to the first
   1553         // glyph in the syllable.
   1554         HB_STACKARRAY(unsigned short, clusters, len);
   1555         HB_STACKARRAY(unsigned int, properties, len);
   1556 
   1557         for (i = 0; i < len; ++i)
   1558             clusters[i] = i;
   1559 
   1560         // features we should always apply
   1561         for (i = 0; i < len; ++i)
   1562             properties[i] = ~(LocaProperty
   1563                               | CcmpProperty
   1564                               | NuktaProperty
   1565                               | VattuProperty
   1566                               | ConjunctFormProperty
   1567                               | PreSubstProperty
   1568                               | BelowSubstProperty
   1569                               | AboveSubstProperty
   1570                               | PostSubstProperty
   1571                               | HalantProperty
   1572                               | IndicCaltProperty
   1573                               | PositioningProperties);
   1574 
   1575         // Loca always applies
   1576         // Ccmp always applies
   1577         // Init
   1578         if (item->item.pos == 0
   1579             || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1])))
   1580             properties[0] &= ~InitProperty;
   1581 
   1582         // Nukta always applies
   1583         // Akhant
   1584         for (i = 0; i <= base; ++i)
   1585             properties[i] &= ~AkhantProperty;
   1586         // Reph
   1587         if (reph >= 0) {
   1588             properties[reph] &= ~RephProperty;
   1589             properties[reph+1] &= ~RephProperty;
   1590         }
   1591         // BelowForm
   1592         for (i = base+1; i < len; ++i)
   1593             properties[i] &= ~BelowFormProperty;
   1594 
   1595         if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
   1596             // vattu glyphs need this aswell
   1597             bool vattu = false;
   1598             for (i = base-2; i > 1; --i) {
   1599                 if (form(reordered[i]) == Consonant) {
   1600                     vattu = (!vattu && reordered[i] == ra);
   1601                     if (vattu) {
   1602                         IDEBUG("forming vattu ligature at %d", i);
   1603                         properties[i] &= ~BelowFormProperty;
   1604                         properties[i+1] &= ~BelowFormProperty;
   1605                     }
   1606                 }
   1607             }
   1608         }
   1609         // HalfFormProperty
   1610         for (i = 0; i < base; ++i)
   1611             properties[i] &= ~HalfFormProperty;
   1612         if (control) {
   1613             for (i = 2; i < len; ++i) {
   1614                 if (reordered[i] == 0x200d /* ZWJ */) {
   1615                     properties[i-1] &= ~HalfFormProperty;
   1616                     properties[i-2] &= ~HalfFormProperty;
   1617                 } else if (reordered[i] == 0x200c /* ZWNJ */) {
   1618                     properties[i-1] &= ~HalfFormProperty;
   1619                     properties[i-2] &= ~HalfFormProperty;
   1620                 }
   1621             }
   1622         }
   1623         // PostFormProperty
   1624         for (i = base+1; i < len; ++i)
   1625             properties[i] &= ~PostFormProperty;
   1626         // vattu always applies
   1627         // pres always applies
   1628         // blws always applies
   1629         // abvs always applies
   1630         // psts always applies
   1631         // halant always applies
   1632         // calt always applies
   1633 
   1634 #ifdef INDIC_DEBUG
   1635 //        {
   1636 //            IDEBUG("OT properties:");
   1637 //            for (int i = 0; i < len; ++i)
   1638 //                qDebug("    i: %s", ::propertiesToString(properties[i]).toLatin1().data());
   1639 //        }
   1640 #endif
   1641 
   1642         // initialize
   1643         item->log_clusters = clusters;
   1644         HB_OpenTypeShape(item, properties);
   1645 
   1646         int newLen = item->face->buffer->in_length;
   1647         HB_GlyphItem otl_glyphs = item->face->buffer->in_string;
   1648 
   1649         // move the left matra back to its correct position in malayalam and tamil
   1650         if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) {
   1651 //             qDebug("reordering matra, len=%d", newLen);
   1652             // need to find the base in the shaped string and move the matra there
   1653             int basePos = 0;
   1654             while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base)
   1655                 basePos++;
   1656             --basePos;
   1657             if (basePos < newLen && basePos > 1) {
   1658 //                 qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen);
   1659                 HB_GlyphItemRec m = otl_glyphs[0];
   1660                 --basePos;
   1661                 for (i = 0; i < basePos; ++i)
   1662                     otl_glyphs[i] = otl_glyphs[i+1];
   1663                 otl_glyphs[basePos] = m;
   1664             }
   1665         }
   1666 
   1667         HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false);
   1668 
   1669         HB_FREE_STACKARRAY(clusters);
   1670         HB_FREE_STACKARRAY(properties);
   1671 
   1672         if (!positioned)
   1673             goto error;
   1674 
   1675         if (control) {
   1676             IDEBUG("found a control char in the syllable");
   1677             hb_uint32 i = 0, j = 0;
   1678             while (i < item->num_glyphs) {
   1679                 if (form(reordered[otl_glyphs[i].cluster]) == Control) {
   1680                     ++i;
   1681                     if (i >= item->num_glyphs)
   1682                         break;
   1683                 }
   1684                 item->glyphs[j] = item->glyphs[i];
   1685                 item->attributes[j] = item->attributes[i];
   1686                 ++i;
   1687                 ++j;
   1688             }
   1689             item->num_glyphs = j;
   1690         }
   1691 
   1692     } else {
   1693         HB_HeuristicPosition(item);
   1694     }
   1695 #endif // NO_OPENTYPE
   1696     item->attributes[0].clusterStart = true;
   1697 
   1698     HB_FREE_STACKARRAY(reordered);
   1699     HB_FREE_STACKARRAY(position);
   1700 
   1701     IDEBUG("<<<<<<");
   1702     return true;
   1703 
   1704 error:
   1705     HB_FREE_STACKARRAY(reordered);
   1706     HB_FREE_STACKARRAY(position);
   1707     return false;
   1708 }
   1709 
   1710 /* syllables are of the form:
   1711 
   1712    (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
   1713    (Consonant Nukta? Halant)* Consonant Halant
   1714    IndependentVowel VowelMark? StressMark?
   1715 
   1716    We return syllable boundaries on invalid combinations aswell
   1717 */
   1718 static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid)
   1719 {
   1720     *invalid = false;
   1721     IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
   1722     const HB_UChar16 *uc = s+start;
   1723 
   1724     int pos = 0;
   1725     Form state = form(uc[pos]);
   1726     IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
   1727     pos++;
   1728 
   1729     if (state != Consonant && state != IndependentVowel) {
   1730         if (state != Other)
   1731             *invalid = true;
   1732         goto finish;
   1733     }
   1734 
   1735     while (pos < end - start) {
   1736         Form newState = form(uc[pos]);
   1737         IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
   1738         switch(newState) {
   1739         case Control:
   1740             newState = state;
   1741  	    if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
   1742   		break;
   1743             // the control character should be the last char in the item
   1744             ++pos;
   1745             goto finish;
   1746         case Consonant:
   1747 	    if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
   1748                 break;
   1749             goto finish;
   1750         case Halant:
   1751             if (state == Nukta || state == Consonant)
   1752                 break;
   1753             // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
   1754             if (script == HB_Script_Bengali && pos == 1 &&
   1755                  (uc[0] == 0x0985 || uc[0] == 0x098f))
   1756                 break;
   1757             // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
   1758             if (script == HB_Script_Sinhala && state == Matra) {
   1759                 ++pos;
   1760                 continue;
   1761             }
   1762             if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
   1763                 ++pos;
   1764                 continue;
   1765             }
   1766             goto finish;
   1767         case Nukta:
   1768             if (state == Consonant)
   1769                 break;
   1770             goto finish;
   1771         case StressMark:
   1772             if (state == VowelMark)
   1773                 break;
   1774             // fall through
   1775         case VowelMark:
   1776             if (state == Matra || state == LengthMark || state == IndependentVowel)
   1777                 break;
   1778             // fall through
   1779         case Matra:
   1780             if (state == Consonant || state == Nukta)
   1781                 break;
   1782             if (state == Matra) {
   1783                 // ### needs proper testing for correct two/three part matras
   1784                 break;
   1785             }
   1786             // ### not sure if this is correct. If it is, does it apply only to Bengali or should
   1787             // it work for all Indic languages?
   1788             // the combination Independent_A + Vowel Sign AA is allowed.
   1789             if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
   1790                 break;
   1791             if (script == HB_Script_Tamil && state == Matra) {
   1792                 if (uc[pos-1] == 0x0bc6 &&
   1793                      (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
   1794                     break;
   1795                 if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
   1796                     break;
   1797             }
   1798             goto finish;
   1799 
   1800         case LengthMark:
   1801             if (state == Matra) {
   1802                 // ### needs proper testing for correct two/three part matras
   1803                 break;
   1804             }
   1805         case IndependentVowel:
   1806         case Invalid:
   1807         case Other:
   1808             goto finish;
   1809         }
   1810         state = newState;
   1811         pos++;
   1812     }
   1813  finish:
   1814     return pos+start;
   1815 }
   1816 
   1817 HB_Bool HB_IndicShape(HB_ShaperItem *item)
   1818 {
   1819     assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala);
   1820 
   1821     HB_Bool openType = false;
   1822 #ifndef NO_OPENTYPE
   1823     openType = HB_SelectScript(item, indic_features);
   1824 #endif
   1825     unsigned short *logClusters = item->log_clusters;
   1826 
   1827     HB_ShaperItem syllable = *item;
   1828     int first_glyph = 0;
   1829 
   1830     int sstart = item->item.pos;
   1831     int end = sstart + item->item.length;
   1832     IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length);
   1833     while (sstart < end) {
   1834         bool invalid;
   1835         int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid);
   1836         IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
   1837                invalid ? "true" : "false");
   1838         syllable.item.pos = sstart;
   1839         syllable.item.length = send-sstart;
   1840         syllable.glyphs = item->glyphs + first_glyph;
   1841         syllable.attributes = item->attributes + first_glyph;
   1842         syllable.offsets = item->offsets + first_glyph;
   1843         syllable.advances = item->advances + first_glyph;
   1844         syllable.num_glyphs = item->num_glyphs - first_glyph;
   1845         if (!indic_shape_syllable(openType, &syllable, invalid)) {
   1846             IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
   1847             item->num_glyphs += syllable.num_glyphs;
   1848             return false;
   1849         }
   1850         // fix logcluster array
   1851         IDEBUG("syllable:");
   1852         hb_uint32 g;
   1853         for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g)
   1854             IDEBUG("        %d -> glyph %x", g, item->glyphs[g]);
   1855         IDEBUG("    logclusters:");
   1856         int i;
   1857         for (i = sstart; i < send; ++i) {
   1858             IDEBUG("        %d -> glyph %d", i, first_glyph);
   1859             logClusters[i-item->item.pos] = first_glyph;
   1860         }
   1861         sstart = send;
   1862         first_glyph += syllable.num_glyphs;
   1863     }
   1864     item->num_glyphs = first_glyph;
   1865     return true;
   1866 }
   1867 
   1868 void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
   1869 {
   1870     int end = from + len;
   1871     const HB_UChar16 *uc = text + from;
   1872     attributes += from;
   1873     hb_uint32 i = 0;
   1874     while (i < len) {
   1875         bool invalid;
   1876         hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
   1877          attributes[i].charStop = true;
   1878 
   1879         if (boundary > len-1) boundary = len;
   1880         i++;
   1881         while (i < boundary) {
   1882             attributes[i].charStop = false;
   1883             ++uc;
   1884             ++i;
   1885         }
   1886         assert(i == boundary);
   1887     }
   1888 
   1889 
   1890 }
   1891 
   1892 
   1893