Home | History | Annotate | Download | only in api
      1 /*
      2  * Copyright  2011  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #include "hb-test.h"
     28 
     29 /* Unit tests for hb-buffer.h */
     30 
     31 
     32 static const char utf8[10] = "ab\360\240\200\200defg";
     33 static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
     34 static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
     35 
     36 
     37 typedef enum {
     38   BUFFER_EMPTY,
     39   BUFFER_ONE_BY_ONE,
     40   BUFFER_UTF32,
     41   BUFFER_UTF16,
     42   BUFFER_UTF8,
     43   BUFFER_NUM_TYPES,
     44 } buffer_type_t;
     45 
     46 static const char *buffer_names[] = {
     47   "empty",
     48   "one-by-one",
     49   "utf32",
     50   "utf16",
     51   "utf8"
     52 };
     53 
     54 typedef struct
     55 {
     56   hb_buffer_t *buffer;
     57 } fixture_t;
     58 
     59 static void
     60 fixture_init (fixture_t *fixture, gconstpointer user_data)
     61 {
     62   hb_buffer_t *b;
     63   unsigned int i;
     64 
     65   b = fixture->buffer = hb_buffer_create ();
     66 
     67   switch (GPOINTER_TO_INT (user_data))
     68   {
     69     case BUFFER_EMPTY:
     70       break;
     71 
     72     case BUFFER_ONE_BY_ONE:
     73       for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
     74 	hb_buffer_add (b, utf32[i], i);
     75       break;
     76 
     77     case BUFFER_UTF32:
     78       hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
     79       break;
     80 
     81     case BUFFER_UTF16:
     82       hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
     83       break;
     84 
     85     case BUFFER_UTF8:
     86       hb_buffer_add_utf8  (b, utf8,  G_N_ELEMENTS (utf8),  1, G_N_ELEMENTS (utf8)  - 2);
     87       break;
     88 
     89     default:
     90       g_assert_not_reached ();
     91   }
     92 }
     93 
     94 static void
     95 fixture_finish (fixture_t *fixture, gconstpointer user_data)
     96 {
     97   hb_buffer_destroy (fixture->buffer);
     98 }
     99 
    100 
    101 static void
    102 test_buffer_properties (fixture_t *fixture, gconstpointer user_data)
    103 {
    104   hb_buffer_t *b = fixture->buffer;
    105   hb_unicode_funcs_t *ufuncs;
    106 
    107   /* test default properties */
    108 
    109   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
    110   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    111   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    112   g_assert (hb_buffer_get_language (b) == NULL);
    113 
    114 
    115   /* test property changes are retained */
    116   ufuncs = hb_unicode_funcs_create (NULL);
    117   hb_buffer_set_unicode_funcs (b, ufuncs);
    118   hb_unicode_funcs_destroy (ufuncs);
    119   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
    120 
    121   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
    122   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
    123 
    124   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
    125   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
    126 
    127   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
    128   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
    129 
    130   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
    131   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
    132 
    133   hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
    134   g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
    135 
    136 
    137   /* test clear_contents clears all these properties: */
    138 
    139   hb_buffer_clear_contents (b);
    140 
    141   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
    142   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    143   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    144   g_assert (hb_buffer_get_language (b) == NULL);
    145 
    146   /* but not these: */
    147 
    148   g_assert (hb_buffer_get_flags (b) != HB_BUFFER_FLAGS_DEFAULT);
    149   g_assert (hb_buffer_get_replacement_codepoint (b) != HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
    150 
    151 
    152   /* test reset clears all properties */
    153 
    154   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
    155   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
    156 
    157   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
    158   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
    159 
    160   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
    161   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
    162 
    163   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
    164   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
    165 
    166   hb_buffer_set_replacement_codepoint (b, (unsigned int) -1);
    167   g_assert (hb_buffer_get_replacement_codepoint (b) == (unsigned int) -1);
    168 
    169   hb_buffer_reset (b);
    170 
    171   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
    172   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    173   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    174   g_assert (hb_buffer_get_language (b) == NULL);
    175   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
    176   g_assert (hb_buffer_get_replacement_codepoint (b) == HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
    177 }
    178 
    179 static void
    180 test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
    181 {
    182   hb_buffer_t *b = fixture->buffer;
    183   unsigned int i, len, len2;
    184   buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
    185   hb_glyph_info_t *glyphs;
    186 
    187   if (buffer_type == BUFFER_EMPTY) {
    188     g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    189     return;
    190   }
    191 
    192   len = hb_buffer_get_length (b);
    193   hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
    194   glyphs = hb_buffer_get_glyph_infos (b, &len2);
    195   g_assert_cmpint (len, ==, len2);
    196   g_assert_cmpint (len, ==, 5);
    197 
    198   for (i = 0; i < len; i++) {
    199     g_assert_cmphex (glyphs[i].mask,      ==, 0);
    200     g_assert_cmphex (glyphs[i].var1.u32,  ==, 0);
    201     g_assert_cmphex (glyphs[i].var2.u32,  ==, 0);
    202   }
    203 
    204   for (i = 0; i < len; i++) {
    205     unsigned int cluster;
    206     cluster = 1+i;
    207     if (i >= 2) {
    208       if (buffer_type == BUFFER_UTF16)
    209 	cluster++;
    210       else if (buffer_type == BUFFER_UTF8)
    211         cluster += 3;
    212     }
    213     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    214     g_assert_cmphex (glyphs[i].cluster,   ==, cluster);
    215   }
    216 
    217   /* reverse, test, and reverse back */
    218 
    219   hb_buffer_reverse (b);
    220   for (i = 0; i < len; i++)
    221     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    222 
    223   hb_buffer_reverse (b);
    224   for (i = 0; i < len; i++)
    225     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    226 
    227   /* reverse_clusters works same as reverse for now since each codepoint is
    228    * in its own cluster */
    229 
    230   hb_buffer_reverse_clusters (b);
    231   for (i = 0; i < len; i++)
    232     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    233 
    234   hb_buffer_reverse_clusters (b);
    235   for (i = 0; i < len; i++)
    236     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    237 
    238   /* now form a cluster and test again */
    239   glyphs[2].cluster = glyphs[1].cluster;
    240 
    241   /* reverse, test, and reverse back */
    242 
    243   hb_buffer_reverse (b);
    244   for (i = 0; i < len; i++)
    245     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    246 
    247   hb_buffer_reverse (b);
    248   for (i = 0; i < len; i++)
    249     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    250 
    251   /* reverse_clusters twice still should return the original string,
    252    * but when applied once, the 1-2 cluster should be retained. */
    253 
    254   hb_buffer_reverse_clusters (b);
    255   for (i = 0; i < len; i++) {
    256     unsigned int j = len-1-i;
    257     if (j == 1)
    258       j = 2;
    259     else if (j == 2)
    260       j = 1;
    261     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
    262   }
    263 
    264   hb_buffer_reverse_clusters (b);
    265   for (i = 0; i < len; i++)
    266     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    267 
    268 
    269   /* test setting length */
    270 
    271   /* enlarge */
    272   g_assert (hb_buffer_set_length (b, 10));
    273   glyphs = hb_buffer_get_glyph_infos (b, NULL);
    274   g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
    275   for (i = 0; i < 5; i++)
    276     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    277   for (i = 5; i < 10; i++)
    278     g_assert_cmphex (glyphs[i].codepoint, ==, 0);
    279   /* shrink */
    280   g_assert (hb_buffer_set_length (b, 3));
    281   glyphs = hb_buffer_get_glyph_infos (b, NULL);
    282   g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
    283   for (i = 0; i < 3; i++)
    284     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    285 
    286 
    287   g_assert (hb_buffer_allocation_successful (b));
    288 
    289 
    290   /* test reset clears content */
    291 
    292   hb_buffer_reset (b);
    293   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    294 }
    295 
    296 static void
    297 test_buffer_positions (fixture_t *fixture, gconstpointer user_data)
    298 {
    299   hb_buffer_t *b = fixture->buffer;
    300   unsigned int i, len, len2;
    301   hb_glyph_position_t *positions;
    302 
    303   /* Without shaping, positions should all be zero */
    304   len = hb_buffer_get_length (b);
    305   hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
    306   positions = hb_buffer_get_glyph_positions (b, &len2);
    307   g_assert_cmpint (len, ==, len2);
    308   for (i = 0; i < len; i++) {
    309     g_assert_cmpint (0, ==, positions[i].x_advance);
    310     g_assert_cmpint (0, ==, positions[i].y_advance);
    311     g_assert_cmpint (0, ==, positions[i].x_offset);
    312     g_assert_cmpint (0, ==, positions[i].y_offset);
    313     g_assert_cmpint (0, ==, positions[i].var.i32);
    314   }
    315 
    316   /* test reset clears content */
    317   hb_buffer_reset (b);
    318   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    319 }
    320 
    321 static void
    322 test_buffer_allocation (fixture_t *fixture, gconstpointer user_data)
    323 {
    324   hb_buffer_t *b = fixture->buffer;
    325 
    326   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    327 
    328   g_assert (hb_buffer_pre_allocate (b, 100));
    329   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    330   g_assert (hb_buffer_allocation_successful (b));
    331 
    332   /* lets try a huge allocation, make sure it fails */
    333   g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
    334   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    335   g_assert (!hb_buffer_allocation_successful (b));
    336 
    337   /* small one again */
    338   g_assert (hb_buffer_pre_allocate (b, 50));
    339   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    340   g_assert (!hb_buffer_allocation_successful (b));
    341 
    342   hb_buffer_reset (b);
    343   g_assert (hb_buffer_allocation_successful (b));
    344 
    345   /* all allocation and size  */
    346   g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
    347   g_assert (!hb_buffer_allocation_successful (b));
    348 
    349   hb_buffer_reset (b);
    350   g_assert (hb_buffer_allocation_successful (b));
    351 
    352   /* technically, this one can actually pass on 64bit machines, but
    353    * I'm doubtful that any malloc allows 4GB allocations at a time.
    354    * But let's only enable it on a 32-bit machine. */
    355   if (sizeof (long) == 4) {
    356     g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
    357     g_assert (!hb_buffer_allocation_successful (b));
    358   }
    359 
    360   hb_buffer_reset (b);
    361   g_assert (hb_buffer_allocation_successful (b));
    362 }
    363 
    364 
    365 typedef struct {
    366   const char utf8[8];
    367   const uint32_t codepoints[8];
    368 } utf8_conversion_test_t;
    369 
    370 /* note: we skip the first and last byte when adding to buffer */
    371 static const utf8_conversion_test_t utf8_conversion_tests[] = {
    372   {"a\303\207", {-1}},
    373   {"a\303\207b", {0xC7}},
    374   {"ab\303cd", {'b', -1, 'c'}},
    375   {"ab\303\302\301cd", {'b', -1, -1, -1, 'c'}}
    376 };
    377 
    378 static void
    379 test_buffer_utf8_conversion (void)
    380 {
    381   hb_buffer_t *b;
    382   hb_glyph_info_t *glyphs;
    383   unsigned int bytes, chars, i, j, len;
    384 
    385   b = hb_buffer_create ();
    386   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
    387 
    388   for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
    389   {
    390     const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
    391     char *escaped;
    392 
    393     escaped = g_strescape (test->utf8, NULL);
    394     g_test_message ("UTF-8 test #%d: %s", i, escaped);
    395     g_free (escaped);
    396 
    397     bytes = strlen (test->utf8);
    398     for (chars = 0; test->codepoints[chars]; chars++)
    399       ;
    400 
    401     hb_buffer_clear_contents (b);
    402     hb_buffer_add_utf8 (b, test->utf8, bytes,  1, bytes - 2);
    403 
    404     glyphs = hb_buffer_get_glyph_infos (b, &len);
    405     g_assert_cmpint (len, ==, chars);
    406     for (j = 0; j < chars; j++)
    407       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    408   }
    409 
    410   hb_buffer_destroy (b);
    411 }
    412 
    413 
    414 
    415 /* Following test table is adapted from glib/glib/tests/utf8-validate.c
    416  * with relicensing permission from Matthias Clasen. */
    417 
    418 typedef struct {
    419   const char *utf8;
    420   int max_len;
    421   unsigned int offset;
    422   gboolean valid;
    423 } utf8_validity_test_t;
    424 
    425 static const utf8_validity_test_t utf8_validity_tests[] = {
    426   /* some tests to check max_len handling */
    427   /* length 1 */
    428   { "abcde", -1, 5, TRUE },
    429   { "abcde", 3, 3, TRUE },
    430   { "abcde", 5, 5, TRUE },
    431   /* length 2 */
    432   { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
    433   { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE },
    434   { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE },
    435   { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE },
    436   { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE },
    437   { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE },
    438   { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE },
    439   /* length 3 */
    440   { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
    441   { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
    442   { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
    443   { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
    444   { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
    445   { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
    446   { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
    447 
    448   /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
    449   /* greek 'kosme' */
    450   { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
    451   /* first sequence of each length */
    452   { "\x00", -1, 0, TRUE },
    453   { "\xc2\x80", -1, 2, TRUE },
    454   { "\xe0\xa0\x80", -1, 3, TRUE },
    455   { "\xf0\x90\x80\x80", -1, 4, TRUE },
    456   { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
    457   { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
    458   /* last sequence of each length */
    459   { "\x7f", -1, 1, TRUE },
    460   { "\xdf\xbf", -1, 2, TRUE },
    461   { "\xef\xbf\xbf", -1, 0, TRUE },
    462   { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
    463   { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
    464   { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
    465   { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    466   { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    467   /* other boundary conditions */
    468   { "\xed\x9f\xbf", -1, 3, TRUE },
    469   { "\xed\xa0\x80", -1, 0, FALSE },
    470   { "\xed\xbf\xbf", -1, 0, FALSE },
    471   { "\xee\x80\x80", -1, 3, TRUE },
    472   { "\xef\xbf\xbd", -1, 3, TRUE },
    473   { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
    474   /* malformed sequences */
    475   /* continuation bytes */
    476   { "\x80", -1, 0, FALSE },
    477   { "\xbf", -1, 0, FALSE },
    478   { "\x80\xbf", -1, 0, FALSE },
    479   { "\x80\xbf\x80", -1, 0, FALSE },
    480   { "\x80\xbf\x80\xbf", -1, 0, FALSE },
    481   { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    482   { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
    483   { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    484 
    485   /* all possible continuation byte */
    486   { "\x80", -1, 0, FALSE },
    487   { "\x81", -1, 0, FALSE },
    488   { "\x82", -1, 0, FALSE },
    489   { "\x83", -1, 0, FALSE },
    490   { "\x84", -1, 0, FALSE },
    491   { "\x85", -1, 0, FALSE },
    492   { "\x86", -1, 0, FALSE },
    493   { "\x87", -1, 0, FALSE },
    494   { "\x88", -1, 0, FALSE },
    495   { "\x89", -1, 0, FALSE },
    496   { "\x8a", -1, 0, FALSE },
    497   { "\x8b", -1, 0, FALSE },
    498   { "\x8c", -1, 0, FALSE },
    499   { "\x8d", -1, 0, FALSE },
    500   { "\x8e", -1, 0, FALSE },
    501   { "\x8f", -1, 0, FALSE },
    502   { "\x90", -1, 0, FALSE },
    503   { "\x91", -1, 0, FALSE },
    504   { "\x92", -1, 0, FALSE },
    505   { "\x93", -1, 0, FALSE },
    506   { "\x94", -1, 0, FALSE },
    507   { "\x95", -1, 0, FALSE },
    508   { "\x96", -1, 0, FALSE },
    509   { "\x97", -1, 0, FALSE },
    510   { "\x98", -1, 0, FALSE },
    511   { "\x99", -1, 0, FALSE },
    512   { "\x9a", -1, 0, FALSE },
    513   { "\x9b", -1, 0, FALSE },
    514   { "\x9c", -1, 0, FALSE },
    515   { "\x9d", -1, 0, FALSE },
    516   { "\x9e", -1, 0, FALSE },
    517   { "\x9f", -1, 0, FALSE },
    518   { "\xa0", -1, 0, FALSE },
    519   { "\xa1", -1, 0, FALSE },
    520   { "\xa2", -1, 0, FALSE },
    521   { "\xa3", -1, 0, FALSE },
    522   { "\xa4", -1, 0, FALSE },
    523   { "\xa5", -1, 0, FALSE },
    524   { "\xa6", -1, 0, FALSE },
    525   { "\xa7", -1, 0, FALSE },
    526   { "\xa8", -1, 0, FALSE },
    527   { "\xa9", -1, 0, FALSE },
    528   { "\xaa", -1, 0, FALSE },
    529   { "\xab", -1, 0, FALSE },
    530   { "\xac", -1, 0, FALSE },
    531   { "\xad", -1, 0, FALSE },
    532   { "\xae", -1, 0, FALSE },
    533   { "\xaf", -1, 0, FALSE },
    534   { "\xb0", -1, 0, FALSE },
    535   { "\xb1", -1, 0, FALSE },
    536   { "\xb2", -1, 0, FALSE },
    537   { "\xb3", -1, 0, FALSE },
    538   { "\xb4", -1, 0, FALSE },
    539   { "\xb5", -1, 0, FALSE },
    540   { "\xb6", -1, 0, FALSE },
    541   { "\xb7", -1, 0, FALSE },
    542   { "\xb8", -1, 0, FALSE },
    543   { "\xb9", -1, 0, FALSE },
    544   { "\xba", -1, 0, FALSE },
    545   { "\xbb", -1, 0, FALSE },
    546   { "\xbc", -1, 0, FALSE },
    547   { "\xbd", -1, 0, FALSE },
    548   { "\xbe", -1, 0, FALSE },
    549   { "\xbf", -1, 0, FALSE },
    550   /* lone start characters */
    551   { "\xc0\x20", -1, 0, FALSE },
    552   { "\xc1\x20", -1, 0, FALSE },
    553   { "\xc2\x20", -1, 0, FALSE },
    554   { "\xc3\x20", -1, 0, FALSE },
    555   { "\xc4\x20", -1, 0, FALSE },
    556   { "\xc5\x20", -1, 0, FALSE },
    557   { "\xc6\x20", -1, 0, FALSE },
    558   { "\xc7\x20", -1, 0, FALSE },
    559   { "\xc8\x20", -1, 0, FALSE },
    560   { "\xc9\x20", -1, 0, FALSE },
    561   { "\xca\x20", -1, 0, FALSE },
    562   { "\xcb\x20", -1, 0, FALSE },
    563   { "\xcc\x20", -1, 0, FALSE },
    564   { "\xcd\x20", -1, 0, FALSE },
    565   { "\xce\x20", -1, 0, FALSE },
    566   { "\xcf\x20", -1, 0, FALSE },
    567   { "\xd0\x20", -1, 0, FALSE },
    568   { "\xd1\x20", -1, 0, FALSE },
    569   { "\xd2\x20", -1, 0, FALSE },
    570   { "\xd3\x20", -1, 0, FALSE },
    571   { "\xd4\x20", -1, 0, FALSE },
    572   { "\xd5\x20", -1, 0, FALSE },
    573   { "\xd6\x20", -1, 0, FALSE },
    574   { "\xd7\x20", -1, 0, FALSE },
    575   { "\xd8\x20", -1, 0, FALSE },
    576   { "\xd9\x20", -1, 0, FALSE },
    577   { "\xda\x20", -1, 0, FALSE },
    578   { "\xdb\x20", -1, 0, FALSE },
    579   { "\xdc\x20", -1, 0, FALSE },
    580   { "\xdd\x20", -1, 0, FALSE },
    581   { "\xde\x20", -1, 0, FALSE },
    582   { "\xdf\x20", -1, 0, FALSE },
    583   { "\xe0\x20", -1, 0, FALSE },
    584   { "\xe1\x20", -1, 0, FALSE },
    585   { "\xe2\x20", -1, 0, FALSE },
    586   { "\xe3\x20", -1, 0, FALSE },
    587   { "\xe4\x20", -1, 0, FALSE },
    588   { "\xe5\x20", -1, 0, FALSE },
    589   { "\xe6\x20", -1, 0, FALSE },
    590   { "\xe7\x20", -1, 0, FALSE },
    591   { "\xe8\x20", -1, 0, FALSE },
    592   { "\xe9\x20", -1, 0, FALSE },
    593   { "\xea\x20", -1, 0, FALSE },
    594   { "\xeb\x20", -1, 0, FALSE },
    595   { "\xec\x20", -1, 0, FALSE },
    596   { "\xed\x20", -1, 0, FALSE },
    597   { "\xee\x20", -1, 0, FALSE },
    598   { "\xef\x20", -1, 0, FALSE },
    599   { "\xf0\x20", -1, 0, FALSE },
    600   { "\xf1\x20", -1, 0, FALSE },
    601   { "\xf2\x20", -1, 0, FALSE },
    602   { "\xf3\x20", -1, 0, FALSE },
    603   { "\xf4\x20", -1, 0, FALSE },
    604   { "\xf5\x20", -1, 0, FALSE },
    605   { "\xf6\x20", -1, 0, FALSE },
    606   { "\xf7\x20", -1, 0, FALSE },
    607   { "\xf8\x20", -1, 0, FALSE },
    608   { "\xf9\x20", -1, 0, FALSE },
    609   { "\xfa\x20", -1, 0, FALSE },
    610   { "\xfb\x20", -1, 0, FALSE },
    611   { "\xfc\x20", -1, 0, FALSE },
    612   { "\xfd\x20", -1, 0, FALSE },
    613   /* missing continuation bytes */
    614   { "\x20\xc0", -1, 1, FALSE },
    615   { "\x20\xe0\x80", -1, 1, FALSE },
    616   { "\x20\xf0\x80\x80", -1, 1, FALSE },
    617   { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
    618   { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
    619   { "\x20\xdf", -1, 1, FALSE },
    620   { "\x20\xef\xbf", -1, 1, FALSE },
    621   { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
    622   { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
    623   { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
    624   /* impossible bytes */
    625   { "\x20\xfe\x20", -1, 1, FALSE },
    626   { "\x20\xff\x20", -1, 1, FALSE },
    627   /* overlong sequences */
    628   { "\x20\xc0\xaf\x20", -1, 1, FALSE },
    629   { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
    630   { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
    631   { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
    632   { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
    633   { "\x20\xc1\xbf\x20", -1, 1, FALSE },
    634   { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
    635   { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
    636   { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
    637   { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
    638   { "\x20\xc0\x80\x20", -1, 1, FALSE },
    639   { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
    640   { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
    641   { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
    642   { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
    643   /* illegal code positions */
    644   { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
    645   { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
    646   { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
    647   { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
    648   { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
    649   { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
    650   { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
    651   { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
    652   { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
    653   { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
    654   { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
    655   { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
    656   { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
    657   { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
    658   { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
    659 #if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
    660   { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
    661   { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
    662 #endif
    663   { "", -1, 0, TRUE }
    664 };
    665 
    666 static void
    667 test_buffer_utf8_validity (void)
    668 {
    669   hb_buffer_t *b;
    670   unsigned int i;
    671 
    672   b = hb_buffer_create ();
    673   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
    674 
    675   for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
    676   {
    677     const utf8_validity_test_t *test = &utf8_validity_tests[i];
    678     unsigned int text_bytes, segment_bytes, j, len;
    679     hb_glyph_info_t *glyphs;
    680     char *escaped;
    681 
    682     escaped = g_strescape (test->utf8, NULL);
    683     g_test_message ("UTF-8 test #%d: %s", i, escaped);
    684     g_free (escaped);
    685 
    686     text_bytes = strlen (test->utf8);
    687     if (test->max_len == -1)
    688       segment_bytes = text_bytes;
    689     else
    690       segment_bytes = test->max_len;
    691 
    692     hb_buffer_clear_contents (b);
    693     hb_buffer_add_utf8 (b, test->utf8, text_bytes,  0, segment_bytes);
    694 
    695     glyphs = hb_buffer_get_glyph_infos (b, &len);
    696     for (j = 0; j < len; j++)
    697       if (glyphs[j].codepoint == (hb_codepoint_t) -1)
    698 	break;
    699 
    700     g_assert (test->valid ? j == len : j < len);
    701     if (!test->valid)
    702       g_assert (glyphs[j].cluster == test->offset);
    703   }
    704 
    705   hb_buffer_destroy (b);
    706 }
    707 
    708 
    709 typedef struct {
    710   const uint16_t utf16[8];
    711   const uint32_t codepoints[8];
    712 } utf16_conversion_test_t;
    713 
    714 /* note: we skip the first and last item from utf16 when adding to buffer */
    715 static const utf16_conversion_test_t utf16_conversion_tests[] = {
    716   {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
    717   {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
    718   {{0x41, 0xD800, 0xDF02}, {-1}},
    719   {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
    720   {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
    721   {{0x41, 0xDF00, 0x61}, {-1}},
    722   {{0x41, 0x61}, {0}}
    723 };
    724 
    725 static void
    726 test_buffer_utf16_conversion (void)
    727 {
    728   hb_buffer_t *b;
    729   unsigned int i;
    730 
    731   b = hb_buffer_create ();
    732   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
    733 
    734   for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
    735   {
    736     const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
    737     unsigned int u_len, chars, j, len;
    738     hb_glyph_info_t *glyphs;
    739 
    740     g_test_message ("UTF-16 test #%d", i);
    741 
    742     for (u_len = 0; test->utf16[u_len]; u_len++)
    743       ;
    744     for (chars = 0; test->codepoints[chars]; chars++)
    745       ;
    746 
    747     hb_buffer_clear_contents (b);
    748     hb_buffer_add_utf16 (b, test->utf16, u_len,  1, u_len - 2);
    749 
    750     glyphs = hb_buffer_get_glyph_infos (b, &len);
    751     g_assert_cmpint (len, ==, chars);
    752     for (j = 0; j < chars; j++)
    753       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    754   }
    755 
    756   hb_buffer_destroy (b);
    757 }
    758 
    759 
    760 typedef struct {
    761   const uint32_t utf32[8];
    762   const uint32_t codepoints[8];
    763 } utf32_conversion_test_t;
    764 
    765 /* note: we skip the first and last item from utf32 when adding to buffer */
    766 static const utf32_conversion_test_t utf32_conversion_tests[] = {
    767   {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, -3, -3}},
    768   {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
    769   {{0x41, 0xD800, 0xDF02, 0x61}, {-3, -3}},
    770   {{0x41, 0xD800, 0xDF02}, {-3}},
    771   {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -3}},
    772   {{0x41, 0xD800, 0x61, 0xDF02}, {-3, 0x61}},
    773   {{0x41, 0xDF00, 0x61}, {-3}},
    774   {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
    775   {{0x41, 0x110000, 0x61}, {-3}},
    776   {{0x41, 0x61}, {0}}
    777 };
    778 
    779 static void
    780 test_buffer_utf32_conversion (void)
    781 {
    782   hb_buffer_t *b;
    783   unsigned int i;
    784 
    785   b = hb_buffer_create ();
    786   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
    787 
    788   for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
    789   {
    790     const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
    791     unsigned int u_len, chars, j, len;
    792     hb_glyph_info_t *glyphs;
    793 
    794     g_test_message ("UTF-32 test #%d", i);
    795 
    796     for (u_len = 0; test->utf32[u_len]; u_len++)
    797       ;
    798     for (chars = 0; test->codepoints[chars]; chars++)
    799       ;
    800 
    801     hb_buffer_clear_contents (b);
    802     hb_buffer_add_utf32 (b, test->utf32, u_len,  1, u_len - 2);
    803 
    804     glyphs = hb_buffer_get_glyph_infos (b, &len);
    805     g_assert_cmpint (len, ==, chars);
    806     for (j = 0; j < chars; j++)
    807       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    808   }
    809 
    810   hb_buffer_destroy (b);
    811 }
    812 
    813 
    814 static void
    815 test_empty (hb_buffer_t *b)
    816 {
    817   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    818   g_assert (!hb_buffer_get_glyph_infos (b, NULL));
    819   g_assert (!hb_buffer_get_glyph_positions (b, NULL));
    820 }
    821 
    822 static void
    823 test_buffer_empty (void)
    824 {
    825   hb_buffer_t *b = hb_buffer_get_empty ();
    826 
    827   g_assert (hb_buffer_get_empty ());
    828   g_assert (hb_buffer_get_empty () == b);
    829 
    830   g_assert (!hb_buffer_allocation_successful (b));
    831 
    832   test_empty (b);
    833 
    834   hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
    835 
    836   test_empty (b);
    837 
    838   hb_buffer_reverse (b);
    839   hb_buffer_reverse_clusters (b);
    840 
    841   g_assert (!hb_buffer_set_length (b, 10));
    842 
    843   test_empty (b);
    844 
    845   g_assert (hb_buffer_set_length (b, 0));
    846 
    847   test_empty (b);
    848 
    849   g_assert (!hb_buffer_allocation_successful (b));
    850 
    851   hb_buffer_reset (b);
    852 
    853   test_empty (b);
    854 
    855   g_assert (!hb_buffer_allocation_successful (b));
    856 }
    857 
    858 int
    859 main (int argc, char **argv)
    860 {
    861   unsigned int i;
    862 
    863   hb_test_init (&argc, &argv);
    864 
    865   for (i = 0; i < BUFFER_NUM_TYPES; i++)
    866   {
    867     const void *buffer_type = GINT_TO_POINTER (i);
    868     const char *buffer_name = buffer_names[i];
    869 
    870     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
    871     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
    872     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
    873   }
    874 
    875   hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
    876 
    877   hb_test_add (test_buffer_utf8_conversion);
    878   hb_test_add (test_buffer_utf8_validity);
    879   hb_test_add (test_buffer_utf16_conversion);
    880   hb_test_add (test_buffer_utf32_conversion);
    881   hb_test_add (test_buffer_empty);
    882 
    883   return hb_test_run();
    884 }
    885