Home | History | Annotate | Download | only in api
      1 /*
      2  * Copyright  2011  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #include "hb-test.h"
     28 
     29 /* Unit tests for hb-buffer.h */
     30 
     31 
     32 static const char utf8[10] = "ab\360\240\200\200defg";
     33 static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
     34 static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
     35 
     36 
     37 typedef enum {
     38   BUFFER_EMPTY,
     39   BUFFER_ONE_BY_ONE,
     40   BUFFER_UTF32,
     41   BUFFER_UTF16,
     42   BUFFER_UTF8,
     43   BUFFER_NUM_TYPES,
     44 } buffer_type_t;
     45 
     46 static const char *buffer_names[] = {
     47   "empty",
     48   "one-by-one",
     49   "utf32",
     50   "utf16",
     51   "utf8"
     52 };
     53 
     54 typedef struct
     55 {
     56   hb_buffer_t *buffer;
     57 } fixture_t;
     58 
     59 static void
     60 fixture_init (fixture_t *fixture, gconstpointer user_data)
     61 {
     62   hb_buffer_t *b;
     63   unsigned int i;
     64 
     65   b = fixture->buffer = hb_buffer_create ();
     66 
     67   switch (GPOINTER_TO_INT (user_data))
     68   {
     69     case BUFFER_EMPTY:
     70       break;
     71 
     72     case BUFFER_ONE_BY_ONE:
     73       for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
     74 	hb_buffer_add (b, utf32[i], i);
     75       break;
     76 
     77     case BUFFER_UTF32:
     78       hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
     79       break;
     80 
     81     case BUFFER_UTF16:
     82       hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
     83       break;
     84 
     85     case BUFFER_UTF8:
     86       hb_buffer_add_utf8  (b, utf8,  G_N_ELEMENTS (utf8),  1, G_N_ELEMENTS (utf8)  - 2);
     87       break;
     88 
     89     default:
     90       g_assert_not_reached ();
     91   }
     92 }
     93 
     94 static void
     95 fixture_finish (fixture_t *fixture, gconstpointer user_data)
     96 {
     97   hb_buffer_destroy (fixture->buffer);
     98 }
     99 
    100 
    101 static void
    102 test_buffer_properties (fixture_t *fixture, gconstpointer user_data)
    103 {
    104   hb_buffer_t *b = fixture->buffer;
    105   hb_unicode_funcs_t *ufuncs;
    106 
    107   /* test default properties */
    108 
    109   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
    110   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    111   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    112   g_assert (hb_buffer_get_language (b) == NULL);
    113   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_DEFAULT);
    114 
    115 
    116   /* test property changes are retained */
    117   ufuncs = hb_unicode_funcs_create (NULL);
    118   hb_buffer_set_unicode_funcs (b, ufuncs);
    119   hb_unicode_funcs_destroy (ufuncs);
    120   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
    121 
    122   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
    123   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
    124 
    125   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
    126   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
    127 
    128   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
    129   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
    130 
    131   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
    132   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
    133 
    134 
    135 
    136   /* test clear clears all properties but unicode_funcs */
    137 
    138   hb_buffer_clear_contents (b);
    139 
    140   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
    141   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    142   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    143   g_assert (hb_buffer_get_language (b) == NULL);
    144   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
    145 
    146 
    147   /* test reset clears all properties */
    148 
    149   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
    150   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
    151 
    152   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
    153   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
    154 
    155   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
    156   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
    157 
    158   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
    159   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
    160 
    161   hb_buffer_reset (b);
    162 
    163   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
    164   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    165   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    166   g_assert (hb_buffer_get_language (b) == NULL);
    167   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
    168 }
    169 
    170 static void
    171 test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
    172 {
    173   hb_buffer_t *b = fixture->buffer;
    174   unsigned int i, len, len2;
    175   buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
    176   hb_glyph_info_t *glyphs;
    177 
    178   if (buffer_type == BUFFER_EMPTY) {
    179     g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    180     return;
    181   }
    182 
    183   len = hb_buffer_get_length (b);
    184   hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
    185   glyphs = hb_buffer_get_glyph_infos (b, &len2);
    186   g_assert_cmpint (len, ==, len2);
    187   g_assert_cmpint (len, ==, 5);
    188 
    189   for (i = 0; i < len; i++) {
    190     g_assert_cmphex (glyphs[i].mask,      ==, 1);
    191     g_assert_cmphex (glyphs[i].var1.u32,  ==, 0);
    192     g_assert_cmphex (glyphs[i].var2.u32,  ==, 0);
    193   }
    194 
    195   for (i = 0; i < len; i++) {
    196     unsigned int cluster;
    197     cluster = 1+i;
    198     if (i >= 2) {
    199       if (buffer_type == BUFFER_UTF16)
    200 	cluster++;
    201       else if (buffer_type == BUFFER_UTF8)
    202         cluster += 3;
    203     }
    204     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    205     g_assert_cmphex (glyphs[i].cluster,   ==, cluster);
    206   }
    207 
    208   /* reverse, test, and reverse back */
    209 
    210   hb_buffer_reverse (b);
    211   for (i = 0; i < len; i++)
    212     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    213 
    214   hb_buffer_reverse (b);
    215   for (i = 0; i < len; i++)
    216     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    217 
    218   /* reverse_clusters works same as reverse for now since each codepoint is
    219    * in its own cluster */
    220 
    221   hb_buffer_reverse_clusters (b);
    222   for (i = 0; i < len; i++)
    223     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    224 
    225   hb_buffer_reverse_clusters (b);
    226   for (i = 0; i < len; i++)
    227     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    228 
    229   /* now form a cluster and test again */
    230   glyphs[2].cluster = glyphs[1].cluster;
    231 
    232   /* reverse, test, and reverse back */
    233 
    234   hb_buffer_reverse (b);
    235   for (i = 0; i < len; i++)
    236     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    237 
    238   hb_buffer_reverse (b);
    239   for (i = 0; i < len; i++)
    240     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    241 
    242   /* reverse_clusters twice still should return the original string,
    243    * but when applied once, the 1-2 cluster should be retained. */
    244 
    245   hb_buffer_reverse_clusters (b);
    246   for (i = 0; i < len; i++) {
    247     unsigned int j = len-1-i;
    248     if (j == 1)
    249       j = 2;
    250     else if (j == 2)
    251       j = 1;
    252     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
    253   }
    254 
    255   hb_buffer_reverse_clusters (b);
    256   for (i = 0; i < len; i++)
    257     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    258 
    259 
    260   /* test setting length */
    261 
    262   /* enlarge */
    263   g_assert (hb_buffer_set_length (b, 10));
    264   glyphs = hb_buffer_get_glyph_infos (b, NULL);
    265   g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
    266   for (i = 0; i < 5; i++)
    267     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    268   for (i = 5; i < 10; i++)
    269     g_assert_cmphex (glyphs[i].codepoint, ==, 0);
    270   /* shrink */
    271   g_assert (hb_buffer_set_length (b, 3));
    272   glyphs = hb_buffer_get_glyph_infos (b, NULL);
    273   g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
    274   for (i = 0; i < 3; i++)
    275     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    276 
    277 
    278   g_assert (hb_buffer_allocation_successful (b));
    279 
    280 
    281   /* test reset clears content */
    282 
    283   hb_buffer_reset (b);
    284   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    285 }
    286 
    287 static void
    288 test_buffer_positions (fixture_t *fixture, gconstpointer user_data)
    289 {
    290   hb_buffer_t *b = fixture->buffer;
    291   unsigned int i, len, len2;
    292   hb_glyph_position_t *positions;
    293 
    294   /* Without shaping, positions should all be zero */
    295   len = hb_buffer_get_length (b);
    296   hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
    297   positions = hb_buffer_get_glyph_positions (b, &len2);
    298   g_assert_cmpint (len, ==, len2);
    299   for (i = 0; i < len; i++) {
    300     g_assert_cmpint (0, ==, positions[i].x_advance);
    301     g_assert_cmpint (0, ==, positions[i].y_advance);
    302     g_assert_cmpint (0, ==, positions[i].x_offset);
    303     g_assert_cmpint (0, ==, positions[i].y_offset);
    304     g_assert_cmpint (0, ==, positions[i].var.i32);
    305   }
    306 
    307   /* test reset clears content */
    308   hb_buffer_reset (b);
    309   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    310 }
    311 
    312 static void
    313 test_buffer_allocation (fixture_t *fixture, gconstpointer user_data)
    314 {
    315   hb_buffer_t *b = fixture->buffer;
    316 
    317   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    318 
    319   g_assert (hb_buffer_pre_allocate (b, 100));
    320   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    321   g_assert (hb_buffer_allocation_successful (b));
    322 
    323   /* lets try a huge allocation, make sure it fails */
    324   g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
    325   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    326   g_assert (!hb_buffer_allocation_successful (b));
    327 
    328   /* small one again */
    329   g_assert (hb_buffer_pre_allocate (b, 50));
    330   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    331   g_assert (!hb_buffer_allocation_successful (b));
    332 
    333   hb_buffer_reset (b);
    334   g_assert (hb_buffer_allocation_successful (b));
    335 
    336   /* all allocation and size  */
    337   g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
    338   g_assert (!hb_buffer_allocation_successful (b));
    339 
    340   hb_buffer_reset (b);
    341   g_assert (hb_buffer_allocation_successful (b));
    342 
    343   /* technically, this one can actually pass on 64bit machines, but
    344    * I'm doubtful that any malloc allows 4GB allocations at a time.
    345    * But let's only enable it on a 32-bit machine. */
    346   if (sizeof (long) == 4) {
    347     g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
    348     g_assert (!hb_buffer_allocation_successful (b));
    349   }
    350 
    351   hb_buffer_reset (b);
    352   g_assert (hb_buffer_allocation_successful (b));
    353 }
    354 
    355 
    356 typedef struct {
    357   const char utf8[8];
    358   const uint32_t codepoints[8];
    359 } utf8_conversion_test_t;
    360 
    361 /* note: we skip the first and last byte when adding to buffer */
    362 static const utf8_conversion_test_t utf8_conversion_tests[] = {
    363   {"a\303\207", {-1}},
    364   {"a\303\207b", {0xC7}},
    365   {"ab\303cd", {'b', -1, 'c'}},
    366   {"ab\303\302\301cd", {'b', -1, -1, -1, 'c'}}
    367 };
    368 
    369 static void
    370 test_buffer_utf8_conversion (void)
    371 {
    372   hb_buffer_t *b;
    373   hb_glyph_info_t *glyphs;
    374   unsigned int bytes, chars, i, j, len;
    375 
    376   b = hb_buffer_create ();
    377   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
    378 
    379   for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
    380   {
    381     const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
    382     char *escaped;
    383 
    384     escaped = g_strescape (test->utf8, NULL);
    385     g_test_message ("UTF-8 test #%d: %s", i, escaped);
    386     g_free (escaped);
    387 
    388     bytes = strlen (test->utf8);
    389     for (chars = 0; test->codepoints[chars]; chars++)
    390       ;
    391 
    392     hb_buffer_clear_contents (b);
    393     hb_buffer_add_utf8 (b, test->utf8, bytes,  1, bytes - 2);
    394 
    395     glyphs = hb_buffer_get_glyph_infos (b, &len);
    396     g_assert_cmpint (len, ==, chars);
    397     for (j = 0; j < chars; j++)
    398       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    399   }
    400 
    401   hb_buffer_destroy (b);
    402 }
    403 
    404 
    405 
    406 /* Following test table is adapted from glib/glib/tests/utf8-validate.c
    407  * with relicensing permission from Matthias Clasen. */
    408 
    409 typedef struct {
    410   const char *utf8;
    411   int max_len;
    412   unsigned int offset;
    413   gboolean valid;
    414 } utf8_validity_test_t;
    415 
    416 static const utf8_validity_test_t utf8_validity_tests[] = {
    417   /* some tests to check max_len handling */
    418   /* length 1 */
    419   { "abcde", -1, 5, TRUE },
    420   { "abcde", 3, 3, TRUE },
    421   { "abcde", 5, 5, TRUE },
    422   /* length 2 */
    423   { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
    424   { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE },
    425   { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE },
    426   { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE },
    427   { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE },
    428   { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE },
    429   { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE },
    430   /* length 3 */
    431   { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
    432   { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
    433   { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
    434   { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
    435   { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
    436   { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
    437   { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
    438 
    439   /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
    440   /* greek 'kosme' */
    441   { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
    442   /* first sequence of each length */
    443   { "\x00", -1, 0, TRUE },
    444   { "\xc2\x80", -1, 2, TRUE },
    445   { "\xe0\xa0\x80", -1, 3, TRUE },
    446   { "\xf0\x90\x80\x80", -1, 4, TRUE },
    447   { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
    448   { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
    449   /* last sequence of each length */
    450   { "\x7f", -1, 1, TRUE },
    451   { "\xdf\xbf", -1, 2, TRUE },
    452   { "\xef\xbf\xbf", -1, 0, TRUE },
    453   { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
    454   { "\xf4\x90\xbf\xbf", -1, 0, FALSE },
    455   { "\xf7\xbf\xbf\xbf", -1, 0, FALSE },
    456   { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    457   { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    458   /* other boundary conditions */
    459   { "\xed\x9f\xbf", -1, 3, TRUE },
    460   { "\xed\xa0\x80", -1, 0, FALSE },
    461   { "\xed\xbf\xbf", -1, 0, FALSE },
    462   { "\xee\x80\x80", -1, 3, TRUE },
    463   { "\xef\xbf\xbd", -1, 3, TRUE },
    464   { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
    465   /* malformed sequences */
    466   /* continuation bytes */
    467   { "\x80", -1, 0, FALSE },
    468   { "\xbf", -1, 0, FALSE },
    469   { "\x80\xbf", -1, 0, FALSE },
    470   { "\x80\xbf\x80", -1, 0, FALSE },
    471   { "\x80\xbf\x80\xbf", -1, 0, FALSE },
    472   { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    473   { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
    474   { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    475 
    476   /* all possible continuation byte */
    477   { "\x80", -1, 0, FALSE },
    478   { "\x81", -1, 0, FALSE },
    479   { "\x82", -1, 0, FALSE },
    480   { "\x83", -1, 0, FALSE },
    481   { "\x84", -1, 0, FALSE },
    482   { "\x85", -1, 0, FALSE },
    483   { "\x86", -1, 0, FALSE },
    484   { "\x87", -1, 0, FALSE },
    485   { "\x88", -1, 0, FALSE },
    486   { "\x89", -1, 0, FALSE },
    487   { "\x8a", -1, 0, FALSE },
    488   { "\x8b", -1, 0, FALSE },
    489   { "\x8c", -1, 0, FALSE },
    490   { "\x8d", -1, 0, FALSE },
    491   { "\x8e", -1, 0, FALSE },
    492   { "\x8f", -1, 0, FALSE },
    493   { "\x90", -1, 0, FALSE },
    494   { "\x91", -1, 0, FALSE },
    495   { "\x92", -1, 0, FALSE },
    496   { "\x93", -1, 0, FALSE },
    497   { "\x94", -1, 0, FALSE },
    498   { "\x95", -1, 0, FALSE },
    499   { "\x96", -1, 0, FALSE },
    500   { "\x97", -1, 0, FALSE },
    501   { "\x98", -1, 0, FALSE },
    502   { "\x99", -1, 0, FALSE },
    503   { "\x9a", -1, 0, FALSE },
    504   { "\x9b", -1, 0, FALSE },
    505   { "\x9c", -1, 0, FALSE },
    506   { "\x9d", -1, 0, FALSE },
    507   { "\x9e", -1, 0, FALSE },
    508   { "\x9f", -1, 0, FALSE },
    509   { "\xa0", -1, 0, FALSE },
    510   { "\xa1", -1, 0, FALSE },
    511   { "\xa2", -1, 0, FALSE },
    512   { "\xa3", -1, 0, FALSE },
    513   { "\xa4", -1, 0, FALSE },
    514   { "\xa5", -1, 0, FALSE },
    515   { "\xa6", -1, 0, FALSE },
    516   { "\xa7", -1, 0, FALSE },
    517   { "\xa8", -1, 0, FALSE },
    518   { "\xa9", -1, 0, FALSE },
    519   { "\xaa", -1, 0, FALSE },
    520   { "\xab", -1, 0, FALSE },
    521   { "\xac", -1, 0, FALSE },
    522   { "\xad", -1, 0, FALSE },
    523   { "\xae", -1, 0, FALSE },
    524   { "\xaf", -1, 0, FALSE },
    525   { "\xb0", -1, 0, FALSE },
    526   { "\xb1", -1, 0, FALSE },
    527   { "\xb2", -1, 0, FALSE },
    528   { "\xb3", -1, 0, FALSE },
    529   { "\xb4", -1, 0, FALSE },
    530   { "\xb5", -1, 0, FALSE },
    531   { "\xb6", -1, 0, FALSE },
    532   { "\xb7", -1, 0, FALSE },
    533   { "\xb8", -1, 0, FALSE },
    534   { "\xb9", -1, 0, FALSE },
    535   { "\xba", -1, 0, FALSE },
    536   { "\xbb", -1, 0, FALSE },
    537   { "\xbc", -1, 0, FALSE },
    538   { "\xbd", -1, 0, FALSE },
    539   { "\xbe", -1, 0, FALSE },
    540   { "\xbf", -1, 0, FALSE },
    541   /* lone start characters */
    542   { "\xc0\x20", -1, 0, FALSE },
    543   { "\xc1\x20", -1, 0, FALSE },
    544   { "\xc2\x20", -1, 0, FALSE },
    545   { "\xc3\x20", -1, 0, FALSE },
    546   { "\xc4\x20", -1, 0, FALSE },
    547   { "\xc5\x20", -1, 0, FALSE },
    548   { "\xc6\x20", -1, 0, FALSE },
    549   { "\xc7\x20", -1, 0, FALSE },
    550   { "\xc8\x20", -1, 0, FALSE },
    551   { "\xc9\x20", -1, 0, FALSE },
    552   { "\xca\x20", -1, 0, FALSE },
    553   { "\xcb\x20", -1, 0, FALSE },
    554   { "\xcc\x20", -1, 0, FALSE },
    555   { "\xcd\x20", -1, 0, FALSE },
    556   { "\xce\x20", -1, 0, FALSE },
    557   { "\xcf\x20", -1, 0, FALSE },
    558   { "\xd0\x20", -1, 0, FALSE },
    559   { "\xd1\x20", -1, 0, FALSE },
    560   { "\xd2\x20", -1, 0, FALSE },
    561   { "\xd3\x20", -1, 0, FALSE },
    562   { "\xd4\x20", -1, 0, FALSE },
    563   { "\xd5\x20", -1, 0, FALSE },
    564   { "\xd6\x20", -1, 0, FALSE },
    565   { "\xd7\x20", -1, 0, FALSE },
    566   { "\xd8\x20", -1, 0, FALSE },
    567   { "\xd9\x20", -1, 0, FALSE },
    568   { "\xda\x20", -1, 0, FALSE },
    569   { "\xdb\x20", -1, 0, FALSE },
    570   { "\xdc\x20", -1, 0, FALSE },
    571   { "\xdd\x20", -1, 0, FALSE },
    572   { "\xde\x20", -1, 0, FALSE },
    573   { "\xdf\x20", -1, 0, FALSE },
    574   { "\xe0\x20", -1, 0, FALSE },
    575   { "\xe1\x20", -1, 0, FALSE },
    576   { "\xe2\x20", -1, 0, FALSE },
    577   { "\xe3\x20", -1, 0, FALSE },
    578   { "\xe4\x20", -1, 0, FALSE },
    579   { "\xe5\x20", -1, 0, FALSE },
    580   { "\xe6\x20", -1, 0, FALSE },
    581   { "\xe7\x20", -1, 0, FALSE },
    582   { "\xe8\x20", -1, 0, FALSE },
    583   { "\xe9\x20", -1, 0, FALSE },
    584   { "\xea\x20", -1, 0, FALSE },
    585   { "\xeb\x20", -1, 0, FALSE },
    586   { "\xec\x20", -1, 0, FALSE },
    587   { "\xed\x20", -1, 0, FALSE },
    588   { "\xee\x20", -1, 0, FALSE },
    589   { "\xef\x20", -1, 0, FALSE },
    590   { "\xf0\x20", -1, 0, FALSE },
    591   { "\xf1\x20", -1, 0, FALSE },
    592   { "\xf2\x20", -1, 0, FALSE },
    593   { "\xf3\x20", -1, 0, FALSE },
    594   { "\xf4\x20", -1, 0, FALSE },
    595   { "\xf5\x20", -1, 0, FALSE },
    596   { "\xf6\x20", -1, 0, FALSE },
    597   { "\xf7\x20", -1, 0, FALSE },
    598   { "\xf8\x20", -1, 0, FALSE },
    599   { "\xf9\x20", -1, 0, FALSE },
    600   { "\xfa\x20", -1, 0, FALSE },
    601   { "\xfb\x20", -1, 0, FALSE },
    602   { "\xfc\x20", -1, 0, FALSE },
    603   { "\xfd\x20", -1, 0, FALSE },
    604   /* missing continuation bytes */
    605   { "\x20\xc0", -1, 1, FALSE },
    606   { "\x20\xe0\x80", -1, 1, FALSE },
    607   { "\x20\xf0\x80\x80", -1, 1, FALSE },
    608   { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
    609   { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
    610   { "\x20\xdf", -1, 1, FALSE },
    611   { "\x20\xef\xbf", -1, 1, FALSE },
    612   { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
    613   { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
    614   { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
    615   /* impossible bytes */
    616   { "\x20\xfe\x20", -1, 1, FALSE },
    617   { "\x20\xff\x20", -1, 1, FALSE },
    618   /* overlong sequences */
    619   { "\x20\xc0\xaf\x20", -1, 1, FALSE },
    620   { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
    621   { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
    622   { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
    623   { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
    624   { "\x20\xc1\xbf\x20", -1, 1, FALSE },
    625   { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
    626   { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
    627   { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
    628   { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
    629   { "\x20\xc0\x80\x20", -1, 1, FALSE },
    630   { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
    631   { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
    632   { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
    633   { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
    634   /* illegal code positions */
    635   { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
    636   { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
    637   { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
    638   { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
    639   { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
    640   { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
    641   { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
    642   { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
    643   { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
    644   { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
    645   { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
    646   { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
    647   { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
    648   { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
    649   { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
    650 #if 0 /* We don't consider U+FFFE / U+FFFF and similar invalid. */
    651   { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
    652   { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
    653 #endif
    654   { "", -1, 0, TRUE }
    655 };
    656 
    657 static void
    658 test_buffer_utf8_validity (void)
    659 {
    660   hb_buffer_t *b;
    661   unsigned int i;
    662 
    663   b = hb_buffer_create ();
    664   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
    665 
    666   for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
    667   {
    668     const utf8_validity_test_t *test = &utf8_validity_tests[i];
    669     unsigned int text_bytes, segment_bytes, j, len;
    670     hb_glyph_info_t *glyphs;
    671     char *escaped;
    672 
    673     escaped = g_strescape (test->utf8, NULL);
    674     g_test_message ("UTF-8 test #%d: %s", i, escaped);
    675     g_free (escaped);
    676 
    677     text_bytes = strlen (test->utf8);
    678     if (test->max_len == -1)
    679       segment_bytes = text_bytes;
    680     else
    681       segment_bytes = test->max_len;
    682 
    683     hb_buffer_clear_contents (b);
    684     hb_buffer_add_utf8 (b, test->utf8, text_bytes,  0, segment_bytes);
    685 
    686     glyphs = hb_buffer_get_glyph_infos (b, &len);
    687     for (j = 0; j < len; j++)
    688       if (glyphs[j].codepoint == (hb_codepoint_t) -1)
    689 	break;
    690 
    691     g_assert (test->valid ? j == len : j < len);
    692     if (!test->valid)
    693       g_assert (glyphs[j].cluster == test->offset);
    694   }
    695 
    696   hb_buffer_destroy (b);
    697 }
    698 
    699 
    700 typedef struct {
    701   const uint16_t utf16[8];
    702   const uint32_t codepoints[8];
    703 } utf16_conversion_test_t;
    704 
    705 /* note: we skip the first and last item from utf16 when adding to buffer */
    706 static const utf16_conversion_test_t utf16_conversion_tests[] = {
    707   {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
    708   {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
    709   {{0x41, 0xD800, 0xDF02}, {-1}},
    710   {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
    711   {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
    712   {{0x41, 0xDF00, 0x61}, {-1}},
    713   {{0x41, 0x61}, {0}}
    714 };
    715 
    716 static void
    717 test_buffer_utf16_conversion (void)
    718 {
    719   hb_buffer_t *b;
    720   unsigned int i;
    721 
    722   b = hb_buffer_create ();
    723   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -1);
    724 
    725   for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
    726   {
    727     const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
    728     unsigned int u_len, chars, j, len;
    729     hb_glyph_info_t *glyphs;
    730 
    731     g_test_message ("UTF-16 test #%d", i);
    732 
    733     for (u_len = 0; test->utf16[u_len]; u_len++)
    734       ;
    735     for (chars = 0; test->codepoints[chars]; chars++)
    736       ;
    737 
    738     hb_buffer_clear_contents (b);
    739     hb_buffer_add_utf16 (b, test->utf16, u_len,  1, u_len - 2);
    740 
    741     glyphs = hb_buffer_get_glyph_infos (b, &len);
    742     g_assert_cmpint (len, ==, chars);
    743     for (j = 0; j < chars; j++)
    744       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    745   }
    746 
    747   hb_buffer_destroy (b);
    748 }
    749 
    750 
    751 typedef struct {
    752   const uint32_t utf32[8];
    753   const uint32_t codepoints[8];
    754 } utf32_conversion_test_t;
    755 
    756 /* note: we skip the first and last item from utf32 when adding to buffer */
    757 static const utf32_conversion_test_t utf32_conversion_tests[] = {
    758   {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, -3, -3}},
    759   {{0x41, 0x004D, 0x0430, 0x4E8C, 0x10302, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
    760   {{0x41, 0xD800, 0xDF02, 0x61}, {-3, -3}},
    761   {{0x41, 0xD800, 0xDF02}, {-3}},
    762   {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -3}},
    763   {{0x41, 0xD800, 0x61, 0xDF02}, {-3, 0x61}},
    764   {{0x41, 0xDF00, 0x61}, {-3}},
    765   {{0x41, 0x10FFFF, 0x61}, {0x10FFFF}},
    766   {{0x41, 0x110000, 0x61}, {-3}},
    767   {{0x41, 0x61}, {0}}
    768 };
    769 
    770 static void
    771 test_buffer_utf32_conversion (void)
    772 {
    773   hb_buffer_t *b;
    774   unsigned int i;
    775 
    776   b = hb_buffer_create ();
    777   hb_buffer_set_replacement_codepoint (b, (hb_codepoint_t) -3);
    778 
    779   for (i = 0; i < G_N_ELEMENTS (utf32_conversion_tests); i++)
    780   {
    781     const utf32_conversion_test_t *test = &utf32_conversion_tests[i];
    782     unsigned int u_len, chars, j, len;
    783     hb_glyph_info_t *glyphs;
    784 
    785     g_test_message ("UTF-32 test #%d", i);
    786 
    787     for (u_len = 0; test->utf32[u_len]; u_len++)
    788       ;
    789     for (chars = 0; test->codepoints[chars]; chars++)
    790       ;
    791 
    792     hb_buffer_clear_contents (b);
    793     hb_buffer_add_utf32 (b, test->utf32, u_len,  1, u_len - 2);
    794 
    795     glyphs = hb_buffer_get_glyph_infos (b, &len);
    796     g_assert_cmpint (len, ==, chars);
    797     for (j = 0; j < chars; j++)
    798       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    799   }
    800 
    801   hb_buffer_destroy (b);
    802 }
    803 
    804 
    805 static void
    806 test_empty (hb_buffer_t *b)
    807 {
    808   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    809   g_assert (!hb_buffer_get_glyph_infos (b, NULL));
    810   g_assert (!hb_buffer_get_glyph_positions (b, NULL));
    811 }
    812 
    813 static void
    814 test_buffer_empty (void)
    815 {
    816   hb_buffer_t *b = hb_buffer_get_empty ();
    817 
    818   g_assert (hb_buffer_get_empty ());
    819   g_assert (hb_buffer_get_empty () == b);
    820 
    821   g_assert (!hb_buffer_allocation_successful (b));
    822 
    823   test_empty (b);
    824 
    825   hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
    826 
    827   test_empty (b);
    828 
    829   hb_buffer_reverse (b);
    830   hb_buffer_reverse_clusters (b);
    831 
    832   g_assert (!hb_buffer_set_length (b, 10));
    833 
    834   test_empty (b);
    835 
    836   g_assert (hb_buffer_set_length (b, 0));
    837 
    838   test_empty (b);
    839 
    840   g_assert (!hb_buffer_allocation_successful (b));
    841 
    842   hb_buffer_reset (b);
    843 
    844   test_empty (b);
    845 
    846   g_assert (!hb_buffer_allocation_successful (b));
    847 }
    848 
    849 int
    850 main (int argc, char **argv)
    851 {
    852   unsigned int i;
    853 
    854   hb_test_init (&argc, &argv);
    855 
    856   for (i = 0; i < BUFFER_NUM_TYPES; i++)
    857   {
    858     const void *buffer_type = GINT_TO_POINTER (i);
    859     const char *buffer_name = buffer_names[i];
    860 
    861     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
    862     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
    863     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
    864   }
    865 
    866   hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
    867 
    868   hb_test_add (test_buffer_utf8_conversion);
    869   hb_test_add (test_buffer_utf8_validity);
    870   hb_test_add (test_buffer_utf16_conversion);
    871   hb_test_add (test_buffer_utf32_conversion);
    872   hb_test_add (test_buffer_empty);
    873 
    874   return hb_test_run();
    875 }
    876