Home | History | Annotate | Download | only in api
      1 /*
      2  * Copyright  2011  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #include "hb-test.h"
     28 
     29 /* Unit tests for hb-buffer.h */
     30 
     31 
     32 static const char utf8[10] = "ab\360\240\200\200defg";
     33 static const uint16_t utf16[8] = {'a', 'b', 0xD840, 0xDC00, 'd', 'e', 'f', 'g'};
     34 static const uint32_t utf32[7] = {'a', 'b', 0x20000, 'd', 'e', 'f', 'g'};
     35 
     36 
     37 typedef enum {
     38   BUFFER_EMPTY,
     39   BUFFER_ONE_BY_ONE,
     40   BUFFER_UTF32,
     41   BUFFER_UTF16,
     42   BUFFER_UTF8,
     43   BUFFER_NUM_TYPES,
     44 } buffer_type_t;
     45 
     46 static const char *buffer_names[] = {
     47   "empty",
     48   "one-by-one",
     49   "utf32",
     50   "utf16",
     51   "utf8"
     52 };
     53 
     54 typedef struct
     55 {
     56   hb_buffer_t *buffer;
     57 } fixture_t;
     58 
     59 static void
     60 fixture_init (fixture_t *fixture, gconstpointer user_data)
     61 {
     62   hb_buffer_t *b;
     63   unsigned int i;
     64 
     65   b = fixture->buffer = hb_buffer_create ();
     66 
     67   switch (GPOINTER_TO_INT (user_data))
     68   {
     69     case BUFFER_EMPTY:
     70       break;
     71 
     72     case BUFFER_ONE_BY_ONE:
     73       for (i = 1; i < G_N_ELEMENTS (utf32) - 1; i++)
     74 	hb_buffer_add (b, utf32[i], i);
     75       break;
     76 
     77     case BUFFER_UTF32:
     78       hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
     79       break;
     80 
     81     case BUFFER_UTF16:
     82       hb_buffer_add_utf16 (b, utf16, G_N_ELEMENTS (utf16), 1, G_N_ELEMENTS (utf16) - 2);
     83       break;
     84 
     85     case BUFFER_UTF8:
     86       hb_buffer_add_utf8  (b, utf8,  G_N_ELEMENTS (utf8),  1, G_N_ELEMENTS (utf8)  - 2);
     87       break;
     88 
     89     default:
     90       g_assert_not_reached ();
     91   }
     92 }
     93 
     94 static void
     95 fixture_finish (fixture_t *fixture, gconstpointer user_data)
     96 {
     97   hb_buffer_destroy (fixture->buffer);
     98 }
     99 
    100 
    101 static void
    102 test_buffer_properties (fixture_t *fixture, gconstpointer user_data)
    103 {
    104   hb_buffer_t *b = fixture->buffer;
    105   hb_unicode_funcs_t *ufuncs;
    106 
    107   /* test default properties */
    108 
    109   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
    110   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    111   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    112   g_assert (hb_buffer_get_language (b) == NULL);
    113   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
    114 
    115 
    116   /* test property changes are retained */
    117   ufuncs = hb_unicode_funcs_create (NULL);
    118   hb_buffer_set_unicode_funcs (b, ufuncs);
    119   hb_unicode_funcs_destroy (ufuncs);
    120   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
    121 
    122   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
    123   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
    124 
    125   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
    126   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
    127 
    128   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
    129   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
    130 
    131   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
    132   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
    133 
    134 
    135 
    136   /* test clear clears all properties but unicode_funcs */
    137 
    138   hb_buffer_clear_contents (b);
    139 
    140   g_assert (hb_buffer_get_unicode_funcs (b) == ufuncs);
    141   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    142   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    143   g_assert (hb_buffer_get_language (b) == NULL);
    144   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
    145 
    146 
    147   /* test reset clears all properties */
    148 
    149   hb_buffer_set_direction (b, HB_DIRECTION_RTL);
    150   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_RTL);
    151 
    152   hb_buffer_set_script (b, HB_SCRIPT_ARABIC);
    153   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_ARABIC);
    154 
    155   hb_buffer_set_language (b, hb_language_from_string ("fa", -1));
    156   g_assert (hb_buffer_get_language (b) == hb_language_from_string ("Fa", -1));
    157 
    158   hb_buffer_set_flags (b, HB_BUFFER_FLAG_BOT);
    159   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAG_BOT);
    160 
    161   hb_buffer_reset (b);
    162 
    163   g_assert (hb_buffer_get_unicode_funcs (b) == hb_unicode_funcs_get_default ());
    164   g_assert (hb_buffer_get_direction (b) == HB_DIRECTION_INVALID);
    165   g_assert (hb_buffer_get_script (b) == HB_SCRIPT_INVALID);
    166   g_assert (hb_buffer_get_language (b) == NULL);
    167   g_assert (hb_buffer_get_flags (b) == HB_BUFFER_FLAGS_DEFAULT);
    168 }
    169 
    170 static void
    171 test_buffer_contents (fixture_t *fixture, gconstpointer user_data)
    172 {
    173   hb_buffer_t *b = fixture->buffer;
    174   unsigned int i, len, len2;
    175   buffer_type_t buffer_type = GPOINTER_TO_INT (user_data);
    176   hb_glyph_info_t *glyphs;
    177 
    178   if (buffer_type == BUFFER_EMPTY) {
    179     g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    180     return;
    181   }
    182 
    183   len = hb_buffer_get_length (b);
    184   hb_buffer_get_glyph_infos (b, NULL); /* test NULL */
    185   glyphs = hb_buffer_get_glyph_infos (b, &len2);
    186   g_assert_cmpint (len, ==, len2);
    187   g_assert_cmpint (len, ==, 5);
    188 
    189   for (i = 0; i < len; i++) {
    190     g_assert_cmphex (glyphs[i].mask,      ==, 1);
    191     g_assert_cmphex (glyphs[i].var1.u32,  ==, 0);
    192     g_assert_cmphex (glyphs[i].var2.u32,  ==, 0);
    193   }
    194 
    195   for (i = 0; i < len; i++) {
    196     unsigned int cluster;
    197     cluster = 1+i;
    198     if (i >= 2) {
    199       if (buffer_type == BUFFER_UTF16)
    200 	cluster++;
    201       else if (buffer_type == BUFFER_UTF8)
    202         cluster += 3;
    203     }
    204     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    205     g_assert_cmphex (glyphs[i].cluster,   ==, cluster);
    206   }
    207 
    208   /* reverse, test, and reverse back */
    209 
    210   hb_buffer_reverse (b);
    211   for (i = 0; i < len; i++)
    212     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    213 
    214   hb_buffer_reverse (b);
    215   for (i = 0; i < len; i++)
    216     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    217 
    218   /* reverse_clusters works same as reverse for now since each codepoint is
    219    * in its own cluster */
    220 
    221   hb_buffer_reverse_clusters (b);
    222   for (i = 0; i < len; i++)
    223     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    224 
    225   hb_buffer_reverse_clusters (b);
    226   for (i = 0; i < len; i++)
    227     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    228 
    229   /* now form a cluster and test again */
    230   glyphs[2].cluster = glyphs[1].cluster;
    231 
    232   /* reverse, test, and reverse back */
    233 
    234   hb_buffer_reverse (b);
    235   for (i = 0; i < len; i++)
    236     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[len-i]);
    237 
    238   hb_buffer_reverse (b);
    239   for (i = 0; i < len; i++)
    240     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    241 
    242   /* reverse_clusters twice still should return the original string,
    243    * but when applied once, the 1-2 cluster should be retained. */
    244 
    245   hb_buffer_reverse_clusters (b);
    246   for (i = 0; i < len; i++) {
    247     unsigned int j = len-1-i;
    248     if (j == 1)
    249       j = 2;
    250     else if (j == 2)
    251       j = 1;
    252     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+j]);
    253   }
    254 
    255   hb_buffer_reverse_clusters (b);
    256   for (i = 0; i < len; i++)
    257     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    258 
    259 
    260   /* test setting length */
    261 
    262   /* enlarge */
    263   g_assert (hb_buffer_set_length (b, 10));
    264   glyphs = hb_buffer_get_glyph_infos (b, NULL);
    265   g_assert_cmpint (hb_buffer_get_length (b), ==, 10);
    266   for (i = 0; i < 5; i++)
    267     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    268   for (i = 5; i < 10; i++)
    269     g_assert_cmphex (glyphs[i].codepoint, ==, 0);
    270   /* shrink */
    271   g_assert (hb_buffer_set_length (b, 3));
    272   glyphs = hb_buffer_get_glyph_infos (b, NULL);
    273   g_assert_cmpint (hb_buffer_get_length (b), ==, 3);
    274   for (i = 0; i < 3; i++)
    275     g_assert_cmphex (glyphs[i].codepoint, ==, utf32[1+i]);
    276 
    277 
    278   g_assert (hb_buffer_allocation_successful (b));
    279 
    280 
    281   /* test reset clears content */
    282 
    283   hb_buffer_reset (b);
    284   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    285 }
    286 
    287 static void
    288 test_buffer_positions (fixture_t *fixture, gconstpointer user_data)
    289 {
    290   hb_buffer_t *b = fixture->buffer;
    291   unsigned int i, len, len2;
    292   hb_glyph_position_t *positions;
    293 
    294   /* Without shaping, positions should all be zero */
    295   len = hb_buffer_get_length (b);
    296   hb_buffer_get_glyph_positions (b, NULL); /* test NULL */
    297   positions = hb_buffer_get_glyph_positions (b, &len2);
    298   g_assert_cmpint (len, ==, len2);
    299   for (i = 0; i < len; i++) {
    300     g_assert_cmpint (0, ==, positions[i].x_advance);
    301     g_assert_cmpint (0, ==, positions[i].y_advance);
    302     g_assert_cmpint (0, ==, positions[i].x_offset);
    303     g_assert_cmpint (0, ==, positions[i].y_offset);
    304     g_assert_cmpint (0, ==, positions[i].var.i32);
    305   }
    306 
    307   /* test reset clears content */
    308   hb_buffer_reset (b);
    309   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    310 }
    311 
    312 static void
    313 test_buffer_allocation (fixture_t *fixture, gconstpointer user_data)
    314 {
    315   hb_buffer_t *b = fixture->buffer;
    316 
    317   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    318 
    319   g_assert (hb_buffer_pre_allocate (b, 100));
    320   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    321   g_assert (hb_buffer_allocation_successful (b));
    322 
    323   /* lets try a huge allocation, make sure it fails */
    324   g_assert (!hb_buffer_pre_allocate (b, (unsigned int) -1));
    325   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    326   g_assert (!hb_buffer_allocation_successful (b));
    327 
    328   /* small one again */
    329   g_assert (hb_buffer_pre_allocate (b, 50));
    330   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    331   g_assert (!hb_buffer_allocation_successful (b));
    332 
    333   hb_buffer_reset (b);
    334   g_assert (hb_buffer_allocation_successful (b));
    335 
    336   /* all allocation and size  */
    337   g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 + 1));
    338   g_assert (!hb_buffer_allocation_successful (b));
    339 
    340   hb_buffer_reset (b);
    341   g_assert (hb_buffer_allocation_successful (b));
    342 
    343   /* technically, this one can actually pass on 64bit machines, but
    344    * I'm doubtful that any malloc allows 4GB allocations at a time.
    345    * But let's only enable it on a 32-bit machine. */
    346   if (sizeof (long) == 4) {
    347     g_assert (!hb_buffer_pre_allocate (b, ((unsigned int) -1) / 20 - 1));
    348     g_assert (!hb_buffer_allocation_successful (b));
    349   }
    350 
    351   hb_buffer_reset (b);
    352   g_assert (hb_buffer_allocation_successful (b));
    353 }
    354 
    355 
    356 typedef struct {
    357   const char utf8[8];
    358   const uint32_t codepoints[8];
    359 } utf8_conversion_test_t;
    360 
    361 /* note: we skip the first and last byte when adding to buffer */
    362 static const utf8_conversion_test_t utf8_conversion_tests[] = {
    363   {"a\303\207", {-1}},
    364   {"a\303\207b", {0xC7}},
    365   {"ab\303cd", {'b', -1, 'c'}},
    366   {"ab\303\302\301cd", {'b', -1, -1, -1, 'c'}}
    367 };
    368 
    369 static void
    370 test_buffer_utf8_conversion (void)
    371 {
    372   hb_buffer_t *b;
    373   hb_glyph_info_t *glyphs;
    374   unsigned int bytes, chars, i, j, len;
    375 
    376   b = hb_buffer_create ();
    377 
    378   for (i = 0; i < G_N_ELEMENTS (utf8_conversion_tests); i++)
    379   {
    380     const utf8_conversion_test_t *test = &utf8_conversion_tests[i];
    381     char *escaped;
    382 
    383     escaped = g_strescape (test->utf8, NULL);
    384     g_test_message ("UTF-8 test #%d: %s", i, escaped);
    385     g_free (escaped);
    386 
    387     bytes = strlen (test->utf8);
    388     for (chars = 0; test->codepoints[chars]; chars++)
    389       ;
    390 
    391     hb_buffer_reset (b);
    392     hb_buffer_add_utf8 (b, test->utf8, bytes,  1, bytes - 2);
    393 
    394     glyphs = hb_buffer_get_glyph_infos (b, &len);
    395     g_assert_cmpint (len, ==, chars);
    396     for (j = 0; j < chars; j++)
    397       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    398   }
    399 
    400   hb_buffer_destroy (b);
    401 }
    402 
    403 
    404 
    405 /* Following test table is adapted from glib/glib/tests/utf8-validate.c
    406  * with relicensing permission from Matthias Clasen. */
    407 
    408 typedef struct {
    409   const char *utf8;
    410   int max_len;
    411   unsigned int offset;
    412   gboolean valid;
    413 } utf8_validity_test_t;
    414 
    415 static const utf8_validity_test_t utf8_validity_tests[] = {
    416   /* some tests to check max_len handling */
    417   /* length 1 */
    418   { "abcde", -1, 5, TRUE },
    419   { "abcde", 3, 3, TRUE },
    420   { "abcde", 5, 5, TRUE },
    421   /* length 2 */
    422   { "\xc2\xa9\xc2\xa9\xc2\xa9", -1, 6, TRUE },
    423   { "\xc2\xa9\xc2\xa9\xc2\xa9",  1, 0, FALSE },
    424   { "\xc2\xa9\xc2\xa9\xc2\xa9",  2, 2, TRUE },
    425   { "\xc2\xa9\xc2\xa9\xc2\xa9",  3, 2, FALSE },
    426   { "\xc2\xa9\xc2\xa9\xc2\xa9",  4, 4, TRUE },
    427   { "\xc2\xa9\xc2\xa9\xc2\xa9",  5, 4, FALSE },
    428   { "\xc2\xa9\xc2\xa9\xc2\xa9",  6, 6, TRUE },
    429   /* length 3 */
    430   { "\xe2\x89\xa0\xe2\x89\xa0", -1, 6, TRUE },
    431   { "\xe2\x89\xa0\xe2\x89\xa0",  1, 0, FALSE },
    432   { "\xe2\x89\xa0\xe2\x89\xa0",  2, 0, FALSE },
    433   { "\xe2\x89\xa0\xe2\x89\xa0",  3, 3, TRUE },
    434   { "\xe2\x89\xa0\xe2\x89\xa0",  4, 3, FALSE },
    435   { "\xe2\x89\xa0\xe2\x89\xa0",  5, 3, FALSE },
    436   { "\xe2\x89\xa0\xe2\x89\xa0",  6, 6, TRUE },
    437 
    438   /* examples from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */
    439   /* greek 'kosme' */
    440   { "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5", -1, 11, TRUE },
    441   /* first sequence of each length */
    442   { "\x00", -1, 0, TRUE },
    443   { "\xc2\x80", -1, 2, TRUE },
    444   { "\xe0\xa0\x80", -1, 3, TRUE },
    445   { "\xf0\x90\x80\x80", -1, 4, TRUE },
    446   { "\xf8\x88\x80\x80\x80", -1, 0, FALSE },
    447   { "\xfc\x84\x80\x80\x80\x80", -1, 0, FALSE },
    448   /* last sequence of each length */
    449   { "\x7f", -1, 1, TRUE },
    450   { "\xdf\xbf", -1, 2, TRUE },
    451   { "\xef\xbf\xbf", -1, 0, TRUE },
    452   { "\xf7\xbf\xbf\xbf", -1, 0, TRUE },
    453   { "\xfb\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    454   { "\xfd\xbf\xbf\xbf\xbf\xbf", -1, 0, FALSE },
    455   /* other boundary conditions */
    456   { "\xed\x9f\xbf", -1, 3, TRUE },
    457   { "\xee\x80\x80", -1, 3, TRUE },
    458   { "\xef\xbf\xbd", -1, 3, TRUE },
    459   { "\xf4\x8f\xbf\xbf", -1, 0, TRUE },
    460   /* malformed sequences */
    461   /* continuation bytes */
    462   { "\x80", -1, 0, FALSE },
    463   { "\xbf", -1, 0, FALSE },
    464   { "\x80\xbf", -1, 0, FALSE },
    465   { "\x80\xbf\x80", -1, 0, FALSE },
    466   { "\x80\xbf\x80\xbf", -1, 0, FALSE },
    467   { "\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    468   { "\x80\xbf\x80\xbf\x80\xbf", -1, 0, FALSE },
    469   { "\x80\xbf\x80\xbf\x80\xbf\x80", -1, 0, FALSE },
    470 
    471   /* all possible continuation byte */
    472   { "\x80", -1, 0, FALSE },
    473   { "\x81", -1, 0, FALSE },
    474   { "\x82", -1, 0, FALSE },
    475   { "\x83", -1, 0, FALSE },
    476   { "\x84", -1, 0, FALSE },
    477   { "\x85", -1, 0, FALSE },
    478   { "\x86", -1, 0, FALSE },
    479   { "\x87", -1, 0, FALSE },
    480   { "\x88", -1, 0, FALSE },
    481   { "\x89", -1, 0, FALSE },
    482   { "\x8a", -1, 0, FALSE },
    483   { "\x8b", -1, 0, FALSE },
    484   { "\x8c", -1, 0, FALSE },
    485   { "\x8d", -1, 0, FALSE },
    486   { "\x8e", -1, 0, FALSE },
    487   { "\x8f", -1, 0, FALSE },
    488   { "\x90", -1, 0, FALSE },
    489   { "\x91", -1, 0, FALSE },
    490   { "\x92", -1, 0, FALSE },
    491   { "\x93", -1, 0, FALSE },
    492   { "\x94", -1, 0, FALSE },
    493   { "\x95", -1, 0, FALSE },
    494   { "\x96", -1, 0, FALSE },
    495   { "\x97", -1, 0, FALSE },
    496   { "\x98", -1, 0, FALSE },
    497   { "\x99", -1, 0, FALSE },
    498   { "\x9a", -1, 0, FALSE },
    499   { "\x9b", -1, 0, FALSE },
    500   { "\x9c", -1, 0, FALSE },
    501   { "\x9d", -1, 0, FALSE },
    502   { "\x9e", -1, 0, FALSE },
    503   { "\x9f", -1, 0, FALSE },
    504   { "\xa0", -1, 0, FALSE },
    505   { "\xa1", -1, 0, FALSE },
    506   { "\xa2", -1, 0, FALSE },
    507   { "\xa3", -1, 0, FALSE },
    508   { "\xa4", -1, 0, FALSE },
    509   { "\xa5", -1, 0, FALSE },
    510   { "\xa6", -1, 0, FALSE },
    511   { "\xa7", -1, 0, FALSE },
    512   { "\xa8", -1, 0, FALSE },
    513   { "\xa9", -1, 0, FALSE },
    514   { "\xaa", -1, 0, FALSE },
    515   { "\xab", -1, 0, FALSE },
    516   { "\xac", -1, 0, FALSE },
    517   { "\xad", -1, 0, FALSE },
    518   { "\xae", -1, 0, FALSE },
    519   { "\xaf", -1, 0, FALSE },
    520   { "\xb0", -1, 0, FALSE },
    521   { "\xb1", -1, 0, FALSE },
    522   { "\xb2", -1, 0, FALSE },
    523   { "\xb3", -1, 0, FALSE },
    524   { "\xb4", -1, 0, FALSE },
    525   { "\xb5", -1, 0, FALSE },
    526   { "\xb6", -1, 0, FALSE },
    527   { "\xb7", -1, 0, FALSE },
    528   { "\xb8", -1, 0, FALSE },
    529   { "\xb9", -1, 0, FALSE },
    530   { "\xba", -1, 0, FALSE },
    531   { "\xbb", -1, 0, FALSE },
    532   { "\xbc", -1, 0, FALSE },
    533   { "\xbd", -1, 0, FALSE },
    534   { "\xbe", -1, 0, FALSE },
    535   { "\xbf", -1, 0, FALSE },
    536   /* lone start characters */
    537   { "\xc0\x20", -1, 0, FALSE },
    538   { "\xc1\x20", -1, 0, FALSE },
    539   { "\xc2\x20", -1, 0, FALSE },
    540   { "\xc3\x20", -1, 0, FALSE },
    541   { "\xc4\x20", -1, 0, FALSE },
    542   { "\xc5\x20", -1, 0, FALSE },
    543   { "\xc6\x20", -1, 0, FALSE },
    544   { "\xc7\x20", -1, 0, FALSE },
    545   { "\xc8\x20", -1, 0, FALSE },
    546   { "\xc9\x20", -1, 0, FALSE },
    547   { "\xca\x20", -1, 0, FALSE },
    548   { "\xcb\x20", -1, 0, FALSE },
    549   { "\xcc\x20", -1, 0, FALSE },
    550   { "\xcd\x20", -1, 0, FALSE },
    551   { "\xce\x20", -1, 0, FALSE },
    552   { "\xcf\x20", -1, 0, FALSE },
    553   { "\xd0\x20", -1, 0, FALSE },
    554   { "\xd1\x20", -1, 0, FALSE },
    555   { "\xd2\x20", -1, 0, FALSE },
    556   { "\xd3\x20", -1, 0, FALSE },
    557   { "\xd4\x20", -1, 0, FALSE },
    558   { "\xd5\x20", -1, 0, FALSE },
    559   { "\xd6\x20", -1, 0, FALSE },
    560   { "\xd7\x20", -1, 0, FALSE },
    561   { "\xd8\x20", -1, 0, FALSE },
    562   { "\xd9\x20", -1, 0, FALSE },
    563   { "\xda\x20", -1, 0, FALSE },
    564   { "\xdb\x20", -1, 0, FALSE },
    565   { "\xdc\x20", -1, 0, FALSE },
    566   { "\xdd\x20", -1, 0, FALSE },
    567   { "\xde\x20", -1, 0, FALSE },
    568   { "\xdf\x20", -1, 0, FALSE },
    569   { "\xe0\x20", -1, 0, FALSE },
    570   { "\xe1\x20", -1, 0, FALSE },
    571   { "\xe2\x20", -1, 0, FALSE },
    572   { "\xe3\x20", -1, 0, FALSE },
    573   { "\xe4\x20", -1, 0, FALSE },
    574   { "\xe5\x20", -1, 0, FALSE },
    575   { "\xe6\x20", -1, 0, FALSE },
    576   { "\xe7\x20", -1, 0, FALSE },
    577   { "\xe8\x20", -1, 0, FALSE },
    578   { "\xe9\x20", -1, 0, FALSE },
    579   { "\xea\x20", -1, 0, FALSE },
    580   { "\xeb\x20", -1, 0, FALSE },
    581   { "\xec\x20", -1, 0, FALSE },
    582   { "\xed\x20", -1, 0, FALSE },
    583   { "\xee\x20", -1, 0, FALSE },
    584   { "\xef\x20", -1, 0, FALSE },
    585   { "\xf0\x20", -1, 0, FALSE },
    586   { "\xf1\x20", -1, 0, FALSE },
    587   { "\xf2\x20", -1, 0, FALSE },
    588   { "\xf3\x20", -1, 0, FALSE },
    589   { "\xf4\x20", -1, 0, FALSE },
    590   { "\xf5\x20", -1, 0, FALSE },
    591   { "\xf6\x20", -1, 0, FALSE },
    592   { "\xf7\x20", -1, 0, FALSE },
    593   { "\xf8\x20", -1, 0, FALSE },
    594   { "\xf9\x20", -1, 0, FALSE },
    595   { "\xfa\x20", -1, 0, FALSE },
    596   { "\xfb\x20", -1, 0, FALSE },
    597   { "\xfc\x20", -1, 0, FALSE },
    598   { "\xfd\x20", -1, 0, FALSE },
    599   /* missing continuation bytes */
    600   { "\x20\xc0", -1, 1, FALSE },
    601   { "\x20\xe0\x80", -1, 1, FALSE },
    602   { "\x20\xf0\x80\x80", -1, 1, FALSE },
    603   { "\x20\xf8\x80\x80\x80", -1, 1, FALSE },
    604   { "\x20\xfc\x80\x80\x80\x80", -1, 1, FALSE },
    605   { "\x20\xdf", -1, 1, FALSE },
    606   { "\x20\xef\xbf", -1, 1, FALSE },
    607   { "\x20\xf7\xbf\xbf", -1, 1, FALSE },
    608   { "\x20\xfb\xbf\xbf\xbf", -1, 1, FALSE },
    609   { "\x20\xfd\xbf\xbf\xbf\xbf", -1, 1, FALSE },
    610   /* impossible bytes */
    611   { "\x20\xfe\x20", -1, 1, FALSE },
    612   { "\x20\xff\x20", -1, 1, FALSE },
    613 #if 0
    614   /* XXX fix these, or document that we don't detect them? */
    615   /* overlong sequences */
    616   { "\x20\xc0\xaf\x20", -1, 1, FALSE },
    617   { "\x20\xe0\x80\xaf\x20", -1, 1, FALSE },
    618   { "\x20\xf0\x80\x80\xaf\x20", -1, 1, FALSE },
    619   { "\x20\xf8\x80\x80\x80\xaf\x20", -1, 1, FALSE },
    620   { "\x20\xfc\x80\x80\x80\x80\xaf\x20", -1, 1, FALSE },
    621   { "\x20\xc1\xbf\x20", -1, 1, FALSE },
    622   { "\x20\xe0\x9f\xbf\x20", -1, 1, FALSE },
    623   { "\x20\xf0\x8f\xbf\xbf\x20", -1, 1, FALSE },
    624   { "\x20\xf8\x87\xbf\xbf\xbf\x20", -1, 1, FALSE },
    625   { "\x20\xfc\x83\xbf\xbf\xbf\xbf\x20", -1, 1, FALSE },
    626   { "\x20\xc0\x80\x20", -1, 1, FALSE },
    627   { "\x20\xe0\x80\x80\x20", -1, 1, FALSE },
    628   { "\x20\xf0\x80\x80\x80\x20", -1, 1, FALSE },
    629   { "\x20\xf8\x80\x80\x80\x80\x20", -1, 1, FALSE },
    630   { "\x20\xfc\x80\x80\x80\x80\x80\x20", -1, 1, FALSE },
    631   /* illegal code positions */
    632   { "\x20\xed\xa0\x80\x20", -1, 1, FALSE },
    633   { "\x20\xed\xad\xbf\x20", -1, 1, FALSE },
    634   { "\x20\xed\xae\x80\x20", -1, 1, FALSE },
    635   { "\x20\xed\xaf\xbf\x20", -1, 1, FALSE },
    636   { "\x20\xed\xb0\x80\x20", -1, 1, FALSE },
    637   { "\x20\xed\xbe\x80\x20", -1, 1, FALSE },
    638   { "\x20\xed\xbf\xbf\x20", -1, 1, FALSE },
    639   { "\x20\xed\xa0\x80\xed\xb0\x80\x20", -1, 1, FALSE },
    640   { "\x20\xed\xa0\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
    641   { "\x20\xed\xad\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
    642   { "\x20\xed\xad\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
    643   { "\x20\xed\xae\x80\xed\xb0\x80\x20", -1, 1, FALSE },
    644   { "\x20\xed\xae\x80\xed\xbf\xbf\x20", -1, 1, FALSE },
    645   { "\x20\xed\xaf\xbf\xed\xb0\x80\x20", -1, 1, FALSE },
    646   { "\x20\xed\xaf\xbf\xed\xbf\xbf\x20", -1, 1, FALSE },
    647   { "\x20\xef\xbf\xbe\x20", -1, 1, FALSE },
    648   { "\x20\xef\xbf\xbf\x20", -1, 1, FALSE },
    649 #endif
    650   { "", -1, 0, TRUE }
    651 };
    652 
    653 static void
    654 test_buffer_utf8_validity (void)
    655 {
    656   hb_buffer_t *b;
    657   unsigned int i;
    658 
    659   b = hb_buffer_create ();
    660 
    661   for (i = 0; i < G_N_ELEMENTS (utf8_validity_tests); i++)
    662   {
    663     const utf8_validity_test_t *test = &utf8_validity_tests[i];
    664     unsigned int text_bytes, segment_bytes, j, len;
    665     hb_glyph_info_t *glyphs;
    666     char *escaped;
    667 
    668     escaped = g_strescape (test->utf8, NULL);
    669     g_test_message ("UTF-8 test #%d: %s", i, escaped);
    670     g_free (escaped);
    671 
    672     text_bytes = strlen (test->utf8);
    673     if (test->max_len == -1)
    674       segment_bytes = text_bytes;
    675     else
    676       segment_bytes = test->max_len;
    677 
    678     hb_buffer_reset (b);
    679     hb_buffer_add_utf8 (b, test->utf8, text_bytes,  0, segment_bytes);
    680 
    681     glyphs = hb_buffer_get_glyph_infos (b, &len);
    682     for (j = 0; j < len; j++)
    683       if (glyphs[j].codepoint == (hb_codepoint_t) -1)
    684 	break;
    685 
    686     g_assert (test->valid ? j == len : j < len);
    687     if (!test->valid)
    688       g_assert (glyphs[j].cluster == test->offset);
    689   }
    690 
    691   hb_buffer_destroy (b);
    692 }
    693 
    694 
    695 typedef struct {
    696   const uint16_t utf16[8];
    697   const uint32_t codepoints[8];
    698 } utf16_conversion_test_t;
    699 
    700 /* note: we skip the first and last item from utf16 when adding to buffer */
    701 static const utf16_conversion_test_t utf16_conversion_tests[] = {
    702   {{0x41, 0x004D, 0x0430, 0x4E8C, 0xD800, 0xDF02, 0x61} , {0x004D, 0x0430, 0x4E8C, 0x10302}},
    703   {{0x41, 0xD800, 0xDF02, 0x61}, {0x10302}},
    704   {{0x41, 0xD800, 0xDF02}, {-1}},
    705   {{0x41, 0x61, 0xD800, 0xDF02}, {0x61, -1}},
    706   {{0x41, 0xD800, 0x61, 0xDF02}, {-1, 0x61}},
    707   {{0x41, 0x61}, {}}
    708 };
    709 
    710 static void
    711 test_buffer_utf16_conversion (void)
    712 {
    713   hb_buffer_t *b;
    714   unsigned int i;
    715 
    716   b = hb_buffer_create ();
    717 
    718   for (i = 0; i < G_N_ELEMENTS (utf16_conversion_tests); i++)
    719   {
    720     const utf16_conversion_test_t *test = &utf16_conversion_tests[i];
    721     unsigned int u_len, chars, j, len;
    722     hb_glyph_info_t *glyphs;
    723 
    724     g_test_message ("UTF-16 test #%d", i);
    725 
    726     for (u_len = 0; test->utf16[u_len]; u_len++)
    727       ;
    728     for (chars = 0; test->codepoints[chars]; chars++)
    729       ;
    730 
    731     hb_buffer_reset (b);
    732     hb_buffer_add_utf16 (b, test->utf16, u_len,  1, u_len - 2);
    733 
    734     glyphs = hb_buffer_get_glyph_infos (b, &len);
    735     g_assert_cmpint (len, ==, chars);
    736     for (j = 0; j < chars; j++)
    737       g_assert_cmphex (glyphs[j].codepoint, ==, test->codepoints[j]);
    738   }
    739 
    740   hb_buffer_destroy (b);
    741 }
    742 
    743 static void
    744 test_empty (hb_buffer_t *b)
    745 {
    746   g_assert_cmpint (hb_buffer_get_length (b), ==, 0);
    747   g_assert (!hb_buffer_get_glyph_infos (b, NULL));
    748   g_assert (!hb_buffer_get_glyph_positions (b, NULL));
    749 }
    750 
    751 static void
    752 test_buffer_empty (void)
    753 {
    754   hb_buffer_t *b = hb_buffer_get_empty ();
    755 
    756   g_assert (hb_buffer_get_empty ());
    757   g_assert (hb_buffer_get_empty () == b);
    758 
    759   g_assert (!hb_buffer_allocation_successful (b));
    760 
    761   test_empty (b);
    762 
    763   hb_buffer_add_utf32 (b, utf32, G_N_ELEMENTS (utf32), 1, G_N_ELEMENTS (utf32) - 2);
    764 
    765   test_empty (b);
    766 
    767   hb_buffer_reverse (b);
    768   hb_buffer_reverse_clusters (b);
    769 
    770   g_assert (!hb_buffer_set_length (b, 10));
    771 
    772   test_empty (b);
    773 
    774   g_assert (hb_buffer_set_length (b, 0));
    775 
    776   test_empty (b);
    777 
    778   g_assert (!hb_buffer_allocation_successful (b));
    779 
    780   hb_buffer_reset (b);
    781 
    782   test_empty (b);
    783 
    784   g_assert (!hb_buffer_allocation_successful (b));
    785 }
    786 
    787 int
    788 main (int argc, char **argv)
    789 {
    790   unsigned int i;
    791 
    792   hb_test_init (&argc, &argv);
    793 
    794   for (i = 0; i < BUFFER_NUM_TYPES; i++)
    795   {
    796     const void *buffer_type = GINT_TO_POINTER (i);
    797     const char *buffer_name = buffer_names[i];
    798 
    799     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_properties);
    800     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_contents);
    801     hb_test_add_fixture_flavor (fixture, buffer_type, buffer_name, test_buffer_positions);
    802   }
    803 
    804   hb_test_add_fixture (fixture, GINT_TO_POINTER (BUFFER_EMPTY), test_buffer_allocation);
    805 
    806   hb_test_add (test_buffer_utf8_conversion);
    807   hb_test_add (test_buffer_utf8_validity);
    808   hb_test_add (test_buffer_utf16_conversion);
    809   hb_test_add (test_buffer_empty);
    810 
    811   return hb_test_run();
    812 }
    813