Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2012,2017  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_SET_HH
     28 #define HB_SET_HH
     29 
     30 #include "hb.hh"
     31 
     32 
     33 /*
     34  * hb_set_t
     35  */
     36 
     37 /* TODO Keep a free-list so we can free pages that are completely zeroed.  At that
     38  * point maybe also use a sentinel value for "all-1" pages? */
     39 
     40 struct hb_set_t
     41 {
     42   HB_NO_COPY_ASSIGN (hb_set_t);
     43   hb_set_t ()  { init (); }
     44   ~hb_set_t () { fini (); }
     45 
     46   struct page_map_t
     47   {
     48     int cmp (const page_map_t &o) const { return (int) o.major - (int) major; }
     49 
     50     uint32_t major;
     51     uint32_t index;
     52   };
     53 
     54   struct page_t
     55   {
     56     void init0 () { v.clear (); }
     57     void init1 () { v.clear (0xFF); }
     58 
     59     unsigned int len () const
     60     { return ARRAY_LENGTH_CONST (v); }
     61 
     62     bool is_empty () const
     63     {
     64       for (unsigned int i = 0; i < len (); i++)
     65         if (v[i])
     66 	  return false;
     67       return true;
     68     }
     69 
     70     void add (hb_codepoint_t g) { elt (g) |= mask (g); }
     71     void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
     72     bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); }
     73 
     74     void add_range (hb_codepoint_t a, hb_codepoint_t b)
     75     {
     76       elt_t *la = &elt (a);
     77       elt_t *lb = &elt (b);
     78       if (la == lb)
     79         *la |= (mask (b) << 1) - mask(a);
     80       else
     81       {
     82 	*la |= ~(mask (a) - 1);
     83 	la++;
     84 
     85 	memset (la, 0xff, (char *) lb - (char *) la);
     86 
     87 	*lb |= ((mask (b) << 1) - 1);
     88       }
     89     }
     90 
     91     bool is_equal (const page_t *other) const
     92     {
     93       return 0 == hb_memcmp (&v, &other->v, sizeof (v));
     94     }
     95 
     96     unsigned int get_population () const
     97     {
     98       unsigned int pop = 0;
     99       for (unsigned int i = 0; i < len (); i++)
    100         pop += hb_popcount (v[i]);
    101       return pop;
    102     }
    103 
    104     bool next (hb_codepoint_t *codepoint) const
    105     {
    106       unsigned int m = (*codepoint + 1) & MASK;
    107       if (!m)
    108       {
    109 	*codepoint = INVALID;
    110 	return false;
    111       }
    112       unsigned int i = m / ELT_BITS;
    113       unsigned int j = m & ELT_MASK;
    114 
    115       const elt_t vv = v[i] & ~((elt_t (1) << j) - 1);
    116       for (const elt_t *p = &vv; i < len (); p = &v[++i])
    117 	if (*p)
    118 	{
    119 	  *codepoint = i * ELT_BITS + elt_get_min (*p);
    120 	  return true;
    121 	}
    122 
    123       *codepoint = INVALID;
    124       return false;
    125     }
    126     bool previous (hb_codepoint_t *codepoint) const
    127     {
    128       unsigned int m = (*codepoint - 1) & MASK;
    129       if (m == MASK)
    130       {
    131 	*codepoint = INVALID;
    132 	return false;
    133       }
    134       unsigned int i = m / ELT_BITS;
    135       unsigned int j = m & ELT_MASK;
    136 
    137       const elt_t vv = v[i] & ((elt_t (1) << (j + 1)) - 1);
    138       for (const elt_t *p = &vv; (int) i >= 0; p = &v[--i])
    139 	if (*p)
    140 	{
    141 	  *codepoint = i * ELT_BITS + elt_get_max (*p);
    142 	  return true;
    143 	}
    144 
    145       *codepoint = INVALID;
    146       return false;
    147     }
    148     hb_codepoint_t get_min () const
    149     {
    150       for (unsigned int i = 0; i < len (); i++)
    151         if (v[i])
    152 	  return i * ELT_BITS + elt_get_min (v[i]);
    153       return INVALID;
    154     }
    155     hb_codepoint_t get_max () const
    156     {
    157       for (int i = len () - 1; i >= 0; i--)
    158         if (v[i])
    159 	  return i * ELT_BITS + elt_get_max (v[i]);
    160       return 0;
    161     }
    162 
    163     typedef unsigned long long elt_t;
    164     enum { PAGE_BITS = 512 };
    165     static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
    166 
    167     static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); }
    168     static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; }
    169 
    170     typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
    171 
    172     enum { ELT_BITS = sizeof (elt_t) * 8 };
    173     enum { ELT_MASK = ELT_BITS - 1 };
    174     enum { BITS = sizeof (vector_t) * 8 };
    175     enum { MASK = BITS - 1 };
    176     static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, "");
    177 
    178     elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
    179     elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
    180     elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); }
    181 
    182     vector_t v;
    183   };
    184   static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, "");
    185 
    186   hb_object_header_t header;
    187   bool successful; /* Allocations successful */
    188   mutable unsigned int population;
    189   hb_vector_t<page_map_t, 1> page_map;
    190   hb_vector_t<page_t, 1> pages;
    191 
    192   void init_shallow ()
    193   {
    194     successful = true;
    195     population = 0;
    196     page_map.init ();
    197     pages.init ();
    198   }
    199   void init ()
    200   {
    201     hb_object_init (this);
    202     init_shallow ();
    203   }
    204   void fini_shallow ()
    205   {
    206     population = 0;
    207     page_map.fini ();
    208     pages.fini ();
    209   }
    210   void fini ()
    211   {
    212     hb_object_fini (this);
    213     fini_shallow ();
    214   }
    215 
    216   bool in_error () const { return !successful; }
    217 
    218   bool resize (unsigned int count)
    219   {
    220     if (unlikely (!successful)) return false;
    221     if (!pages.resize (count) || !page_map.resize (count))
    222     {
    223       pages.resize (page_map.len);
    224       successful = false;
    225       return false;
    226     }
    227     return true;
    228   }
    229 
    230   void clear ()
    231   {
    232     if (unlikely (hb_object_is_immutable (this)))
    233       return;
    234     successful = true;
    235     population = 0;
    236     page_map.resize (0);
    237     pages.resize (0);
    238   }
    239   bool is_empty () const
    240   {
    241     unsigned int count = pages.len;
    242     for (unsigned int i = 0; i < count; i++)
    243       if (!pages[i].is_empty ())
    244         return false;
    245     return true;
    246   }
    247 
    248   void dirty () { population = (unsigned int) -1; }
    249 
    250   void add (hb_codepoint_t g)
    251   {
    252     if (unlikely (!successful)) return;
    253     if (unlikely (g == INVALID)) return;
    254     dirty ();
    255     page_t *page = page_for_insert (g); if (unlikely (!page)) return;
    256     page->add (g);
    257   }
    258   bool add_range (hb_codepoint_t a, hb_codepoint_t b)
    259   {
    260     if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
    261     if (unlikely (a > b || a == INVALID || b == INVALID)) return false;
    262     dirty ();
    263     unsigned int ma = get_major (a);
    264     unsigned int mb = get_major (b);
    265     if (ma == mb)
    266     {
    267       page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
    268       page->add_range (a, b);
    269     }
    270     else
    271     {
    272       page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
    273       page->add_range (a, major_start (ma + 1) - 1);
    274 
    275       for (unsigned int m = ma + 1; m < mb; m++)
    276       {
    277 	page = page_for_insert (major_start (m)); if (unlikely (!page)) return false;
    278 	page->init1 ();
    279       }
    280 
    281       page = page_for_insert (b); if (unlikely (!page)) return false;
    282       page->add_range (major_start (mb), b);
    283     }
    284     return true;
    285   }
    286 
    287   template <typename T>
    288   void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
    289   {
    290     if (unlikely (!successful)) return;
    291     if (!count) return;
    292     dirty ();
    293     hb_codepoint_t g = *array;
    294     while (count)
    295     {
    296       unsigned int m = get_major (g);
    297       page_t *page = page_for_insert (g); if (unlikely (!page)) return;
    298       unsigned int start = major_start (m);
    299       unsigned int end = major_start (m + 1);
    300       do
    301       {
    302 	page->add (g);
    303 
    304 	array = (const T *) ((const char *) array + stride);
    305 	count--;
    306       }
    307       while (count && (g = *array, start <= g && g < end));
    308     }
    309   }
    310 
    311   /* Might return false if array looks unsorted.
    312    * Used for faster rejection of corrupt data. */
    313   template <typename T>
    314   bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
    315   {
    316     if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
    317     if (!count) return true;
    318     dirty ();
    319     hb_codepoint_t g = *array;
    320     hb_codepoint_t last_g = g;
    321     while (count)
    322     {
    323       unsigned int m = get_major (g);
    324       page_t *page = page_for_insert (g); if (unlikely (!page)) return false;
    325       unsigned int end = major_start (m + 1);
    326       do
    327       {
    328         /* If we try harder we can change the following comparison to <=;
    329 	 * Not sure if it's worth it. */
    330         if (g < last_g) return false;
    331 	last_g = g;
    332 	page->add (g);
    333 
    334 	array = (const T *) ((const char *) array + stride);
    335 	count--;
    336       }
    337       while (count && (g = *array, g < end));
    338     }
    339     return true;
    340   }
    341 
    342   void del (hb_codepoint_t g)
    343   {
    344     /* TODO perform op even if !successful. */
    345     if (unlikely (!successful)) return;
    346     page_t *page = page_for (g);
    347     if (!page)
    348       return;
    349     dirty ();
    350     page->del (g);
    351   }
    352   void del_range (hb_codepoint_t a, hb_codepoint_t b)
    353   {
    354     /* TODO perform op even if !successful. */
    355     /* TODO Optimize, like add_range(). */
    356     if (unlikely (!successful)) return;
    357     for (unsigned int i = a; i < b + 1; i++)
    358       del (i);
    359   }
    360   bool has (hb_codepoint_t g) const
    361   {
    362     const page_t *page = page_for (g);
    363     if (!page)
    364       return false;
    365     return page->has (g);
    366   }
    367   bool intersects (hb_codepoint_t first,
    368 			  hb_codepoint_t last) const
    369   {
    370     hb_codepoint_t c = first - 1;
    371     return next (&c) && c <= last;
    372   }
    373   void set (const hb_set_t *other)
    374   {
    375     if (unlikely (!successful)) return;
    376     unsigned int count = other->pages.len;
    377     if (!resize (count))
    378       return;
    379     population = other->population;
    380     memcpy ((void *) pages, (const void *) other->pages, count * pages.item_size);
    381     memcpy ((void *) page_map, (const void *) other->page_map, count * page_map.item_size);
    382   }
    383 
    384   bool is_equal (const hb_set_t *other) const
    385   {
    386     if (get_population () != other->get_population ())
    387       return false;
    388 
    389     unsigned int na = pages.len;
    390     unsigned int nb = other->pages.len;
    391 
    392     unsigned int a = 0, b = 0;
    393     for (; a < na && b < nb; )
    394     {
    395       if (page_at (a).is_empty ()) { a++; continue; }
    396       if (other->page_at (b).is_empty ()) { b++; continue; }
    397       if (page_map[a].major != other->page_map[b].major ||
    398 	  !page_at (a).is_equal (&other->page_at (b)))
    399         return false;
    400       a++;
    401       b++;
    402     }
    403     for (; a < na; a++)
    404       if (!page_at (a).is_empty ()) { return false; }
    405     for (; b < nb; b++)
    406       if (!other->page_at (b).is_empty ()) { return false; }
    407 
    408     return true;
    409   }
    410 
    411   bool is_subset (const hb_set_t *larger_set) const
    412   {
    413     if (get_population () > larger_set->get_population ())
    414       return false;
    415 
    416     /* TODO Optimize to use pages. */
    417     hb_codepoint_t c = INVALID;
    418     while (next (&c))
    419       if (!larger_set->has (c))
    420         return false;
    421 
    422     return true;
    423   }
    424 
    425   template <class Op>
    426   void process (const hb_set_t *other)
    427   {
    428     if (unlikely (!successful)) return;
    429 
    430     dirty ();
    431 
    432     unsigned int na = pages.len;
    433     unsigned int nb = other->pages.len;
    434     unsigned int next_page = na;
    435 
    436     unsigned int count = 0, newCount = 0;
    437     unsigned int a = 0, b = 0;
    438     for (; a < na && b < nb; )
    439     {
    440       if (page_map[a].major == other->page_map[b].major)
    441       {
    442         count++;
    443 	a++;
    444 	b++;
    445       }
    446       else if (page_map[a].major < other->page_map[b].major)
    447       {
    448         if (Op::passthru_left)
    449 	  count++;
    450         a++;
    451       }
    452       else
    453       {
    454         if (Op::passthru_right)
    455 	  count++;
    456         b++;
    457       }
    458     }
    459     if (Op::passthru_left)
    460       count += na - a;
    461     if (Op::passthru_right)
    462       count += nb - b;
    463 
    464     if (count > pages.len)
    465       if (!resize (count))
    466         return;
    467     newCount = count;
    468 
    469     /* Process in-place backward. */
    470     a = na;
    471     b = nb;
    472     for (; a && b; )
    473     {
    474       if (page_map[a - 1].major == other->page_map[b - 1].major)
    475       {
    476 	a--;
    477 	b--;
    478 	count--;
    479 	page_map[count] = page_map[a];
    480 	Op::process (page_at (count).v, page_at (a).v, other->page_at (b).v);
    481       }
    482       else if (page_map[a - 1].major > other->page_map[b - 1].major)
    483       {
    484 	a--;
    485 	if (Op::passthru_left)
    486 	{
    487 	  count--;
    488 	  page_map[count] = page_map[a];
    489 	}
    490       }
    491       else
    492       {
    493 	b--;
    494 	if (Op::passthru_right)
    495 	{
    496 	  count--;
    497 	  page_map[count].major = other->page_map[b].major;
    498 	  page_map[count].index = next_page++;
    499 	  page_at (count).v = other->page_at (b).v;
    500 	}
    501       }
    502     }
    503     if (Op::passthru_left)
    504       while (a)
    505       {
    506 	a--;
    507 	count--;
    508 	page_map[count] = page_map [a];
    509       }
    510     if (Op::passthru_right)
    511       while (b)
    512       {
    513 	b--;
    514 	count--;
    515 	page_map[count].major = other->page_map[b].major;
    516 	page_map[count].index = next_page++;
    517 	page_at (count).v = other->page_at (b).v;
    518       }
    519     assert (!count);
    520     if (pages.len > newCount)
    521       resize (newCount);
    522   }
    523 
    524   void union_ (const hb_set_t *other)
    525   {
    526     process<HbOpOr> (other);
    527   }
    528   void intersect (const hb_set_t *other)
    529   {
    530     process<HbOpAnd> (other);
    531   }
    532   void subtract (const hb_set_t *other)
    533   {
    534     process<HbOpMinus> (other);
    535   }
    536   void symmetric_difference (const hb_set_t *other)
    537   {
    538     process<HbOpXor> (other);
    539   }
    540   bool next (hb_codepoint_t *codepoint) const
    541   {
    542     if (unlikely (*codepoint == INVALID)) {
    543       *codepoint = get_min ();
    544       return *codepoint != INVALID;
    545     }
    546 
    547     page_map_t map = {get_major (*codepoint), 0};
    548     unsigned int i;
    549     page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST);
    550     if (i < page_map.len && page_map[i].major == map.major)
    551     {
    552       if (pages[page_map[i].index].next (codepoint))
    553       {
    554 	*codepoint += page_map[i].major * page_t::PAGE_BITS;
    555 	return true;
    556       }
    557       i++;
    558     }
    559     for (; i < page_map.len; i++)
    560     {
    561       hb_codepoint_t m = pages[page_map[i].index].get_min ();
    562       if (m != INVALID)
    563       {
    564 	*codepoint = page_map[i].major * page_t::PAGE_BITS + m;
    565 	return true;
    566       }
    567     }
    568     *codepoint = INVALID;
    569     return false;
    570   }
    571   bool previous (hb_codepoint_t *codepoint) const
    572   {
    573     if (unlikely (*codepoint == INVALID)) {
    574       *codepoint = get_max ();
    575       return *codepoint != INVALID;
    576     }
    577 
    578     page_map_t map = {get_major (*codepoint), 0};
    579     unsigned int i;
    580     page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST);
    581     if (i < page_map.len && page_map[i].major == map.major)
    582     {
    583       if (pages[page_map[i].index].previous (codepoint))
    584       {
    585 	*codepoint += page_map[i].major * page_t::PAGE_BITS;
    586 	return true;
    587       }
    588     }
    589     i--;
    590     for (; (int) i >= 0; i--)
    591     {
    592       hb_codepoint_t m = pages[page_map[i].index].get_max ();
    593       if (m != INVALID)
    594       {
    595 	*codepoint = page_map[i].major * page_t::PAGE_BITS + m;
    596 	return true;
    597       }
    598     }
    599     *codepoint = INVALID;
    600     return false;
    601   }
    602   bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
    603   {
    604     hb_codepoint_t i;
    605 
    606     i = *last;
    607     if (!next (&i))
    608     {
    609       *last = *first = INVALID;
    610       return false;
    611     }
    612 
    613     /* TODO Speed up. */
    614     *last = *first = i;
    615     while (next (&i) && i == *last + 1)
    616       (*last)++;
    617 
    618     return true;
    619   }
    620   bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const
    621   {
    622     hb_codepoint_t i;
    623 
    624     i = *first;
    625     if (!previous (&i))
    626     {
    627       *last = *first = INVALID;
    628       return false;
    629     }
    630 
    631     /* TODO Speed up. */
    632     *last = *first = i;
    633     while (previous (&i) && i == *first - 1)
    634       (*first)--;
    635 
    636     return true;
    637   }
    638 
    639   unsigned int get_population () const
    640   {
    641     if (population != (unsigned int) -1)
    642       return population;
    643 
    644     unsigned int pop = 0;
    645     unsigned int count = pages.len;
    646     for (unsigned int i = 0; i < count; i++)
    647       pop += pages[i].get_population ();
    648 
    649     population = pop;
    650     return pop;
    651   }
    652   hb_codepoint_t get_min () const
    653   {
    654     unsigned int count = pages.len;
    655     for (unsigned int i = 0; i < count; i++)
    656       if (!page_at (i).is_empty ())
    657         return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min ();
    658     return INVALID;
    659   }
    660   hb_codepoint_t get_max () const
    661   {
    662     unsigned int count = pages.len;
    663     for (int i = count - 1; i >= 0; i++)
    664       if (!page_at (i).is_empty ())
    665         return page_map[(unsigned) i].major * page_t::PAGE_BITS + page_at (i).get_max ();
    666     return INVALID;
    667   }
    668 
    669   static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
    670 
    671   page_t *page_for_insert (hb_codepoint_t g)
    672   {
    673     page_map_t map = {get_major (g), pages.len};
    674     unsigned int i;
    675     if (!page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST))
    676     {
    677       if (!resize (pages.len + 1))
    678 	return nullptr;
    679 
    680       pages[map.index].init0 ();
    681       memmove (page_map + i + 1,
    682 	       page_map + i,
    683 	       (page_map.len - 1 - i) * page_map.item_size);
    684       page_map[i] = map;
    685     }
    686     return &pages[page_map[i].index];
    687   }
    688   page_t *page_for (hb_codepoint_t g)
    689   {
    690     page_map_t key = {get_major (g)};
    691     const page_map_t *found = page_map.bsearch (key);
    692     if (found)
    693       return &pages[found->index];
    694     return nullptr;
    695   }
    696   const page_t *page_for (hb_codepoint_t g) const
    697   {
    698     page_map_t key = {get_major (g)};
    699     const page_map_t *found = page_map.bsearch (key);
    700     if (found)
    701       return &pages[found->index];
    702     return nullptr;
    703   }
    704   page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
    705   const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
    706   unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; }
    707   hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; }
    708 };
    709 
    710 
    711 #endif /* HB_SET_HH */
    712