Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2012  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_SET_PRIVATE_HH
     28 #define HB_SET_PRIVATE_HH
     29 
     30 #include "hb-private.hh"
     31 #include "hb-set.h"
     32 #include "hb-object-private.hh"
     33 
     34 
     35 /*
     36  * The set digests here implement various "filters" that support
     37  * "approximate member query".  Conceptually these are like Bloom
     38  * Filter and Quotient Filter, however, much smaller, faster, and
     39  * designed to fit the requirements of our uses for glyph coverage
     40  * queries.  As a result, our filters have much higher.
     41  */
     42 
     43 template <typename mask_t, unsigned int shift>
     44 struct hb_set_digest_lowest_bits_t
     45 {
     46   ASSERT_POD ();
     47 
     48   static const unsigned int mask_bytes = sizeof (mask_t);
     49   static const unsigned int mask_bits = sizeof (mask_t) * 8;
     50   static const unsigned int num_bits = 0
     51 				     + (mask_bytes >= 1 ? 3 : 0)
     52 				     + (mask_bytes >= 2 ? 1 : 0)
     53 				     + (mask_bytes >= 4 ? 1 : 0)
     54 				     + (mask_bytes >= 8 ? 1 : 0)
     55 				     + (mask_bytes >= 16? 1 : 0)
     56 				     + 0;
     57 
     58   ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
     59   ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
     60 
     61   inline void init (void) {
     62     mask = 0;
     63   }
     64 
     65   inline void add (hb_codepoint_t g) {
     66     mask |= mask_for (g);
     67   }
     68 
     69   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
     70     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
     71       mask = (mask_t) -1;
     72     else {
     73       mask_t ma = mask_for (a);
     74       mask_t mb = mask_for (b);
     75       mask |= mb + (mb - ma) - (mb < ma);
     76     }
     77   }
     78 
     79   inline bool may_have (hb_codepoint_t g) const {
     80     return !!(mask & mask_for (g));
     81   }
     82 
     83   private:
     84 
     85   static inline mask_t mask_for (hb_codepoint_t g) {
     86     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
     87   }
     88   mask_t mask;
     89 };
     90 
     91 template <typename head_t, typename tail_t>
     92 struct hb_set_digest_combiner_t
     93 {
     94   ASSERT_POD ();
     95 
     96   inline void init (void) {
     97     head.init ();
     98     tail.init ();
     99   }
    100 
    101   inline void add (hb_codepoint_t g) {
    102     head.add (g);
    103     tail.add (g);
    104   }
    105 
    106   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
    107     head.add_range (a, b);
    108     tail.add_range (a, b);
    109   }
    110 
    111   inline bool may_have (hb_codepoint_t g) const {
    112     return head.may_have (g) && tail.may_have (g);
    113   }
    114 
    115   private:
    116   head_t head;
    117   tail_t tail;
    118 };
    119 
    120 
    121 /*
    122  * hb_set_digest_t
    123  *
    124  * This is a combination of digests that performs "best".
    125  * There is not much science to this: it's a result of intuition
    126  * and testing.
    127  */
    128 typedef hb_set_digest_combiner_t
    129 <
    130   hb_set_digest_lowest_bits_t<unsigned long, 4>,
    131   hb_set_digest_combiner_t
    132   <
    133     hb_set_digest_lowest_bits_t<unsigned long, 0>,
    134     hb_set_digest_lowest_bits_t<unsigned long, 9>
    135   >
    136 > hb_set_digest_t;
    137 
    138 
    139 
    140 /*
    141  * hb_set_t
    142  */
    143 
    144 
    145 /* TODO Make this faster and memmory efficient. */
    146 
    147 struct hb_set_t
    148 {
    149   hb_object_header_t header;
    150   ASSERT_POD ();
    151   bool in_error;
    152 
    153   inline void init (void) {
    154     header.init ();
    155     clear ();
    156   }
    157   inline void fini (void) {
    158   }
    159   inline void clear (void) {
    160     if (unlikely (hb_object_is_inert (this)))
    161       return;
    162     in_error = false;
    163     memset (elts, 0, sizeof elts);
    164   }
    165   inline bool is_empty (void) const {
    166     for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
    167       if (elts[i])
    168         return false;
    169     return true;
    170   }
    171   inline void add (hb_codepoint_t g)
    172   {
    173     if (unlikely (in_error)) return;
    174     if (unlikely (g == SENTINEL)) return;
    175     if (unlikely (g > MAX_G)) return;
    176     elt (g) |= mask (g);
    177   }
    178   inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
    179   {
    180     if (unlikely (in_error)) return;
    181     /* TODO Speedup */
    182     for (unsigned int i = a; i < b + 1; i++)
    183       add (i);
    184   }
    185   inline void del (hb_codepoint_t g)
    186   {
    187     if (unlikely (in_error)) return;
    188     if (unlikely (g > MAX_G)) return;
    189     elt (g) &= ~mask (g);
    190   }
    191   inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
    192   {
    193     if (unlikely (in_error)) return;
    194     /* TODO Speedup */
    195     for (unsigned int i = a; i < b + 1; i++)
    196       del (i);
    197   }
    198   inline bool has (hb_codepoint_t g) const
    199   {
    200     if (unlikely (g > MAX_G)) return false;
    201     return !!(elt (g) & mask (g));
    202   }
    203   inline bool intersects (hb_codepoint_t first,
    204 			  hb_codepoint_t last) const
    205   {
    206     if (unlikely (first > MAX_G)) return false;
    207     if (unlikely (last  > MAX_G)) last = MAX_G;
    208     unsigned int end = last + 1;
    209     for (hb_codepoint_t i = first; i < end; i++)
    210       if (has (i))
    211         return true;
    212     return false;
    213   }
    214   inline bool is_equal (const hb_set_t *other) const
    215   {
    216     for (unsigned int i = 0; i < ELTS; i++)
    217       if (elts[i] != other->elts[i])
    218         return false;
    219     return true;
    220   }
    221   inline void set (const hb_set_t *other)
    222   {
    223     if (unlikely (in_error)) return;
    224     for (unsigned int i = 0; i < ELTS; i++)
    225       elts[i] = other->elts[i];
    226   }
    227   inline void union_ (const hb_set_t *other)
    228   {
    229     if (unlikely (in_error)) return;
    230     for (unsigned int i = 0; i < ELTS; i++)
    231       elts[i] |= other->elts[i];
    232   }
    233   inline void intersect (const hb_set_t *other)
    234   {
    235     if (unlikely (in_error)) return;
    236     for (unsigned int i = 0; i < ELTS; i++)
    237       elts[i] &= other->elts[i];
    238   }
    239   inline void subtract (const hb_set_t *other)
    240   {
    241     if (unlikely (in_error)) return;
    242     for (unsigned int i = 0; i < ELTS; i++)
    243       elts[i] &= ~other->elts[i];
    244   }
    245   inline void symmetric_difference (const hb_set_t *other)
    246   {
    247     if (unlikely (in_error)) return;
    248     for (unsigned int i = 0; i < ELTS; i++)
    249       elts[i] ^= other->elts[i];
    250   }
    251   inline void invert (void)
    252   {
    253     if (unlikely (in_error)) return;
    254     for (unsigned int i = 0; i < ELTS; i++)
    255       elts[i] = ~elts[i];
    256   }
    257   inline bool next (hb_codepoint_t *codepoint) const
    258   {
    259     if (unlikely (*codepoint == SENTINEL)) {
    260       hb_codepoint_t i = get_min ();
    261       if (i != SENTINEL) {
    262         *codepoint = i;
    263 	return true;
    264       } else
    265         return false;
    266     }
    267     for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
    268       if (has (i)) {
    269         *codepoint = i;
    270 	return true;
    271       }
    272     return false;
    273   }
    274   inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
    275   {
    276     hb_codepoint_t i;
    277 
    278     i = *last;
    279     if (!next (&i))
    280       return false;
    281 
    282     *last = *first = i;
    283     while (next (&i) && i == *last + 1)
    284       (*last)++;
    285 
    286     return true;
    287   }
    288 
    289   inline unsigned int get_population (void) const
    290   {
    291     unsigned int count = 0;
    292     for (unsigned int i = 0; i < ELTS; i++)
    293       count += _hb_popcount32 (elts[i]);
    294     return count;
    295   }
    296   inline hb_codepoint_t get_min (void) const
    297   {
    298     for (unsigned int i = 0; i < ELTS; i++)
    299       if (elts[i])
    300 	for (unsigned int j = 0; j < BITS; j++)
    301 	  if (elts[i] & (1 << j))
    302 	    return i * BITS + j;
    303     return SENTINEL;
    304   }
    305   inline hb_codepoint_t get_max (void) const
    306   {
    307     for (unsigned int i = ELTS; i; i--)
    308       if (elts[i - 1])
    309 	for (unsigned int j = BITS; j; j--)
    310 	  if (elts[i - 1] & (1 << (j - 1)))
    311 	    return (i - 1) * BITS + (j - 1);
    312     return SENTINEL;
    313   }
    314 
    315   typedef uint32_t elt_t;
    316   static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
    317   static const unsigned int SHIFT = 5;
    318   static const unsigned int BITS = (1 << SHIFT);
    319   static const unsigned int MASK = BITS - 1;
    320   static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
    321   static  const hb_codepoint_t SENTINEL = (hb_codepoint_t) -1;
    322 
    323   elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
    324   elt_t elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
    325   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
    326 
    327   elt_t elts[ELTS]; /* XXX 8kb */
    328 
    329   ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
    330   ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
    331 };
    332 
    333 
    334 
    335 #endif /* HB_SET_PRIVATE_HH */
    336