Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2012  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_SET_PRIVATE_HH
     28 #define HB_SET_PRIVATE_HH
     29 
     30 #include "hb-private.hh"
     31 #include "hb-object-private.hh"
     32 
     33 
     34 /*
     35  * The set digests here implement various "filters" that support
     36  * "approximate member query".  Conceptually these are like Bloom
     37  * Filter and Quotient Filter, however, much smaller, faster, and
     38  * designed to fit the requirements of our uses for glyph coverage
     39  * queries.
     40  *
     41  * Our filters are highly accurate if the lookup covers fairly local
     42  * set of glyphs, but fully flooded and ineffective if coverage is
     43  * all over the place.
     44  *
     45  * The frozen-set can be used instead of a digest, to trade more
     46  * memory for 100% accuracy, but in practice, that doesn't look like
     47  * an attractive trade-off.
     48  */
     49 
     50 template <typename mask_t, unsigned int shift>
     51 struct hb_set_digest_lowest_bits_t
     52 {
     53   ASSERT_POD ();
     54 
     55   static const unsigned int mask_bytes = sizeof (mask_t);
     56   static const unsigned int mask_bits = sizeof (mask_t) * 8;
     57   static const unsigned int num_bits = 0
     58 				     + (mask_bytes >= 1 ? 3 : 0)
     59 				     + (mask_bytes >= 2 ? 1 : 0)
     60 				     + (mask_bytes >= 4 ? 1 : 0)
     61 				     + (mask_bytes >= 8 ? 1 : 0)
     62 				     + (mask_bytes >= 16? 1 : 0)
     63 				     + 0;
     64 
     65   ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
     66   ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
     67 
     68   inline void init (void) {
     69     mask = 0;
     70   }
     71 
     72   inline void add (hb_codepoint_t g) {
     73     mask |= mask_for (g);
     74   }
     75 
     76   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
     77     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
     78       mask = (mask_t) -1;
     79     else {
     80       mask_t ma = mask_for (a);
     81       mask_t mb = mask_for (b);
     82       mask |= mb + (mb - ma) - (mb < ma);
     83     }
     84   }
     85 
     86   inline bool may_have (hb_codepoint_t g) const {
     87     return !!(mask & mask_for (g));
     88   }
     89 
     90   private:
     91 
     92   static inline mask_t mask_for (hb_codepoint_t g) {
     93     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
     94   }
     95   mask_t mask;
     96 };
     97 
     98 template <typename head_t, typename tail_t>
     99 struct hb_set_digest_combiner_t
    100 {
    101   ASSERT_POD ();
    102 
    103   inline void init (void) {
    104     head.init ();
    105     tail.init ();
    106   }
    107 
    108   inline void add (hb_codepoint_t g) {
    109     head.add (g);
    110     tail.add (g);
    111   }
    112 
    113   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
    114     head.add_range (a, b);
    115     tail.add_range (a, b);
    116   }
    117 
    118   inline bool may_have (hb_codepoint_t g) const {
    119     return head.may_have (g) && tail.may_have (g);
    120   }
    121 
    122   private:
    123   head_t head;
    124   tail_t tail;
    125 };
    126 
    127 
    128 /*
    129  * hb_set_digest_t
    130  *
    131  * This is a combination of digests that performs "best".
    132  * There is not much science to this: it's a result of intuition
    133  * and testing.
    134  */
    135 typedef hb_set_digest_combiner_t
    136 <
    137   hb_set_digest_lowest_bits_t<unsigned long, 4>,
    138   hb_set_digest_combiner_t
    139   <
    140     hb_set_digest_lowest_bits_t<unsigned long, 0>,
    141     hb_set_digest_lowest_bits_t<unsigned long, 9>
    142   >
    143 > hb_set_digest_t;
    144 
    145 
    146 
    147 /*
    148  * hb_set_t
    149  */
    150 
    151 
    152 /* TODO Make this faster and memmory efficient. */
    153 
    154 struct hb_set_t
    155 {
    156   friend struct hb_frozen_set_t;
    157 
    158   hb_object_header_t header;
    159   ASSERT_POD ();
    160   bool in_error;
    161 
    162   inline void init (void) {
    163     hb_object_init (this);
    164     clear ();
    165   }
    166   inline void fini (void) {
    167   }
    168   inline void clear (void) {
    169     if (unlikely (hb_object_is_inert (this)))
    170       return;
    171     in_error = false;
    172     memset (elts, 0, sizeof elts);
    173   }
    174   inline bool is_empty (void) const {
    175     for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
    176       if (elts[i])
    177         return false;
    178     return true;
    179   }
    180   inline void add (hb_codepoint_t g)
    181   {
    182     if (unlikely (in_error)) return;
    183     if (unlikely (g == INVALID)) return;
    184     if (unlikely (g > MAX_G)) return;
    185     elt (g) |= mask (g);
    186   }
    187   inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
    188   {
    189     if (unlikely (in_error)) return;
    190     /* TODO Speedup */
    191     for (unsigned int i = a; i < b + 1; i++)
    192       add (i);
    193   }
    194   inline void del (hb_codepoint_t g)
    195   {
    196     if (unlikely (in_error)) return;
    197     if (unlikely (g > MAX_G)) return;
    198     elt (g) &= ~mask (g);
    199   }
    200   inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
    201   {
    202     if (unlikely (in_error)) return;
    203     /* TODO Speedup */
    204     for (unsigned int i = a; i < b + 1; i++)
    205       del (i);
    206   }
    207   inline bool has (hb_codepoint_t g) const
    208   {
    209     if (unlikely (g > MAX_G)) return false;
    210     return !!(elt (g) & mask (g));
    211   }
    212   inline bool intersects (hb_codepoint_t first,
    213 			  hb_codepoint_t last) const
    214   {
    215     if (unlikely (first > MAX_G)) return false;
    216     if (unlikely (last  > MAX_G)) last = MAX_G;
    217     unsigned int end = last + 1;
    218     for (hb_codepoint_t i = first; i < end; i++)
    219       if (has (i))
    220         return true;
    221     return false;
    222   }
    223   inline bool is_equal (const hb_set_t *other) const
    224   {
    225     for (unsigned int i = 0; i < ELTS; i++)
    226       if (elts[i] != other->elts[i])
    227         return false;
    228     return true;
    229   }
    230   inline void set (const hb_set_t *other)
    231   {
    232     if (unlikely (in_error)) return;
    233     for (unsigned int i = 0; i < ELTS; i++)
    234       elts[i] = other->elts[i];
    235   }
    236   inline void union_ (const hb_set_t *other)
    237   {
    238     if (unlikely (in_error)) return;
    239     for (unsigned int i = 0; i < ELTS; i++)
    240       elts[i] |= other->elts[i];
    241   }
    242   inline void intersect (const hb_set_t *other)
    243   {
    244     if (unlikely (in_error)) return;
    245     for (unsigned int i = 0; i < ELTS; i++)
    246       elts[i] &= other->elts[i];
    247   }
    248   inline void subtract (const hb_set_t *other)
    249   {
    250     if (unlikely (in_error)) return;
    251     for (unsigned int i = 0; i < ELTS; i++)
    252       elts[i] &= ~other->elts[i];
    253   }
    254   inline void symmetric_difference (const hb_set_t *other)
    255   {
    256     if (unlikely (in_error)) return;
    257     for (unsigned int i = 0; i < ELTS; i++)
    258       elts[i] ^= other->elts[i];
    259   }
    260   inline void invert (void)
    261   {
    262     if (unlikely (in_error)) return;
    263     for (unsigned int i = 0; i < ELTS; i++)
    264       elts[i] = ~elts[i];
    265   }
    266   inline bool next (hb_codepoint_t *codepoint) const
    267   {
    268     if (unlikely (*codepoint == INVALID)) {
    269       hb_codepoint_t i = get_min ();
    270       if (i != INVALID) {
    271         *codepoint = i;
    272 	return true;
    273       } else {
    274 	*codepoint = INVALID;
    275         return false;
    276       }
    277     }
    278     for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
    279       if (has (i)) {
    280         *codepoint = i;
    281 	return true;
    282       }
    283     *codepoint = INVALID;
    284     return false;
    285   }
    286   inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
    287   {
    288     hb_codepoint_t i;
    289 
    290     i = *last;
    291     if (!next (&i))
    292     {
    293       *last = *first = INVALID;
    294       return false;
    295     }
    296 
    297     *last = *first = i;
    298     while (next (&i) && i == *last + 1)
    299       (*last)++;
    300 
    301     return true;
    302   }
    303 
    304   inline unsigned int get_population (void) const
    305   {
    306     unsigned int count = 0;
    307     for (unsigned int i = 0; i < ELTS; i++)
    308       count += _hb_popcount32 (elts[i]);
    309     return count;
    310   }
    311   inline hb_codepoint_t get_min (void) const
    312   {
    313     for (unsigned int i = 0; i < ELTS; i++)
    314       if (elts[i])
    315 	for (unsigned int j = 0; j < BITS; j++)
    316 	  if (elts[i] & (1u << j))
    317 	    return i * BITS + j;
    318     return INVALID;
    319   }
    320   inline hb_codepoint_t get_max (void) const
    321   {
    322     for (unsigned int i = ELTS; i; i--)
    323       if (elts[i - 1])
    324 	for (unsigned int j = BITS; j; j--)
    325 	  if (elts[i - 1] & (1u << (j - 1)))
    326 	    return (i - 1) * BITS + (j - 1);
    327     return INVALID;
    328   }
    329 
    330   typedef uint32_t elt_t;
    331   static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
    332   static const unsigned int SHIFT = 5;
    333   static const unsigned int BITS = (1 << SHIFT);
    334   static const unsigned int MASK = BITS - 1;
    335   static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
    336   static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
    337 
    338   elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
    339   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
    340   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
    341 
    342   elt_t elts[ELTS]; /* XXX 8kb */
    343 
    344   ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
    345   ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
    346 };
    347 
    348 struct hb_frozen_set_t
    349 {
    350   static const unsigned int SHIFT = hb_set_t::SHIFT;
    351   static const unsigned int BITS = hb_set_t::BITS;
    352   static const unsigned int MASK = hb_set_t::MASK;
    353   typedef hb_set_t::elt_t elt_t;
    354 
    355   inline void init (const hb_set_t &set)
    356   {
    357     start = count = 0;
    358     elts = NULL;
    359 
    360     unsigned int max = set.get_max ();
    361     if (max == set.INVALID)
    362       return;
    363     unsigned int min = set.get_min ();
    364     const elt_t &min_elt = set.elt (min);
    365 
    366     start = min & ~MASK;
    367     count = max - start + 1;
    368     unsigned int num_elts = (count + BITS - 1) / BITS;
    369     unsigned int elts_size = num_elts * sizeof (elt_t);
    370     elts = (elt_t *) malloc (elts_size);
    371     if (unlikely (!elts))
    372     {
    373       start = count = 0;
    374       return;
    375     }
    376     memcpy (elts, &min_elt, elts_size);
    377   }
    378 
    379   inline void fini (void)
    380   {
    381     if (elts)
    382       free (elts);
    383   }
    384 
    385   inline bool has (hb_codepoint_t g) const
    386   {
    387     /* hb_codepoint_t is unsigned. */
    388     g -= start;
    389     if (unlikely (g > count)) return false;
    390     return !!(elt (g) & mask (g));
    391   }
    392 
    393   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
    394   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
    395 
    396   private:
    397   hb_codepoint_t start, count;
    398   elt_t *elts;
    399 };
    400 
    401 
    402 #endif /* HB_SET_PRIVATE_HH */
    403