Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2012  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_SET_PRIVATE_HH
     28 #define HB_SET_PRIVATE_HH
     29 
     30 #include "hb-private.hh"
     31 #include "hb-object-private.hh"
     32 
     33 
     34 /*
     35  * The set digests here implement various "filters" that support
     36  * "approximate member query".  Conceptually these are like Bloom
     37  * Filter and Quotient Filter, however, much smaller, faster, and
     38  * designed to fit the requirements of our uses for glyph coverage
     39  * queries.  As a result, our filters have much higher.
     40  */
     41 
     42 template <typename mask_t, unsigned int shift>
     43 struct hb_set_digest_lowest_bits_t
     44 {
     45   ASSERT_POD ();
     46 
     47   static const unsigned int mask_bytes = sizeof (mask_t);
     48   static const unsigned int mask_bits = sizeof (mask_t) * 8;
     49   static const unsigned int num_bits = 0
     50 				     + (mask_bytes >= 1 ? 3 : 0)
     51 				     + (mask_bytes >= 2 ? 1 : 0)
     52 				     + (mask_bytes >= 4 ? 1 : 0)
     53 				     + (mask_bytes >= 8 ? 1 : 0)
     54 				     + (mask_bytes >= 16? 1 : 0)
     55 				     + 0;
     56 
     57   ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
     58   ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
     59 
     60   inline void init (void) {
     61     mask = 0;
     62   }
     63 
     64   inline void add (hb_codepoint_t g) {
     65     mask |= mask_for (g);
     66   }
     67 
     68   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
     69     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
     70       mask = (mask_t) -1;
     71     else {
     72       mask_t ma = mask_for (a);
     73       mask_t mb = mask_for (b);
     74       mask |= mb + (mb - ma) - (mb < ma);
     75     }
     76   }
     77 
     78   inline bool may_have (hb_codepoint_t g) const {
     79     return !!(mask & mask_for (g));
     80   }
     81 
     82   private:
     83 
     84   static inline mask_t mask_for (hb_codepoint_t g) {
     85     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
     86   }
     87   mask_t mask;
     88 };
     89 
     90 template <typename head_t, typename tail_t>
     91 struct hb_set_digest_combiner_t
     92 {
     93   ASSERT_POD ();
     94 
     95   inline void init (void) {
     96     head.init ();
     97     tail.init ();
     98   }
     99 
    100   inline void add (hb_codepoint_t g) {
    101     head.add (g);
    102     tail.add (g);
    103   }
    104 
    105   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
    106     head.add_range (a, b);
    107     tail.add_range (a, b);
    108   }
    109 
    110   inline bool may_have (hb_codepoint_t g) const {
    111     return head.may_have (g) && tail.may_have (g);
    112   }
    113 
    114   private:
    115   head_t head;
    116   tail_t tail;
    117 };
    118 
    119 
    120 /*
    121  * hb_set_digest_t
    122  *
    123  * This is a combination of digests that performs "best".
    124  * There is not much science to this: it's a result of intuition
    125  * and testing.
    126  */
    127 typedef hb_set_digest_combiner_t
    128 <
    129   hb_set_digest_lowest_bits_t<unsigned long, 4>,
    130   hb_set_digest_combiner_t
    131   <
    132     hb_set_digest_lowest_bits_t<unsigned long, 0>,
    133     hb_set_digest_lowest_bits_t<unsigned long, 9>
    134   >
    135 > hb_set_digest_t;
    136 
    137 
    138 
    139 /*
    140  * hb_set_t
    141  */
    142 
    143 
    144 /* TODO Make this faster and memmory efficient. */
    145 
    146 struct hb_set_t
    147 {
    148   hb_object_header_t header;
    149   ASSERT_POD ();
    150   bool in_error;
    151 
    152   inline void init (void) {
    153     hb_object_init (this);
    154     clear ();
    155   }
    156   inline void fini (void) {
    157   }
    158   inline void clear (void) {
    159     if (unlikely (hb_object_is_inert (this)))
    160       return;
    161     in_error = false;
    162     memset (elts, 0, sizeof elts);
    163   }
    164   inline bool is_empty (void) const {
    165     for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
    166       if (elts[i])
    167         return false;
    168     return true;
    169   }
    170   inline void add (hb_codepoint_t g)
    171   {
    172     if (unlikely (in_error)) return;
    173     if (unlikely (g == INVALID)) return;
    174     if (unlikely (g > MAX_G)) return;
    175     elt (g) |= mask (g);
    176   }
    177   inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
    178   {
    179     if (unlikely (in_error)) return;
    180     /* TODO Speedup */
    181     for (unsigned int i = a; i < b + 1; i++)
    182       add (i);
    183   }
    184   inline void del (hb_codepoint_t g)
    185   {
    186     if (unlikely (in_error)) return;
    187     if (unlikely (g > MAX_G)) return;
    188     elt (g) &= ~mask (g);
    189   }
    190   inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
    191   {
    192     if (unlikely (in_error)) return;
    193     /* TODO Speedup */
    194     for (unsigned int i = a; i < b + 1; i++)
    195       del (i);
    196   }
    197   inline bool has (hb_codepoint_t g) const
    198   {
    199     if (unlikely (g > MAX_G)) return false;
    200     return !!(elt (g) & mask (g));
    201   }
    202   inline bool intersects (hb_codepoint_t first,
    203 			  hb_codepoint_t last) const
    204   {
    205     if (unlikely (first > MAX_G)) return false;
    206     if (unlikely (last  > MAX_G)) last = MAX_G;
    207     unsigned int end = last + 1;
    208     for (hb_codepoint_t i = first; i < end; i++)
    209       if (has (i))
    210         return true;
    211     return false;
    212   }
    213   inline bool is_equal (const hb_set_t *other) const
    214   {
    215     for (unsigned int i = 0; i < ELTS; i++)
    216       if (elts[i] != other->elts[i])
    217         return false;
    218     return true;
    219   }
    220   inline void set (const hb_set_t *other)
    221   {
    222     if (unlikely (in_error)) return;
    223     for (unsigned int i = 0; i < ELTS; i++)
    224       elts[i] = other->elts[i];
    225   }
    226   inline void union_ (const hb_set_t *other)
    227   {
    228     if (unlikely (in_error)) return;
    229     for (unsigned int i = 0; i < ELTS; i++)
    230       elts[i] |= other->elts[i];
    231   }
    232   inline void intersect (const hb_set_t *other)
    233   {
    234     if (unlikely (in_error)) return;
    235     for (unsigned int i = 0; i < ELTS; i++)
    236       elts[i] &= other->elts[i];
    237   }
    238   inline void subtract (const hb_set_t *other)
    239   {
    240     if (unlikely (in_error)) return;
    241     for (unsigned int i = 0; i < ELTS; i++)
    242       elts[i] &= ~other->elts[i];
    243   }
    244   inline void symmetric_difference (const hb_set_t *other)
    245   {
    246     if (unlikely (in_error)) return;
    247     for (unsigned int i = 0; i < ELTS; i++)
    248       elts[i] ^= other->elts[i];
    249   }
    250   inline void invert (void)
    251   {
    252     if (unlikely (in_error)) return;
    253     for (unsigned int i = 0; i < ELTS; i++)
    254       elts[i] = ~elts[i];
    255   }
    256   inline bool next (hb_codepoint_t *codepoint) const
    257   {
    258     if (unlikely (*codepoint == INVALID)) {
    259       hb_codepoint_t i = get_min ();
    260       if (i != INVALID) {
    261         *codepoint = i;
    262 	return true;
    263       } else {
    264 	*codepoint = INVALID;
    265         return false;
    266       }
    267     }
    268     for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
    269       if (has (i)) {
    270         *codepoint = i;
    271 	return true;
    272       }
    273     *codepoint = INVALID;
    274     return false;
    275   }
    276   inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
    277   {
    278     hb_codepoint_t i;
    279 
    280     i = *last;
    281     if (!next (&i))
    282     {
    283       *last = *first = INVALID;
    284       return false;
    285     }
    286 
    287     *last = *first = i;
    288     while (next (&i) && i == *last + 1)
    289       (*last)++;
    290 
    291     return true;
    292   }
    293 
    294   inline unsigned int get_population (void) const
    295   {
    296     unsigned int count = 0;
    297     for (unsigned int i = 0; i < ELTS; i++)
    298       count += _hb_popcount32 (elts[i]);
    299     return count;
    300   }
    301   inline hb_codepoint_t get_min (void) const
    302   {
    303     for (unsigned int i = 0; i < ELTS; i++)
    304       if (elts[i])
    305 	for (unsigned int j = 0; j < BITS; j++)
    306 	  if (elts[i] & (1 << j))
    307 	    return i * BITS + j;
    308     return INVALID;
    309   }
    310   inline hb_codepoint_t get_max (void) const
    311   {
    312     for (unsigned int i = ELTS; i; i--)
    313       if (elts[i - 1])
    314 	for (unsigned int j = BITS; j; j--)
    315 	  if (elts[i - 1] & (1 << (j - 1)))
    316 	    return (i - 1) * BITS + (j - 1);
    317     return INVALID;
    318   }
    319 
    320   typedef uint32_t elt_t;
    321   static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
    322   static const unsigned int SHIFT = 5;
    323   static const unsigned int BITS = (1 << SHIFT);
    324   static const unsigned int MASK = BITS - 1;
    325   static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
    326   static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
    327 
    328   elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
    329   elt_t elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
    330   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
    331 
    332   elt_t elts[ELTS]; /* XXX 8kb */
    333 
    334   ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
    335   ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
    336 };
    337 
    338 
    339 
    340 #endif /* HB_SET_PRIVATE_HH */
    341