Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2012  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_SET_DIGEST_PRIVATE_HH
     28 #define HB_SET_DIGEST_PRIVATE_HH
     29 
     30 #include "hb-private.hh"
     31 
     32 /*
     33  * The set digests here implement various "filters" that support
     34  * "approximate member query".  Conceptually these are like Bloom
     35  * Filter and Quotient Filter, however, much smaller, faster, and
     36  * designed to fit the requirements of our uses for glyph coverage
     37  * queries.
     38  *
     39  * Our filters are highly accurate if the lookup covers fairly local
     40  * set of glyphs, but fully flooded and ineffective if coverage is
     41  * all over the place.
     42  *
     43  * The frozen-set can be used instead of a digest, to trade more
     44  * memory for 100% accuracy, but in practice, that doesn't look like
     45  * an attractive trade-off.
     46  */
     47 
     48 template <typename mask_t, unsigned int shift>
     49 struct hb_set_digest_lowest_bits_t
     50 {
     51   ASSERT_POD ();
     52 
     53   static const unsigned int mask_bytes = sizeof (mask_t);
     54   static const unsigned int mask_bits = sizeof (mask_t) * 8;
     55   static const unsigned int num_bits = 0
     56 				     + (mask_bytes >= 1 ? 3 : 0)
     57 				     + (mask_bytes >= 2 ? 1 : 0)
     58 				     + (mask_bytes >= 4 ? 1 : 0)
     59 				     + (mask_bytes >= 8 ? 1 : 0)
     60 				     + (mask_bytes >= 16? 1 : 0)
     61 				     + 0;
     62 
     63   static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
     64   static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
     65 
     66   inline void init (void) {
     67     mask = 0;
     68   }
     69 
     70   inline void add (hb_codepoint_t g) {
     71     mask |= mask_for (g);
     72   }
     73 
     74   inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) {
     75     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
     76       mask = (mask_t) -1;
     77     else {
     78       mask_t ma = mask_for (a);
     79       mask_t mb = mask_for (b);
     80       mask |= mb + (mb - ma) - (mb < ma);
     81     }
     82     return true;
     83   }
     84 
     85   template <typename T>
     86   inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
     87   {
     88     for (unsigned int i = 0; i < count; i++)
     89     {
     90       add (*array);
     91       array = (const T *) (stride + (const char *) array);
     92     }
     93   }
     94   template <typename T>
     95   inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
     96   {
     97     for (unsigned int i = 0; i < count; i++)
     98     {
     99       add (*array);
    100       array = (const T *) (stride + (const char *) array);
    101     }
    102     return true;
    103   }
    104 
    105   inline bool may_have (hb_codepoint_t g) const {
    106     return !!(mask & mask_for (g));
    107   }
    108 
    109   private:
    110 
    111   static inline mask_t mask_for (hb_codepoint_t g) {
    112     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
    113   }
    114   mask_t mask;
    115 };
    116 
    117 template <typename head_t, typename tail_t>
    118 struct hb_set_digest_combiner_t
    119 {
    120   ASSERT_POD ();
    121 
    122   inline void init (void) {
    123     head.init ();
    124     tail.init ();
    125   }
    126 
    127   inline void add (hb_codepoint_t g) {
    128     head.add (g);
    129     tail.add (g);
    130   }
    131 
    132   inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) {
    133     head.add_range (a, b);
    134     tail.add_range (a, b);
    135     return true;
    136   }
    137   template <typename T>
    138   inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
    139   {
    140     head.add_array (array, count, stride);
    141     tail.add_array (array, count, stride);
    142   }
    143   template <typename T>
    144   inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
    145   {
    146     head.add_sorted_array (array, count, stride);
    147     tail.add_sorted_array (array, count, stride);
    148     return true;
    149   }
    150 
    151   inline bool may_have (hb_codepoint_t g) const {
    152     return head.may_have (g) && tail.may_have (g);
    153   }
    154 
    155   private:
    156   head_t head;
    157   tail_t tail;
    158 };
    159 
    160 
    161 /*
    162  * hb_set_digest_t
    163  *
    164  * This is a combination of digests that performs "best".
    165  * There is not much science to this: it's a result of intuition
    166  * and testing.
    167  */
    168 typedef hb_set_digest_combiner_t
    169 <
    170   hb_set_digest_lowest_bits_t<unsigned long, 4>,
    171   hb_set_digest_combiner_t
    172   <
    173     hb_set_digest_lowest_bits_t<unsigned long, 0>,
    174     hb_set_digest_lowest_bits_t<unsigned long, 9>
    175   >
    176 > hb_set_digest_t;
    177 
    178 
    179 #endif /* HB_SET_DIGEST_PRIVATE_HH */
    180