1 /* 2 * Copyright 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_PRIVATE_HH 28 #define HB_SET_PRIVATE_HH 29 30 #include "hb-private.hh" 31 #include "hb-set.h" 32 #include "hb-object-private.hh" 33 34 35 /* 36 * The set digests here implement various "filters" that support 37 * "approximate member query". Conceptually these are like Bloom 38 * Filter and Quotient Filter, however, much smaller, faster, and 39 * designed to fit the requirements of our uses for glyph coverage 40 * queries. As a result, our filters have much higher. 41 */ 42 43 template <typename mask_t, unsigned int shift> 44 struct hb_set_digest_lowest_bits_t 45 { 46 ASSERT_POD (); 47 48 static const unsigned int mask_bytes = sizeof (mask_t); 49 static const unsigned int mask_bits = sizeof (mask_t) * 8; 50 static const unsigned int num_bits = 0 51 + (mask_bytes >= 1 ? 3 : 0) 52 + (mask_bytes >= 2 ? 1 : 0) 53 + (mask_bytes >= 4 ? 1 : 0) 54 + (mask_bytes >= 8 ? 1 : 0) 55 + (mask_bytes >= 16? 1 : 0) 56 + 0; 57 58 ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8); 59 ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8); 60 61 inline void init (void) { 62 mask = 0; 63 } 64 65 inline void add (hb_codepoint_t g) { 66 mask |= mask_for (g); 67 } 68 69 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 70 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 71 mask = (mask_t) -1; 72 else { 73 mask_t ma = mask_for (a); 74 mask_t mb = mask_for (b); 75 mask |= mb + (mb - ma) - (mb < ma); 76 } 77 } 78 79 inline bool may_have (hb_codepoint_t g) const { 80 return !!(mask & mask_for (g)); 81 } 82 83 private: 84 85 static inline mask_t mask_for (hb_codepoint_t g) { 86 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); 87 } 88 mask_t mask; 89 }; 90 91 template <typename head_t, typename tail_t> 92 struct hb_set_digest_combiner_t 93 { 94 ASSERT_POD (); 95 96 inline void init (void) { 97 head.init (); 98 tail.init (); 99 } 100 101 inline void add (hb_codepoint_t g) { 102 head.add (g); 103 tail.add (g); 104 } 105 106 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 107 head.add_range (a, b); 108 tail.add_range (a, b); 109 } 110 111 inline bool may_have (hb_codepoint_t g) const { 112 return head.may_have (g) && tail.may_have (g); 113 } 114 115 private: 116 head_t head; 117 tail_t tail; 118 }; 119 120 121 /* 122 * hb_set_digest_t 123 * 124 * This is a combination of digests that performs "best". 125 * There is not much science to this: it's a result of intuition 126 * and testing. 127 */ 128 typedef hb_set_digest_combiner_t 129 < 130 hb_set_digest_lowest_bits_t<unsigned long, 4>, 131 hb_set_digest_combiner_t 132 < 133 hb_set_digest_lowest_bits_t<unsigned long, 0>, 134 hb_set_digest_lowest_bits_t<unsigned long, 9> 135 > 136 > hb_set_digest_t; 137 138 139 140 /* 141 * hb_set_t 142 */ 143 144 145 /* TODO Make this faster and memmory efficient. */ 146 147 struct hb_set_t 148 { 149 hb_object_header_t header; 150 ASSERT_POD (); 151 bool in_error; 152 153 inline void init (void) { 154 header.init (); 155 clear (); 156 } 157 inline void fini (void) { 158 } 159 inline void clear (void) { 160 if (unlikely (hb_object_is_inert (this))) 161 return; 162 in_error = false; 163 memset (elts, 0, sizeof elts); 164 } 165 inline bool is_empty (void) const { 166 for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++) 167 if (elts[i]) 168 return false; 169 return true; 170 } 171 inline void add (hb_codepoint_t g) 172 { 173 if (unlikely (in_error)) return; 174 if (unlikely (g == SENTINEL)) return; 175 if (unlikely (g > MAX_G)) return; 176 elt (g) |= mask (g); 177 } 178 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) 179 { 180 if (unlikely (in_error)) return; 181 /* TODO Speedup */ 182 for (unsigned int i = a; i < b + 1; i++) 183 add (i); 184 } 185 inline void del (hb_codepoint_t g) 186 { 187 if (unlikely (in_error)) return; 188 if (unlikely (g > MAX_G)) return; 189 elt (g) &= ~mask (g); 190 } 191 inline void del_range (hb_codepoint_t a, hb_codepoint_t b) 192 { 193 if (unlikely (in_error)) return; 194 /* TODO Speedup */ 195 for (unsigned int i = a; i < b + 1; i++) 196 del (i); 197 } 198 inline bool has (hb_codepoint_t g) const 199 { 200 if (unlikely (g > MAX_G)) return false; 201 return !!(elt (g) & mask (g)); 202 } 203 inline bool intersects (hb_codepoint_t first, 204 hb_codepoint_t last) const 205 { 206 if (unlikely (first > MAX_G)) return false; 207 if (unlikely (last > MAX_G)) last = MAX_G; 208 unsigned int end = last + 1; 209 for (hb_codepoint_t i = first; i < end; i++) 210 if (has (i)) 211 return true; 212 return false; 213 } 214 inline bool is_equal (const hb_set_t *other) const 215 { 216 for (unsigned int i = 0; i < ELTS; i++) 217 if (elts[i] != other->elts[i]) 218 return false; 219 return true; 220 } 221 inline void set (const hb_set_t *other) 222 { 223 if (unlikely (in_error)) return; 224 for (unsigned int i = 0; i < ELTS; i++) 225 elts[i] = other->elts[i]; 226 } 227 inline void union_ (const hb_set_t *other) 228 { 229 if (unlikely (in_error)) return; 230 for (unsigned int i = 0; i < ELTS; i++) 231 elts[i] |= other->elts[i]; 232 } 233 inline void intersect (const hb_set_t *other) 234 { 235 if (unlikely (in_error)) return; 236 for (unsigned int i = 0; i < ELTS; i++) 237 elts[i] &= other->elts[i]; 238 } 239 inline void subtract (const hb_set_t *other) 240 { 241 if (unlikely (in_error)) return; 242 for (unsigned int i = 0; i < ELTS; i++) 243 elts[i] &= ~other->elts[i]; 244 } 245 inline void symmetric_difference (const hb_set_t *other) 246 { 247 if (unlikely (in_error)) return; 248 for (unsigned int i = 0; i < ELTS; i++) 249 elts[i] ^= other->elts[i]; 250 } 251 inline void invert (void) 252 { 253 if (unlikely (in_error)) return; 254 for (unsigned int i = 0; i < ELTS; i++) 255 elts[i] = ~elts[i]; 256 } 257 inline bool next (hb_codepoint_t *codepoint) const 258 { 259 if (unlikely (*codepoint == SENTINEL)) { 260 hb_codepoint_t i = get_min (); 261 if (i != SENTINEL) { 262 *codepoint = i; 263 return true; 264 } else 265 return false; 266 } 267 for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++) 268 if (has (i)) { 269 *codepoint = i; 270 return true; 271 } 272 return false; 273 } 274 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 275 { 276 hb_codepoint_t i; 277 278 i = *last; 279 if (!next (&i)) 280 return false; 281 282 *last = *first = i; 283 while (next (&i) && i == *last + 1) 284 (*last)++; 285 286 return true; 287 } 288 289 inline unsigned int get_population (void) const 290 { 291 unsigned int count = 0; 292 for (unsigned int i = 0; i < ELTS; i++) 293 count += _hb_popcount32 (elts[i]); 294 return count; 295 } 296 inline hb_codepoint_t get_min (void) const 297 { 298 for (unsigned int i = 0; i < ELTS; i++) 299 if (elts[i]) 300 for (unsigned int j = 0; j < BITS; j++) 301 if (elts[i] & (1 << j)) 302 return i * BITS + j; 303 return SENTINEL; 304 } 305 inline hb_codepoint_t get_max (void) const 306 { 307 for (unsigned int i = ELTS; i; i--) 308 if (elts[i - 1]) 309 for (unsigned int j = BITS; j; j--) 310 if (elts[i - 1] & (1 << (j - 1))) 311 return (i - 1) * BITS + (j - 1); 312 return SENTINEL; 313 } 314 315 typedef uint32_t elt_t; 316 static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */ 317 static const unsigned int SHIFT = 5; 318 static const unsigned int BITS = (1 << SHIFT); 319 static const unsigned int MASK = BITS - 1; 320 static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS; 321 static const hb_codepoint_t SENTINEL = (hb_codepoint_t) -1; 322 323 elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; } 324 elt_t elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } 325 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } 326 327 elt_t elts[ELTS]; /* XXX 8kb */ 328 329 ASSERT_STATIC (sizeof (elt_t) * 8 == BITS); 330 ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G); 331 }; 332 333 334 335 #endif /* HB_SET_PRIVATE_HH */ 336