1 #include "bitvector.h" 2 #include "popcount.h" 3 4 namespace marisa { 5 namespace { 6 7 const UInt8 SelectTable[8][256] = { 8 { 9 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 10 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 11 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 12 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 13 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 14 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 15 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 16 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 17 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 18 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 19 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 20 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 21 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 22 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 23 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 24 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 25 }, 26 { 27 7, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 28 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 29 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 30 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 31 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 32 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 33 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 34 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 35 7, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 36 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 37 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 38 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 39 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 40 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 41 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 42 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1 43 }, 44 { 45 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 3, 7, 3, 3, 2, 46 7, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2, 47 7, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 48 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 49 7, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 50 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 51 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 52 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 53 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 3, 7, 3, 3, 2, 54 7, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2, 55 7, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 56 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 57 7, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 58 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 59 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 60 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2 61 }, 62 { 63 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, 64 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 4, 7, 4, 4, 3, 65 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 3, 66 7, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3, 67 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 3, 68 7, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3, 69 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 70 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 71 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, 72 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 4, 7, 4, 4, 3, 73 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 3, 74 7, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3, 75 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 3, 76 7, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3, 77 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 78 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3 79 }, 80 { 81 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 82 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 83 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 84 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 4, 85 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 86 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 4, 87 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5, 88 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 89 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 90 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 91 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 92 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 5, 7, 5, 5, 4, 93 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 94 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 4, 95 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5, 96 7, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4 97 }, 98 { 99 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 100 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 101 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 102 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 103 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 104 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 105 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 106 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5, 107 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 108 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 109 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 110 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 111 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 112 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 113 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 114 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 6, 6, 5 115 }, 116 { 117 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 118 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 119 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 120 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 121 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 122 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 123 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 124 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 125 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 126 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 127 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 128 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 129 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 130 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 131 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 132 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6 133 }, 134 { 135 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 136 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 137 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 138 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 139 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 140 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 141 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 142 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 143 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 144 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 145 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 146 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 147 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 148 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 149 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 150 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 151 } 152 }; 153 154 } // namespace 155 156 BitVector::BitVector() 157 : blocks_(), size_(0), ranks_(), select0s_(), select1s_() {} 158 159 void BitVector::build() { 160 Vector<Rank> ranks; 161 const UInt32 num_blocks = (size_ / 512) + (((size_ % 512) != 0) ? 1 : 0); 162 ranks.resize(num_blocks + 1); 163 Vector<UInt32> select0s; 164 Vector<UInt32> select1s; 165 UInt32 num_0s = 0; 166 UInt32 num_1s = 0; 167 for (UInt32 i = 0; i < size_; ++i) { 168 if ((i % 64) == 0) { 169 UInt32 rank_id = i / 512; 170 switch ((i / 64) % 8) { 171 case 0: { 172 ranks[rank_id].set_abs(num_1s); 173 break; 174 } 175 case 1: { 176 ranks[rank_id].set_rel1(num_1s - ranks[rank_id].abs()); 177 break; 178 } 179 case 2: { 180 ranks[rank_id].set_rel2(num_1s - ranks[rank_id].abs()); 181 break; 182 } 183 case 3: { 184 ranks[rank_id].set_rel3(num_1s - ranks[rank_id].abs()); 185 break; 186 } 187 case 4: { 188 ranks[rank_id].set_rel4(num_1s - ranks[rank_id].abs()); 189 break; 190 } 191 case 5: { 192 ranks[rank_id].set_rel5(num_1s - ranks[rank_id].abs()); 193 break; 194 } 195 case 6: { 196 ranks[rank_id].set_rel6(num_1s - ranks[rank_id].abs()); 197 break; 198 } 199 case 7: { 200 ranks[rank_id].set_rel7(num_1s - ranks[rank_id].abs()); 201 break; 202 } 203 } 204 } 205 if ((*this)[i]) { 206 if ((num_1s % 512) == 0) { 207 select1s.push_back(i); 208 } 209 ++num_1s; 210 } else { 211 if ((num_0s % 512) == 0) { 212 select0s.push_back(i); 213 } 214 ++num_0s; 215 } 216 } 217 if ((size_ % 512) != 0) { 218 UInt32 rank_id = (size_ - 1) / 512; 219 switch (((size_ - 1) / 64) % 8) { 220 case 0: { 221 ranks[rank_id].set_rel1(num_1s - ranks[rank_id].abs()); 222 } 223 case 1: { 224 ranks[rank_id].set_rel2(num_1s - ranks[rank_id].abs()); 225 } 226 case 2: { 227 ranks[rank_id].set_rel3(num_1s - ranks[rank_id].abs()); 228 } 229 case 3: { 230 ranks[rank_id].set_rel4(num_1s - ranks[rank_id].abs()); 231 } 232 case 4: { 233 ranks[rank_id].set_rel5(num_1s - ranks[rank_id].abs()); 234 } 235 case 5: { 236 ranks[rank_id].set_rel6(num_1s - ranks[rank_id].abs()); 237 } 238 case 6: { 239 ranks[rank_id].set_rel7(num_1s - ranks[rank_id].abs()); 240 break; 241 } 242 } 243 } 244 ranks.back().set_abs(num_1s); 245 select0s.push_back(size_); 246 select1s.push_back(size_); 247 select0s.shrink(); 248 select1s.shrink(); 249 250 blocks_.shrink(); 251 ranks_.swap(&ranks); 252 select0s_.swap(&select0s); 253 select1s_.swap(&select1s); 254 } 255 256 void BitVector::mmap(Mapper *mapper, const char *filename, 257 long offset, int whence) { 258 MARISA_THROW_IF(mapper == NULL, MARISA_PARAM_ERROR); 259 Mapper temp_mapper; 260 temp_mapper.open(filename, offset, whence); 261 map(temp_mapper); 262 temp_mapper.swap(mapper); 263 } 264 265 void BitVector::map(const void *ptr, std::size_t size) { 266 Mapper mapper(ptr, size); 267 map(mapper); 268 } 269 270 void BitVector::map(Mapper &mapper) { 271 BitVector temp; 272 temp.blocks_.map(mapper); 273 mapper.map(&temp.size_); 274 temp.ranks_.map(mapper); 275 temp.select0s_.map(mapper); 276 temp.select1s_.map(mapper); 277 temp.swap(this); 278 } 279 280 void BitVector::load(const char *filename, 281 long offset, int whence) { 282 Reader reader; 283 reader.open(filename, offset, whence); 284 read(reader); 285 } 286 287 void BitVector::fread(std::FILE *file) { 288 Reader reader(file); 289 read(reader); 290 } 291 292 void BitVector::read(int fd) { 293 Reader reader(fd); 294 read(reader); 295 } 296 297 void BitVector::read(std::istream &stream) { 298 Reader reader(&stream); 299 read(reader); 300 } 301 302 void BitVector::read(Reader &reader) { 303 BitVector temp; 304 temp.blocks_.read(reader); 305 reader.read(&temp.size_); 306 temp.ranks_.read(reader); 307 temp.select0s_.read(reader); 308 temp.select1s_.read(reader); 309 temp.swap(this); 310 } 311 312 void BitVector::save(const char *filename, bool trunc_flag, 313 long offset, int whence) const { 314 Writer writer; 315 writer.open(filename, trunc_flag, offset, whence); 316 write(writer); 317 } 318 319 void BitVector::fwrite(std::FILE *file) const { 320 Writer writer(file); 321 write(writer); 322 } 323 324 void BitVector::write(int fd) const { 325 Writer writer(fd); 326 write(writer); 327 } 328 329 void BitVector::write(std::ostream &stream) const { 330 Writer writer(&stream); 331 write(writer); 332 } 333 334 void BitVector::write(Writer &writer) const { 335 blocks_.write(writer); 336 writer.write(size_); 337 ranks_.write(writer); 338 select0s_.write(writer); 339 select1s_.write(writer); 340 } 341 342 UInt32 BitVector::rank1(UInt32 i) const { 343 MARISA_DEBUG_IF(i > size_, MARISA_PARAM_ERROR); 344 const Rank &rank = ranks_[i / 512]; 345 UInt32 offset = rank.abs(); 346 switch ((i / 64) % 8) { 347 case 1: { 348 offset += rank.rel1(); 349 break; 350 } 351 case 2: { 352 offset += rank.rel2(); 353 break; 354 } 355 case 3: { 356 offset += rank.rel3(); 357 break; 358 } 359 case 4: { 360 offset += rank.rel4(); 361 break; 362 } 363 case 5: { 364 offset += rank.rel5(); 365 break; 366 } 367 case 6: { 368 offset += rank.rel6(); 369 break; 370 } 371 case 7: { 372 offset += rank.rel7(); 373 break; 374 } 375 } 376 switch ((i / 32) % 2) { 377 case 1: { 378 offset += PopCount(blocks_[(i / 32) - 1]).lo32(); 379 } 380 case 0: { 381 offset += PopCount(blocks_[i / 32] & ((1U << (i % 32)) - 1)).lo32(); 382 break; 383 } 384 } 385 return offset; 386 } 387 388 UInt32 BitVector::select0(UInt32 i) const { 389 UInt32 select_id = i / 512; 390 MARISA_DEBUG_IF((select_id + 1) >= select0s_.size(), MARISA_PARAM_ERROR); 391 if ((i % 512) == 0) { 392 return select0s_[select_id]; 393 } 394 UInt32 begin = select0s_[select_id] / 512; 395 UInt32 end = (select0s_[select_id + 1] + 511) / 512; 396 if (begin + 10 >= end) { 397 while (i >= ((begin + 1) * 512) - ranks_[begin + 1].abs()) { 398 ++begin; 399 } 400 } else { 401 while (begin + 1 < end) { 402 UInt32 middle = (begin + end) / 2; 403 if (i < (middle * 512) - ranks_[middle].abs()) { 404 end = middle; 405 } else { 406 begin = middle; 407 } 408 } 409 } 410 const UInt32 rank_id = begin; 411 i -= (rank_id * 512) - ranks_[rank_id].abs(); 412 413 const Rank &rank = ranks_[rank_id]; 414 UInt32 block_id = rank_id * 16; 415 if (i < (256U - rank.rel4())) { 416 if (i < (128U - rank.rel2())) { 417 if (i >= (64U - rank.rel1())) { 418 block_id += 2; 419 i -= 64 - rank.rel1(); 420 } 421 } else if (i < (192U - rank.rel3())) { 422 block_id += 4; 423 i -= 128 - rank.rel2(); 424 } else { 425 block_id += 6; 426 i -= 192 - rank.rel3(); 427 } 428 } else if (i < (384U - rank.rel6())) { 429 if (i < (320U - rank.rel5())) { 430 block_id += 8; 431 i -= 256 - rank.rel4(); 432 } else { 433 block_id += 10; 434 i -= 320 - rank.rel5(); 435 } 436 } else if (i < (448U - rank.rel7())) { 437 block_id += 12; 438 i -= 384 - rank.rel6(); 439 } else { 440 block_id += 14; 441 i -= 448 - rank.rel7(); 442 } 443 444 UInt32 block = ~blocks_[block_id]; 445 PopCount count(block); 446 if (i >= count.lo32()) { 447 ++block_id; 448 i -= count.lo32(); 449 block = ~blocks_[block_id]; 450 count = PopCount(block); 451 } 452 453 UInt32 bit_id = block_id * 32; 454 if (i < count.lo16()) { 455 if (i >= count.lo8()) { 456 bit_id += 8; 457 block >>= 8; 458 i -= count.lo8(); 459 } 460 } else if (i < count.lo24()) { 461 bit_id += 16; 462 block >>= 16; 463 i -= count.lo16(); 464 } else { 465 bit_id += 24; 466 block >>= 24; 467 i -= count.lo24(); 468 } 469 return bit_id + SelectTable[i][block & 0xFF]; 470 } 471 472 UInt32 BitVector::select1(UInt32 i) const { 473 UInt32 select_id = i / 512; 474 MARISA_DEBUG_IF((select_id + 1) >= select1s_.size(), MARISA_PARAM_ERROR); 475 if ((i % 512) == 0) { 476 return select1s_[select_id]; 477 } 478 UInt32 begin = select1s_[select_id] / 512; 479 UInt32 end = (select1s_[select_id + 1] + 511) / 512; 480 if (begin + 10 >= end) { 481 while (i >= ranks_[begin + 1].abs()) { 482 ++begin; 483 } 484 } else { 485 while (begin + 1 < end) { 486 UInt32 middle = (begin + end) / 2; 487 if (i < ranks_[middle].abs()) { 488 end = middle; 489 } else { 490 begin = middle; 491 } 492 } 493 } 494 const UInt32 rank_id = begin; 495 i -= ranks_[rank_id].abs(); 496 497 const Rank &rank = ranks_[rank_id]; 498 UInt32 block_id = rank_id * 16; 499 if (i < rank.rel4()) { 500 if (i < rank.rel2()) { 501 if (i >= rank.rel1()) { 502 block_id += 2; 503 i -= rank.rel1(); 504 } 505 } else if (i < rank.rel3()) { 506 block_id += 4; 507 i -= rank.rel2(); 508 } else { 509 block_id += 6; 510 i -= rank.rel3(); 511 } 512 } else if (i < rank.rel6()) { 513 if (i < rank.rel5()) { 514 block_id += 8; 515 i -= rank.rel4(); 516 } else { 517 block_id += 10; 518 i -= rank.rel5(); 519 } 520 } else if (i < rank.rel7()) { 521 block_id += 12; 522 i -= rank.rel6(); 523 } else { 524 block_id += 14; 525 i -= rank.rel7(); 526 } 527 528 UInt32 block = blocks_[block_id]; 529 PopCount count(block); 530 if (i >= count.lo32()) { 531 ++block_id; 532 i -= count.lo32(); 533 block = blocks_[block_id]; 534 count = PopCount(block); 535 } 536 537 UInt32 bit_id = block_id * 32; 538 if (i < count.lo16()) { 539 if (i >= count.lo8()) { 540 bit_id += 8; 541 block >>= 8; 542 i -= count.lo8(); 543 } 544 } else if (i < count.lo24()) { 545 bit_id += 16; 546 block >>= 16; 547 i -= count.lo16(); 548 } else { 549 bit_id += 24; 550 block >>= 24; 551 i -= count.lo24(); 552 } 553 return bit_id + SelectTable[i][block & 0xFF]; 554 } 555 556 void BitVector::clear() { 557 BitVector().swap(this); 558 } 559 560 void BitVector::swap(BitVector *rhs) { 561 MARISA_THROW_IF(rhs == NULL, MARISA_PARAM_ERROR); 562 blocks_.swap(&rhs->blocks_); 563 Swap(&size_, &rhs->size_); 564 ranks_.swap(&rhs->ranks_); 565 select0s_.swap(&rhs->select0s_); 566 select1s_.swap(&rhs->select1s_); 567 } 568 569 } // namespace marisa 570