Home | History | Annotate | Download | only in marisa_alpha
      1 #include <algorithm>
      2 #include <functional>
      3 #include <utility>
      4 
      5 #include "tail.h"
      6 
      7 namespace marisa_alpha {
      8 
      9 Tail::Tail() : buf_() {}
     10 
     11 void Tail::build(const Vector<String> &keys,
     12     Vector<UInt32> *offsets, int mode) {
     13   switch (mode) {
     14     case MARISA_ALPHA_BINARY_TAIL: {
     15       build_binary_tail(keys, offsets);
     16       return;
     17     }
     18     case MARISA_ALPHA_TEXT_TAIL: {
     19       if (!build_text_tail(keys, offsets)) {
     20         build_binary_tail(keys, offsets);
     21       }
     22       return;
     23     }
     24     default: {
     25       MARISA_ALPHA_THROW(MARISA_ALPHA_PARAM_ERROR);
     26     }
     27   }
     28 }
     29 
     30 void Tail::mmap(Mapper *mapper, const char *filename,
     31     long offset, int whence) {
     32   if (mapper == NULL) {
     33     MARISA_ALPHA_THROW(MARISA_ALPHA_PARAM_ERROR);
     34   }
     35   Mapper temp_mapper;
     36   temp_mapper.open(filename, offset, whence);
     37   map(temp_mapper);
     38   temp_mapper.swap(mapper);
     39 }
     40 
     41 void Tail::map(const void *ptr, std::size_t size) {
     42   Mapper mapper(ptr, size);
     43   map(mapper);
     44 }
     45 
     46 void Tail::map(Mapper &mapper) {
     47   Tail temp;
     48   temp.buf_.map(mapper);
     49   temp.swap(this);
     50 }
     51 
     52 void Tail::load(const char *filename, long offset, int whence) {
     53   Reader reader;
     54   reader.open(filename, offset, whence);
     55   read(reader);
     56 }
     57 
     58 void Tail::fread(::FILE *file) {
     59   Reader reader(file);
     60   read(reader);
     61 }
     62 
     63 void Tail::read(int fd) {
     64   Reader reader(fd);
     65   read(reader);
     66 }
     67 
     68 void Tail::read(std::istream &stream) {
     69   Reader reader(&stream);
     70   read(reader);
     71 }
     72 
     73 void Tail::read(Reader &reader) {
     74   Tail temp;
     75   temp.buf_.read(reader);
     76   temp.swap(this);
     77 }
     78 
     79 void Tail::save(const char *filename, bool trunc_flag,
     80     long offset, int whence) const {
     81   Writer writer;
     82   writer.open(filename, trunc_flag, offset, whence);
     83   write(writer);
     84 }
     85 
     86 void Tail::fwrite(::FILE *file) const {
     87   Writer writer(file);
     88   write(writer);
     89 }
     90 
     91 void Tail::write(int fd) const {
     92   Writer writer(fd);
     93   write(writer);
     94 }
     95 
     96 void Tail::write(std::ostream &stream) const {
     97   Writer writer(&stream);
     98   write(writer);
     99 }
    100 
    101 void Tail::write(Writer &writer) const {
    102   buf_.write(writer);
    103 }
    104 
    105 void Tail::clear() {
    106   Tail().swap(this);
    107 }
    108 
    109 void Tail::swap(Tail *rhs) {
    110   buf_.swap(&rhs->buf_);
    111 }
    112 
    113 void Tail::build_binary_tail(const Vector<String> &keys,
    114     Vector<UInt32> *offsets) {
    115   if (keys.empty()) {
    116     build_empty_tail(offsets);
    117     return;
    118   }
    119 
    120   Vector<UInt8> buf;
    121   buf.push_back('\0');
    122 
    123   Vector<UInt32> temp_offsets;
    124   temp_offsets.resize(keys.size() + 1);
    125 
    126   for (std::size_t i = 0; i < keys.size(); ++i) {
    127     temp_offsets[i] = (UInt32)buf.size();
    128     for (std::size_t j = 0; j < keys[i].length(); ++j) {
    129       buf.push_back(keys[i][j]);
    130     }
    131   }
    132   temp_offsets.back() = (UInt32)buf.size();
    133   buf.shrink();
    134 
    135   if (offsets != NULL) {
    136     temp_offsets.swap(offsets);
    137   }
    138   buf_.swap(&buf);
    139 }
    140 
    141 bool Tail::build_text_tail(const Vector<String> &keys,
    142     Vector<UInt32> *offsets) {
    143   if (keys.empty()) {
    144     build_empty_tail(offsets);
    145     return true;
    146   }
    147 
    148   typedef std::pair<RString, UInt32> KeyIdPair;
    149   Vector<KeyIdPair> pairs;
    150   pairs.resize(keys.size());
    151   for (std::size_t i = 0; i < keys.size(); ++i) {
    152     for (std::size_t j = 0; j < keys[i].length(); ++j) {
    153       if (keys[i][j] == '\0') {
    154         return false;
    155       }
    156     }
    157     pairs[i].first = RString(keys[i]);
    158     pairs[i].second = (UInt32)i;
    159   }
    160   std::sort(pairs.begin(), pairs.end(), std::greater<KeyIdPair>());
    161 
    162   Vector<UInt8> buf;
    163   buf.push_back('T');
    164 
    165   Vector<UInt32> temp_offsets;
    166   temp_offsets.resize(pairs.size(), 1);
    167 
    168   const KeyIdPair dummy_key;
    169   const KeyIdPair *last = &dummy_key;
    170   for (std::size_t i = 0; i < pairs.size(); ++i) {
    171     const KeyIdPair &cur = pairs[i];
    172     std::size_t match = 0;
    173     while ((match < cur.first.length()) && (match < last->first.length()) &&
    174         last->first[match] == cur.first[match]) {
    175       ++match;
    176     }
    177     if ((match == cur.first.length()) && (last->first.length() != 0)) {
    178       temp_offsets[cur.second] = (UInt32)(temp_offsets[last->second]
    179           + (last->first.length() - match));
    180     } else {
    181       temp_offsets[cur.second] = (UInt32)buf.size();
    182       for (std::size_t j = 1; j <= cur.first.length(); ++j) {
    183         buf.push_back(cur.first[cur.first.length() - j]);
    184       }
    185       buf.push_back('\0');
    186     }
    187     last = &cur;
    188   }
    189   buf.shrink();
    190 
    191   if (offsets != NULL) {
    192     temp_offsets.swap(offsets);
    193   }
    194   buf_.swap(&buf);
    195   return true;
    196 }
    197 
    198 void Tail::build_empty_tail(Vector<UInt32> *offsets) {
    199   buf_.clear();
    200   if (offsets != NULL) {
    201     offsets->clear();
    202   }
    203 }
    204 
    205 }  // namespace marisa_alpha
    206