Home | History | Annotate | Download | only in libtextclassifier
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // Functions to compress and decompress low entropy entries in the model.
     18 
     19 #ifndef LIBTEXTCLASSIFIER_ZLIB_UTILS_H_
     20 #define LIBTEXTCLASSIFIER_ZLIB_UTILS_H_
     21 
     22 #include <memory>
     23 
     24 #include "model_generated.h"
     25 #include "util/utf8/unilib.h"
     26 #include "zlib.h"
     27 
     28 namespace libtextclassifier2 {
     29 
     30 class ZlibDecompressor {
     31  public:
     32   static std::unique_ptr<ZlibDecompressor> Instance();
     33   ~ZlibDecompressor();
     34 
     35   bool Decompress(const CompressedBuffer* compressed_buffer, std::string* out);
     36 
     37  private:
     38   ZlibDecompressor();
     39   z_stream stream_;
     40   bool initialized_;
     41 };
     42 
     43 class ZlibCompressor {
     44  public:
     45   static std::unique_ptr<ZlibCompressor> Instance();
     46   ~ZlibCompressor();
     47 
     48   void Compress(const std::string& uncompressed_content,
     49                 CompressedBufferT* out);
     50 
     51  private:
     52   explicit ZlibCompressor(int level = Z_BEST_COMPRESSION,
     53                           // Tmp. buffer size was set based on the current set
     54                           // of patterns to be compressed.
     55                           int tmp_buffer_size = 64 * 1024);
     56   z_stream stream_;
     57   std::unique_ptr<Bytef[]> buffer_;
     58   unsigned int buffer_size_;
     59   bool initialized_;
     60 };
     61 
     62 // Compresses regex and datetime rules in the model in place.
     63 bool CompressModel(ModelT* model);
     64 
     65 // Decompresses regex and datetime rules in the model in place.
     66 bool DecompressModel(ModelT* model);
     67 
     68 // Compresses regex and datetime rules in the model.
     69 std::string CompressSerializedModel(const std::string& model);
     70 
     71 // Create and compile a regex pattern from optionally compressed pattern.
     72 std::unique_ptr<UniLib::RegexPattern> UncompressMakeRegexPattern(
     73     const UniLib& unilib, const flatbuffers::String* uncompressed_pattern,
     74     const CompressedBuffer* compressed_pattern, ZlibDecompressor* decompressor,
     75     std::string* result_pattern_text = nullptr);
     76 
     77 }  // namespace libtextclassifier2
     78 
     79 #endif  // LIBTEXTCLASSIFIER_ZLIB_UTILS_H_
     80