Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "utils/resources.h"
     18 #include "utils/base/logging.h"
     19 #include "utils/zlib/buffer_generated.h"
     20 #include "utils/zlib/zlib.h"
     21 
     22 namespace libtextclassifier3 {
     23 namespace {
     24 bool isWildcardMatch(const flatbuffers::String* left,
     25                      const std::string& right) {
     26   return (left == nullptr || right.empty());
     27 }
     28 
     29 bool isExactMatch(const flatbuffers::String* left, const std::string& right) {
     30   if (left == nullptr) {
     31     return right.empty();
     32   }
     33   return left->str() == right;
     34 }
     35 
     36 }  // namespace
     37 
     38 int Resources::LocaleMatch(const Locale& locale,
     39                            const LanguageTag* entry_locale) const {
     40   int match = LOCALE_NO_MATCH;
     41   if (isExactMatch(entry_locale->language(), locale.Language())) {
     42     match |= LOCALE_LANGUAGE_MATCH;
     43   } else if (isWildcardMatch(entry_locale->language(), locale.Language())) {
     44     match |= LOCALE_LANGUAGE_WILDCARD_MATCH;
     45   }
     46 
     47   if (isExactMatch(entry_locale->script(), locale.Script())) {
     48     match |= LOCALE_SCRIPT_MATCH;
     49   } else if (isWildcardMatch(entry_locale->script(), locale.Script())) {
     50     match |= LOCALE_SCRIPT_WILDCARD_MATCH;
     51   }
     52 
     53   if (isExactMatch(entry_locale->region(), locale.Region())) {
     54     match |= LOCALE_REGION_MATCH;
     55   } else if (isWildcardMatch(entry_locale->region(), locale.Region())) {
     56     match |= LOCALE_REGION_WILDCARD_MATCH;
     57   }
     58 
     59   return match;
     60 }
     61 
     62 const ResourceEntry* Resources::FindResource(
     63     const StringPiece resource_name) const {
     64   if (resources_ == nullptr || resources_->resource_entry() == nullptr) {
     65     TC3_LOG(ERROR) << "No resources defined.";
     66     return nullptr;
     67   }
     68   const ResourceEntry* entry =
     69       resources_->resource_entry()->LookupByKey(resource_name.data());
     70   if (entry == nullptr) {
     71     TC3_LOG(ERROR) << "Resource " << resource_name.ToString() << " not found";
     72     return nullptr;
     73   }
     74   return entry;
     75 }
     76 
     77 int Resources::BestResourceForLocales(
     78     const ResourceEntry* resource, const std::vector<Locale>& locales) const {
     79   // Find best match based on locale.
     80   int resource_id = -1;
     81   int locale_match = LOCALE_NO_MATCH;
     82   const auto* resources = resource->resource();
     83   for (int user_locale = 0; user_locale < locales.size(); user_locale++) {
     84     if (!locales[user_locale].IsValid()) {
     85       continue;
     86     }
     87     for (int i = 0; i < resources->size(); i++) {
     88       for (const int locale_id : *resources->Get(i)->locale()) {
     89         const int candidate_match = LocaleMatch(
     90             locales[user_locale], resources_->locale()->Get(locale_id));
     91 
     92         // Only consider if at least the language matches.
     93         if ((candidate_match & LOCALE_LANGUAGE_MATCH) == 0 &&
     94             (candidate_match & LOCALE_LANGUAGE_WILDCARD_MATCH) == 0) {
     95           continue;
     96         }
     97 
     98         if (candidate_match > locale_match) {
     99           locale_match = candidate_match;
    100           resource_id = i;
    101         }
    102       }
    103     }
    104 
    105     // If the language matches exactly, we are already finished.
    106     // We found an exact language match.
    107     if (locale_match & LOCALE_LANGUAGE_MATCH) {
    108       return resource_id;
    109     }
    110   }
    111   return resource_id;
    112 }
    113 
    114 bool Resources::GetResourceContent(const std::vector<Locale>& locales,
    115                                    const StringPiece resource_name,
    116                                    std::string* result) const {
    117   const ResourceEntry* entry = FindResource(resource_name);
    118   if (entry == nullptr || entry->resource() == nullptr) {
    119     return false;
    120   }
    121 
    122   int resource_id = BestResourceForLocales(entry, locales);
    123   if (resource_id < 0) {
    124     return false;
    125   }
    126   const auto* resource = entry->resource()->Get(resource_id);
    127   if (resource->content() != nullptr) {
    128     *result = resource->content()->str();
    129     return true;
    130   } else if (resource->compressed_content() != nullptr) {
    131     std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance(
    132         resources_->compression_dictionary()->data(),
    133         resources_->compression_dictionary()->size());
    134     if (decompressor != nullptr &&
    135         decompressor->MaybeDecompress(resource->compressed_content(), result)) {
    136       return true;
    137     }
    138   }
    139   return false;
    140 }
    141 
    142 bool CompressResources(ResourcePoolT* resources,
    143                        const bool build_compression_dictionary,
    144                        const int dictionary_sample_every) {
    145   std::vector<unsigned char> dictionary;
    146   if (build_compression_dictionary) {
    147     {
    148       // Build up a compression dictionary.
    149       std::unique_ptr<ZlibCompressor> compressor = ZlibCompressor::Instance();
    150       int i = 0;
    151       for (auto& entry : resources->resource_entry) {
    152         for (auto& resource : entry->resource) {
    153           if (resource->content.empty()) {
    154             continue;
    155           }
    156           i++;
    157 
    158           // Use a sample of the entries to build up a custom compression
    159           // dictionary. Using all entries will generally not give a benefit
    160           // for small data sizes, so we subsample here.
    161           if (i % dictionary_sample_every != 0) {
    162             continue;
    163           }
    164           CompressedBufferT compressed_content;
    165           compressor->Compress(resource->content, &compressed_content);
    166         }
    167       }
    168       compressor->GetDictionary(&dictionary);
    169       resources->compression_dictionary.assign(
    170           dictionary.data(), dictionary.data() + dictionary.size());
    171     }
    172   }
    173 
    174   for (auto& entry : resources->resource_entry) {
    175     for (auto& resource : entry->resource) {
    176       if (resource->content.empty()) {
    177         continue;
    178       }
    179       // Try compressing the data.
    180       std::unique_ptr<ZlibCompressor> compressor =
    181           build_compression_dictionary
    182               ? ZlibCompressor::Instance(dictionary.data(), dictionary.size())
    183               : ZlibCompressor::Instance();
    184       if (!compressor) {
    185         TC3_LOG(ERROR) << "Cannot create zlib compressor.";
    186         return false;
    187       }
    188 
    189       CompressedBufferT compressed_content;
    190       compressor->Compress(resource->content, &compressed_content);
    191 
    192       // Only keep compressed version if smaller.
    193       if (compressed_content.uncompressed_size >
    194           compressed_content.buffer.size()) {
    195         resource->content.clear();
    196         resource->compressed_content.reset(new CompressedBufferT);
    197         *resource->compressed_content = compressed_content;
    198       }
    199     }
    200   }
    201   return true;
    202 }
    203 
    204 std::string CompressSerializedResources(const std::string& resources,
    205                                         const int dictionary_sample_every) {
    206   std::unique_ptr<ResourcePoolT> unpacked_resources(
    207       flatbuffers::GetRoot<ResourcePool>(resources.data())->UnPack());
    208   TC3_CHECK(unpacked_resources != nullptr);
    209   TC3_CHECK(
    210       CompressResources(unpacked_resources.get(), dictionary_sample_every));
    211   flatbuffers::FlatBufferBuilder builder;
    212   builder.Finish(ResourcePool::Pack(builder, unpacked_resources.get()));
    213   return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
    214                      builder.GetSize());
    215 }
    216 
    217 }  // namespace libtextclassifier3
    218