Home | History | Annotate | Download | only in compile
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "compile/PseudolocaleGenerator.h"
     18 
     19 #include <algorithm>
     20 
     21 #include "ResourceTable.h"
     22 #include "ResourceValues.h"
     23 #include "ValueVisitor.h"
     24 #include "compile/Pseudolocalizer.h"
     25 #include "util/Util.h"
     26 
     27 using ::android::ConfigDescription;
     28 using ::android::StringPiece;
     29 using ::android::StringPiece16;
     30 
     31 namespace aapt {
     32 
     33 // The struct that represents both Span objects and UntranslatableSections.
     34 struct UnifiedSpan {
     35   // Only present for Span objects. If not present, this was an UntranslatableSection.
     36   Maybe<std::string> tag;
     37 
     38   // The UTF-16 index into the string where this span starts.
     39   uint32_t first_char;
     40 
     41   // The UTF-16 index into the string where this span ends, inclusive.
     42   uint32_t last_char;
     43 };
     44 
     45 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
     46   if (left.first_char < right.first_char) {
     47     return true;
     48   } else if (left.first_char > right.first_char) {
     49     return false;
     50   } else if (left.last_char < right.last_char) {
     51     return true;
     52   }
     53   return false;
     54 }
     55 
     56 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
     57   return UnifiedSpan{*span.name, span.first_char, span.last_char};
     58 }
     59 
     60 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
     61   return UnifiedSpan{
     62       {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
     63 }
     64 
     65 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
     66 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
     67 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
     68   // Ensure the Spans are sorted and converted.
     69   std::vector<UnifiedSpan> sorted_spans;
     70   sorted_spans.reserve(string.value->spans.size());
     71   std::transform(string.value->spans.begin(), string.value->spans.end(),
     72                  std::back_inserter(sorted_spans), SpanToUnifiedSpan);
     73 
     74   // Stable sort to ensure tag sequences like "<b><i>" are preserved.
     75   std::stable_sort(sorted_spans.begin(), sorted_spans.end());
     76 
     77   // Ensure the UntranslatableSections are sorted and converted.
     78   std::vector<UnifiedSpan> sorted_untranslatable_sections;
     79   sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
     80   std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
     81                  std::back_inserter(sorted_untranslatable_sections),
     82                  UntranslatableSectionToUnifiedSpan);
     83   std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
     84 
     85   std::vector<UnifiedSpan> merged_spans;
     86   merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
     87   auto span_iter = sorted_spans.begin();
     88   auto untranslatable_iter = sorted_untranslatable_sections.begin();
     89   while (span_iter != sorted_spans.end() &&
     90          untranslatable_iter != sorted_untranslatable_sections.end()) {
     91     if (*span_iter < *untranslatable_iter) {
     92       merged_spans.push_back(std::move(*span_iter));
     93       ++span_iter;
     94     } else {
     95       merged_spans.push_back(std::move(*untranslatable_iter));
     96       ++untranslatable_iter;
     97     }
     98   }
     99 
    100   while (span_iter != sorted_spans.end()) {
    101     merged_spans.push_back(std::move(*span_iter));
    102     ++span_iter;
    103   }
    104 
    105   while (untranslatable_iter != sorted_untranslatable_sections.end()) {
    106     merged_spans.push_back(std::move(*untranslatable_iter));
    107     ++untranslatable_iter;
    108   }
    109   return merged_spans;
    110 }
    111 
    112 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
    113                                                          Pseudolocalizer::Method method,
    114                                                          StringPool* pool) {
    115   Pseudolocalizer localizer(method);
    116 
    117   // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
    118   // This will effectively subdivide the string into multiple sections that can be individually
    119   // pseudolocalized, while keeping the span indices synchronized.
    120   std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
    121 
    122   // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
    123   // runtime. So we will do all our processing in UTF-16, then convert back.
    124   const std::u16string text16 = util::Utf8ToUtf16(string->value->value);
    125 
    126   // Convenient wrapper around the text that allows us to work with StringPieces.
    127   const StringPiece16 text(text16);
    128 
    129   // The new string.
    130   std::string new_string = localizer.Start();
    131 
    132   // The stack that keeps track of what nested Span we're in.
    133   std::vector<size_t> span_stack;
    134 
    135   // The current position in the original text.
    136   uint32_t cursor = 0u;
    137 
    138   // The current position in the new text.
    139   uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
    140                                              new_string.size(), false);
    141 
    142   // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
    143   bool translatable = true;
    144   size_t span_idx = 0u;
    145   while (span_idx < merged_spans.size() || !span_stack.empty()) {
    146     UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
    147     UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
    148 
    149     if (span != nullptr) {
    150       if (parent_span == nullptr || parent_span->last_char > span->first_char) {
    151         // There is no parent, or this span is the child of the parent.
    152         // Pseudolocalize all the text until this span.
    153         const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
    154         cursor += substr.size();
    155 
    156         // Pseudolocalize the substring.
    157         std::string new_substr = util::Utf16ToUtf8(substr);
    158         if (translatable) {
    159           new_substr = localizer.Text(new_substr);
    160         }
    161         new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
    162                                            new_substr.size(), false);
    163         new_string += new_substr;
    164 
    165         // Rewrite the first_char.
    166         span->first_char = new_cursor;
    167         if (!span->tag) {
    168           // An untranslatable section has begun!
    169           translatable = false;
    170         }
    171         span_stack.push_back(span_idx);
    172         ++span_idx;
    173         continue;
    174       }
    175     }
    176 
    177     if (parent_span != nullptr) {
    178       // There is a parent, and either this span is not a child of it, or there are no more spans.
    179       // Pop this off the stack.
    180       const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
    181       cursor += substr.size();
    182 
    183       // Pseudolocalize the substring.
    184       std::string new_substr = util::Utf16ToUtf8(substr);
    185       if (translatable) {
    186         new_substr = localizer.Text(new_substr);
    187       }
    188       new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
    189                                          new_substr.size(), false);
    190       new_string += new_substr;
    191 
    192       parent_span->last_char = new_cursor - 1;
    193       if (parent_span->tag) {
    194         // An end to an untranslatable section.
    195         translatable = true;
    196       }
    197       span_stack.pop_back();
    198     }
    199   }
    200 
    201   // Finish the pseudolocalization at the end of the string.
    202   new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
    203   new_string += localizer.End();
    204 
    205   StyleString localized;
    206   localized.str = std::move(new_string);
    207 
    208   // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
    209   for (UnifiedSpan& span : merged_spans) {
    210     if (span.tag) {
    211       localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
    212     }
    213   }
    214   return util::make_unique<StyledString>(pool->MakeRef(localized));
    215 }
    216 
    217 namespace {
    218 
    219 class Visitor : public ValueVisitor {
    220  public:
    221   // Either value or item will be populated upon visiting the value.
    222   std::unique_ptr<Value> value;
    223   std::unique_ptr<Item> item;
    224 
    225   Visitor(StringPool* pool, Pseudolocalizer::Method method)
    226       : pool_(pool), method_(method), localizer_(method) {}
    227 
    228   void Visit(Plural* plural) override {
    229     std::unique_ptr<Plural> localized = util::make_unique<Plural>();
    230     for (size_t i = 0; i < plural->values.size(); i++) {
    231       Visitor sub_visitor(pool_, method_);
    232       if (plural->values[i]) {
    233         plural->values[i]->Accept(&sub_visitor);
    234         if (sub_visitor.value) {
    235           localized->values[i] = std::move(sub_visitor.item);
    236         } else {
    237           localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
    238         }
    239       }
    240     }
    241     localized->SetSource(plural->GetSource());
    242     localized->SetWeak(true);
    243     value = std::move(localized);
    244   }
    245 
    246   void Visit(String* string) override {
    247     const StringPiece original_string = *string->value;
    248     std::string result = localizer_.Start();
    249 
    250     // Pseudolocalize only the translatable sections.
    251     size_t start = 0u;
    252     for (const UntranslatableSection& section : string->untranslatable_sections) {
    253       // Pseudolocalize the content before the untranslatable section.
    254       const size_t len = section.start - start;
    255       if (len > 0u) {
    256         result += localizer_.Text(original_string.substr(start, len));
    257       }
    258 
    259       // Copy the untranslatable content.
    260       result += original_string.substr(section.start, section.end - section.start);
    261       start = section.end;
    262     }
    263 
    264     // Pseudolocalize the content after the last untranslatable section.
    265     if (start != original_string.size()) {
    266       const size_t len = original_string.size() - start;
    267       result += localizer_.Text(original_string.substr(start, len));
    268     }
    269     result += localizer_.End();
    270 
    271     std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
    272     localized->SetSource(string->GetSource());
    273     localized->SetWeak(true);
    274     item = std::move(localized);
    275   }
    276 
    277   void Visit(StyledString* string) override {
    278     item = PseudolocalizeStyledString(string, method_, pool_);
    279     item->SetSource(string->GetSource());
    280     item->SetWeak(true);
    281   }
    282 
    283  private:
    284   DISALLOW_COPY_AND_ASSIGN(Visitor);
    285 
    286   StringPool* pool_;
    287   Pseudolocalizer::Method method_;
    288   Pseudolocalizer localizer_;
    289 };
    290 
    291 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
    292                                               Pseudolocalizer::Method m) {
    293   ConfigDescription modified = base;
    294   switch (m) {
    295     case Pseudolocalizer::Method::kAccent:
    296       modified.language[0] = 'e';
    297       modified.language[1] = 'n';
    298       modified.country[0] = 'X';
    299       modified.country[1] = 'A';
    300       break;
    301 
    302     case Pseudolocalizer::Method::kBidi:
    303       modified.language[0] = 'a';
    304       modified.language[1] = 'r';
    305       modified.country[0] = 'X';
    306       modified.country[1] = 'B';
    307       break;
    308     default:
    309       break;
    310   }
    311   return modified;
    312 }
    313 
    314 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
    315                             ResourceConfigValue* original_value,
    316                             StringPool* pool, ResourceEntry* entry) {
    317   Visitor visitor(pool, method);
    318   original_value->value->Accept(&visitor);
    319 
    320   std::unique_ptr<Value> localized_value;
    321   if (visitor.value) {
    322     localized_value = std::move(visitor.value);
    323   } else if (visitor.item) {
    324     localized_value = std::move(visitor.item);
    325   }
    326 
    327   if (!localized_value) {
    328     return;
    329   }
    330 
    331   ConfigDescription config_with_accent =
    332       ModifyConfigForPseudoLocale(original_value->config, method);
    333 
    334   ResourceConfigValue* new_config_value =
    335       entry->FindOrCreateValue(config_with_accent, original_value->product);
    336   if (!new_config_value->value) {
    337     // Only use auto-generated pseudo-localization if none is defined.
    338     new_config_value->value = std::move(localized_value);
    339   }
    340 }
    341 
    342 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
    343 // translatable.
    344 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
    345   const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
    346   if (diff & ConfigDescription::CONFIG_LOCALE) {
    347     return false;
    348   }
    349   return config_value->value->IsTranslatable();
    350 }
    351 
    352 }  // namespace
    353 
    354 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
    355   for (auto& package : table->packages) {
    356     for (auto& type : package->types) {
    357       for (auto& entry : type->entries) {
    358         std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
    359         for (ResourceConfigValue* value : values) {
    360           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
    361                                  entry.get());
    362           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
    363                                  entry.get());
    364         }
    365       }
    366     }
    367   }
    368   return true;
    369 }
    370 
    371 }  // namespace aapt
    372