Home | History | Annotate | Download | only in compile
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "compile/PseudolocaleGenerator.h"
     18 
     19 #include <algorithm>
     20 
     21 #include "ResourceTable.h"
     22 #include "ResourceValues.h"
     23 #include "ValueVisitor.h"
     24 #include "compile/Pseudolocalizer.h"
     25 #include "util/Util.h"
     26 
     27 using android::StringPiece;
     28 using android::StringPiece16;
     29 
     30 namespace aapt {
     31 
     32 // The struct that represents both Span objects and UntranslatableSections.
     33 struct UnifiedSpan {
     34   // Only present for Span objects. If not present, this was an UntranslatableSection.
     35   Maybe<std::string> tag;
     36 
     37   // The UTF-16 index into the string where this span starts.
     38   uint32_t first_char;
     39 
     40   // The UTF-16 index into the string where this span ends, inclusive.
     41   uint32_t last_char;
     42 };
     43 
     44 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
     45   if (left.first_char < right.first_char) {
     46     return true;
     47   } else if (left.first_char > right.first_char) {
     48     return false;
     49   } else if (left.last_char < right.last_char) {
     50     return true;
     51   }
     52   return false;
     53 }
     54 
     55 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
     56   return UnifiedSpan{*span.name, span.first_char, span.last_char};
     57 }
     58 
     59 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
     60   return UnifiedSpan{
     61       {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
     62 }
     63 
     64 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
     65 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
     66 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
     67   // Ensure the Spans are sorted and converted.
     68   std::vector<UnifiedSpan> sorted_spans;
     69   sorted_spans.reserve(string.value->spans.size());
     70   std::transform(string.value->spans.begin(), string.value->spans.end(),
     71                  std::back_inserter(sorted_spans), SpanToUnifiedSpan);
     72 
     73   // Stable sort to ensure tag sequences like "<b><i>" are preserved.
     74   std::stable_sort(sorted_spans.begin(), sorted_spans.end());
     75 
     76   // Ensure the UntranslatableSections are sorted and converted.
     77   std::vector<UnifiedSpan> sorted_untranslatable_sections;
     78   sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
     79   std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
     80                  std::back_inserter(sorted_untranslatable_sections),
     81                  UntranslatableSectionToUnifiedSpan);
     82   std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
     83 
     84   std::vector<UnifiedSpan> merged_spans;
     85   merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
     86   auto span_iter = sorted_spans.begin();
     87   auto untranslatable_iter = sorted_untranslatable_sections.begin();
     88   while (span_iter != sorted_spans.end() &&
     89          untranslatable_iter != sorted_untranslatable_sections.end()) {
     90     if (*span_iter < *untranslatable_iter) {
     91       merged_spans.push_back(std::move(*span_iter));
     92       ++span_iter;
     93     } else {
     94       merged_spans.push_back(std::move(*untranslatable_iter));
     95       ++untranslatable_iter;
     96     }
     97   }
     98 
     99   while (span_iter != sorted_spans.end()) {
    100     merged_spans.push_back(std::move(*span_iter));
    101     ++span_iter;
    102   }
    103 
    104   while (untranslatable_iter != sorted_untranslatable_sections.end()) {
    105     merged_spans.push_back(std::move(*untranslatable_iter));
    106     ++untranslatable_iter;
    107   }
    108   return merged_spans;
    109 }
    110 
    111 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
    112                                                          Pseudolocalizer::Method method,
    113                                                          StringPool* pool) {
    114   Pseudolocalizer localizer(method);
    115 
    116   // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
    117   // This will effectively subdivide the string into multiple sections that can be individually
    118   // pseudolocalized, while keeping the span indices synchronized.
    119   std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
    120 
    121   // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
    122   // runtime. So we will do all our processing in UTF-16, then convert back.
    123   const std::u16string text16 = util::Utf8ToUtf16(string->value->value);
    124 
    125   // Convenient wrapper around the text that allows us to work with StringPieces.
    126   const StringPiece16 text(text16);
    127 
    128   // The new string.
    129   std::string new_string = localizer.Start();
    130 
    131   // The stack that keeps track of what nested Span we're in.
    132   std::vector<size_t> span_stack;
    133 
    134   // The current position in the original text.
    135   uint32_t cursor = 0u;
    136 
    137   // The current position in the new text.
    138   uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
    139                                              new_string.size(), false);
    140 
    141   // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
    142   bool translatable = true;
    143   size_t span_idx = 0u;
    144   while (span_idx < merged_spans.size() || !span_stack.empty()) {
    145     UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
    146     UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
    147 
    148     if (span != nullptr) {
    149       if (parent_span == nullptr || parent_span->last_char > span->first_char) {
    150         // There is no parent, or this span is the child of the parent.
    151         // Pseudolocalize all the text until this span.
    152         const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
    153         cursor += substr.size();
    154 
    155         // Pseudolocalize the substring.
    156         std::string new_substr = util::Utf16ToUtf8(substr);
    157         if (translatable) {
    158           new_substr = localizer.Text(new_substr);
    159         }
    160         new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
    161                                            new_substr.size(), false);
    162         new_string += new_substr;
    163 
    164         // Rewrite the first_char.
    165         span->first_char = new_cursor;
    166         if (!span->tag) {
    167           // An untranslatable section has begun!
    168           translatable = false;
    169         }
    170         span_stack.push_back(span_idx);
    171         ++span_idx;
    172         continue;
    173       }
    174     }
    175 
    176     if (parent_span != nullptr) {
    177       // There is a parent, and either this span is not a child of it, or there are no more spans.
    178       // Pop this off the stack.
    179       const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
    180       cursor += substr.size();
    181 
    182       // Pseudolocalize the substring.
    183       std::string new_substr = util::Utf16ToUtf8(substr);
    184       if (translatable) {
    185         new_substr = localizer.Text(new_substr);
    186       }
    187       new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
    188                                          new_substr.size(), false);
    189       new_string += new_substr;
    190 
    191       parent_span->last_char = new_cursor - 1;
    192       if (parent_span->tag) {
    193         // An end to an untranslatable section.
    194         translatable = true;
    195       }
    196       span_stack.pop_back();
    197     }
    198   }
    199 
    200   // Finish the pseudolocalization at the end of the string.
    201   new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
    202   new_string += localizer.End();
    203 
    204   StyleString localized;
    205   localized.str = std::move(new_string);
    206 
    207   // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
    208   for (UnifiedSpan& span : merged_spans) {
    209     if (span.tag) {
    210       localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
    211     }
    212   }
    213   return util::make_unique<StyledString>(pool->MakeRef(localized));
    214 }
    215 
    216 namespace {
    217 
    218 class Visitor : public RawValueVisitor {
    219  public:
    220   // Either value or item will be populated upon visiting the value.
    221   std::unique_ptr<Value> value;
    222   std::unique_ptr<Item> item;
    223 
    224   Visitor(StringPool* pool, Pseudolocalizer::Method method)
    225       : pool_(pool), method_(method), localizer_(method) {}
    226 
    227   void Visit(Plural* plural) override {
    228     std::unique_ptr<Plural> localized = util::make_unique<Plural>();
    229     for (size_t i = 0; i < plural->values.size(); i++) {
    230       Visitor sub_visitor(pool_, method_);
    231       if (plural->values[i]) {
    232         plural->values[i]->Accept(&sub_visitor);
    233         if (sub_visitor.value) {
    234           localized->values[i] = std::move(sub_visitor.item);
    235         } else {
    236           localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
    237         }
    238       }
    239     }
    240     localized->SetSource(plural->GetSource());
    241     localized->SetWeak(true);
    242     value = std::move(localized);
    243   }
    244 
    245   void Visit(String* string) override {
    246     const StringPiece original_string = *string->value;
    247     std::string result = localizer_.Start();
    248 
    249     // Pseudolocalize only the translatable sections.
    250     size_t start = 0u;
    251     for (const UntranslatableSection& section : string->untranslatable_sections) {
    252       // Pseudolocalize the content before the untranslatable section.
    253       const size_t len = section.start - start;
    254       if (len > 0u) {
    255         result += localizer_.Text(original_string.substr(start, len));
    256       }
    257 
    258       // Copy the untranslatable content.
    259       result += original_string.substr(section.start, section.end - section.start);
    260       start = section.end;
    261     }
    262 
    263     // Pseudolocalize the content after the last untranslatable section.
    264     if (start != original_string.size()) {
    265       const size_t len = original_string.size() - start;
    266       result += localizer_.Text(original_string.substr(start, len));
    267     }
    268     result += localizer_.End();
    269 
    270     std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
    271     localized->SetSource(string->GetSource());
    272     localized->SetWeak(true);
    273     item = std::move(localized);
    274   }
    275 
    276   void Visit(StyledString* string) override {
    277     item = PseudolocalizeStyledString(string, method_, pool_);
    278     item->SetSource(string->GetSource());
    279     item->SetWeak(true);
    280   }
    281 
    282  private:
    283   DISALLOW_COPY_AND_ASSIGN(Visitor);
    284 
    285   StringPool* pool_;
    286   Pseudolocalizer::Method method_;
    287   Pseudolocalizer localizer_;
    288 };
    289 
    290 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
    291                                               Pseudolocalizer::Method m) {
    292   ConfigDescription modified = base;
    293   switch (m) {
    294     case Pseudolocalizer::Method::kAccent:
    295       modified.language[0] = 'e';
    296       modified.language[1] = 'n';
    297       modified.country[0] = 'X';
    298       modified.country[1] = 'A';
    299       break;
    300 
    301     case Pseudolocalizer::Method::kBidi:
    302       modified.language[0] = 'a';
    303       modified.language[1] = 'r';
    304       modified.country[0] = 'X';
    305       modified.country[1] = 'B';
    306       break;
    307     default:
    308       break;
    309   }
    310   return modified;
    311 }
    312 
    313 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
    314                             ResourceConfigValue* original_value,
    315                             StringPool* pool, ResourceEntry* entry) {
    316   Visitor visitor(pool, method);
    317   original_value->value->Accept(&visitor);
    318 
    319   std::unique_ptr<Value> localized_value;
    320   if (visitor.value) {
    321     localized_value = std::move(visitor.value);
    322   } else if (visitor.item) {
    323     localized_value = std::move(visitor.item);
    324   }
    325 
    326   if (!localized_value) {
    327     return;
    328   }
    329 
    330   ConfigDescription config_with_accent =
    331       ModifyConfigForPseudoLocale(original_value->config, method);
    332 
    333   ResourceConfigValue* new_config_value =
    334       entry->FindOrCreateValue(config_with_accent, original_value->product);
    335   if (!new_config_value->value) {
    336     // Only use auto-generated pseudo-localization if none is defined.
    337     new_config_value->value = std::move(localized_value);
    338   }
    339 }
    340 
    341 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
    342 // translatable.
    343 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
    344   const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
    345   if (diff & ConfigDescription::CONFIG_LOCALE) {
    346     return false;
    347   }
    348   return config_value->value->IsTranslatable();
    349 }
    350 
    351 }  // namespace
    352 
    353 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
    354   for (auto& package : table->packages) {
    355     for (auto& type : package->types) {
    356       for (auto& entry : type->entries) {
    357         std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
    358         for (ResourceConfigValue* value : values) {
    359           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
    360                                  entry.get());
    361           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
    362                                  entry.get());
    363         }
    364       }
    365     }
    366   }
    367   return true;
    368 }
    369 
    370 }  // namespace aapt
    371