1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "compile/PseudolocaleGenerator.h" 18 19 #include <algorithm> 20 21 #include "ResourceTable.h" 22 #include "ResourceValues.h" 23 #include "ValueVisitor.h" 24 #include "compile/Pseudolocalizer.h" 25 #include "util/Util.h" 26 27 using android::StringPiece; 28 using android::StringPiece16; 29 30 namespace aapt { 31 32 // The struct that represents both Span objects and UntranslatableSections. 33 struct UnifiedSpan { 34 // Only present for Span objects. If not present, this was an UntranslatableSection. 35 Maybe<std::string> tag; 36 37 // The UTF-16 index into the string where this span starts. 38 uint32_t first_char; 39 40 // The UTF-16 index into the string where this span ends, inclusive. 41 uint32_t last_char; 42 }; 43 44 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) { 45 if (left.first_char < right.first_char) { 46 return true; 47 } else if (left.first_char > right.first_char) { 48 return false; 49 } else if (left.last_char < right.last_char) { 50 return true; 51 } 52 return false; 53 } 54 55 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) { 56 return UnifiedSpan{*span.name, span.first_char, span.last_char}; 57 } 58 59 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) { 60 return UnifiedSpan{ 61 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1}; 62 } 63 64 // Merges the Span and UntranslatableSections of this StyledString into a single vector of 65 // UnifiedSpans. This will first check that the Spans are sorted in ascending order. 66 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) { 67 // Ensure the Spans are sorted and converted. 68 std::vector<UnifiedSpan> sorted_spans; 69 sorted_spans.reserve(string.value->spans.size()); 70 std::transform(string.value->spans.begin(), string.value->spans.end(), 71 std::back_inserter(sorted_spans), SpanToUnifiedSpan); 72 73 // Stable sort to ensure tag sequences like "<b><i>" are preserved. 74 std::stable_sort(sorted_spans.begin(), sorted_spans.end()); 75 76 // Ensure the UntranslatableSections are sorted and converted. 77 std::vector<UnifiedSpan> sorted_untranslatable_sections; 78 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size()); 79 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(), 80 std::back_inserter(sorted_untranslatable_sections), 81 UntranslatableSectionToUnifiedSpan); 82 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end()); 83 84 std::vector<UnifiedSpan> merged_spans; 85 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size()); 86 auto span_iter = sorted_spans.begin(); 87 auto untranslatable_iter = sorted_untranslatable_sections.begin(); 88 while (span_iter != sorted_spans.end() && 89 untranslatable_iter != sorted_untranslatable_sections.end()) { 90 if (*span_iter < *untranslatable_iter) { 91 merged_spans.push_back(std::move(*span_iter)); 92 ++span_iter; 93 } else { 94 merged_spans.push_back(std::move(*untranslatable_iter)); 95 ++untranslatable_iter; 96 } 97 } 98 99 while (span_iter != sorted_spans.end()) { 100 merged_spans.push_back(std::move(*span_iter)); 101 ++span_iter; 102 } 103 104 while (untranslatable_iter != sorted_untranslatable_sections.end()) { 105 merged_spans.push_back(std::move(*untranslatable_iter)); 106 ++untranslatable_iter; 107 } 108 return merged_spans; 109 } 110 111 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string, 112 Pseudolocalizer::Method method, 113 StringPool* pool) { 114 Pseudolocalizer localizer(method); 115 116 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char. 117 // This will effectively subdivide the string into multiple sections that can be individually 118 // pseudolocalized, while keeping the span indices synchronized. 119 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string); 120 121 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the 122 // runtime. So we will do all our processing in UTF-16, then convert back. 123 const std::u16string text16 = util::Utf8ToUtf16(string->value->value); 124 125 // Convenient wrapper around the text that allows us to work with StringPieces. 126 const StringPiece16 text(text16); 127 128 // The new string. 129 std::string new_string = localizer.Start(); 130 131 // The stack that keeps track of what nested Span we're in. 132 std::vector<size_t> span_stack; 133 134 // The current position in the original text. 135 uint32_t cursor = 0u; 136 137 // The current position in the new text. 138 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()), 139 new_string.size(), false); 140 141 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it. 142 bool translatable = true; 143 size_t span_idx = 0u; 144 while (span_idx < merged_spans.size() || !span_stack.empty()) { 145 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx]; 146 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()]; 147 148 if (span != nullptr) { 149 if (parent_span == nullptr || parent_span->last_char > span->first_char) { 150 // There is no parent, or this span is the child of the parent. 151 // Pseudolocalize all the text until this span. 152 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor); 153 cursor += substr.size(); 154 155 // Pseudolocalize the substring. 156 std::string new_substr = util::Utf16ToUtf8(substr); 157 if (translatable) { 158 new_substr = localizer.Text(new_substr); 159 } 160 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), 161 new_substr.size(), false); 162 new_string += new_substr; 163 164 // Rewrite the first_char. 165 span->first_char = new_cursor; 166 if (!span->tag) { 167 // An untranslatable section has begun! 168 translatable = false; 169 } 170 span_stack.push_back(span_idx); 171 ++span_idx; 172 continue; 173 } 174 } 175 176 if (parent_span != nullptr) { 177 // There is a parent, and either this span is not a child of it, or there are no more spans. 178 // Pop this off the stack. 179 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1); 180 cursor += substr.size(); 181 182 // Pseudolocalize the substring. 183 std::string new_substr = util::Utf16ToUtf8(substr); 184 if (translatable) { 185 new_substr = localizer.Text(new_substr); 186 } 187 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), 188 new_substr.size(), false); 189 new_string += new_substr; 190 191 parent_span->last_char = new_cursor - 1; 192 if (parent_span->tag) { 193 // An end to an untranslatable section. 194 translatable = true; 195 } 196 span_stack.pop_back(); 197 } 198 } 199 200 // Finish the pseudolocalization at the end of the string. 201 new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor))); 202 new_string += localizer.End(); 203 204 StyleString localized; 205 localized.str = std::move(new_string); 206 207 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections. 208 for (UnifiedSpan& span : merged_spans) { 209 if (span.tag) { 210 localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char}); 211 } 212 } 213 return util::make_unique<StyledString>(pool->MakeRef(localized)); 214 } 215 216 namespace { 217 218 class Visitor : public RawValueVisitor { 219 public: 220 // Either value or item will be populated upon visiting the value. 221 std::unique_ptr<Value> value; 222 std::unique_ptr<Item> item; 223 224 Visitor(StringPool* pool, Pseudolocalizer::Method method) 225 : pool_(pool), method_(method), localizer_(method) {} 226 227 void Visit(Plural* plural) override { 228 std::unique_ptr<Plural> localized = util::make_unique<Plural>(); 229 for (size_t i = 0; i < plural->values.size(); i++) { 230 Visitor sub_visitor(pool_, method_); 231 if (plural->values[i]) { 232 plural->values[i]->Accept(&sub_visitor); 233 if (sub_visitor.value) { 234 localized->values[i] = std::move(sub_visitor.item); 235 } else { 236 localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_)); 237 } 238 } 239 } 240 localized->SetSource(plural->GetSource()); 241 localized->SetWeak(true); 242 value = std::move(localized); 243 } 244 245 void Visit(String* string) override { 246 const StringPiece original_string = *string->value; 247 std::string result = localizer_.Start(); 248 249 // Pseudolocalize only the translatable sections. 250 size_t start = 0u; 251 for (const UntranslatableSection& section : string->untranslatable_sections) { 252 // Pseudolocalize the content before the untranslatable section. 253 const size_t len = section.start - start; 254 if (len > 0u) { 255 result += localizer_.Text(original_string.substr(start, len)); 256 } 257 258 // Copy the untranslatable content. 259 result += original_string.substr(section.start, section.end - section.start); 260 start = section.end; 261 } 262 263 // Pseudolocalize the content after the last untranslatable section. 264 if (start != original_string.size()) { 265 const size_t len = original_string.size() - start; 266 result += localizer_.Text(original_string.substr(start, len)); 267 } 268 result += localizer_.End(); 269 270 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result)); 271 localized->SetSource(string->GetSource()); 272 localized->SetWeak(true); 273 item = std::move(localized); 274 } 275 276 void Visit(StyledString* string) override { 277 item = PseudolocalizeStyledString(string, method_, pool_); 278 item->SetSource(string->GetSource()); 279 item->SetWeak(true); 280 } 281 282 private: 283 DISALLOW_COPY_AND_ASSIGN(Visitor); 284 285 StringPool* pool_; 286 Pseudolocalizer::Method method_; 287 Pseudolocalizer localizer_; 288 }; 289 290 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base, 291 Pseudolocalizer::Method m) { 292 ConfigDescription modified = base; 293 switch (m) { 294 case Pseudolocalizer::Method::kAccent: 295 modified.language[0] = 'e'; 296 modified.language[1] = 'n'; 297 modified.country[0] = 'X'; 298 modified.country[1] = 'A'; 299 break; 300 301 case Pseudolocalizer::Method::kBidi: 302 modified.language[0] = 'a'; 303 modified.language[1] = 'r'; 304 modified.country[0] = 'X'; 305 modified.country[1] = 'B'; 306 break; 307 default: 308 break; 309 } 310 return modified; 311 } 312 313 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method, 314 ResourceConfigValue* original_value, 315 StringPool* pool, ResourceEntry* entry) { 316 Visitor visitor(pool, method); 317 original_value->value->Accept(&visitor); 318 319 std::unique_ptr<Value> localized_value; 320 if (visitor.value) { 321 localized_value = std::move(visitor.value); 322 } else if (visitor.item) { 323 localized_value = std::move(visitor.item); 324 } 325 326 if (!localized_value) { 327 return; 328 } 329 330 ConfigDescription config_with_accent = 331 ModifyConfigForPseudoLocale(original_value->config, method); 332 333 ResourceConfigValue* new_config_value = 334 entry->FindOrCreateValue(config_with_accent, original_value->product); 335 if (!new_config_value->value) { 336 // Only use auto-generated pseudo-localization if none is defined. 337 new_config_value->value = std::move(localized_value); 338 } 339 } 340 341 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is 342 // translatable. 343 static bool IsPseudolocalizable(ResourceConfigValue* config_value) { 344 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); 345 if (diff & ConfigDescription::CONFIG_LOCALE) { 346 return false; 347 } 348 return config_value->value->IsTranslatable(); 349 } 350 351 } // namespace 352 353 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { 354 for (auto& package : table->packages) { 355 for (auto& type : package->types) { 356 for (auto& entry : type->entries) { 357 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable); 358 for (ResourceConfigValue* value : values) { 359 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, 360 entry.get()); 361 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, 362 entry.get()); 363 } 364 } 365 } 366 } 367 return true; 368 } 369 370 } // namespace aapt 371