1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "compile/PseudolocaleGenerator.h" 18 19 #include <algorithm> 20 21 #include "ResourceTable.h" 22 #include "ResourceValues.h" 23 #include "ValueVisitor.h" 24 #include "compile/Pseudolocalizer.h" 25 #include "util/Util.h" 26 27 using ::android::ConfigDescription; 28 using ::android::StringPiece; 29 using ::android::StringPiece16; 30 31 namespace aapt { 32 33 // The struct that represents both Span objects and UntranslatableSections. 34 struct UnifiedSpan { 35 // Only present for Span objects. If not present, this was an UntranslatableSection. 36 Maybe<std::string> tag; 37 38 // The UTF-16 index into the string where this span starts. 39 uint32_t first_char; 40 41 // The UTF-16 index into the string where this span ends, inclusive. 42 uint32_t last_char; 43 }; 44 45 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) { 46 if (left.first_char < right.first_char) { 47 return true; 48 } else if (left.first_char > right.first_char) { 49 return false; 50 } else if (left.last_char < right.last_char) { 51 return true; 52 } 53 return false; 54 } 55 56 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) { 57 return UnifiedSpan{*span.name, span.first_char, span.last_char}; 58 } 59 60 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) { 61 return UnifiedSpan{ 62 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1}; 63 } 64 65 // Merges the Span and UntranslatableSections of this StyledString into a single vector of 66 // UnifiedSpans. This will first check that the Spans are sorted in ascending order. 67 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) { 68 // Ensure the Spans are sorted and converted. 69 std::vector<UnifiedSpan> sorted_spans; 70 sorted_spans.reserve(string.value->spans.size()); 71 std::transform(string.value->spans.begin(), string.value->spans.end(), 72 std::back_inserter(sorted_spans), SpanToUnifiedSpan); 73 74 // Stable sort to ensure tag sequences like "<b><i>" are preserved. 75 std::stable_sort(sorted_spans.begin(), sorted_spans.end()); 76 77 // Ensure the UntranslatableSections are sorted and converted. 78 std::vector<UnifiedSpan> sorted_untranslatable_sections; 79 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size()); 80 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(), 81 std::back_inserter(sorted_untranslatable_sections), 82 UntranslatableSectionToUnifiedSpan); 83 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end()); 84 85 std::vector<UnifiedSpan> merged_spans; 86 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size()); 87 auto span_iter = sorted_spans.begin(); 88 auto untranslatable_iter = sorted_untranslatable_sections.begin(); 89 while (span_iter != sorted_spans.end() && 90 untranslatable_iter != sorted_untranslatable_sections.end()) { 91 if (*span_iter < *untranslatable_iter) { 92 merged_spans.push_back(std::move(*span_iter)); 93 ++span_iter; 94 } else { 95 merged_spans.push_back(std::move(*untranslatable_iter)); 96 ++untranslatable_iter; 97 } 98 } 99 100 while (span_iter != sorted_spans.end()) { 101 merged_spans.push_back(std::move(*span_iter)); 102 ++span_iter; 103 } 104 105 while (untranslatable_iter != sorted_untranslatable_sections.end()) { 106 merged_spans.push_back(std::move(*untranslatable_iter)); 107 ++untranslatable_iter; 108 } 109 return merged_spans; 110 } 111 112 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string, 113 Pseudolocalizer::Method method, 114 StringPool* pool) { 115 Pseudolocalizer localizer(method); 116 117 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char. 118 // This will effectively subdivide the string into multiple sections that can be individually 119 // pseudolocalized, while keeping the span indices synchronized. 120 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string); 121 122 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the 123 // runtime. So we will do all our processing in UTF-16, then convert back. 124 const std::u16string text16 = util::Utf8ToUtf16(string->value->value); 125 126 // Convenient wrapper around the text that allows us to work with StringPieces. 127 const StringPiece16 text(text16); 128 129 // The new string. 130 std::string new_string = localizer.Start(); 131 132 // The stack that keeps track of what nested Span we're in. 133 std::vector<size_t> span_stack; 134 135 // The current position in the original text. 136 uint32_t cursor = 0u; 137 138 // The current position in the new text. 139 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()), 140 new_string.size(), false); 141 142 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it. 143 bool translatable = true; 144 size_t span_idx = 0u; 145 while (span_idx < merged_spans.size() || !span_stack.empty()) { 146 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx]; 147 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()]; 148 149 if (span != nullptr) { 150 if (parent_span == nullptr || parent_span->last_char > span->first_char) { 151 // There is no parent, or this span is the child of the parent. 152 // Pseudolocalize all the text until this span. 153 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor); 154 cursor += substr.size(); 155 156 // Pseudolocalize the substring. 157 std::string new_substr = util::Utf16ToUtf8(substr); 158 if (translatable) { 159 new_substr = localizer.Text(new_substr); 160 } 161 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), 162 new_substr.size(), false); 163 new_string += new_substr; 164 165 // Rewrite the first_char. 166 span->first_char = new_cursor; 167 if (!span->tag) { 168 // An untranslatable section has begun! 169 translatable = false; 170 } 171 span_stack.push_back(span_idx); 172 ++span_idx; 173 continue; 174 } 175 } 176 177 if (parent_span != nullptr) { 178 // There is a parent, and either this span is not a child of it, or there are no more spans. 179 // Pop this off the stack. 180 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1); 181 cursor += substr.size(); 182 183 // Pseudolocalize the substring. 184 std::string new_substr = util::Utf16ToUtf8(substr); 185 if (translatable) { 186 new_substr = localizer.Text(new_substr); 187 } 188 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), 189 new_substr.size(), false); 190 new_string += new_substr; 191 192 parent_span->last_char = new_cursor - 1; 193 if (parent_span->tag) { 194 // An end to an untranslatable section. 195 translatable = true; 196 } 197 span_stack.pop_back(); 198 } 199 } 200 201 // Finish the pseudolocalization at the end of the string. 202 new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor))); 203 new_string += localizer.End(); 204 205 StyleString localized; 206 localized.str = std::move(new_string); 207 208 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections. 209 for (UnifiedSpan& span : merged_spans) { 210 if (span.tag) { 211 localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char}); 212 } 213 } 214 return util::make_unique<StyledString>(pool->MakeRef(localized)); 215 } 216 217 namespace { 218 219 class Visitor : public ValueVisitor { 220 public: 221 // Either value or item will be populated upon visiting the value. 222 std::unique_ptr<Value> value; 223 std::unique_ptr<Item> item; 224 225 Visitor(StringPool* pool, Pseudolocalizer::Method method) 226 : pool_(pool), method_(method), localizer_(method) {} 227 228 void Visit(Plural* plural) override { 229 std::unique_ptr<Plural> localized = util::make_unique<Plural>(); 230 for (size_t i = 0; i < plural->values.size(); i++) { 231 Visitor sub_visitor(pool_, method_); 232 if (plural->values[i]) { 233 plural->values[i]->Accept(&sub_visitor); 234 if (sub_visitor.value) { 235 localized->values[i] = std::move(sub_visitor.item); 236 } else { 237 localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_)); 238 } 239 } 240 } 241 localized->SetSource(plural->GetSource()); 242 localized->SetWeak(true); 243 value = std::move(localized); 244 } 245 246 void Visit(String* string) override { 247 const StringPiece original_string = *string->value; 248 std::string result = localizer_.Start(); 249 250 // Pseudolocalize only the translatable sections. 251 size_t start = 0u; 252 for (const UntranslatableSection& section : string->untranslatable_sections) { 253 // Pseudolocalize the content before the untranslatable section. 254 const size_t len = section.start - start; 255 if (len > 0u) { 256 result += localizer_.Text(original_string.substr(start, len)); 257 } 258 259 // Copy the untranslatable content. 260 result += original_string.substr(section.start, section.end - section.start); 261 start = section.end; 262 } 263 264 // Pseudolocalize the content after the last untranslatable section. 265 if (start != original_string.size()) { 266 const size_t len = original_string.size() - start; 267 result += localizer_.Text(original_string.substr(start, len)); 268 } 269 result += localizer_.End(); 270 271 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result)); 272 localized->SetSource(string->GetSource()); 273 localized->SetWeak(true); 274 item = std::move(localized); 275 } 276 277 void Visit(StyledString* string) override { 278 item = PseudolocalizeStyledString(string, method_, pool_); 279 item->SetSource(string->GetSource()); 280 item->SetWeak(true); 281 } 282 283 private: 284 DISALLOW_COPY_AND_ASSIGN(Visitor); 285 286 StringPool* pool_; 287 Pseudolocalizer::Method method_; 288 Pseudolocalizer localizer_; 289 }; 290 291 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base, 292 Pseudolocalizer::Method m) { 293 ConfigDescription modified = base; 294 switch (m) { 295 case Pseudolocalizer::Method::kAccent: 296 modified.language[0] = 'e'; 297 modified.language[1] = 'n'; 298 modified.country[0] = 'X'; 299 modified.country[1] = 'A'; 300 break; 301 302 case Pseudolocalizer::Method::kBidi: 303 modified.language[0] = 'a'; 304 modified.language[1] = 'r'; 305 modified.country[0] = 'X'; 306 modified.country[1] = 'B'; 307 break; 308 default: 309 break; 310 } 311 return modified; 312 } 313 314 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method, 315 ResourceConfigValue* original_value, 316 StringPool* pool, ResourceEntry* entry) { 317 Visitor visitor(pool, method); 318 original_value->value->Accept(&visitor); 319 320 std::unique_ptr<Value> localized_value; 321 if (visitor.value) { 322 localized_value = std::move(visitor.value); 323 } else if (visitor.item) { 324 localized_value = std::move(visitor.item); 325 } 326 327 if (!localized_value) { 328 return; 329 } 330 331 ConfigDescription config_with_accent = 332 ModifyConfigForPseudoLocale(original_value->config, method); 333 334 ResourceConfigValue* new_config_value = 335 entry->FindOrCreateValue(config_with_accent, original_value->product); 336 if (!new_config_value->value) { 337 // Only use auto-generated pseudo-localization if none is defined. 338 new_config_value->value = std::move(localized_value); 339 } 340 } 341 342 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is 343 // translatable. 344 static bool IsPseudolocalizable(ResourceConfigValue* config_value) { 345 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); 346 if (diff & ConfigDescription::CONFIG_LOCALE) { 347 return false; 348 } 349 return config_value->value->IsTranslatable(); 350 } 351 352 } // namespace 353 354 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { 355 for (auto& package : table->packages) { 356 for (auto& type : package->types) { 357 for (auto& entry : type->entries) { 358 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable); 359 for (ResourceConfigValue* value : values) { 360 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, 361 entry.get()); 362 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, 363 entry.get()); 364 } 365 } 366 } 367 } 368 return true; 369 } 370 371 } // namespace aapt 372