1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "util/BigBuffer.h" 18 #include "util/Maybe.h" 19 #include "util/StringPiece.h" 20 #include "util/Util.h" 21 22 #include <algorithm> 23 #include <ostream> 24 #include <string> 25 #include <utils/Unicode.h> 26 #include <vector> 27 28 namespace aapt { 29 namespace util { 30 31 static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep, 32 const std::function<char(char)>& f) { 33 std::vector<std::string> parts; 34 const StringPiece::const_iterator end = std::end(str); 35 StringPiece::const_iterator start = std::begin(str); 36 StringPiece::const_iterator current; 37 do { 38 current = std::find(start, end, sep); 39 parts.emplace_back(str.substr(start, current).toString()); 40 if (f) { 41 std::string& part = parts.back(); 42 std::transform(part.begin(), part.end(), part.begin(), f); 43 } 44 start = current + 1; 45 } while (current != end); 46 return parts; 47 } 48 49 std::vector<std::string> split(const StringPiece& str, char sep) { 50 return splitAndTransform(str, sep, nullptr); 51 } 52 53 std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) { 54 return splitAndTransform(str, sep, ::tolower); 55 } 56 57 StringPiece16 trimWhitespace(const StringPiece16& str) { 58 if (str.size() == 0 || str.data() == nullptr) { 59 return str; 60 } 61 62 const char16_t* start = str.data(); 63 const char16_t* end = str.data() + str.length(); 64 65 while (start != end && util::isspace16(*start)) { 66 start++; 67 } 68 69 while (end != start && util::isspace16(*(end - 1))) { 70 end--; 71 } 72 73 return StringPiece16(start, end - start); 74 } 75 76 StringPiece trimWhitespace(const StringPiece& str) { 77 if (str.size() == 0 || str.data() == nullptr) { 78 return str; 79 } 80 81 const char* start = str.data(); 82 const char* end = str.data() + str.length(); 83 84 while (start != end && isspace(*start)) { 85 start++; 86 } 87 88 while (end != start && isspace(*(end - 1))) { 89 end--; 90 } 91 92 return StringPiece(start, end - start); 93 } 94 95 StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str, 96 const StringPiece16& allowedChars) { 97 const auto endIter = str.end(); 98 for (auto iter = str.begin(); iter != endIter; ++iter) { 99 char16_t c = *iter; 100 if ((c >= u'a' && c <= u'z') || 101 (c >= u'A' && c <= u'Z') || 102 (c >= u'0' && c <= u'9')) { 103 continue; 104 } 105 106 bool match = false; 107 for (char16_t i : allowedChars) { 108 if (c == i) { 109 match = true; 110 break; 111 } 112 } 113 114 if (!match) { 115 return iter; 116 } 117 } 118 return endIter; 119 } 120 121 bool isJavaClassName(const StringPiece16& str) { 122 size_t pieces = 0; 123 for (const StringPiece16& piece : tokenize(str, u'.')) { 124 pieces++; 125 if (piece.empty()) { 126 return false; 127 } 128 129 // Can't have starting or trailing $ character. 130 if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') { 131 return false; 132 } 133 134 if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) { 135 return false; 136 } 137 } 138 return pieces >= 2; 139 } 140 141 bool isJavaPackageName(const StringPiece16& str) { 142 if (str.empty()) { 143 return false; 144 } 145 146 size_t pieces = 0; 147 for (const StringPiece16& piece : tokenize(str, u'.')) { 148 pieces++; 149 if (piece.empty()) { 150 return false; 151 } 152 153 if (piece.data()[0] == u'_' || piece.data()[piece.size() - 1] == u'_') { 154 return false; 155 } 156 157 if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) { 158 return false; 159 } 160 } 161 return pieces >= 1; 162 } 163 164 Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package, 165 const StringPiece16& className) { 166 if (className.empty()) { 167 return {}; 168 } 169 170 if (util::isJavaClassName(className)) { 171 return className.toString(); 172 } 173 174 if (package.empty()) { 175 return {}; 176 } 177 178 if (className.data()[0] != u'.') { 179 return {}; 180 } 181 182 std::u16string result(package.data(), package.size()); 183 result.append(className.data(), className.size()); 184 if (!isJavaClassName(result)) { 185 return {}; 186 } 187 return result; 188 } 189 190 static size_t consumeDigits(const char16_t* start, const char16_t* end) { 191 const char16_t* c = start; 192 for (; c != end && *c >= u'0' && *c <= u'9'; c++) {} 193 return static_cast<size_t>(c - start); 194 } 195 196 bool verifyJavaStringFormat(const StringPiece16& str) { 197 const char16_t* c = str.begin(); 198 const char16_t* const end = str.end(); 199 200 size_t argCount = 0; 201 bool nonpositional = false; 202 while (c != end) { 203 if (*c == u'%' && c + 1 < end) { 204 c++; 205 206 if (*c == u'%') { 207 c++; 208 continue; 209 } 210 211 argCount++; 212 213 size_t numDigits = consumeDigits(c, end); 214 if (numDigits > 0) { 215 c += numDigits; 216 if (c != end && *c != u'$') { 217 // The digits were a size, but not a positional argument. 218 nonpositional = true; 219 } 220 } else if (*c == u'<') { 221 // Reusing last argument, bad idea since positions can be moved around 222 // during translation. 223 nonpositional = true; 224 225 c++; 226 227 // Optionally we can have a $ after 228 if (c != end && *c == u'$') { 229 c++; 230 } 231 } else { 232 nonpositional = true; 233 } 234 235 // Ignore size, width, flags, etc. 236 while (c != end && (*c == u'-' || 237 *c == u'#' || 238 *c == u'+' || 239 *c == u' ' || 240 *c == u',' || 241 *c == u'(' || 242 (*c >= u'0' && *c <= '9'))) { 243 c++; 244 } 245 246 /* 247 * This is a shortcut to detect strings that are going to Time.format() 248 * instead of String.format() 249 * 250 * Comparison of String.format() and Time.format() args: 251 * 252 * String: ABC E GH ST X abcdefgh nost x 253 * Time: DEFGHKMS W Za d hkm s w yz 254 * 255 * Therefore we know it's definitely Time if we have: 256 * DFKMWZkmwyz 257 */ 258 if (c != end) { 259 switch (*c) { 260 case 'D': 261 case 'F': 262 case 'K': 263 case 'M': 264 case 'W': 265 case 'Z': 266 case 'k': 267 case 'm': 268 case 'w': 269 case 'y': 270 case 'z': 271 return true; 272 } 273 } 274 } 275 276 if (c != end) { 277 c++; 278 } 279 } 280 281 if (argCount > 1 && nonpositional) { 282 // Multiple arguments were specified, but some or all were non positional. Translated 283 // strings may rearrange the order of the arguments, which will break the string. 284 return false; 285 } 286 return true; 287 } 288 289 static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) { 290 char16_t code = 0; 291 for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) { 292 char16_t c = **start; 293 int a; 294 if (c >= '0' && c <= '9') { 295 a = c - '0'; 296 } else if (c >= 'a' && c <= 'f') { 297 a = c - 'a' + 10; 298 } else if (c >= 'A' && c <= 'F') { 299 a = c - 'A' + 10; 300 } else { 301 return make_nothing<char16_t>(); 302 } 303 code = (code << 4) | a; 304 } 305 return make_value(code); 306 } 307 308 StringBuilder& StringBuilder::append(const StringPiece16& str) { 309 if (!mError.empty()) { 310 return *this; 311 } 312 313 const char16_t* const end = str.end(); 314 const char16_t* start = str.begin(); 315 const char16_t* current = start; 316 while (current != end) { 317 if (mLastCharWasEscape) { 318 switch (*current) { 319 case u't': 320 mStr += u'\t'; 321 break; 322 case u'n': 323 mStr += u'\n'; 324 break; 325 case u'#': 326 mStr += u'#'; 327 break; 328 case u'@': 329 mStr += u'@'; 330 break; 331 case u'?': 332 mStr += u'?'; 333 break; 334 case u'"': 335 mStr += u'"'; 336 break; 337 case u'\'': 338 mStr += u'\''; 339 break; 340 case u'\\': 341 mStr += u'\\'; 342 break; 343 case u'u': { 344 current++; 345 Maybe<char16_t> c = parseUnicodeCodepoint(¤t, end); 346 if (!c) { 347 mError = "invalid unicode escape sequence"; 348 return *this; 349 } 350 mStr += c.value(); 351 current -= 1; 352 break; 353 } 354 355 default: 356 // Ignore. 357 break; 358 } 359 mLastCharWasEscape = false; 360 start = current + 1; 361 } else if (*current == u'"') { 362 if (!mQuote && mTrailingSpace) { 363 // We found an opening quote, and we have 364 // trailing space, so we should append that 365 // space now. 366 if (mTrailingSpace) { 367 // We had trailing whitespace, so 368 // replace with a single space. 369 if (!mStr.empty()) { 370 mStr += u' '; 371 } 372 mTrailingSpace = false; 373 } 374 } 375 mQuote = !mQuote; 376 mStr.append(start, current - start); 377 start = current + 1; 378 } else if (*current == u'\'' && !mQuote) { 379 // This should be escaped. 380 mError = "unescaped apostrophe"; 381 return *this; 382 } else if (*current == u'\\') { 383 // This is an escape sequence, convert to the real value. 384 if (!mQuote && mTrailingSpace) { 385 // We had trailing whitespace, so 386 // replace with a single space. 387 if (!mStr.empty()) { 388 mStr += u' '; 389 } 390 mTrailingSpace = false; 391 } 392 mStr.append(start, current - start); 393 start = current + 1; 394 mLastCharWasEscape = true; 395 } else if (!mQuote) { 396 // This is not quoted text, so look for whitespace. 397 if (isspace16(*current)) { 398 // We found whitespace, see if we have seen some 399 // before. 400 if (!mTrailingSpace) { 401 // We didn't see a previous adjacent space, 402 // so mark that we did. 403 mTrailingSpace = true; 404 mStr.append(start, current - start); 405 } 406 407 // Keep skipping whitespace. 408 start = current + 1; 409 } else if (mTrailingSpace) { 410 // We saw trailing space before, so replace all 411 // that trailing space with one space. 412 if (!mStr.empty()) { 413 mStr += u' '; 414 } 415 mTrailingSpace = false; 416 } 417 } 418 current++; 419 } 420 mStr.append(start, end - start); 421 return *this; 422 } 423 424 std::u16string utf8ToUtf16(const StringPiece& utf8) { 425 ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()), 426 utf8.length()); 427 if (utf16Length <= 0) { 428 return {}; 429 } 430 431 std::u16string utf16; 432 utf16.resize(utf16Length); 433 utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin()); 434 return utf16; 435 } 436 437 std::string utf16ToUtf8(const StringPiece16& utf16) { 438 ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length()); 439 if (utf8Length <= 0) { 440 return {}; 441 } 442 443 std::string utf8; 444 utf8.resize(utf8Length); 445 utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin()); 446 return utf8; 447 } 448 449 bool writeAll(std::ostream& out, const BigBuffer& buffer) { 450 for (const auto& b : buffer) { 451 if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) { 452 return false; 453 } 454 } 455 return true; 456 } 457 458 std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) { 459 std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]); 460 uint8_t* p = data.get(); 461 for (const auto& block : buffer) { 462 memcpy(p, block.buffer.get(), block.size); 463 p += block.size; 464 } 465 return data; 466 } 467 468 bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix, 469 StringPiece16* outEntry, StringPiece16* outSuffix) { 470 if (!stringStartsWith<char16_t>(path, u"res/")) { 471 return false; 472 } 473 474 StringPiece16::const_iterator lastOccurence = path.end(); 475 for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) { 476 if (*iter == u'/') { 477 lastOccurence = iter; 478 } 479 } 480 481 if (lastOccurence == path.end()) { 482 return false; 483 } 484 485 auto iter = std::find(lastOccurence, path.end(), u'.'); 486 *outSuffix = StringPiece16(iter, path.end() - iter); 487 *outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1); 488 *outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1); 489 return true; 490 } 491 492 } // namespace util 493 } // namespace aapt 494