Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "util/Util.h"
     18 
     19 #include <algorithm>
     20 #include <ostream>
     21 #include <string>
     22 #include <vector>
     23 
     24 #include "android-base/stringprintf.h"
     25 #include "androidfw/StringPiece.h"
     26 #include "build/version.h"
     27 
     28 #include "text/Unicode.h"
     29 #include "text/Utf8Iterator.h"
     30 #include "util/BigBuffer.h"
     31 #include "util/Maybe.h"
     32 #include "utils/Unicode.h"
     33 
     34 using ::aapt::text::Utf8Iterator;
     35 using ::android::StringPiece;
     36 using ::android::StringPiece16;
     37 
     38 namespace aapt {
     39 namespace util {
     40 
     41 static std::vector<std::string> SplitAndTransform(
     42     const StringPiece& str, char sep, const std::function<char(char)>& f) {
     43   std::vector<std::string> parts;
     44   const StringPiece::const_iterator end = std::end(str);
     45   StringPiece::const_iterator start = std::begin(str);
     46   StringPiece::const_iterator current;
     47   do {
     48     current = std::find(start, end, sep);
     49     parts.emplace_back(str.substr(start, current).to_string());
     50     if (f) {
     51       std::string& part = parts.back();
     52       std::transform(part.begin(), part.end(), part.begin(), f);
     53     }
     54     start = current + 1;
     55   } while (current != end);
     56   return parts;
     57 }
     58 
     59 std::vector<std::string> Split(const StringPiece& str, char sep) {
     60   return SplitAndTransform(str, sep, nullptr);
     61 }
     62 
     63 std::vector<std::string> SplitAndLowercase(const StringPiece& str, char sep) {
     64   return SplitAndTransform(str, sep, ::tolower);
     65 }
     66 
     67 bool StartsWith(const StringPiece& str, const StringPiece& prefix) {
     68   if (str.size() < prefix.size()) {
     69     return false;
     70   }
     71   return str.substr(0, prefix.size()) == prefix;
     72 }
     73 
     74 bool EndsWith(const StringPiece& str, const StringPiece& suffix) {
     75   if (str.size() < suffix.size()) {
     76     return false;
     77   }
     78   return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
     79 }
     80 
     81 StringPiece TrimLeadingWhitespace(const StringPiece& str) {
     82   if (str.size() == 0 || str.data() == nullptr) {
     83     return str;
     84   }
     85 
     86   const char* start = str.data();
     87   const char* end = start + str.length();
     88 
     89   while (start != end && isspace(*start)) {
     90     start++;
     91   }
     92   return StringPiece(start, end - start);
     93 }
     94 
     95 StringPiece TrimTrailingWhitespace(const StringPiece& str) {
     96   if (str.size() == 0 || str.data() == nullptr) {
     97     return str;
     98   }
     99 
    100   const char* start = str.data();
    101   const char* end = start + str.length();
    102 
    103   while (end != start && isspace(*(end - 1))) {
    104     end--;
    105   }
    106   return StringPiece(start, end - start);
    107 }
    108 
    109 StringPiece TrimWhitespace(const StringPiece& str) {
    110   if (str.size() == 0 || str.data() == nullptr) {
    111     return str;
    112   }
    113 
    114   const char* start = str.data();
    115   const char* end = str.data() + str.length();
    116 
    117   while (start != end && isspace(*start)) {
    118     start++;
    119   }
    120 
    121   while (end != start && isspace(*(end - 1))) {
    122     end--;
    123   }
    124 
    125   return StringPiece(start, end - start);
    126 }
    127 
    128 static int IsJavaNameImpl(const StringPiece& str) {
    129   int pieces = 0;
    130   for (const StringPiece& piece : Tokenize(str, '.')) {
    131     pieces++;
    132     if (!text::IsJavaIdentifier(piece)) {
    133       return -1;
    134     }
    135   }
    136   return pieces;
    137 }
    138 
    139 bool IsJavaClassName(const StringPiece& str) {
    140   return IsJavaNameImpl(str) >= 2;
    141 }
    142 
    143 bool IsJavaPackageName(const StringPiece& str) {
    144   return IsJavaNameImpl(str) >= 1;
    145 }
    146 
    147 static int IsAndroidNameImpl(const StringPiece& str) {
    148   int pieces = 0;
    149   for (const StringPiece& piece : Tokenize(str, '.')) {
    150     if (piece.empty()) {
    151       return -1;
    152     }
    153 
    154     const char first_character = piece.data()[0];
    155     if (!::isalpha(first_character)) {
    156       return -1;
    157     }
    158 
    159     bool valid = std::all_of(piece.begin() + 1, piece.end(), [](const char c) -> bool {
    160       return ::isalnum(c) || c == '_';
    161     });
    162 
    163     if (!valid) {
    164       return -1;
    165     }
    166     pieces++;
    167   }
    168   return pieces;
    169 }
    170 
    171 bool IsAndroidPackageName(const StringPiece& str) {
    172   return IsAndroidNameImpl(str) > 1 || str == "android";
    173 }
    174 
    175 bool IsAndroidSplitName(const StringPiece& str) {
    176   return IsAndroidNameImpl(str) > 0;
    177 }
    178 
    179 Maybe<std::string> GetFullyQualifiedClassName(const StringPiece& package,
    180                                               const StringPiece& classname) {
    181   if (classname.empty()) {
    182     return {};
    183   }
    184 
    185   if (util::IsJavaClassName(classname)) {
    186     return classname.to_string();
    187   }
    188 
    189   if (package.empty()) {
    190     return {};
    191   }
    192 
    193   std::string result = package.to_string();
    194   if (classname.data()[0] != '.') {
    195     result += '.';
    196   }
    197 
    198   result.append(classname.data(), classname.size());
    199   if (!IsJavaClassName(result)) {
    200     return {};
    201   }
    202   return result;
    203 }
    204 
    205 const char* GetToolName() {
    206   static const char* const sToolName = "Android Asset Packaging Tool (aapt)";
    207   return sToolName;
    208 }
    209 
    210 std::string GetToolFingerprint() {
    211   // DO NOT UPDATE, this is more of a marketing version.
    212   static const char* const sMajorVersion = "2";
    213 
    214   // Update minor version whenever a feature or flag is added.
    215   static const char* const sMinorVersion = "19";
    216 
    217   // The build id of aapt2 binary.
    218   static const std::string sBuildId = android::build::GetBuildNumber();
    219 
    220   return android::base::StringPrintf("%s.%s-%s", sMajorVersion, sMinorVersion, sBuildId.c_str());
    221 }
    222 
    223 static size_t ConsumeDigits(const char* start, const char* end) {
    224   const char* c = start;
    225   for (; c != end && *c >= '0' && *c <= '9'; c++) {
    226   }
    227   return static_cast<size_t>(c - start);
    228 }
    229 
    230 bool VerifyJavaStringFormat(const StringPiece& str) {
    231   const char* c = str.begin();
    232   const char* const end = str.end();
    233 
    234   size_t arg_count = 0;
    235   bool nonpositional = false;
    236   while (c != end) {
    237     if (*c == '%' && c + 1 < end) {
    238       c++;
    239 
    240       if (*c == '%' || *c == 'n') {
    241         c++;
    242         continue;
    243       }
    244 
    245       arg_count++;
    246 
    247       size_t num_digits = ConsumeDigits(c, end);
    248       if (num_digits > 0) {
    249         c += num_digits;
    250         if (c != end && *c != '$') {
    251           // The digits were a size, but not a positional argument.
    252           nonpositional = true;
    253         }
    254       } else if (*c == '<') {
    255         // Reusing last argument, bad idea since positions can be moved around
    256         // during translation.
    257         nonpositional = true;
    258 
    259         c++;
    260 
    261         // Optionally we can have a $ after
    262         if (c != end && *c == '$') {
    263           c++;
    264         }
    265       } else {
    266         nonpositional = true;
    267       }
    268 
    269       // Ignore size, width, flags, etc.
    270       while (c != end && (*c == '-' || *c == '#' || *c == '+' || *c == ' ' ||
    271                           *c == ',' || *c == '(' || (*c >= '0' && *c <= '9'))) {
    272         c++;
    273       }
    274 
    275       /*
    276        * This is a shortcut to detect strings that are going to Time.format()
    277        * instead of String.format()
    278        *
    279        * Comparison of String.format() and Time.format() args:
    280        *
    281        * String: ABC E GH  ST X abcdefgh  nost x
    282        *   Time:    DEFGHKMS W Za  d   hkm  s w yz
    283        *
    284        * Therefore we know it's definitely Time if we have:
    285        *     DFKMWZkmwyz
    286        */
    287       if (c != end) {
    288         switch (*c) {
    289           case 'D':
    290           case 'F':
    291           case 'K':
    292           case 'M':
    293           case 'W':
    294           case 'Z':
    295           case 'k':
    296           case 'm':
    297           case 'w':
    298           case 'y':
    299           case 'z':
    300             return true;
    301         }
    302       }
    303     }
    304 
    305     if (c != end) {
    306       c++;
    307     }
    308   }
    309 
    310   if (arg_count > 1 && nonpositional) {
    311     // Multiple arguments were specified, but some or all were non positional.
    312     // Translated
    313     // strings may rearrange the order of the arguments, which will break the
    314     // string.
    315     return false;
    316   }
    317   return true;
    318 }
    319 
    320 std::string Utf8ToModifiedUtf8(const std::string& utf8) {
    321   // Java uses Modified UTF-8 which only supports the 1, 2, and 3 byte formats of UTF-8. To encode
    322   // 4 byte UTF-8 codepoints, Modified UTF-8 allows the use of surrogate pairs in the same format
    323   // of CESU-8 surrogate pairs. Calculate the size of the utf8 string with all 4 byte UTF-8
    324   // codepoints replaced with 2 3 byte surrogate pairs
    325   size_t modified_size = 0;
    326   const size_t size = utf8.size();
    327   for (size_t i = 0; i < size; i++) {
    328     if (((uint8_t) utf8[i] >> 4) == 0xF) {
    329       modified_size += 6;
    330       i += 3;
    331     } else {
    332       modified_size++;
    333     }
    334   }
    335 
    336   // Early out if no 4 byte codepoints are found
    337   if (size == modified_size) {
    338     return utf8;
    339   }
    340 
    341   std::string output;
    342   output.reserve(modified_size);
    343   for (size_t i = 0; i < size; i++) {
    344     if (((uint8_t) utf8[i] >> 4) == 0xF) {
    345       int32_t codepoint = utf32_from_utf8_at(utf8.data(), size, i, nullptr);
    346 
    347       // Calculate the high and low surrogates as UTF-16 would
    348       int32_t high = ((codepoint - 0x10000) / 0x400) + 0xD800;
    349       int32_t low = ((codepoint - 0x10000) % 0x400) + 0xDC00;
    350 
    351       // Encode each surrogate in UTF-8
    352       output.push_back((char) (0xE4 | ((high >> 12) & 0xF)));
    353       output.push_back((char) (0x80 | ((high >> 6) & 0x3F)));
    354       output.push_back((char) (0x80 | (high & 0x3F)));
    355       output.push_back((char) (0xE4 | ((low >> 12) & 0xF)));
    356       output.push_back((char) (0x80 | ((low >> 6) & 0x3F)));
    357       output.push_back((char) (0x80 | (low & 0x3F)));
    358       i += 3;
    359     } else {
    360       output.push_back(utf8[i]);
    361     }
    362   }
    363 
    364   return output;
    365 }
    366 
    367 std::string ModifiedUtf8ToUtf8(const std::string& modified_utf8) {
    368   // The UTF-8 representation will have a byte length less than or equal to the Modified UTF-8
    369   // representation.
    370   std::string output;
    371   output.reserve(modified_utf8.size());
    372 
    373   size_t index = 0;
    374   const size_t modified_size = modified_utf8.size();
    375   while (index < modified_size) {
    376     size_t next_index;
    377     int32_t high_surrogate = utf32_from_utf8_at(modified_utf8.data(), modified_size, index,
    378                                                 &next_index);
    379     if (high_surrogate < 0) {
    380       return {};
    381     }
    382 
    383     // Check that the first codepoint is within the high surrogate range
    384     if (high_surrogate >= 0xD800 && high_surrogate <= 0xDB7F) {
    385       int32_t low_surrogate = utf32_from_utf8_at(modified_utf8.data(), modified_size, next_index,
    386                                                  &next_index);
    387       if (low_surrogate < 0) {
    388         return {};
    389       }
    390 
    391       // Check that the second codepoint is within the low surrogate range
    392       if (low_surrogate >= 0xDC00 && low_surrogate <= 0xDFFF) {
    393         const char32_t codepoint = (char32_t) (((high_surrogate - 0xD800) * 0x400)
    394             + (low_surrogate - 0xDC00) + 0x10000);
    395 
    396         // The decoded codepoint should represent a 4 byte, UTF-8 character
    397         const size_t utf8_length = (size_t) utf32_to_utf8_length(&codepoint, 1);
    398         if (utf8_length != 4) {
    399           return {};
    400         }
    401 
    402         // Encode the UTF-8 representation of the codepoint into the string
    403         char* start = &output[output.size()];
    404         output.resize(output.size() + utf8_length);
    405         utf32_to_utf8((char32_t*) &codepoint, 1, start, utf8_length + 1);
    406 
    407         index = next_index;
    408         continue;
    409       }
    410     }
    411 
    412     // Append non-surrogate pairs to the output string
    413     for (size_t i = index; i < next_index; i++) {
    414       output.push_back(modified_utf8[i]);
    415     }
    416     index = next_index;
    417   }
    418   return output;
    419 }
    420 
    421 std::u16string Utf8ToUtf16(const StringPiece& utf8) {
    422   ssize_t utf16_length = utf8_to_utf16_length(
    423       reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
    424   if (utf16_length <= 0) {
    425     return {};
    426   }
    427 
    428   std::u16string utf16;
    429   utf16.resize(utf16_length);
    430   utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(),
    431                 &*utf16.begin(), utf16_length + 1);
    432   return utf16;
    433 }
    434 
    435 std::string Utf16ToUtf8(const StringPiece16& utf16) {
    436   ssize_t utf8_length = utf16_to_utf8_length(utf16.data(), utf16.length());
    437   if (utf8_length <= 0) {
    438     return {};
    439   }
    440 
    441   std::string utf8;
    442   utf8.resize(utf8_length);
    443   utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8_length + 1);
    444   return utf8;
    445 }
    446 
    447 bool WriteAll(std::ostream& out, const BigBuffer& buffer) {
    448   for (const auto& b : buffer) {
    449     if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
    450       return false;
    451     }
    452   }
    453   return true;
    454 }
    455 
    456 std::unique_ptr<uint8_t[]> Copy(const BigBuffer& buffer) {
    457   std::unique_ptr<uint8_t[]> data =
    458       std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
    459   uint8_t* p = data.get();
    460   for (const auto& block : buffer) {
    461     memcpy(p, block.buffer.get(), block.size);
    462     p += block.size;
    463   }
    464   return data;
    465 }
    466 
    467 typename Tokenizer::iterator& Tokenizer::iterator::operator++() {
    468   const char* start = token_.end();
    469   const char* end = str_.end();
    470   if (start == end) {
    471     end_ = true;
    472     token_.assign(token_.end(), 0);
    473     return *this;
    474   }
    475 
    476   start += 1;
    477   const char* current = start;
    478   while (current != end) {
    479     if (*current == separator_) {
    480       token_.assign(start, current - start);
    481       return *this;
    482     }
    483     ++current;
    484   }
    485   token_.assign(start, end - start);
    486   return *this;
    487 }
    488 
    489 bool Tokenizer::iterator::operator==(const iterator& rhs) const {
    490   // We check equality here a bit differently.
    491   // We need to know that the addresses are the same.
    492   return token_.begin() == rhs.token_.begin() &&
    493          token_.end() == rhs.token_.end() && end_ == rhs.end_;
    494 }
    495 
    496 bool Tokenizer::iterator::operator!=(const iterator& rhs) const {
    497   return !(*this == rhs);
    498 }
    499 
    500 Tokenizer::iterator::iterator(const StringPiece& s, char sep, const StringPiece& tok, bool end)
    501     : str_(s), separator_(sep), token_(tok), end_(end) {}
    502 
    503 Tokenizer::Tokenizer(const StringPiece& str, char sep)
    504     : begin_(++iterator(str, sep, StringPiece(str.begin() - 1, 0), false)),
    505       end_(str, sep, StringPiece(str.end(), 0), true) {}
    506 
    507 bool ExtractResFilePathParts(const StringPiece& path, StringPiece* out_prefix,
    508                              StringPiece* out_entry, StringPiece* out_suffix) {
    509   const StringPiece res_prefix("res/");
    510   if (!StartsWith(path, res_prefix)) {
    511     return false;
    512   }
    513 
    514   StringPiece::const_iterator last_occurence = path.end();
    515   for (auto iter = path.begin() + res_prefix.size(); iter != path.end();
    516        ++iter) {
    517     if (*iter == '/') {
    518       last_occurence = iter;
    519     }
    520   }
    521 
    522   if (last_occurence == path.end()) {
    523     return false;
    524   }
    525 
    526   auto iter = std::find(last_occurence, path.end(), '.');
    527   *out_suffix = StringPiece(iter, path.end() - iter);
    528   *out_entry = StringPiece(last_occurence + 1, iter - last_occurence - 1);
    529   *out_prefix = StringPiece(path.begin(), last_occurence - path.begin() + 1);
    530   return true;
    531 }
    532 
    533 StringPiece16 GetString16(const android::ResStringPool& pool, size_t idx) {
    534   size_t len;
    535   const char16_t* str = pool.stringAt(idx, &len);
    536   if (str != nullptr) {
    537     return StringPiece16(str, len);
    538   }
    539   return StringPiece16();
    540 }
    541 
    542 std::string GetString(const android::ResStringPool& pool, size_t idx) {
    543   size_t len;
    544   const char* str = pool.string8At(idx, &len);
    545   if (str != nullptr) {
    546     return ModifiedUtf8ToUtf8(std::string(str, len));
    547   }
    548   return Utf16ToUtf8(GetString16(pool, idx));
    549 }
    550 
    551 }  // namespace util
    552 }  // namespace aapt
    553