Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "util/BigBuffer.h"
     18 #include "util/Maybe.h"
     19 #include "util/StringPiece.h"
     20 #include "util/Util.h"
     21 
     22 #include <algorithm>
     23 #include <ostream>
     24 #include <string>
     25 #include <utils/Unicode.h>
     26 #include <vector>
     27 
     28 namespace aapt {
     29 namespace util {
     30 
     31 static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
     32         const std::function<char(char)>& f) {
     33     std::vector<std::string> parts;
     34     const StringPiece::const_iterator end = std::end(str);
     35     StringPiece::const_iterator start = std::begin(str);
     36     StringPiece::const_iterator current;
     37     do {
     38         current = std::find(start, end, sep);
     39         parts.emplace_back(str.substr(start, current).toString());
     40         if (f) {
     41             std::string& part = parts.back();
     42             std::transform(part.begin(), part.end(), part.begin(), f);
     43         }
     44         start = current + 1;
     45     } while (current != end);
     46     return parts;
     47 }
     48 
     49 std::vector<std::string> split(const StringPiece& str, char sep) {
     50     return splitAndTransform(str, sep, nullptr);
     51 }
     52 
     53 std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
     54     return splitAndTransform(str, sep, ::tolower);
     55 }
     56 
     57 StringPiece16 trimWhitespace(const StringPiece16& str) {
     58     if (str.size() == 0 || str.data() == nullptr) {
     59         return str;
     60     }
     61 
     62     const char16_t* start = str.data();
     63     const char16_t* end = str.data() + str.length();
     64 
     65     while (start != end && util::isspace16(*start)) {
     66         start++;
     67     }
     68 
     69     while (end != start && util::isspace16(*(end - 1))) {
     70         end--;
     71     }
     72 
     73     return StringPiece16(start, end - start);
     74 }
     75 
     76 StringPiece trimWhitespace(const StringPiece& str) {
     77     if (str.size() == 0 || str.data() == nullptr) {
     78         return str;
     79     }
     80 
     81     const char* start = str.data();
     82     const char* end = str.data() + str.length();
     83 
     84     while (start != end && isspace(*start)) {
     85         start++;
     86     }
     87 
     88     while (end != start && isspace(*(end - 1))) {
     89         end--;
     90     }
     91 
     92     return StringPiece(start, end - start);
     93 }
     94 
     95 StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
     96         const StringPiece16& allowedChars) {
     97     const auto endIter = str.end();
     98     for (auto iter = str.begin(); iter != endIter; ++iter) {
     99         char16_t c = *iter;
    100         if ((c >= u'a' && c <= u'z') ||
    101                 (c >= u'A' && c <= u'Z') ||
    102                 (c >= u'0' && c <= u'9')) {
    103             continue;
    104         }
    105 
    106         bool match = false;
    107         for (char16_t i : allowedChars) {
    108             if (c == i) {
    109                 match = true;
    110                 break;
    111             }
    112         }
    113 
    114         if (!match) {
    115             return iter;
    116         }
    117     }
    118     return endIter;
    119 }
    120 
    121 bool isJavaClassName(const StringPiece16& str) {
    122     size_t pieces = 0;
    123     for (const StringPiece16& piece : tokenize(str, u'.')) {
    124         pieces++;
    125         if (piece.empty()) {
    126             return false;
    127         }
    128 
    129         // Can't have starting or trailing $ character.
    130         if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') {
    131             return false;
    132         }
    133 
    134         if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
    135             return false;
    136         }
    137     }
    138     return pieces >= 2;
    139 }
    140 
    141 bool isJavaPackageName(const StringPiece16& str) {
    142     if (str.empty()) {
    143         return false;
    144     }
    145 
    146     size_t pieces = 0;
    147     for (const StringPiece16& piece : tokenize(str, u'.')) {
    148         pieces++;
    149         if (piece.empty()) {
    150             return false;
    151         }
    152 
    153         if (piece.data()[0] == u'_' || piece.data()[piece.size() - 1] == u'_') {
    154             return false;
    155         }
    156 
    157         if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) {
    158             return false;
    159         }
    160     }
    161     return pieces >= 1;
    162 }
    163 
    164 Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
    165                                                  const StringPiece16& className) {
    166     if (className.empty()) {
    167         return {};
    168     }
    169 
    170     if (util::isJavaClassName(className)) {
    171         return className.toString();
    172     }
    173 
    174     if (package.empty()) {
    175         return {};
    176     }
    177 
    178     if (className.data()[0] != u'.') {
    179         return {};
    180     }
    181 
    182     std::u16string result(package.data(), package.size());
    183     result.append(className.data(), className.size());
    184     if (!isJavaClassName(result)) {
    185         return {};
    186     }
    187     return result;
    188 }
    189 
    190 static size_t consumeDigits(const char16_t* start, const char16_t* end) {
    191     const char16_t* c = start;
    192     for (; c != end && *c >= u'0' && *c <= u'9'; c++) {}
    193     return static_cast<size_t>(c - start);
    194 }
    195 
    196 bool verifyJavaStringFormat(const StringPiece16& str) {
    197     const char16_t* c = str.begin();
    198     const char16_t* const end = str.end();
    199 
    200     size_t argCount = 0;
    201     bool nonpositional = false;
    202     while (c != end) {
    203         if (*c == u'%' && c + 1 < end) {
    204             c++;
    205 
    206             if (*c == u'%') {
    207                 c++;
    208                 continue;
    209             }
    210 
    211             argCount++;
    212 
    213             size_t numDigits = consumeDigits(c, end);
    214             if (numDigits > 0) {
    215                 c += numDigits;
    216                 if (c != end && *c != u'$') {
    217                     // The digits were a size, but not a positional argument.
    218                     nonpositional = true;
    219                 }
    220             } else if (*c == u'<') {
    221                 // Reusing last argument, bad idea since positions can be moved around
    222                 // during translation.
    223                 nonpositional = true;
    224 
    225                 c++;
    226 
    227                 // Optionally we can have a $ after
    228                 if (c != end && *c == u'$') {
    229                     c++;
    230                 }
    231             } else {
    232                 nonpositional = true;
    233             }
    234 
    235             // Ignore size, width, flags, etc.
    236             while (c != end && (*c == u'-' ||
    237                     *c == u'#' ||
    238                     *c == u'+' ||
    239                     *c == u' ' ||
    240                     *c == u',' ||
    241                     *c == u'(' ||
    242                     (*c >= u'0' && *c <= '9'))) {
    243                 c++;
    244             }
    245 
    246             /*
    247              * This is a shortcut to detect strings that are going to Time.format()
    248              * instead of String.format()
    249              *
    250              * Comparison of String.format() and Time.format() args:
    251              *
    252              * String: ABC E GH  ST X abcdefgh  nost x
    253              *   Time:    DEFGHKMS W Za  d   hkm  s w yz
    254              *
    255              * Therefore we know it's definitely Time if we have:
    256              *     DFKMWZkmwyz
    257              */
    258             if (c != end) {
    259                 switch (*c) {
    260                 case 'D':
    261                 case 'F':
    262                 case 'K':
    263                 case 'M':
    264                 case 'W':
    265                 case 'Z':
    266                 case 'k':
    267                 case 'm':
    268                 case 'w':
    269                 case 'y':
    270                 case 'z':
    271                     return true;
    272                 }
    273             }
    274         }
    275 
    276         if (c != end) {
    277             c++;
    278         }
    279     }
    280 
    281     if (argCount > 1 && nonpositional) {
    282         // Multiple arguments were specified, but some or all were non positional. Translated
    283         // strings may rearrange the order of the arguments, which will break the string.
    284         return false;
    285     }
    286     return true;
    287 }
    288 
    289 static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
    290     char16_t code = 0;
    291     for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
    292         char16_t c = **start;
    293         int a;
    294         if (c >= '0' && c <= '9') {
    295             a = c - '0';
    296         } else if (c >= 'a' && c <= 'f') {
    297             a = c - 'a' + 10;
    298         } else if (c >= 'A' && c <= 'F') {
    299             a = c - 'A' + 10;
    300         } else {
    301             return make_nothing<char16_t>();
    302         }
    303         code = (code << 4) | a;
    304     }
    305     return make_value(code);
    306 }
    307 
    308 StringBuilder& StringBuilder::append(const StringPiece16& str) {
    309     if (!mError.empty()) {
    310         return *this;
    311     }
    312 
    313     const char16_t* const end = str.end();
    314     const char16_t* start = str.begin();
    315     const char16_t* current = start;
    316     while (current != end) {
    317         if (mLastCharWasEscape) {
    318             switch (*current) {
    319                 case u't':
    320                     mStr += u'\t';
    321                     break;
    322                 case u'n':
    323                     mStr += u'\n';
    324                     break;
    325                 case u'#':
    326                     mStr += u'#';
    327                     break;
    328                 case u'@':
    329                     mStr += u'@';
    330                     break;
    331                 case u'?':
    332                     mStr += u'?';
    333                     break;
    334                 case u'"':
    335                     mStr += u'"';
    336                     break;
    337                 case u'\'':
    338                     mStr += u'\'';
    339                     break;
    340                 case u'\\':
    341                     mStr += u'\\';
    342                     break;
    343                 case u'u': {
    344                     current++;
    345                     Maybe<char16_t> c = parseUnicodeCodepoint(&current, end);
    346                     if (!c) {
    347                         mError = "invalid unicode escape sequence";
    348                         return *this;
    349                     }
    350                     mStr += c.value();
    351                     current -= 1;
    352                     break;
    353                 }
    354 
    355                 default:
    356                     // Ignore.
    357                     break;
    358             }
    359             mLastCharWasEscape = false;
    360             start = current + 1;
    361         } else if (*current == u'"') {
    362             if (!mQuote && mTrailingSpace) {
    363                 // We found an opening quote, and we have
    364                 // trailing space, so we should append that
    365                 // space now.
    366                 if (mTrailingSpace) {
    367                     // We had trailing whitespace, so
    368                     // replace with a single space.
    369                     if (!mStr.empty()) {
    370                         mStr += u' ';
    371                     }
    372                     mTrailingSpace = false;
    373                 }
    374             }
    375             mQuote = !mQuote;
    376             mStr.append(start, current - start);
    377             start = current + 1;
    378         } else if (*current == u'\'' && !mQuote) {
    379             // This should be escaped.
    380             mError = "unescaped apostrophe";
    381             return *this;
    382         } else if (*current == u'\\') {
    383             // This is an escape sequence, convert to the real value.
    384             if (!mQuote && mTrailingSpace) {
    385                 // We had trailing whitespace, so
    386                 // replace with a single space.
    387                 if (!mStr.empty()) {
    388                     mStr += u' ';
    389                 }
    390                 mTrailingSpace = false;
    391             }
    392             mStr.append(start, current - start);
    393             start = current + 1;
    394             mLastCharWasEscape = true;
    395         } else if (!mQuote) {
    396             // This is not quoted text, so look for whitespace.
    397             if (isspace16(*current)) {
    398                 // We found whitespace, see if we have seen some
    399                 // before.
    400                 if (!mTrailingSpace) {
    401                     // We didn't see a previous adjacent space,
    402                     // so mark that we did.
    403                     mTrailingSpace = true;
    404                     mStr.append(start, current - start);
    405                 }
    406 
    407                 // Keep skipping whitespace.
    408                 start = current + 1;
    409             } else if (mTrailingSpace) {
    410                 // We saw trailing space before, so replace all
    411                 // that trailing space with one space.
    412                 if (!mStr.empty()) {
    413                     mStr += u' ';
    414                 }
    415                 mTrailingSpace = false;
    416             }
    417         }
    418         current++;
    419     }
    420     mStr.append(start, end - start);
    421     return *this;
    422 }
    423 
    424 std::u16string utf8ToUtf16(const StringPiece& utf8) {
    425     ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
    426             utf8.length());
    427     if (utf16Length <= 0) {
    428         return {};
    429     }
    430 
    431     std::u16string utf16;
    432     utf16.resize(utf16Length);
    433     utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin());
    434     return utf16;
    435 }
    436 
    437 std::string utf16ToUtf8(const StringPiece16& utf16) {
    438     ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
    439     if (utf8Length <= 0) {
    440         return {};
    441     }
    442 
    443     std::string utf8;
    444     // Make room for '\0' explicitly.
    445     utf8.resize(utf8Length + 1);
    446     utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8Length + 1);
    447     utf8.resize(utf8Length);
    448     return utf8;
    449 }
    450 
    451 bool writeAll(std::ostream& out, const BigBuffer& buffer) {
    452     for (const auto& b : buffer) {
    453         if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
    454             return false;
    455         }
    456     }
    457     return true;
    458 }
    459 
    460 std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
    461     std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
    462     uint8_t* p = data.get();
    463     for (const auto& block : buffer) {
    464         memcpy(p, block.buffer.get(), block.size);
    465         p += block.size;
    466     }
    467     return data;
    468 }
    469 
    470 bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix,
    471                              StringPiece16* outEntry, StringPiece16* outSuffix) {
    472     if (!stringStartsWith<char16_t>(path, u"res/")) {
    473         return false;
    474     }
    475 
    476     StringPiece16::const_iterator lastOccurence = path.end();
    477     for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) {
    478         if (*iter == u'/') {
    479             lastOccurence = iter;
    480         }
    481     }
    482 
    483     if (lastOccurence == path.end()) {
    484         return false;
    485     }
    486 
    487     auto iter = std::find(lastOccurence, path.end(), u'.');
    488     *outSuffix = StringPiece16(iter, path.end() - iter);
    489     *outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1);
    490     *outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1);
    491     return true;
    492 }
    493 
    494 } // namespace util
    495 } // namespace aapt
    496