1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "BigBuffer.h" 18 #include "Maybe.h" 19 #include "StringPiece.h" 20 #include "Util.h" 21 22 #include <algorithm> 23 #include <ostream> 24 #include <string> 25 #include <utils/Unicode.h> 26 #include <vector> 27 28 namespace aapt { 29 namespace util { 30 31 constexpr const char16_t* kSchemaAuto = u"http://schemas.android.com/apk/res-auto"; 32 constexpr const char16_t* kSchemaPrefix = u"http://schemas.android.com/apk/res/"; 33 34 static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep, 35 const std::function<char(char)>& f) { 36 std::vector<std::string> parts; 37 const StringPiece::const_iterator end = std::end(str); 38 StringPiece::const_iterator start = std::begin(str); 39 StringPiece::const_iterator current; 40 do { 41 current = std::find(start, end, sep); 42 parts.emplace_back(str.substr(start, current).toString()); 43 if (f) { 44 std::string& part = parts.back(); 45 std::transform(part.begin(), part.end(), part.begin(), f); 46 } 47 start = current + 1; 48 } while (current != end); 49 return parts; 50 } 51 52 std::vector<std::string> split(const StringPiece& str, char sep) { 53 return splitAndTransform(str, sep, nullptr); 54 } 55 56 std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) { 57 return splitAndTransform(str, sep, ::tolower); 58 } 59 60 StringPiece16 trimWhitespace(const StringPiece16& str) { 61 if (str.size() == 0 || str.data() == nullptr) { 62 return str; 63 } 64 65 const char16_t* start = str.data(); 66 const char16_t* end = str.data() + str.length(); 67 68 while (start != end && util::isspace16(*start)) { 69 start++; 70 } 71 72 while (end != start && util::isspace16(*(end - 1))) { 73 end--; 74 } 75 76 return StringPiece16(start, end - start); 77 } 78 79 StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str, 80 const StringPiece16& allowedChars) { 81 const auto endIter = str.end(); 82 for (auto iter = str.begin(); iter != endIter; ++iter) { 83 char16_t c = *iter; 84 if ((c >= u'a' && c <= u'z') || 85 (c >= u'A' && c <= u'Z') || 86 (c >= u'0' && c <= u'9')) { 87 continue; 88 } 89 90 bool match = false; 91 for (char16_t i : allowedChars) { 92 if (c == i) { 93 match = true; 94 break; 95 } 96 } 97 98 if (!match) { 99 return iter; 100 } 101 } 102 return endIter; 103 } 104 105 bool isJavaClassName(const StringPiece16& str) { 106 size_t pieces = 0; 107 for (const StringPiece16& piece : tokenize(str, u'.')) { 108 pieces++; 109 if (piece.empty()) { 110 return false; 111 } 112 113 // Can't have starting or trailing $ character. 114 if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') { 115 return false; 116 } 117 118 if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) { 119 return false; 120 } 121 } 122 return pieces >= 2; 123 } 124 125 Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package, 126 const StringPiece16& className) { 127 if (className.empty()) { 128 return {}; 129 } 130 131 if (util::isJavaClassName(className)) { 132 return className.toString(); 133 } 134 135 if (package.empty()) { 136 return {}; 137 } 138 139 std::u16string result(package.data(), package.size()); 140 if (className.data()[0] != u'.') { 141 result += u'.'; 142 } 143 result.append(className.data(), className.size()); 144 if (!isJavaClassName(result)) { 145 return {}; 146 } 147 return result; 148 } 149 150 static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) { 151 char16_t code = 0; 152 for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) { 153 char16_t c = **start; 154 int a; 155 if (c >= '0' && c <= '9') { 156 a = c - '0'; 157 } else if (c >= 'a' && c <= 'f') { 158 a = c - 'a' + 10; 159 } else if (c >= 'A' && c <= 'F') { 160 a = c - 'A' + 10; 161 } else { 162 return make_nothing<char16_t>(); 163 } 164 code = (code << 4) | a; 165 } 166 return make_value(code); 167 } 168 169 StringBuilder& StringBuilder::append(const StringPiece16& str) { 170 if (!mError.empty()) { 171 return *this; 172 } 173 174 const char16_t* const end = str.end(); 175 const char16_t* start = str.begin(); 176 const char16_t* current = start; 177 while (current != end) { 178 if (*current == u'"') { 179 if (!mQuote && mTrailingSpace) { 180 // We found an opening quote, and we have 181 // trailing space, so we should append that 182 // space now. 183 if (mTrailingSpace) { 184 // We had trailing whitespace, so 185 // replace with a single space. 186 if (!mStr.empty()) { 187 mStr += u' '; 188 } 189 mTrailingSpace = false; 190 } 191 } 192 mQuote = !mQuote; 193 mStr.append(start, current - start); 194 start = current + 1; 195 } else if (*current == u'\'' && !mQuote) { 196 // This should be escaped. 197 mError = "unescaped apostrophe"; 198 return *this; 199 } else if (*current == u'\\') { 200 // This is an escape sequence, convert to the real value. 201 if (!mQuote && mTrailingSpace) { 202 // We had trailing whitespace, so 203 // replace with a single space. 204 if (!mStr.empty()) { 205 mStr += u' '; 206 } 207 mTrailingSpace = false; 208 } 209 mStr.append(start, current - start); 210 start = current + 1; 211 212 current++; 213 if (current != end) { 214 switch (*current) { 215 case u't': 216 mStr += u'\t'; 217 break; 218 case u'n': 219 mStr += u'\n'; 220 break; 221 case u'#': 222 mStr += u'#'; 223 break; 224 case u'@': 225 mStr += u'@'; 226 break; 227 case u'?': 228 mStr += u'?'; 229 break; 230 case u'"': 231 mStr += u'"'; 232 break; 233 case u'\'': 234 mStr += u'\''; 235 break; 236 case u'\\': 237 mStr += u'\\'; 238 break; 239 case u'u': { 240 current++; 241 Maybe<char16_t> c = parseUnicodeCodepoint(¤t, end); 242 if (!c) { 243 mError = "invalid unicode escape sequence"; 244 return *this; 245 } 246 mStr += c.value(); 247 current -= 1; 248 break; 249 } 250 251 default: 252 // Ignore. 253 break; 254 } 255 start = current + 1; 256 } 257 } else if (!mQuote) { 258 // This is not quoted text, so look for whitespace. 259 if (isspace16(*current)) { 260 // We found whitespace, see if we have seen some 261 // before. 262 if (!mTrailingSpace) { 263 // We didn't see a previous adjacent space, 264 // so mark that we did. 265 mTrailingSpace = true; 266 mStr.append(start, current - start); 267 } 268 269 // Keep skipping whitespace. 270 start = current + 1; 271 } else if (mTrailingSpace) { 272 // We saw trailing space before, so replace all 273 // that trailing space with one space. 274 if (!mStr.empty()) { 275 mStr += u' '; 276 } 277 mTrailingSpace = false; 278 } 279 } 280 current++; 281 } 282 mStr.append(start, end - start); 283 return *this; 284 } 285 286 std::u16string utf8ToUtf16(const StringPiece& utf8) { 287 ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()), 288 utf8.length()); 289 if (utf16Length <= 0) { 290 return {}; 291 } 292 293 std::u16string utf16; 294 utf16.resize(utf16Length); 295 utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin()); 296 return utf16; 297 } 298 299 std::string utf16ToUtf8(const StringPiece16& utf16) { 300 ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length()); 301 if (utf8Length <= 0) { 302 return {}; 303 } 304 305 std::string utf8; 306 utf8.resize(utf8Length); 307 utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin()); 308 return utf8; 309 } 310 311 bool writeAll(std::ostream& out, const BigBuffer& buffer) { 312 for (const auto& b : buffer) { 313 if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) { 314 return false; 315 } 316 } 317 return true; 318 } 319 320 std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) { 321 std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]); 322 uint8_t* p = data.get(); 323 for (const auto& block : buffer) { 324 memcpy(p, block.buffer.get(), block.size); 325 p += block.size; 326 } 327 return data; 328 } 329 330 Maybe<std::u16string> extractPackageFromNamespace(const std::u16string& namespaceUri) { 331 if (stringStartsWith<char16_t>(namespaceUri, kSchemaPrefix)) { 332 StringPiece16 schemaPrefix = kSchemaPrefix; 333 StringPiece16 package = namespaceUri; 334 return package.substr(schemaPrefix.size(), package.size() - schemaPrefix.size()) 335 .toString(); 336 } else if (namespaceUri == kSchemaAuto) { 337 return std::u16string(); 338 } 339 return {}; 340 } 341 342 } // namespace util 343 } // namespace aapt 344