1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "string-inl.h" 18 19 #include "arch/memcmp16.h" 20 #include "array.h" 21 #include "base/array_ref.h" 22 #include "base/stl_util.h" 23 #include "class-inl.h" 24 #include "gc/accounting/card_table-inl.h" 25 #include "gc_root-inl.h" 26 #include "handle_scope-inl.h" 27 #include "intern_table.h" 28 #include "object-inl.h" 29 #include "runtime.h" 30 #include "string-inl.h" 31 #include "thread.h" 32 #include "utf-inl.h" 33 34 namespace art { 35 namespace mirror { 36 37 // TODO: get global references for these 38 GcRoot<Class> String::java_lang_String_; 39 40 int32_t String::FastIndexOf(int32_t ch, int32_t start) { 41 int32_t count = GetLength(); 42 if (start < 0) { 43 start = 0; 44 } else if (start > count) { 45 start = count; 46 } 47 if (IsCompressed()) { 48 return FastIndexOf<uint8_t>(GetValueCompressed(), ch, start); 49 } else { 50 return FastIndexOf<uint16_t>(GetValue(), ch, start); 51 } 52 } 53 54 void String::SetClass(ObjPtr<Class> java_lang_String) { 55 CHECK(java_lang_String_.IsNull()); 56 CHECK(java_lang_String != nullptr); 57 CHECK(java_lang_String->IsStringClass()); 58 java_lang_String_ = GcRoot<Class>(java_lang_String); 59 } 60 61 void String::ResetClass() { 62 CHECK(!java_lang_String_.IsNull()); 63 java_lang_String_ = GcRoot<Class>(nullptr); 64 } 65 66 int String::ComputeHashCode() { 67 int32_t hash_code = 0; 68 if (IsCompressed()) { 69 hash_code = ComputeUtf16Hash(GetValueCompressed(), GetLength()); 70 } else { 71 hash_code = ComputeUtf16Hash(GetValue(), GetLength()); 72 } 73 SetHashCode(hash_code); 74 return hash_code; 75 } 76 77 int32_t String::GetUtfLength() { 78 if (IsCompressed()) { 79 return GetLength(); 80 } else { 81 return CountUtf8Bytes(GetValue(), GetLength()); 82 } 83 } 84 85 inline bool String::AllASCIIExcept(const uint16_t* chars, int32_t length, uint16_t non_ascii) { 86 DCHECK(!IsASCII(non_ascii)); 87 for (int32_t i = 0; i < length; ++i) { 88 if (!IsASCII(chars[i]) && chars[i] != non_ascii) { 89 return false; 90 } 91 } 92 return true; 93 } 94 95 ObjPtr<String> String::DoReplace(Thread* self, Handle<String> src, uint16_t old_c, uint16_t new_c) { 96 int32_t length = src->GetLength(); 97 DCHECK(src->IsCompressed() 98 ? ContainsElement(ArrayRef<uint8_t>(src->value_compressed_, length), old_c) 99 : ContainsElement(ArrayRef<uint16_t>(src->value_, length), old_c)); 100 bool compressible = 101 kUseStringCompression && 102 IsASCII(new_c) && 103 (src->IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(src->value_, length, old_c))); 104 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); 105 const int32_t length_with_flag = String::GetFlaggedCount(length, compressible); 106 SetStringCountVisitor visitor(length_with_flag); 107 ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor); 108 if (UNLIKELY(string == nullptr)) { 109 return nullptr; 110 } 111 if (compressible) { 112 auto replace = [old_c, new_c](uint16_t c) { 113 return dchecked_integral_cast<uint8_t>((old_c != c) ? c : new_c); 114 }; 115 uint8_t* out = string->value_compressed_; 116 if (LIKELY(src->IsCompressed())) { // LIKELY(compressible == src->IsCompressed()) 117 std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace); 118 } else { 119 std::transform(src->value_, src->value_ + length, out, replace); 120 } 121 DCHECK(kUseStringCompression && AllASCII(out, length)); 122 } else { 123 auto replace = [old_c, new_c](uint16_t c) { 124 return (old_c != c) ? c : new_c; 125 }; 126 uint16_t* out = string->value_; 127 if (UNLIKELY(src->IsCompressed())) { // LIKELY(compressible == src->IsCompressed()) 128 std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace); 129 } else { 130 std::transform(src->value_, src->value_ + length, out, replace); 131 } 132 DCHECK(!kUseStringCompression || !AllASCII(out, length)); 133 } 134 return string; 135 } 136 137 String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) { 138 int32_t length = string->GetLength(); 139 int32_t length2 = string2->GetLength(); 140 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); 141 const bool compressible = kUseStringCompression && 142 (string->IsCompressed() && string2->IsCompressed()); 143 const int32_t length_with_flag = String::GetFlaggedCount(length + length2, compressible); 144 145 SetStringCountVisitor visitor(length_with_flag); 146 ObjPtr<String> new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor); 147 if (UNLIKELY(new_string == nullptr)) { 148 return nullptr; 149 } 150 if (compressible) { 151 uint8_t* new_value = new_string->GetValueCompressed(); 152 memcpy(new_value, string->GetValueCompressed(), length * sizeof(uint8_t)); 153 memcpy(new_value + length, string2->GetValueCompressed(), length2 * sizeof(uint8_t)); 154 } else { 155 uint16_t* new_value = new_string->GetValue(); 156 if (string->IsCompressed()) { 157 for (int i = 0; i < length; ++i) { 158 new_value[i] = string->CharAt(i); 159 } 160 } else { 161 memcpy(new_value, string->GetValue(), length * sizeof(uint16_t)); 162 } 163 if (string2->IsCompressed()) { 164 for (int i = 0; i < length2; ++i) { 165 new_value[i+length] = string2->CharAt(i); 166 } 167 } else { 168 memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t)); 169 } 170 } 171 return new_string.Ptr(); 172 } 173 174 String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) { 175 CHECK(utf16_data_in != nullptr || utf16_length == 0); 176 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); 177 const bool compressible = kUseStringCompression && 178 String::AllASCII<uint16_t>(utf16_data_in, utf16_length); 179 int32_t length_with_flag = String::GetFlaggedCount(utf16_length, compressible); 180 SetStringCountVisitor visitor(length_with_flag); 181 ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor); 182 if (UNLIKELY(string == nullptr)) { 183 return nullptr; 184 } 185 if (compressible) { 186 for (int i = 0; i < utf16_length; ++i) { 187 string->GetValueCompressed()[i] = static_cast<uint8_t>(utf16_data_in[i]); 188 } 189 } else { 190 uint16_t* array = string->GetValue(); 191 memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t)); 192 } 193 return string.Ptr(); 194 } 195 196 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) { 197 DCHECK(utf != nullptr); 198 size_t byte_count = strlen(utf); 199 size_t char_count = CountModifiedUtf8Chars(utf, byte_count); 200 return AllocFromModifiedUtf8(self, char_count, utf, byte_count); 201 } 202 203 String* String::AllocFromModifiedUtf8(Thread* self, 204 int32_t utf16_length, 205 const char* utf8_data_in) { 206 return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in)); 207 } 208 209 String* String::AllocFromModifiedUtf8(Thread* self, 210 int32_t utf16_length, 211 const char* utf8_data_in, 212 int32_t utf8_length) { 213 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); 214 const bool compressible = kUseStringCompression && (utf16_length == utf8_length); 215 const int32_t utf16_length_with_flag = String::GetFlaggedCount(utf16_length, compressible); 216 SetStringCountVisitor visitor(utf16_length_with_flag); 217 ObjPtr<String> string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor); 218 if (UNLIKELY(string == nullptr)) { 219 return nullptr; 220 } 221 if (compressible) { 222 memcpy(string->GetValueCompressed(), utf8_data_in, utf16_length * sizeof(uint8_t)); 223 } else { 224 uint16_t* utf16_data_out = string->GetValue(); 225 ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length); 226 } 227 return string.Ptr(); 228 } 229 230 bool String::Equals(ObjPtr<String> that) { 231 if (this == that) { 232 // Quick reference equality test 233 return true; 234 } else if (that == nullptr) { 235 // Null isn't an instanceof anything 236 return false; 237 } else if (this->GetLength() != that->GetLength()) { 238 // Quick length inequality test 239 return false; 240 } else { 241 // Note: don't short circuit on hash code as we're presumably here as the 242 // hash code was already equal 243 for (int32_t i = 0; i < that->GetLength(); ++i) { 244 if (this->CharAt(i) != that->CharAt(i)) { 245 return false; 246 } 247 } 248 return true; 249 } 250 } 251 252 bool String::Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) { 253 if (this->GetLength() != that_length) { 254 return false; 255 } else { 256 for (int32_t i = 0; i < that_length; ++i) { 257 if (this->CharAt(i) != that_chars[that_offset + i]) { 258 return false; 259 } 260 } 261 return true; 262 } 263 } 264 265 bool String::Equals(const char* modified_utf8) { 266 const int32_t length = GetLength(); 267 int32_t i = 0; 268 while (i < length) { 269 const uint32_t ch = GetUtf16FromUtf8(&modified_utf8); 270 if (ch == '\0') { 271 return false; 272 } 273 274 if (GetLeadingUtf16Char(ch) != CharAt(i++)) { 275 return false; 276 } 277 278 const uint16_t trailing = GetTrailingUtf16Char(ch); 279 if (trailing != 0) { 280 if (i == length) { 281 return false; 282 } 283 284 if (CharAt(i++) != trailing) { 285 return false; 286 } 287 } 288 } 289 return *modified_utf8 == '\0'; 290 } 291 292 bool String::Equals(const StringPiece& modified_utf8) { 293 const int32_t length = GetLength(); 294 const char* p = modified_utf8.data(); 295 for (int32_t i = 0; i < length; ++i) { 296 uint32_t ch = GetUtf16FromUtf8(&p); 297 298 if (GetLeadingUtf16Char(ch) != CharAt(i)) { 299 return false; 300 } 301 302 const uint16_t trailing = GetTrailingUtf16Char(ch); 303 if (trailing != 0) { 304 if (i == (length - 1)) { 305 return false; 306 } 307 308 if (CharAt(++i) != trailing) { 309 return false; 310 } 311 } 312 } 313 return true; 314 } 315 316 // Create a modified UTF-8 encoded std::string from a java/lang/String object. 317 std::string String::ToModifiedUtf8() { 318 size_t byte_count = GetUtfLength(); 319 std::string result(byte_count, static_cast<char>(0)); 320 if (IsCompressed()) { 321 for (size_t i = 0; i < byte_count; ++i) { 322 result[i] = static_cast<char>(CharAt(i)); 323 } 324 } else { 325 const uint16_t* chars = GetValue(); 326 ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength()); 327 } 328 return result; 329 } 330 331 int32_t String::CompareTo(ObjPtr<String> rhs) { 332 // Quick test for comparison of a string with itself. 333 ObjPtr<String> lhs = this; 334 if (lhs == rhs) { 335 return 0; 336 } 337 int32_t lhs_count = lhs->GetLength(); 338 int32_t rhs_count = rhs->GetLength(); 339 int32_t count_diff = lhs_count - rhs_count; 340 int32_t min_count = (count_diff < 0) ? lhs_count : rhs_count; 341 if (lhs->IsCompressed() && rhs->IsCompressed()) { 342 const uint8_t* lhs_chars = lhs->GetValueCompressed(); 343 const uint8_t* rhs_chars = rhs->GetValueCompressed(); 344 for (int32_t i = 0; i < min_count; ++i) { 345 int32_t char_diff = static_cast<int32_t>(lhs_chars[i]) - static_cast<int32_t>(rhs_chars[i]); 346 if (char_diff != 0) { 347 return char_diff; 348 } 349 } 350 } else if (lhs->IsCompressed() || rhs->IsCompressed()) { 351 const uint8_t* compressed_chars = 352 lhs->IsCompressed() ? lhs->GetValueCompressed() : rhs->GetValueCompressed(); 353 const uint16_t* uncompressed_chars = lhs->IsCompressed() ? rhs->GetValue() : lhs->GetValue(); 354 for (int32_t i = 0; i < min_count; ++i) { 355 int32_t char_diff = 356 static_cast<int32_t>(compressed_chars[i]) - static_cast<int32_t>(uncompressed_chars[i]); 357 if (char_diff != 0) { 358 return lhs->IsCompressed() ? char_diff : -char_diff; 359 } 360 } 361 } else { 362 const uint16_t* lhs_chars = lhs->GetValue(); 363 const uint16_t* rhs_chars = rhs->GetValue(); 364 // FIXME: The MemCmp16() name is misleading. It returns the char difference on mismatch 365 // where memcmp() only guarantees that the returned value has the same sign. 366 int32_t char_diff = MemCmp16(lhs_chars, rhs_chars, min_count); 367 if (char_diff != 0) { 368 return char_diff; 369 } 370 } 371 return count_diff; 372 } 373 374 void String::VisitRoots(RootVisitor* visitor) { 375 java_lang_String_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass)); 376 } 377 378 CharArray* String::ToCharArray(Thread* self) { 379 StackHandleScope<1> hs(self); 380 Handle<String> string(hs.NewHandle(this)); 381 ObjPtr<CharArray> result = CharArray::Alloc(self, GetLength()); 382 if (result != nullptr) { 383 if (string->IsCompressed()) { 384 int32_t length = string->GetLength(); 385 for (int i = 0; i < length; ++i) { 386 result->GetData()[i] = string->CharAt(i); 387 } 388 } else { 389 memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t)); 390 } 391 } else { 392 self->AssertPendingOOMException(); 393 } 394 return result.Ptr(); 395 } 396 397 void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) { 398 uint16_t* data = array->GetData() + index; 399 if (IsCompressed()) { 400 for (int i = start; i < end; ++i) { 401 data[i-start] = CharAt(i); 402 } 403 } else { 404 uint16_t* value = GetValue() + start; 405 memcpy(data, value, (end - start) * sizeof(uint16_t)); 406 } 407 } 408 409 bool String::IsValueNull() { 410 return (IsCompressed()) ? (GetValueCompressed() == nullptr) : (GetValue() == nullptr); 411 } 412 413 std::string String::PrettyStringDescriptor(ObjPtr<mirror::String> java_descriptor) { 414 if (java_descriptor == nullptr) { 415 return "null"; 416 } 417 return java_descriptor->PrettyStringDescriptor(); 418 } 419 420 std::string String::PrettyStringDescriptor() { 421 return PrettyDescriptor(ToModifiedUtf8().c_str()); 422 } 423 424 ObjPtr<String> String::Intern() { 425 return Runtime::Current()->GetInternTable()->InternWeak(this); 426 } 427 428 } // namespace mirror 429 } // namespace art 430