1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * UTF-8 and Unicode string manipulation, plus java/lang/String convenience 19 * functions. 20 * 21 * In most cases we populate the fields in the String object directly, 22 * rather than going through an instance field lookup. 23 */ 24 #include "Dalvik.h" 25 #include <stdlib.h> 26 27 /* 28 * Initialize string globals. 29 * 30 * This isn't part of the VM init sequence because it's hard to get the 31 * timing right -- we need it to happen after java/lang/String has been 32 * loaded, but before anybody wants to use a string. It's easiest to 33 * just initialize it on first use. 34 * 35 * In some unusual circumstances (e.g. trying to throw an exception because 36 * String implements java/lang/CharSequence, but CharSequence doesn't exist) 37 * we can try to create an exception string internally before anything has 38 * really tried to use String. In that case we basically self-destruct. 39 * 40 * We're expecting to be essentially single-threaded at this point. 41 * We employ atomics to ensure everything is observed correctly, and also 42 * to guarantee that we do detect a problem if our assumption is wrong. 43 */ 44 static bool stringStartup() 45 { 46 if (gDvm.javaLangStringReady < 0) { 47 LOGE("ERROR: reentrant string initialization\n"); 48 assert(false); 49 return false; 50 } 51 52 if (android_atomic_acquire_cas(0, -1, &gDvm.javaLangStringReady) != 0) { 53 LOGE("ERROR: initial string-ready state not 0 (%d)\n", 54 gDvm.javaLangStringReady); 55 return false; 56 } 57 58 if (gDvm.classJavaLangString == NULL) 59 gDvm.classJavaLangString = 60 dvmFindSystemClassNoInit("Ljava/lang/String;"); 61 62 gDvm.offJavaLangString_value = 63 dvmFindFieldOffset(gDvm.classJavaLangString, "value", "[C"); 64 gDvm.offJavaLangString_count = 65 dvmFindFieldOffset(gDvm.classJavaLangString, "count", "I"); 66 gDvm.offJavaLangString_offset = 67 dvmFindFieldOffset(gDvm.classJavaLangString, "offset", "I"); 68 gDvm.offJavaLangString_hashCode = 69 dvmFindFieldOffset(gDvm.classJavaLangString, "hashCode", "I"); 70 71 if (gDvm.offJavaLangString_value < 0 || 72 gDvm.offJavaLangString_count < 0 || 73 gDvm.offJavaLangString_offset < 0 || 74 gDvm.offJavaLangString_hashCode < 0) 75 { 76 LOGE("VM-required field missing from java/lang/String\n"); 77 return false; 78 } 79 80 bool badValue = false; 81 if (gDvm.offJavaLangString_value != STRING_FIELDOFF_VALUE) { 82 LOGE("InlineNative: String.value offset = %d, expected %d\n", 83 gDvm.offJavaLangString_value, STRING_FIELDOFF_VALUE); 84 badValue = true; 85 } 86 if (gDvm.offJavaLangString_count != STRING_FIELDOFF_COUNT) { 87 LOGE("InlineNative: String.count offset = %d, expected %d\n", 88 gDvm.offJavaLangString_count, STRING_FIELDOFF_COUNT); 89 badValue = true; 90 } 91 if (gDvm.offJavaLangString_offset != STRING_FIELDOFF_OFFSET) { 92 LOGE("InlineNative: String.offset offset = %d, expected %d\n", 93 gDvm.offJavaLangString_offset, STRING_FIELDOFF_OFFSET); 94 badValue = true; 95 } 96 if (gDvm.offJavaLangString_hashCode != STRING_FIELDOFF_HASHCODE) { 97 LOGE("InlineNative: String.hashCode offset = %d, expected %d\n", 98 gDvm.offJavaLangString_hashCode, STRING_FIELDOFF_HASHCODE); 99 badValue = true; 100 } 101 if (badValue) 102 return false; 103 104 android_atomic_release_store(1, &gDvm.javaLangStringReady); 105 106 return true; 107 } 108 109 /* 110 * Discard heap-allocated storage. 111 */ 112 void dvmStringShutdown() 113 { 114 // currently unused 115 } 116 117 /* 118 * Compute a hash code on a UTF-8 string, for use with internal hash tables. 119 * 120 * This may or may not yield the same results as the java/lang/String 121 * computeHashCode() function. (To make sure this doesn't get abused, 122 * I'm initializing the hash code to 1 so they *don't* match up.) 123 * 124 * It would be more correct to invoke dexGetUtf16FromUtf8() here and compute 125 * the hash with the result. That way, if something encoded the same 126 * character in two different ways, the hash value would be the same. For 127 * our purposes that isn't necessary. 128 */ 129 u4 dvmComputeUtf8Hash(const char* utf8Str) 130 { 131 u4 hash = 1; 132 133 while (*utf8Str != '\0') 134 hash = hash * 31 + *utf8Str++; 135 136 return hash; 137 } 138 139 /* 140 * Like "strlen", but for strings encoded with "modified" UTF-8. 141 * 142 * The value returned is the number of characters, which may or may not 143 * be the same as the number of bytes. 144 * 145 * (If this needs optimizing, try: mask against 0xa0, shift right 5, 146 * get increment {1-3} from table of 8 values.) 147 */ 148 int dvmUtf8Len(const char* utf8Str) 149 { 150 int ic, len = 0; 151 152 while ((ic = *utf8Str++) != '\0') { 153 len++; 154 if ((ic & 0x80) != 0) { 155 /* two- or three-byte encoding */ 156 utf8Str++; 157 if ((ic & 0x20) != 0) { 158 /* three-byte encoding */ 159 utf8Str++; 160 } 161 } 162 } 163 164 return len; 165 } 166 167 /* 168 * Convert a "modified" UTF-8 string to UTF-16. 169 */ 170 void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str) 171 { 172 while (*utf8Str != '\0') 173 *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str); 174 } 175 176 /* 177 * Given a UTF-16 string, compute the length of the corresponding UTF-8 178 * string in bytes. 179 */ 180 static int utf16_utf8ByteLen(const u2* utf16Str, int len) 181 { 182 int utf8Len = 0; 183 184 while (len--) { 185 unsigned int uic = *utf16Str++; 186 187 /* 188 * The most common case is (uic > 0 && uic <= 0x7f). 189 */ 190 if (uic == 0 || uic > 0x7f) { 191 if (uic > 0x07ff) 192 utf8Len += 3; 193 else /*(uic > 0x7f || uic == 0) */ 194 utf8Len += 2; 195 } else 196 utf8Len++; 197 } 198 return utf8Len; 199 } 200 201 /* 202 * Convert a UTF-16 string to UTF-8. 203 * 204 * Make sure you allocate "utf8Str" with the result of utf16_utf8ByteLen(), 205 * not just "len". 206 */ 207 static void convertUtf16ToUtf8(char* utf8Str, const u2* utf16Str, int len) 208 { 209 assert(len >= 0); 210 211 while (len--) { 212 unsigned int uic = *utf16Str++; 213 214 /* 215 * The most common case is (uic > 0 && uic <= 0x7f). 216 */ 217 if (uic == 0 || uic > 0x7f) { 218 if (uic > 0x07ff) { 219 *utf8Str++ = (uic >> 12) | 0xe0; 220 *utf8Str++ = ((uic >> 6) & 0x3f) | 0x80; 221 *utf8Str++ = (uic & 0x3f) | 0x80; 222 } else /*(uic > 0x7f || uic == 0)*/ { 223 *utf8Str++ = (uic >> 6) | 0xc0; 224 *utf8Str++ = (uic & 0x3f) | 0x80; 225 } 226 } else { 227 *utf8Str++ = uic; 228 } 229 } 230 231 *utf8Str = '\0'; 232 } 233 234 /* 235 * Use the java/lang/String.computeHashCode() algorithm. 236 */ 237 static inline u4 dvmComputeUtf16Hash(const u2* utf16Str, int len) 238 { 239 u4 hash = 0; 240 241 while (len--) 242 hash = hash * 31 + *utf16Str++; 243 244 return hash; 245 } 246 u4 dvmComputeStringHash(const StringObject* strObj) { 247 ArrayObject* chars = (ArrayObject*) dvmGetFieldObject((Object*) strObj, 248 STRING_FIELDOFF_VALUE); 249 int offset, len; 250 251 len = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_COUNT); 252 offset = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_OFFSET); 253 254 return dvmComputeUtf16Hash((u2*) chars->contents + offset, len); 255 } 256 257 /* 258 * Create a new java/lang/String object, using the string data in "utf8Str". 259 * 260 * The caller must call dvmReleaseTrackedAlloc() on the return value. 261 * 262 * Returns NULL and throws an exception on failure. 263 */ 264 StringObject* dvmCreateStringFromCstr(const char* utf8Str) 265 { 266 assert(utf8Str != NULL); 267 return dvmCreateStringFromCstrAndLength(utf8Str, dvmUtf8Len(utf8Str)); 268 } 269 270 /* 271 * Create a java/lang/String from a C string, given its UTF-16 length 272 * (number of UTF-16 code points). 273 * 274 * The caller must call dvmReleaseTrackedAlloc() on the return value. 275 * 276 * Returns NULL and throws an exception on failure. 277 */ 278 StringObject* dvmCreateStringFromCstrAndLength(const char* utf8Str, 279 u4 utf16Length) 280 { 281 StringObject* newObj; 282 ArrayObject* chars; 283 u4 hashCode = 0; 284 285 //LOGV("Creating String from '%s'\n", utf8Str); 286 assert(utf8Str != NULL); 287 288 if (gDvm.javaLangStringReady <= 0) { 289 if (!stringStartup()) 290 return NULL; 291 } 292 293 /* init before alloc */ 294 if (!dvmIsClassInitialized(gDvm.classJavaLangString) && 295 !dvmInitClass(gDvm.classJavaLangString)) 296 { 297 return NULL; 298 } 299 300 newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString, 301 ALLOC_DEFAULT); 302 if (newObj == NULL) 303 return NULL; 304 305 chars = dvmAllocPrimitiveArray('C', utf16Length, ALLOC_DEFAULT); 306 if (chars == NULL) { 307 dvmReleaseTrackedAlloc((Object*) newObj, NULL); 308 return NULL; 309 } 310 dvmConvertUtf8ToUtf16((u2*)chars->contents, utf8Str); 311 hashCode = dvmComputeUtf16Hash((u2*) chars->contents, utf16Length); 312 313 dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE, 314 (Object*)chars); 315 dvmReleaseTrackedAlloc((Object*) chars, NULL); 316 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, utf16Length); 317 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode); 318 /* leave offset set to zero */ 319 320 /* debugging stuff */ 321 //dvmDumpObject((Object*)newObj); 322 //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, utf16Length * 2, 323 // kHexDumpMem); 324 325 /* caller may need to dvmReleaseTrackedAlloc(newObj) */ 326 return newObj; 327 } 328 329 /* 330 * Create a new java/lang/String object, using the Unicode data. 331 */ 332 StringObject* dvmCreateStringFromUnicode(const u2* unichars, int len) 333 { 334 StringObject* newObj; 335 ArrayObject* chars; 336 u4 hashCode = 0; 337 338 /* we allow a null pointer if the length is zero */ 339 assert(len == 0 || unichars != NULL); 340 341 if (gDvm.javaLangStringReady <= 0) { 342 if (!stringStartup()) 343 return NULL; 344 } 345 346 /* init before alloc */ 347 if (!dvmIsClassInitialized(gDvm.classJavaLangString) && 348 !dvmInitClass(gDvm.classJavaLangString)) 349 { 350 return NULL; 351 } 352 353 newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString, 354 ALLOC_DEFAULT); 355 if (newObj == NULL) 356 return NULL; 357 358 chars = dvmAllocPrimitiveArray('C', len, ALLOC_DEFAULT); 359 if (chars == NULL) { 360 dvmReleaseTrackedAlloc((Object*) newObj, NULL); 361 return NULL; 362 } 363 if (len > 0) 364 memcpy(chars->contents, unichars, len * sizeof(u2)); 365 hashCode = dvmComputeUtf16Hash((u2*) chars->contents, len); 366 367 dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE, 368 (Object*)chars); 369 dvmReleaseTrackedAlloc((Object*) chars, NULL); 370 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, len); 371 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode); 372 /* leave offset set to zero */ 373 374 /* debugging stuff */ 375 //dvmDumpObject((Object*)newObj); 376 //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, len*2, kHexDumpMem); 377 378 /* caller must dvmReleaseTrackedAlloc(newObj) */ 379 return newObj; 380 } 381 382 /* 383 * Create a new C string from a java/lang/String object. 384 * 385 * Returns NULL if the object is NULL. 386 */ 387 char* dvmCreateCstrFromString(StringObject* jstr) 388 { 389 char* newStr; 390 ArrayObject* chars; 391 int len, byteLen, offset; 392 const u2* data; 393 394 assert(gDvm.javaLangStringReady > 0); 395 396 if (jstr == NULL) 397 return NULL; 398 399 len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT); 400 offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET); 401 chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, 402 STRING_FIELDOFF_VALUE); 403 data = (const u2*) chars->contents + offset; 404 assert(offset + len <= (int) chars->length); 405 406 byteLen = utf16_utf8ByteLen(data, len); 407 newStr = (char*) malloc(byteLen+1); 408 if (newStr == NULL) 409 return NULL; 410 convertUtf16ToUtf8(newStr, data, len); 411 412 return newStr; 413 } 414 415 /* 416 * Create a UTF-8 C string from a region of a java/lang/String. (Used by 417 * the JNI GetStringUTFRegion call.) 418 */ 419 void dvmCreateCstrFromStringRegion(StringObject* jstr, int start, int len, 420 char* buf) 421 { 422 const u2* data; 423 424 data = dvmStringChars(jstr) + start; 425 convertUtf16ToUtf8(buf, data, len); 426 } 427 428 /* 429 * Compute the length, in modified UTF-8, of a java/lang/String object. 430 * 431 * Does not include the terminating null byte. 432 */ 433 int dvmStringUtf8ByteLen(StringObject* jstr) 434 { 435 ArrayObject* chars; 436 int len, offset; 437 const u2* data; 438 439 assert(gDvm.javaLangStringReady > 0); 440 441 if (jstr == NULL) 442 return 0; // should we throw something? assert? 443 444 len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT); 445 offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET); 446 chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, 447 STRING_FIELDOFF_VALUE); 448 data = (const u2*) chars->contents + offset; 449 assert(offset + len <= (int) chars->length); 450 451 return utf16_utf8ByteLen(data, len); 452 } 453 454 /* 455 * Get the string's length. 456 */ 457 int dvmStringLen(StringObject* jstr) 458 { 459 return dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT); 460 } 461 462 /* 463 * Get the char[] object from the String. 464 */ 465 ArrayObject* dvmStringCharArray(StringObject* jstr) 466 { 467 return (ArrayObject*) dvmGetFieldObject((Object*) jstr, 468 STRING_FIELDOFF_VALUE); 469 } 470 471 /* 472 * Get the string's data. 473 */ 474 const u2* dvmStringChars(StringObject* jstr) 475 { 476 ArrayObject* chars; 477 int offset; 478 479 offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET); 480 chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, 481 STRING_FIELDOFF_VALUE); 482 return (const u2*) chars->contents + offset; 483 } 484 485 486 /* 487 * Compare two String objects. 488 * 489 * This is a dvmHashTableLookup() callback. The function has already 490 * compared their hash values; we need to do a full compare to ensure 491 * that the strings really match. 492 */ 493 int dvmHashcmpStrings(const void* vstrObj1, const void* vstrObj2) 494 { 495 const StringObject* strObj1 = (const StringObject*) vstrObj1; 496 const StringObject* strObj2 = (const StringObject*) vstrObj2; 497 ArrayObject* chars1; 498 ArrayObject* chars2; 499 int len1, len2, offset1, offset2; 500 501 assert(gDvm.javaLangStringReady > 0); 502 503 /* get offset and length into char array; all values are in 16-bit units */ 504 len1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_COUNT); 505 offset1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_OFFSET); 506 len2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_COUNT); 507 offset2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_OFFSET); 508 if (len1 != len2) 509 return len1 - len2; 510 511 chars1 = (ArrayObject*) dvmGetFieldObject((Object*) strObj1, 512 STRING_FIELDOFF_VALUE); 513 chars2 = (ArrayObject*) dvmGetFieldObject((Object*) strObj2, 514 STRING_FIELDOFF_VALUE); 515 516 /* damage here actually indicates a broken java/lang/String */ 517 assert(offset1 + len1 <= (int) chars1->length); 518 assert(offset2 + len2 <= (int) chars2->length); 519 520 return memcmp((const u2*) chars1->contents + offset1, 521 (const u2*) chars2->contents + offset2, 522 len1 * sizeof(u2)); 523 } 524