1 /* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <utils/String16.h> 18 19 #include <utils/Debug.h> 20 #include <utils/Log.h> 21 #include <utils/String8.h> 22 #include <utils/TextOutput.h> 23 #include <utils/threads.h> 24 25 #include <private/utils/Static.h> 26 27 #ifdef HAVE_WINSOCK 28 # undef nhtol 29 # undef htonl 30 # undef nhtos 31 # undef htons 32 33 # ifdef HAVE_LITTLE_ENDIAN 34 # define ntohl(x) ( ((x) << 24) | (((x) >> 24) & 255) | (((x) << 8) & 0xff0000) | (((x) >> 8) & 0xff00) ) 35 # define htonl(x) ntohl(x) 36 # define ntohs(x) ( (((x) << 8) & 0xff00) | (((x) >> 8) & 255) ) 37 # define htons(x) ntohs(x) 38 # else 39 # define ntohl(x) (x) 40 # define htonl(x) (x) 41 # define ntohs(x) (x) 42 # define htons(x) (x) 43 # endif 44 #else 45 # include <netinet/in.h> 46 #endif 47 48 #include <memory.h> 49 #include <stdio.h> 50 #include <ctype.h> 51 52 // --------------------------------------------------------------------------- 53 54 int strcmp16(const char16_t *s1, const char16_t *s2) 55 { 56 char16_t ch; 57 int d = 0; 58 59 while ( 1 ) { 60 d = (int)(ch = *s1++) - (int)*s2++; 61 if ( d || !ch ) 62 break; 63 } 64 65 return d; 66 } 67 68 int strncmp16(const char16_t *s1, const char16_t *s2, size_t n) 69 { 70 char16_t ch; 71 int d = 0; 72 73 while ( n-- ) { 74 d = (int)(ch = *s1++) - (int)*s2++; 75 if ( d || !ch ) 76 break; 77 } 78 79 return d; 80 } 81 82 char16_t *strcpy16(char16_t *dst, const char16_t *src) 83 { 84 char16_t *q = dst; 85 const char16_t *p = src; 86 char16_t ch; 87 88 do { 89 *q++ = ch = *p++; 90 } while ( ch ); 91 92 return dst; 93 } 94 95 size_t strlen16(const char16_t *s) 96 { 97 const char16_t *ss = s; 98 while ( *ss ) 99 ss++; 100 return ss-s; 101 } 102 103 104 char16_t *strncpy16(char16_t *dst, const char16_t *src, size_t n) 105 { 106 char16_t *q = dst; 107 const char16_t *p = src; 108 char ch; 109 110 while (n) { 111 n--; 112 *q++ = ch = *p++; 113 if ( !ch ) 114 break; 115 } 116 117 *q = 0; 118 119 return dst; 120 } 121 122 size_t strnlen16(const char16_t *s, size_t maxlen) 123 { 124 const char16_t *ss = s; 125 126 /* Important: the maxlen test must precede the reference through ss; 127 since the byte beyond the maximum may segfault */ 128 while ((maxlen > 0) && *ss) { 129 ss++; 130 maxlen--; 131 } 132 return ss-s; 133 } 134 135 int strzcmp16(const char16_t *s1, size_t n1, const char16_t *s2, size_t n2) 136 { 137 const char16_t* e1 = s1+n1; 138 const char16_t* e2 = s2+n2; 139 140 while (s1 < e1 && s2 < e2) { 141 const int d = (int)*s1++ - (int)*s2++; 142 if (d) { 143 return d; 144 } 145 } 146 147 return n1 < n2 148 ? (0 - (int)*s2) 149 : (n1 > n2 150 ? ((int)*s1 - 0) 151 : 0); 152 } 153 154 int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2) 155 { 156 const char16_t* e1 = s1H+n1; 157 const char16_t* e2 = s2N+n2; 158 159 while (s1H < e1 && s2N < e2) { 160 const char16_t c2 = ntohs(*s2N); 161 const int d = (int)*s1H++ - (int)c2; 162 s2N++; 163 if (d) { 164 return d; 165 } 166 } 167 168 return n1 < n2 169 ? (0 - (int)ntohs(*s2N)) 170 : (n1 > n2 171 ? ((int)*s1H - 0) 172 : 0); 173 } 174 175 static inline size_t 176 utf8_char_len(uint8_t ch) 177 { 178 return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1; 179 } 180 181 #define UTF8_SHIFT_AND_MASK(unicode, byte) (unicode)<<=6; (unicode) |= (0x3f & (byte)); 182 183 static inline uint32_t 184 utf8_to_utf32(const uint8_t *src, size_t length) 185 { 186 uint32_t unicode; 187 188 switch (length) 189 { 190 case 1: 191 return src[0]; 192 case 2: 193 unicode = src[0] & 0x1f; 194 UTF8_SHIFT_AND_MASK(unicode, src[1]) 195 return unicode; 196 case 3: 197 unicode = src[0] & 0x0f; 198 UTF8_SHIFT_AND_MASK(unicode, src[1]) 199 UTF8_SHIFT_AND_MASK(unicode, src[2]) 200 return unicode; 201 case 4: 202 unicode = src[0] & 0x07; 203 UTF8_SHIFT_AND_MASK(unicode, src[1]) 204 UTF8_SHIFT_AND_MASK(unicode, src[2]) 205 UTF8_SHIFT_AND_MASK(unicode, src[3]) 206 return unicode; 207 default: 208 return 0xffff; 209 } 210 211 //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result); 212 } 213 214 void 215 utf8_to_utf16(const uint8_t *src, size_t srcLen, 216 char16_t* dst, const size_t dstLen) 217 { 218 const uint8_t* const end = src + srcLen; 219 const char16_t* const dstEnd = dst + dstLen; 220 while (src < end && dst < dstEnd) { 221 size_t len = utf8_char_len(*src); 222 uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len); 223 224 // Convert the UTF32 codepoint to one or more UTF16 codepoints 225 if (codepoint <= 0xFFFF) { 226 // Single UTF16 character 227 *dst++ = (char16_t) codepoint; 228 } else { 229 // Multiple UTF16 characters with surrogates 230 codepoint = codepoint - 0x10000; 231 *dst++ = (char16_t) ((codepoint >> 10) + 0xD800); 232 *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00); 233 } 234 235 src += len; 236 } 237 if (dst < dstEnd) { 238 *dst = 0; 239 } 240 } 241 242 // --------------------------------------------------------------------------- 243 244 namespace android { 245 246 static SharedBuffer* gEmptyStringBuf = NULL; 247 static char16_t* gEmptyString = NULL; 248 249 static inline char16_t* getEmptyString() 250 { 251 gEmptyStringBuf->acquire(); 252 return gEmptyString; 253 } 254 255 void initialize_string16() 256 { 257 SharedBuffer* buf = SharedBuffer::alloc(sizeof(char16_t)); 258 char16_t* str = (char16_t*)buf->data(); 259 *str = 0; 260 gEmptyStringBuf = buf; 261 gEmptyString = str; 262 } 263 264 void terminate_string16() 265 { 266 SharedBuffer::bufferFromData(gEmptyString)->release(); 267 gEmptyStringBuf = NULL; 268 gEmptyString = NULL; 269 } 270 271 // --------------------------------------------------------------------------- 272 273 static char16_t* allocFromUTF8(const char* in, size_t len) 274 { 275 if (len == 0) return getEmptyString(); 276 277 size_t chars = 0; 278 const char* end = in+len; 279 const char* p = in; 280 281 while (p < end) { 282 chars++; 283 int utf8len = utf8_char_len(*p); 284 uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, utf8len); 285 if (codepoint > 0xFFFF) chars++; // this will be a surrogate pair in utf16 286 p += utf8len; 287 } 288 289 size_t bufSize = (chars+1)*sizeof(char16_t); 290 SharedBuffer* buf = SharedBuffer::alloc(bufSize); 291 if (buf) { 292 p = in; 293 char16_t* str = (char16_t*)buf->data(); 294 295 utf8_to_utf16((const uint8_t*)p, len, str, bufSize); 296 297 //printf("Created UTF-16 string from UTF-8 \"%s\":", in); 298 //printHexData(1, str, buf->size(), 16, 1); 299 //printf("\n"); 300 301 return str; 302 } 303 304 return getEmptyString(); 305 } 306 307 // --------------------------------------------------------------------------- 308 309 String16::String16() 310 : mString(getEmptyString()) 311 { 312 } 313 314 String16::String16(const String16& o) 315 : mString(o.mString) 316 { 317 SharedBuffer::bufferFromData(mString)->acquire(); 318 } 319 320 String16::String16(const String16& o, size_t len, size_t begin) 321 : mString(getEmptyString()) 322 { 323 setTo(o, len, begin); 324 } 325 326 String16::String16(const char16_t* o) 327 { 328 size_t len = strlen16(o); 329 SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t)); 330 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 331 if (buf) { 332 char16_t* str = (char16_t*)buf->data(); 333 strcpy16(str, o); 334 mString = str; 335 return; 336 } 337 338 mString = getEmptyString(); 339 } 340 341 String16::String16(const char16_t* o, size_t len) 342 { 343 SharedBuffer* buf = SharedBuffer::alloc((len+1)*sizeof(char16_t)); 344 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 345 if (buf) { 346 char16_t* str = (char16_t*)buf->data(); 347 memcpy(str, o, len*sizeof(char16_t)); 348 str[len] = 0; 349 mString = str; 350 return; 351 } 352 353 mString = getEmptyString(); 354 } 355 356 String16::String16(const String8& o) 357 : mString(allocFromUTF8(o.string(), o.size())) 358 { 359 } 360 361 String16::String16(const char* o) 362 : mString(allocFromUTF8(o, strlen(o))) 363 { 364 } 365 366 String16::String16(const char* o, size_t len) 367 : mString(allocFromUTF8(o, len)) 368 { 369 } 370 371 String16::~String16() 372 { 373 SharedBuffer::bufferFromData(mString)->release(); 374 } 375 376 void String16::setTo(const String16& other) 377 { 378 SharedBuffer::bufferFromData(other.mString)->acquire(); 379 SharedBuffer::bufferFromData(mString)->release(); 380 mString = other.mString; 381 } 382 383 status_t String16::setTo(const String16& other, size_t len, size_t begin) 384 { 385 const size_t N = other.size(); 386 if (begin >= N) { 387 SharedBuffer::bufferFromData(mString)->release(); 388 mString = getEmptyString(); 389 return NO_ERROR; 390 } 391 if ((begin+len) > N) len = N-begin; 392 if (begin == 0 && len == N) { 393 setTo(other); 394 return NO_ERROR; 395 } 396 397 if (&other == this) { 398 LOG_ALWAYS_FATAL("Not implemented"); 399 } 400 401 return setTo(other.string()+begin, len); 402 } 403 404 status_t String16::setTo(const char16_t* other) 405 { 406 return setTo(other, strlen16(other)); 407 } 408 409 status_t String16::setTo(const char16_t* other, size_t len) 410 { 411 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 412 ->editResize((len+1)*sizeof(char16_t)); 413 if (buf) { 414 char16_t* str = (char16_t*)buf->data(); 415 memmove(str, other, len*sizeof(char16_t)); 416 str[len] = 0; 417 mString = str; 418 return NO_ERROR; 419 } 420 return NO_MEMORY; 421 } 422 423 status_t String16::append(const String16& other) 424 { 425 const size_t myLen = size(); 426 const size_t otherLen = other.size(); 427 if (myLen == 0) { 428 setTo(other); 429 return NO_ERROR; 430 } else if (otherLen == 0) { 431 return NO_ERROR; 432 } 433 434 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 435 ->editResize((myLen+otherLen+1)*sizeof(char16_t)); 436 if (buf) { 437 char16_t* str = (char16_t*)buf->data(); 438 memcpy(str+myLen, other, (otherLen+1)*sizeof(char16_t)); 439 mString = str; 440 return NO_ERROR; 441 } 442 return NO_MEMORY; 443 } 444 445 status_t String16::append(const char16_t* chrs, size_t otherLen) 446 { 447 const size_t myLen = size(); 448 if (myLen == 0) { 449 setTo(chrs, otherLen); 450 return NO_ERROR; 451 } else if (otherLen == 0) { 452 return NO_ERROR; 453 } 454 455 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 456 ->editResize((myLen+otherLen+1)*sizeof(char16_t)); 457 if (buf) { 458 char16_t* str = (char16_t*)buf->data(); 459 memcpy(str+myLen, chrs, otherLen*sizeof(char16_t)); 460 str[myLen+otherLen] = 0; 461 mString = str; 462 return NO_ERROR; 463 } 464 return NO_MEMORY; 465 } 466 467 status_t String16::insert(size_t pos, const char16_t* chrs) 468 { 469 return insert(pos, chrs, strlen16(chrs)); 470 } 471 472 status_t String16::insert(size_t pos, const char16_t* chrs, size_t len) 473 { 474 const size_t myLen = size(); 475 if (myLen == 0) { 476 return setTo(chrs, len); 477 return NO_ERROR; 478 } else if (len == 0) { 479 return NO_ERROR; 480 } 481 482 if (pos > myLen) pos = myLen; 483 484 #if 0 485 printf("Insert in to %s: pos=%d, len=%d, myLen=%d, chrs=%s\n", 486 String8(*this).string(), pos, 487 len, myLen, String8(chrs, len).string()); 488 #endif 489 490 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 491 ->editResize((myLen+len+1)*sizeof(char16_t)); 492 if (buf) { 493 char16_t* str = (char16_t*)buf->data(); 494 if (pos < myLen) { 495 memmove(str+pos+len, str+pos, (myLen-pos)*sizeof(char16_t)); 496 } 497 memcpy(str+pos, chrs, len*sizeof(char16_t)); 498 str[myLen+len] = 0; 499 mString = str; 500 #if 0 501 printf("Result (%d chrs): %s\n", size(), String8(*this).string()); 502 #endif 503 return NO_ERROR; 504 } 505 return NO_MEMORY; 506 } 507 508 ssize_t String16::findFirst(char16_t c) const 509 { 510 const char16_t* str = string(); 511 const char16_t* p = str; 512 const char16_t* e = p + size(); 513 while (p < e) { 514 if (*p == c) { 515 return p-str; 516 } 517 p++; 518 } 519 return -1; 520 } 521 522 ssize_t String16::findLast(char16_t c) const 523 { 524 const char16_t* str = string(); 525 const char16_t* p = str; 526 const char16_t* e = p + size(); 527 while (p < e) { 528 e--; 529 if (*e == c) { 530 return e-str; 531 } 532 } 533 return -1; 534 } 535 536 bool String16::startsWith(const String16& prefix) const 537 { 538 const size_t ps = prefix.size(); 539 if (ps > size()) return false; 540 return strzcmp16(mString, ps, prefix.string(), ps) == 0; 541 } 542 543 bool String16::startsWith(const char16_t* prefix) const 544 { 545 const size_t ps = strlen16(prefix); 546 if (ps > size()) return false; 547 return strncmp16(mString, prefix, ps) == 0; 548 } 549 550 status_t String16::makeLower() 551 { 552 const size_t N = size(); 553 const char16_t* str = string(); 554 char16_t* edit = NULL; 555 for (size_t i=0; i<N; i++) { 556 const char16_t v = str[i]; 557 if (v >= 'A' && v <= 'Z') { 558 if (!edit) { 559 SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit(); 560 if (!buf) { 561 return NO_MEMORY; 562 } 563 edit = (char16_t*)buf->data(); 564 mString = str = edit; 565 } 566 edit[i] = tolower((char)v); 567 } 568 } 569 return NO_ERROR; 570 } 571 572 status_t String16::replaceAll(char16_t replaceThis, char16_t withThis) 573 { 574 const size_t N = size(); 575 const char16_t* str = string(); 576 char16_t* edit = NULL; 577 for (size_t i=0; i<N; i++) { 578 if (str[i] == replaceThis) { 579 if (!edit) { 580 SharedBuffer* buf = SharedBuffer::bufferFromData(mString)->edit(); 581 if (!buf) { 582 return NO_MEMORY; 583 } 584 edit = (char16_t*)buf->data(); 585 mString = str = edit; 586 } 587 edit[i] = withThis; 588 } 589 } 590 return NO_ERROR; 591 } 592 593 status_t String16::remove(size_t len, size_t begin) 594 { 595 const size_t N = size(); 596 if (begin >= N) { 597 SharedBuffer::bufferFromData(mString)->release(); 598 mString = getEmptyString(); 599 return NO_ERROR; 600 } 601 if ((begin+len) > N) len = N-begin; 602 if (begin == 0 && len == N) { 603 return NO_ERROR; 604 } 605 606 if (begin > 0) { 607 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 608 ->editResize((N+1)*sizeof(char16_t)); 609 if (!buf) { 610 return NO_MEMORY; 611 } 612 char16_t* str = (char16_t*)buf->data(); 613 memmove(str, str+begin, (N-begin+1)*sizeof(char16_t)); 614 mString = str; 615 } 616 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 617 ->editResize((len+1)*sizeof(char16_t)); 618 if (buf) { 619 char16_t* str = (char16_t*)buf->data(); 620 str[len] = 0; 621 mString = str; 622 return NO_ERROR; 623 } 624 return NO_MEMORY; 625 } 626 627 TextOutput& operator<<(TextOutput& to, const String16& val) 628 { 629 to << String8(val).string(); 630 return to; 631 } 632 633 }; // namespace android 634