Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/utf_string_conversions.h"
      6 
      7 #include <stdint.h>
      8 
      9 #include "base/strings/string_piece.h"
     10 #include "base/strings/string_util.h"
     11 #include "base/strings/utf_string_conversion_utils.h"
     12 #include "build/build_config.h"
     13 
     14 namespace base {
     15 
     16 namespace {
     17 
     18 // Generalized Unicode converter -----------------------------------------------
     19 
     20 // Converts the given source Unicode character type to the given destination
     21 // Unicode character type as a STL string. The given input buffer and size
     22 // determine the source, and the given output STL string will be replaced by
     23 // the result.
     24 template<typename SRC_CHAR, typename DEST_STRING>
     25 bool ConvertUnicode(const SRC_CHAR* src,
     26                     size_t src_len,
     27                     DEST_STRING* output) {
     28   // ICU requires 32-bit numbers.
     29   bool success = true;
     30   int32_t src_len32 = static_cast<int32_t>(src_len);
     31   for (int32_t i = 0; i < src_len32; i++) {
     32     uint32_t code_point;
     33     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
     34       WriteUnicodeCharacter(code_point, output);
     35     } else {
     36       WriteUnicodeCharacter(0xFFFD, output);
     37       success = false;
     38     }
     39   }
     40 
     41   return success;
     42 }
     43 
     44 }  // namespace
     45 
     46 // UTF-8 <-> Wide --------------------------------------------------------------
     47 
     48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
     49   if (IsStringASCII(std::wstring(src, src_len))) {
     50     output->assign(src, src + src_len);
     51     return true;
     52   } else {
     53     PrepareForUTF8Output(src, src_len, output);
     54     return ConvertUnicode(src, src_len, output);
     55   }
     56 }
     57 
     58 std::string WideToUTF8(const std::wstring& wide) {
     59   if (IsStringASCII(wide)) {
     60     return std::string(wide.data(), wide.data() + wide.length());
     61   }
     62 
     63   std::string ret;
     64   PrepareForUTF8Output(wide.data(), wide.length(), &ret);
     65   ConvertUnicode(wide.data(), wide.length(), &ret);
     66   return ret;
     67 }
     68 
     69 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
     70   if (IsStringASCII(StringPiece(src, src_len))) {
     71     output->assign(src, src + src_len);
     72     return true;
     73   } else {
     74     PrepareForUTF16Or32Output(src, src_len, output);
     75     return ConvertUnicode(src, src_len, output);
     76   }
     77 }
     78 
     79 std::wstring UTF8ToWide(StringPiece utf8) {
     80   if (IsStringASCII(utf8)) {
     81     return std::wstring(utf8.begin(), utf8.end());
     82   }
     83 
     84   std::wstring ret;
     85   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
     86   ConvertUnicode(utf8.data(), utf8.length(), &ret);
     87   return ret;
     88 }
     89 
     90 // UTF-16 <-> Wide -------------------------------------------------------------
     91 
     92 #if defined(WCHAR_T_IS_UTF16)
     93 
     94 // When wide == UTF-16, then conversions are a NOP.
     95 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
     96   output->assign(src, src_len);
     97   return true;
     98 }
     99 
    100 string16 WideToUTF16(const std::wstring& wide) {
    101   return wide;
    102 }
    103 
    104 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
    105   output->assign(src, src_len);
    106   return true;
    107 }
    108 
    109 std::wstring UTF16ToWide(const string16& utf16) {
    110   return utf16;
    111 }
    112 
    113 #elif defined(WCHAR_T_IS_UTF32)
    114 
    115 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
    116   output->clear();
    117   // Assume that normally we won't have any non-BMP characters so the counts
    118   // will be the same.
    119   output->reserve(src_len);
    120   return ConvertUnicode(src, src_len, output);
    121 }
    122 
    123 string16 WideToUTF16(const std::wstring& wide) {
    124   string16 ret;
    125   WideToUTF16(wide.data(), wide.length(), &ret);
    126   return ret;
    127 }
    128 
    129 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
    130   output->clear();
    131   // Assume that normally we won't have any non-BMP characters so the counts
    132   // will be the same.
    133   output->reserve(src_len);
    134   return ConvertUnicode(src, src_len, output);
    135 }
    136 
    137 std::wstring UTF16ToWide(const string16& utf16) {
    138   std::wstring ret;
    139   UTF16ToWide(utf16.data(), utf16.length(), &ret);
    140   return ret;
    141 }
    142 
    143 #endif  // defined(WCHAR_T_IS_UTF32)
    144 
    145 // UTF16 <-> UTF8 --------------------------------------------------------------
    146 
    147 #if defined(WCHAR_T_IS_UTF32)
    148 
    149 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    150   if (IsStringASCII(StringPiece(src, src_len))) {
    151     output->assign(src, src + src_len);
    152     return true;
    153   } else {
    154     PrepareForUTF16Or32Output(src, src_len, output);
    155     return ConvertUnicode(src, src_len, output);
    156   }
    157 }
    158 
    159 string16 UTF8ToUTF16(StringPiece utf8) {
    160   if (IsStringASCII(utf8)) {
    161     return string16(utf8.begin(), utf8.end());
    162   }
    163 
    164   string16 ret;
    165   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
    166   // Ignore the success flag of this call, it will do the best it can for
    167   // invalid input, which is what we want here.
    168   ConvertUnicode(utf8.data(), utf8.length(), &ret);
    169   return ret;
    170 }
    171 
    172 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    173   if (IsStringASCII(StringPiece16(src, src_len))) {
    174     output->assign(src, src + src_len);
    175     return true;
    176   } else {
    177     PrepareForUTF8Output(src, src_len, output);
    178     return ConvertUnicode(src, src_len, output);
    179   }
    180 }
    181 
    182 std::string UTF16ToUTF8(StringPiece16 utf16) {
    183   std::string ret;
    184   // Ignore the success flag of this call, it will do the best it can for
    185   // invalid input, which is what we want here.
    186   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
    187   return ret;
    188 }
    189 
    190 #elif defined(WCHAR_T_IS_UTF16)
    191 // Easy case since we can use the "wide" versions we already wrote above.
    192 
    193 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    194   return UTF8ToWide(src, src_len, output);
    195 }
    196 
    197 string16 UTF8ToUTF16(StringPiece utf8) {
    198   return UTF8ToWide(utf8);
    199 }
    200 
    201 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    202   return WideToUTF8(src, src_len, output);
    203 }
    204 
    205 std::string UTF16ToUTF8(StringPiece16 utf16) {
    206   if (IsStringASCII(utf16))
    207     return std::string(utf16.data(), utf16.data() + utf16.length());
    208 
    209   std::string ret;
    210   PrepareForUTF8Output(utf16.data(), utf16.length(), &ret);
    211   ConvertUnicode(utf16.data(), utf16.length(), &ret);
    212   return ret;
    213 }
    214 
    215 #endif
    216 
    217 string16 ASCIIToUTF16(StringPiece ascii) {
    218   DCHECK(IsStringASCII(ascii)) << ascii;
    219   return string16(ascii.begin(), ascii.end());
    220 }
    221 
    222 std::string UTF16ToASCII(StringPiece16 utf16) {
    223   DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
    224   return std::string(utf16.begin(), utf16.end());
    225 }
    226 
    227 }  // namespace base
    228