Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/utf_string_conversions.h"
      6 
      7 #include "base/strings/string_piece.h"
      8 #include "base/strings/string_util.h"
      9 #include "base/strings/utf_string_conversion_utils.h"
     10 
     11 namespace base {
     12 
     13 namespace {
     14 
     15 // Generalized Unicode converter -----------------------------------------------
     16 
     17 // Converts the given source Unicode character type to the given destination
     18 // Unicode character type as a STL string. The given input buffer and size
     19 // determine the source, and the given output STL string will be replaced by
     20 // the result.
     21 template<typename SRC_CHAR, typename DEST_STRING>
     22 bool ConvertUnicode(const SRC_CHAR* src,
     23                     size_t src_len,
     24                     DEST_STRING* output) {
     25   // ICU requires 32-bit numbers.
     26   bool success = true;
     27   int32 src_len32 = static_cast<int32>(src_len);
     28   for (int32 i = 0; i < src_len32; i++) {
     29     uint32 code_point;
     30     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
     31       WriteUnicodeCharacter(code_point, output);
     32     } else {
     33       WriteUnicodeCharacter(0xFFFD, output);
     34       success = false;
     35     }
     36   }
     37 
     38   return success;
     39 }
     40 
     41 }  // namespace
     42 
     43 // UTF-8 <-> Wide --------------------------------------------------------------
     44 
     45 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
     46   PrepareForUTF8Output(src, src_len, output);
     47   return ConvertUnicode(src, src_len, output);
     48 }
     49 
     50 std::string WideToUTF8(const std::wstring& wide) {
     51   std::string ret;
     52   // Ignore the success flag of this call, it will do the best it can for
     53   // invalid input, which is what we want here.
     54   WideToUTF8(wide.data(), wide.length(), &ret);
     55   return ret;
     56 }
     57 
     58 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
     59   PrepareForUTF16Or32Output(src, src_len, output);
     60   return ConvertUnicode(src, src_len, output);
     61 }
     62 
     63 std::wstring UTF8ToWide(const StringPiece& utf8) {
     64   std::wstring ret;
     65   UTF8ToWide(utf8.data(), utf8.length(), &ret);
     66   return ret;
     67 }
     68 
     69 // UTF-16 <-> Wide -------------------------------------------------------------
     70 
     71 #if defined(WCHAR_T_IS_UTF16)
     72 
     73 // When wide == UTF-16, then conversions are a NOP.
     74 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
     75   output->assign(src, src_len);
     76   return true;
     77 }
     78 
     79 string16 WideToUTF16(const std::wstring& wide) {
     80   return wide;
     81 }
     82 
     83 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
     84   output->assign(src, src_len);
     85   return true;
     86 }
     87 
     88 std::wstring UTF16ToWide(const string16& utf16) {
     89   return utf16;
     90 }
     91 
     92 #elif defined(WCHAR_T_IS_UTF32)
     93 
     94 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
     95   output->clear();
     96   // Assume that normally we won't have any non-BMP characters so the counts
     97   // will be the same.
     98   output->reserve(src_len);
     99   return ConvertUnicode(src, src_len, output);
    100 }
    101 
    102 string16 WideToUTF16(const std::wstring& wide) {
    103   string16 ret;
    104   WideToUTF16(wide.data(), wide.length(), &ret);
    105   return ret;
    106 }
    107 
    108 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
    109   output->clear();
    110   // Assume that normally we won't have any non-BMP characters so the counts
    111   // will be the same.
    112   output->reserve(src_len);
    113   return ConvertUnicode(src, src_len, output);
    114 }
    115 
    116 std::wstring UTF16ToWide(const string16& utf16) {
    117   std::wstring ret;
    118   UTF16ToWide(utf16.data(), utf16.length(), &ret);
    119   return ret;
    120 }
    121 
    122 #endif  // defined(WCHAR_T_IS_UTF32)
    123 
    124 // UTF16 <-> UTF8 --------------------------------------------------------------
    125 
    126 #if defined(WCHAR_T_IS_UTF32)
    127 
    128 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    129   PrepareForUTF16Or32Output(src, src_len, output);
    130   return ConvertUnicode(src, src_len, output);
    131 }
    132 
    133 string16 UTF8ToUTF16(const StringPiece& utf8) {
    134   string16 ret;
    135   // Ignore the success flag of this call, it will do the best it can for
    136   // invalid input, which is what we want here.
    137   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
    138   return ret;
    139 }
    140 
    141 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    142   PrepareForUTF8Output(src, src_len, output);
    143   return ConvertUnicode(src, src_len, output);
    144 }
    145 
    146 std::string UTF16ToUTF8(const string16& utf16) {
    147   std::string ret;
    148   // Ignore the success flag of this call, it will do the best it can for
    149   // invalid input, which is what we want here.
    150   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
    151   return ret;
    152 }
    153 
    154 #elif defined(WCHAR_T_IS_UTF16)
    155 // Easy case since we can use the "wide" versions we already wrote above.
    156 
    157 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    158   return UTF8ToWide(src, src_len, output);
    159 }
    160 
    161 string16 UTF8ToUTF16(const StringPiece& utf8) {
    162   return UTF8ToWide(utf8);
    163 }
    164 
    165 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    166   return WideToUTF8(src, src_len, output);
    167 }
    168 
    169 std::string UTF16ToUTF8(const string16& utf16) {
    170   return WideToUTF8(utf16);
    171 }
    172 
    173 #endif
    174 
    175 std::wstring ASCIIToWide(const StringPiece& ascii) {
    176   DCHECK(IsStringASCII(ascii)) << ascii;
    177   return std::wstring(ascii.begin(), ascii.end());
    178 }
    179 
    180 string16 ASCIIToUTF16(const StringPiece& ascii) {
    181   DCHECK(IsStringASCII(ascii)) << ascii;
    182   return string16(ascii.begin(), ascii.end());
    183 }
    184 
    185 std::string UTF16ToASCII(const string16& utf16) {
    186   DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
    187   return std::string(utf16.begin(), utf16.end());
    188 }
    189 
    190 }  // namespace base
    191