Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/utf_string_conversions.h"
      6 
      7 #include "base/string_piece.h"
      8 #include "base/string_util.h"
      9 #include "base/utf_string_conversion_utils.h"
     10 
     11 using base::PrepareForUTF8Output;
     12 using base::PrepareForUTF16Or32Output;
     13 using base::ReadUnicodeCharacter;
     14 using base::WriteUnicodeCharacter;
     15 
     16 namespace {
     17 
     18 // Generalized Unicode converter -----------------------------------------------
     19 
     20 // Converts the given source Unicode character type to the given destination
     21 // Unicode character type as a STL string. The given input buffer and size
     22 // determine the source, and the given output STL string will be replaced by
     23 // the result.
     24 template<typename SRC_CHAR, typename DEST_STRING>
     25 bool ConvertUnicode(const SRC_CHAR* src,
     26                     size_t src_len,
     27                     DEST_STRING* output) {
     28   // ICU requires 32-bit numbers.
     29   bool success = true;
     30   int32 src_len32 = static_cast<int32>(src_len);
     31   for (int32 i = 0; i < src_len32; i++) {
     32     uint32 code_point;
     33     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
     34       WriteUnicodeCharacter(code_point, output);
     35     } else {
     36       WriteUnicodeCharacter(0xFFFD, output);
     37       success = false;
     38     }
     39   }
     40 
     41   return success;
     42 }
     43 
     44 }  // namespace
     45 
     46 // UTF-8 <-> Wide --------------------------------------------------------------
     47 
     48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
     49   PrepareForUTF8Output(src, src_len, output);
     50   return ConvertUnicode(src, src_len, output);
     51 }
     52 
     53 std::string WideToUTF8(const std::wstring& wide) {
     54   std::string ret;
     55   // Ignore the success flag of this call, it will do the best it can for
     56   // invalid input, which is what we want here.
     57   WideToUTF8(wide.data(), wide.length(), &ret);
     58   return ret;
     59 }
     60 
     61 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
     62   PrepareForUTF16Or32Output(src, src_len, output);
     63   return ConvertUnicode(src, src_len, output);
     64 }
     65 
     66 std::wstring UTF8ToWide(const base::StringPiece& utf8) {
     67   std::wstring ret;
     68   UTF8ToWide(utf8.data(), utf8.length(), &ret);
     69   return ret;
     70 }
     71 
     72 // UTF-16 <-> Wide -------------------------------------------------------------
     73 
     74 #if defined(WCHAR_T_IS_UTF16)
     75 
     76 // When wide == UTF-16, then conversions are a NOP.
     77 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
     78   output->assign(src, src_len);
     79   return true;
     80 }
     81 
     82 string16 WideToUTF16(const std::wstring& wide) {
     83   return wide;
     84 }
     85 
     86 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
     87   output->assign(src, src_len);
     88   return true;
     89 }
     90 
     91 std::wstring UTF16ToWide(const string16& utf16) {
     92   return utf16;
     93 }
     94 
     95 #elif defined(WCHAR_T_IS_UTF32)
     96 
     97 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
     98   output->clear();
     99   // Assume that normally we won't have any non-BMP characters so the counts
    100   // will be the same.
    101   output->reserve(src_len);
    102   return ConvertUnicode(src, src_len, output);
    103 }
    104 
    105 string16 WideToUTF16(const std::wstring& wide) {
    106   string16 ret;
    107   WideToUTF16(wide.data(), wide.length(), &ret);
    108   return ret;
    109 }
    110 
    111 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
    112   output->clear();
    113   // Assume that normally we won't have any non-BMP characters so the counts
    114   // will be the same.
    115   output->reserve(src_len);
    116   return ConvertUnicode(src, src_len, output);
    117 }
    118 
    119 std::wstring UTF16ToWide(const string16& utf16) {
    120   std::wstring ret;
    121   UTF16ToWide(utf16.data(), utf16.length(), &ret);
    122   return ret;
    123 }
    124 
    125 #endif  // defined(WCHAR_T_IS_UTF32)
    126 
    127 // UTF16 <-> UTF8 --------------------------------------------------------------
    128 
    129 #if defined(WCHAR_T_IS_UTF32)
    130 
    131 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    132   PrepareForUTF16Or32Output(src, src_len, output);
    133   return ConvertUnicode(src, src_len, output);
    134 }
    135 
    136 string16 UTF8ToUTF16(const base::StringPiece& utf8) {
    137   string16 ret;
    138   // Ignore the success flag of this call, it will do the best it can for
    139   // invalid input, which is what we want here.
    140   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
    141   return ret;
    142 }
    143 
    144 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    145   PrepareForUTF8Output(src, src_len, output);
    146   return ConvertUnicode(src, src_len, output);
    147 }
    148 
    149 std::string UTF16ToUTF8(const string16& utf16) {
    150   std::string ret;
    151   // Ignore the success flag of this call, it will do the best it can for
    152   // invalid input, which is what we want here.
    153   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
    154   return ret;
    155 }
    156 
    157 #elif defined(WCHAR_T_IS_UTF16)
    158 // Easy case since we can use the "wide" versions we already wrote above.
    159 
    160 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    161   return UTF8ToWide(src, src_len, output);
    162 }
    163 
    164 string16 UTF8ToUTF16(const base::StringPiece& utf8) {
    165   return UTF8ToWide(utf8);
    166 }
    167 
    168 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    169   return WideToUTF8(src, src_len, output);
    170 }
    171 
    172 std::string UTF16ToUTF8(const string16& utf16) {
    173   return WideToUTF8(utf16);
    174 }
    175 
    176 #endif
    177 
    178 std::wstring ASCIIToWide(const base::StringPiece& ascii) {
    179   DCHECK(IsStringASCII(ascii)) << ascii;
    180   return std::wstring(ascii.begin(), ascii.end());
    181 }
    182 
    183 string16 ASCIIToUTF16(const base::StringPiece& ascii) {
    184   DCHECK(IsStringASCII(ascii)) << ascii;
    185   return string16(ascii.begin(), ascii.end());
    186 }
    187