Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/utf_string_conversions.h"
      6 
      7 #include "base/string_piece.h"
      8 #include "base/utf_string_conversion_utils.h"
      9 
     10 using base::PrepareForUTF8Output;
     11 using base::PrepareForUTF16Or32Output;
     12 using base::ReadUnicodeCharacter;
     13 using base::WriteUnicodeCharacter;
     14 
     15 namespace {
     16 
     17 // Generalized Unicode converter -----------------------------------------------
     18 
     19 // Converts the given source Unicode character type to the given destination
     20 // Unicode character type as a STL string. The given input buffer and size
     21 // determine the source, and the given output STL string will be replaced by
     22 // the result.
     23 template<typename SRC_CHAR, typename DEST_STRING>
     24 bool ConvertUnicode(const SRC_CHAR* src,
     25                     size_t src_len,
     26                     DEST_STRING* output) {
     27   // ICU requires 32-bit numbers.
     28   bool success = true;
     29   int32 src_len32 = static_cast<int32>(src_len);
     30   for (int32 i = 0; i < src_len32; i++) {
     31     uint32 code_point;
     32     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
     33       WriteUnicodeCharacter(code_point, output);
     34     } else {
     35       WriteUnicodeCharacter(0xFFFD, output);
     36       success = false;
     37     }
     38   }
     39 
     40   return success;
     41 }
     42 
     43 }  // namespace
     44 
     45 // UTF-8 <-> Wide --------------------------------------------------------------
     46 
     47 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
     48   PrepareForUTF8Output(src, src_len, output);
     49   return ConvertUnicode(src, src_len, output);
     50 }
     51 
     52 std::string WideToUTF8(const std::wstring& wide) {
     53   std::string ret;
     54   // Ignore the success flag of this call, it will do the best it can for
     55   // invalid input, which is what we want here.
     56   WideToUTF8(wide.data(), wide.length(), &ret);
     57   return ret;
     58 }
     59 
     60 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
     61   PrepareForUTF16Or32Output(src, src_len, output);
     62   return ConvertUnicode(src, src_len, output);
     63 }
     64 
     65 std::wstring UTF8ToWide(const base::StringPiece& utf8) {
     66   std::wstring ret;
     67   UTF8ToWide(utf8.data(), utf8.length(), &ret);
     68   return ret;
     69 }
     70 
     71 // UTF-16 <-> Wide -------------------------------------------------------------
     72 
     73 #if defined(WCHAR_T_IS_UTF16)
     74 
     75 // When wide == UTF-16, then conversions are a NOP.
     76 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
     77   output->assign(src, src_len);
     78   return true;
     79 }
     80 
     81 string16 WideToUTF16(const std::wstring& wide) {
     82   return wide;
     83 }
     84 
     85 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
     86   output->assign(src, src_len);
     87   return true;
     88 }
     89 
     90 std::wstring UTF16ToWide(const string16& utf16) {
     91   return utf16;
     92 }
     93 
     94 #elif defined(WCHAR_T_IS_UTF32)
     95 
     96 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
     97   output->clear();
     98   // Assume that normally we won't have any non-BMP characters so the counts
     99   // will be the same.
    100   output->reserve(src_len);
    101   return ConvertUnicode(src, src_len, output);
    102 }
    103 
    104 string16 WideToUTF16(const std::wstring& wide) {
    105   string16 ret;
    106   WideToUTF16(wide.data(), wide.length(), &ret);
    107   return ret;
    108 }
    109 
    110 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
    111   output->clear();
    112   // Assume that normally we won't have any non-BMP characters so the counts
    113   // will be the same.
    114   output->reserve(src_len);
    115   return ConvertUnicode(src, src_len, output);
    116 }
    117 
    118 std::wstring UTF16ToWide(const string16& utf16) {
    119   std::wstring ret;
    120   UTF16ToWide(utf16.data(), utf16.length(), &ret);
    121   return ret;
    122 }
    123 
    124 #endif  // defined(WCHAR_T_IS_UTF32)
    125 
    126 // UTF16 <-> UTF8 --------------------------------------------------------------
    127 
    128 #if defined(WCHAR_T_IS_UTF32)
    129 
    130 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    131   PrepareForUTF16Or32Output(src, src_len, output);
    132   return ConvertUnicode(src, src_len, output);
    133 }
    134 
    135 string16 UTF8ToUTF16(const std::string& utf8) {
    136   string16 ret;
    137   // Ignore the success flag of this call, it will do the best it can for
    138   // invalid input, which is what we want here.
    139   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
    140   return ret;
    141 }
    142 
    143 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    144   PrepareForUTF8Output(src, src_len, output);
    145   return ConvertUnicode(src, src_len, output);
    146 }
    147 
    148 std::string UTF16ToUTF8(const string16& utf16) {
    149   std::string ret;
    150   // Ignore the success flag of this call, it will do the best it can for
    151   // invalid input, which is what we want here.
    152   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
    153   return ret;
    154 }
    155 
    156 #elif defined(WCHAR_T_IS_UTF16)
    157 // Easy case since we can use the "wide" versions we already wrote above.
    158 
    159 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
    160   return UTF8ToWide(src, src_len, output);
    161 }
    162 
    163 string16 UTF8ToUTF16(const std::string& utf8) {
    164   return UTF8ToWide(utf8);
    165 }
    166 
    167 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
    168   return WideToUTF8(src, src_len, output);
    169 }
    170 
    171 std::string UTF16ToUTF8(const string16& utf16) {
    172   return WideToUTF8(utf16);
    173 }
    174 
    175 #endif
    176