Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/sys_string_conversions.h"
      6 
      7 #include <wchar.h>
      8 
      9 #include "base/strings/string_piece.h"
     10 #include "base/strings/utf_string_conversions.h"
     11 
     12 namespace base {
     13 
     14 std::string SysWideToUTF8(const std::wstring& wide) {
     15   // In theory this should be using the system-provided conversion rather
     16   // than our ICU, but this will do for now.
     17   return WideToUTF8(wide);
     18 }
     19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
     20   // In theory this should be using the system-provided conversion rather
     21   // than our ICU, but this will do for now.
     22   std::wstring out;
     23   UTF8ToWide(utf8.data(), utf8.size(), &out);
     24   return out;
     25 }
     26 
     27 #if defined(OS_CHROMEOS) || defined(OS_ANDROID)
     28 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
     29 // support and a better understanding of what calls these routines.
     30 
     31 // ChromeOS always runs in UTF-8 locale.
     32 std::string SysWideToNativeMB(const std::wstring& wide) {
     33   return WideToUTF8(wide);
     34 }
     35 
     36 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
     37   return SysUTF8ToWide(native_mb);
     38 }
     39 
     40 #else
     41 
     42 std::string SysWideToNativeMB(const std::wstring& wide) {
     43   mbstate_t ps;
     44 
     45   // Calculate the number of multi-byte characters.  We walk through the string
     46   // without writing the output, counting the number of multi-byte characters.
     47   size_t num_out_chars = 0;
     48   memset(&ps, 0, sizeof(ps));
     49   for (size_t i = 0; i < wide.size(); ++i) {
     50     const wchar_t src = wide[i];
     51     // Use a temp buffer since calling wcrtomb with an output of NULL does not
     52     // calculate the output length.
     53     char buf[16];
     54     // Skip NULLs to avoid wcrtomb's special handling of them.
     55     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
     56     switch (res) {
     57       // Handle any errors and return an empty string.
     58       case static_cast<size_t>(-1):
     59         return std::string();
     60         break;
     61       case 0:
     62         // We hit an embedded null byte, keep going.
     63         ++num_out_chars;
     64         break;
     65       default:
     66         num_out_chars += res;
     67         break;
     68     }
     69   }
     70 
     71   if (num_out_chars == 0)
     72     return std::string();
     73 
     74   std::string out;
     75   out.resize(num_out_chars);
     76 
     77   // We walk the input string again, with |i| tracking the index of the
     78   // wide input, and |j| tracking the multi-byte output.
     79   memset(&ps, 0, sizeof(ps));
     80   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
     81     const wchar_t src = wide[i];
     82     // We don't want wcrtomb to do its funkiness for embedded NULLs.
     83     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
     84     switch (res) {
     85       // Handle any errors and return an empty string.
     86       case static_cast<size_t>(-1):
     87         return std::string();
     88         break;
     89       case 0:
     90         // We hit an embedded null byte, keep going.
     91         ++j;  // Output is already zeroed.
     92         break;
     93       default:
     94         j += res;
     95         break;
     96     }
     97   }
     98 
     99   return out;
    100 }
    101 
    102 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
    103   mbstate_t ps;
    104 
    105   // Calculate the number of wide characters.  We walk through the string
    106   // without writing the output, counting the number of wide characters.
    107   size_t num_out_chars = 0;
    108   memset(&ps, 0, sizeof(ps));
    109   for (size_t i = 0; i < native_mb.size(); ) {
    110     const char* src = native_mb.data() + i;
    111     size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
    112     switch (res) {
    113       // Handle any errors and return an empty string.
    114       case static_cast<size_t>(-2):
    115       case static_cast<size_t>(-1):
    116         return std::wstring();
    117         break;
    118       case 0:
    119         // We hit an embedded null byte, keep going.
    120         i += 1;  // Fall through.
    121       default:
    122         i += res;
    123         ++num_out_chars;
    124         break;
    125     }
    126   }
    127 
    128   if (num_out_chars == 0)
    129     return std::wstring();
    130 
    131   std::wstring out;
    132   out.resize(num_out_chars);
    133 
    134   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
    135   // We walk the input string again, with |i| tracking the index of the
    136   // multi-byte input, and |j| tracking the wide output.
    137   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
    138     const char* src = native_mb.data() + i;
    139     wchar_t* dst = &out[j];
    140     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
    141     switch (res) {
    142       // Handle any errors and return an empty string.
    143       case static_cast<size_t>(-2):
    144       case static_cast<size_t>(-1):
    145         return std::wstring();
    146         break;
    147       case 0:
    148         i += 1;  // Skip null byte.
    149         break;
    150       default:
    151         i += res;
    152         break;
    153     }
    154   }
    155 
    156   return out;
    157 }
    158 
    159 #endif  // OS_CHROMEOS
    160 
    161 }  // namespace base
    162