Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/sys_string_conversions.h"
      6 
      7 #include <wchar.h>
      8 
      9 #include "base/string_piece.h"
     10 #include "base/utf_string_conversions.h"
     11 
     12 namespace base {
     13 
     14 std::string SysWideToUTF8(const std::wstring& wide) {
     15   // In theory this should be using the system-provided conversion rather
     16   // than our ICU, but this will do for now.
     17   return WideToUTF8(wide);
     18 }
     19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
     20   // In theory this should be using the system-provided conversion rather
     21   // than our ICU, but this will do for now.
     22   std::wstring out;
     23   UTF8ToWide(utf8.data(), utf8.size(), &out);
     24   return out;
     25 }
     26 
     27 #if defined(OS_CHROMEOS)
     28 
     29 // ChromeOS always runs in UTF-8 locale.
     30 std::string SysWideToNativeMB(const std::wstring& wide) {
     31   return WideToUTF8(wide);
     32 }
     33 
     34 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
     35   return SysUTF8ToWide(native_mb);
     36 }
     37 
     38 #else
     39 
     40 std::string SysWideToNativeMB(const std::wstring& wide) {
     41   mbstate_t ps;
     42 
     43   // Calculate the number of multi-byte characters.  We walk through the string
     44   // without writing the output, counting the number of multi-byte characters.
     45   size_t num_out_chars = 0;
     46   memset(&ps, 0, sizeof(ps));
     47   for (size_t i = 0; i < wide.size(); ++i) {
     48     const wchar_t src = wide[i];
     49     // Use a temp buffer since calling wcrtomb with an output of NULL does not
     50     // calculate the output length.
     51     char buf[16];
     52     // Skip NULLs to avoid wcrtomb's special handling of them.
     53     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
     54     switch (res) {
     55       // Handle any errors and return an empty string.
     56       case static_cast<size_t>(-1):
     57         return std::string();
     58         break;
     59       case 0:
     60         // We hit an embedded null byte, keep going.
     61         ++num_out_chars;
     62         break;
     63       default:
     64         num_out_chars += res;
     65         break;
     66     }
     67   }
     68 
     69   if (num_out_chars == 0)
     70     return std::string();
     71 
     72   std::string out;
     73   out.resize(num_out_chars);
     74 
     75   // We walk the input string again, with |i| tracking the index of the
     76   // wide input, and |j| tracking the multi-byte output.
     77   memset(&ps, 0, sizeof(ps));
     78   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
     79     const wchar_t src = wide[i];
     80     // We don't want wcrtomb to do it's funkiness for embedded NULLs.
     81     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
     82     switch (res) {
     83       // Handle any errors and return an empty string.
     84       case static_cast<size_t>(-1):
     85         return std::string();
     86         break;
     87       case 0:
     88         // We hit an embedded null byte, keep going.
     89         ++j;  // Output is already zeroed.
     90         break;
     91       default:
     92         j += res;
     93         break;
     94     }
     95   }
     96 
     97   return out;
     98 }
     99 
    100 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
    101   mbstate_t ps;
    102 
    103   // Calculate the number of wide characters.  We walk through the string
    104   // without writing the output, counting the number of wide characters.
    105   size_t num_out_chars = 0;
    106   memset(&ps, 0, sizeof(ps));
    107   for (size_t i = 0; i < native_mb.size(); ) {
    108     const char* src = native_mb.data() + i;
    109     size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
    110     switch (res) {
    111       // Handle any errors and return an empty string.
    112       case static_cast<size_t>(-2):
    113       case static_cast<size_t>(-1):
    114         return std::wstring();
    115         break;
    116       case 0:
    117         // We hit an embedded null byte, keep going.
    118         i += 1;  // Fall through.
    119       default:
    120         i += res;
    121         ++num_out_chars;
    122         break;
    123     }
    124   }
    125 
    126   if (num_out_chars == 0)
    127     return std::wstring();
    128 
    129   std::wstring out;
    130   out.resize(num_out_chars);
    131 
    132   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
    133   // We walk the input string again, with |i| tracking the index of the
    134   // multi-byte input, and |j| tracking the wide output.
    135   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
    136     const char* src = native_mb.data() + i;
    137     wchar_t* dst = &out[j];
    138     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
    139     switch (res) {
    140       // Handle any errors and return an empty string.
    141       case static_cast<size_t>(-2):
    142       case static_cast<size_t>(-1):
    143         return std::wstring();
    144         break;
    145       case 0:
    146         i += 1;  // Skip null byte.
    147         break;
    148       default:
    149         i += res;
    150         break;
    151     }
    152   }
    153 
    154   return out;
    155 }
    156 
    157 #endif  // OS_CHROMEOS
    158 
    159 }  // namespace base
    160