Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/sys_string_conversions.h"
      6 
      7 #include <stddef.h>
      8 #include <wchar.h>
      9 
     10 #include "base/strings/string_piece.h"
     11 #include "base/strings/utf_string_conversions.h"
     12 #include "build/build_config.h"
     13 
     14 namespace base {
     15 
     16 std::string SysWideToUTF8(const std::wstring& wide) {
     17   // In theory this should be using the system-provided conversion rather
     18   // than our ICU, but this will do for now.
     19   return WideToUTF8(wide);
     20 }
     21 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
     22   // In theory this should be using the system-provided conversion rather
     23   // than our ICU, but this will do for now.
     24   std::wstring out;
     25   UTF8ToWide(utf8.data(), utf8.size(), &out);
     26   return out;
     27 }
     28 
     29 #if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
     30 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
     31 // support and a better understanding of what calls these routines.
     32 
     33 std::string SysWideToNativeMB(const std::wstring& wide) {
     34   return WideToUTF8(wide);
     35 }
     36 
     37 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
     38   return SysUTF8ToWide(native_mb);
     39 }
     40 
     41 #else
     42 
     43 std::string SysWideToNativeMB(const std::wstring& wide) {
     44   mbstate_t ps;
     45 
     46   // Calculate the number of multi-byte characters.  We walk through the string
     47   // without writing the output, counting the number of multi-byte characters.
     48   size_t num_out_chars = 0;
     49   memset(&ps, 0, sizeof(ps));
     50   for (size_t i = 0; i < wide.size(); ++i) {
     51     const wchar_t src = wide[i];
     52     // Use a temp buffer since calling wcrtomb with an output of NULL does not
     53     // calculate the output length.
     54     char buf[16];
     55     // Skip NULLs to avoid wcrtomb's special handling of them.
     56     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
     57     switch (res) {
     58       // Handle any errors and return an empty string.
     59       case static_cast<size_t>(-1):
     60         return std::string();
     61         break;
     62       case 0:
     63         // We hit an embedded null byte, keep going.
     64         ++num_out_chars;
     65         break;
     66       default:
     67         num_out_chars += res;
     68         break;
     69     }
     70   }
     71 
     72   if (num_out_chars == 0)
     73     return std::string();
     74 
     75   std::string out;
     76   out.resize(num_out_chars);
     77 
     78   // We walk the input string again, with |i| tracking the index of the
     79   // wide input, and |j| tracking the multi-byte output.
     80   memset(&ps, 0, sizeof(ps));
     81   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
     82     const wchar_t src = wide[i];
     83     // We don't want wcrtomb to do its funkiness for embedded NULLs.
     84     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
     85     switch (res) {
     86       // Handle any errors and return an empty string.
     87       case static_cast<size_t>(-1):
     88         return std::string();
     89         break;
     90       case 0:
     91         // We hit an embedded null byte, keep going.
     92         ++j;  // Output is already zeroed.
     93         break;
     94       default:
     95         j += res;
     96         break;
     97     }
     98   }
     99 
    100   return out;
    101 }
    102 
    103 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
    104   mbstate_t ps;
    105 
    106   // Calculate the number of wide characters.  We walk through the string
    107   // without writing the output, counting the number of wide characters.
    108   size_t num_out_chars = 0;
    109   memset(&ps, 0, sizeof(ps));
    110   for (size_t i = 0; i < native_mb.size(); ) {
    111     const char* src = native_mb.data() + i;
    112     size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
    113     switch (res) {
    114       // Handle any errors and return an empty string.
    115       case static_cast<size_t>(-2):
    116       case static_cast<size_t>(-1):
    117         return std::wstring();
    118         break;
    119       case 0:
    120         // We hit an embedded null byte, keep going.
    121         i += 1;  // Fall through.
    122       default:
    123         i += res;
    124         ++num_out_chars;
    125         break;
    126     }
    127   }
    128 
    129   if (num_out_chars == 0)
    130     return std::wstring();
    131 
    132   std::wstring out;
    133   out.resize(num_out_chars);
    134 
    135   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
    136   // We walk the input string again, with |i| tracking the index of the
    137   // multi-byte input, and |j| tracking the wide output.
    138   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
    139     const char* src = native_mb.data() + i;
    140     wchar_t* dst = &out[j];
    141     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
    142     switch (res) {
    143       // Handle any errors and return an empty string.
    144       case static_cast<size_t>(-2):
    145       case static_cast<size_t>(-1):
    146         return std::wstring();
    147         break;
    148       case 0:
    149         i += 1;  // Skip null byte.
    150         break;
    151       default:
    152         i += res;
    153         break;
    154     }
    155   }
    156 
    157   return out;
    158 }
    159 
    160 #endif  // OS_CHROMEOS
    161 
    162 }  // namespace base
    163