Home | History | Annotate | Download | only in base
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <windows.h>
     18 
     19 #include "android-base/utf8.h"
     20 
     21 #include <fcntl.h>
     22 #include <stdio.h>
     23 
     24 #include <algorithm>
     25 #include <string>
     26 
     27 #include "android-base/logging.h"
     28 
     29 namespace android {
     30 namespace base {
     31 
     32 // Helper to set errno based on GetLastError() after WideCharToMultiByte()/MultiByteToWideChar().
     33 static void SetErrnoFromLastError() {
     34   switch (GetLastError()) {
     35     case ERROR_NO_UNICODE_TRANSLATION:
     36       errno = EILSEQ;
     37       break;
     38     default:
     39       errno = EINVAL;
     40       break;
     41   }
     42 }
     43 
     44 bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
     45   utf8->clear();
     46 
     47   if (size == 0) {
     48     return true;
     49   }
     50 
     51   // TODO: Consider using std::wstring_convert once libcxx is supported on
     52   // Windows.
     53 
     54   // Only Vista or later has this flag that causes WideCharToMultiByte() to
     55   // return an error on invalid characters.
     56   const DWORD flags =
     57 #if (WINVER >= 0x0600)
     58     WC_ERR_INVALID_CHARS;
     59 #else
     60     0;
     61 #endif
     62 
     63   const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
     64                                                  NULL, 0, NULL, NULL);
     65   if (chars_required <= 0) {
     66     SetErrnoFromLastError();
     67     return false;
     68   }
     69 
     70   // This could potentially throw a std::bad_alloc exception.
     71   utf8->resize(chars_required);
     72 
     73   const int result = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
     74                                          &(*utf8)[0], chars_required, NULL,
     75                                          NULL);
     76   if (result != chars_required) {
     77     SetErrnoFromLastError();
     78     CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result
     79         << " chars to buffer of " << chars_required << " chars";
     80     utf8->clear();
     81     return false;
     82   }
     83 
     84   return true;
     85 }
     86 
     87 bool WideToUTF8(const wchar_t* utf16, std::string* utf8) {
     88   // Compute string length of NULL-terminated string with wcslen().
     89   return WideToUTF8(utf16, wcslen(utf16), utf8);
     90 }
     91 
     92 bool WideToUTF8(const std::wstring& utf16, std::string* utf8) {
     93   // Use the stored length of the string which allows embedded NULL characters
     94   // to be converted.
     95   return WideToUTF8(utf16.c_str(), utf16.length(), utf8);
     96 }
     97 
     98 // Internal helper function that takes MultiByteToWideChar() flags.
     99 static bool UTF8ToWideWithFlags(const char* utf8, const size_t size, std::wstring* utf16,
    100                                 const DWORD flags) {
    101   utf16->clear();
    102 
    103   if (size == 0) {
    104     return true;
    105   }
    106 
    107   // TODO: Consider using std::wstring_convert once libcxx is supported on
    108   // Windows.
    109   const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
    110                                                  NULL, 0);
    111   if (chars_required <= 0) {
    112     SetErrnoFromLastError();
    113     return false;
    114   }
    115 
    116   // This could potentially throw a std::bad_alloc exception.
    117   utf16->resize(chars_required);
    118 
    119   const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
    120                                          &(*utf16)[0], chars_required);
    121   if (result != chars_required) {
    122     SetErrnoFromLastError();
    123     CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result
    124         << " chars to buffer of " << chars_required << " chars";
    125     utf16->clear();
    126     return false;
    127   }
    128 
    129   return true;
    130 }
    131 
    132 bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) {
    133   // If strictly interpreting as UTF-8 succeeds, return success.
    134   if (UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
    135     return true;
    136   }
    137 
    138   const int saved_errno = errno;
    139 
    140   // Fallback to non-strict interpretation, allowing invalid characters and
    141   // converting as best as possible, and return false to signify a problem.
    142   (void)UTF8ToWideWithFlags(utf8, size, utf16, 0);
    143   errno = saved_errno;
    144   return false;
    145 }
    146 
    147 bool UTF8ToWide(const char* utf8, std::wstring* utf16) {
    148   // Compute string length of NULL-terminated string with strlen().
    149   return UTF8ToWide(utf8, strlen(utf8), utf16);
    150 }
    151 
    152 bool UTF8ToWide(const std::string& utf8, std::wstring* utf16) {
    153   // Use the stored length of the string which allows embedded NULL characters
    154   // to be converted.
    155   return UTF8ToWide(utf8.c_str(), utf8.length(), utf16);
    156 }
    157 
    158 static bool isDriveLetter(wchar_t c) {
    159   return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z');
    160 }
    161 
    162 bool UTF8PathToWindowsLongPath(const char* utf8, std::wstring* utf16) {
    163   if (!UTF8ToWide(utf8, utf16)) {
    164     return false;
    165   }
    166   // Note: Although most Win32 File I/O API are limited to MAX_PATH (260
    167   //       characters), the CreateDirectory API is limited to 248 characters.
    168   if (utf16->length() >= 248) {
    169     // If path is of the form "x:\" or "x:/"
    170     if (isDriveLetter((*utf16)[0]) && (*utf16)[1] == L':' &&
    171         ((*utf16)[2] == L'\\' || (*utf16)[2] == L'/')) {
    172       // Append long path prefix, and make sure there are no unix-style
    173       // separators to ensure a fully compliant Win32 long path string.
    174       utf16->insert(0, LR"(\\?\)");
    175       std::replace(utf16->begin(), utf16->end(), L'/', L'\\');
    176     }
    177   }
    178   return true;
    179 }
    180 
    181 // Versions of standard library APIs that support UTF-8 strings.
    182 namespace utf8 {
    183 
    184 FILE* fopen(const char* name, const char* mode) {
    185   std::wstring name_utf16;
    186   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
    187     return nullptr;
    188   }
    189 
    190   std::wstring mode_utf16;
    191   if (!UTF8ToWide(mode, &mode_utf16)) {
    192     return nullptr;
    193   }
    194 
    195   return _wfopen(name_utf16.c_str(), mode_utf16.c_str());
    196 }
    197 
    198 int mkdir(const char* name, mode_t mode) {
    199   std::wstring name_utf16;
    200   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
    201     return -1;
    202   }
    203 
    204   return _wmkdir(name_utf16.c_str());
    205 }
    206 
    207 int open(const char* name, int flags, ...) {
    208   std::wstring name_utf16;
    209   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
    210     return -1;
    211   }
    212 
    213   int mode = 0;
    214   if ((flags & O_CREAT) != 0) {
    215     va_list args;
    216     va_start(args, flags);
    217     mode = va_arg(args, int);
    218     va_end(args);
    219   }
    220 
    221   return _wopen(name_utf16.c_str(), flags, mode);
    222 }
    223 
    224 int unlink(const char* name) {
    225   std::wstring name_utf16;
    226   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
    227     return -1;
    228   }
    229 
    230   return _wunlink(name_utf16.c_str());
    231 }
    232 
    233 }  // namespace utf8
    234 }  // namespace base
    235 }  // namespace android
    236