1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ppapi/shared_impl/private/ppb_char_set_shared.h" 6 7 #include <algorithm> 8 9 #include "base/i18n/icu_string_conversions.h" 10 #include "ppapi/c/dev/ppb_memory_dev.h" 11 #include "ppapi/thunk/thunk.h" 12 #include "third_party/icu/source/common/unicode/ucnv.h" 13 #include "third_party/icu/source/common/unicode/ucnv_cb.h" 14 #include "third_party/icu/source/common/unicode/ucnv_err.h" 15 #include "third_party/icu/source/common/unicode/ustring.h" 16 17 namespace ppapi { 18 19 namespace { 20 21 PP_CharSet_Trusted_ConversionError DeprecatedToConversionError( 22 PP_CharSet_ConversionError on_error) { 23 switch (on_error) { 24 case PP_CHARSET_CONVERSIONERROR_SKIP: 25 return PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP; 26 case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: 27 return PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE; 28 case PP_CHARSET_CONVERSIONERROR_FAIL: 29 default: 30 return PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL; 31 } 32 } 33 34 // Converts the given PP error handling behavior to the version in base, 35 // placing the result in |*result| and returning true on success. Returns false 36 // if the enum is invalid. 37 bool PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error, 38 base::OnStringConversionError::Type* result) { 39 switch (on_error) { 40 case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL: 41 *result = base::OnStringConversionError::FAIL; 42 return true; 43 case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP: 44 *result = base::OnStringConversionError::SKIP; 45 return true; 46 case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE: 47 *result = base::OnStringConversionError::SUBSTITUTE; 48 return true; 49 default: 50 return false; 51 } 52 } 53 54 } // namespace 55 56 // static 57 // The "substitution" behavior of this function does not match the 58 // implementation in base, so we partially duplicate the code from 59 // icu_string_conversions.cc with the correct error handling setup required 60 // by the PPAPI interface. 61 char* PPB_CharSet_Shared::UTF16ToCharSetDeprecated( 62 const uint16_t* utf16, 63 uint32_t utf16_len, 64 const char* output_char_set, 65 PP_CharSet_ConversionError deprecated_on_error, 66 uint32_t* output_length) { 67 *output_length = 0; 68 PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError( 69 deprecated_on_error); 70 71 // Compute required length. 72 uint32_t required_length = 0; 73 UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, NULL, 74 &required_length); 75 76 // Our output is null terminated, so need one more byte. 77 char* ret_buf = static_cast<char*>( 78 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(required_length + 1)); 79 80 // Do the conversion into the buffer. 81 PP_Bool result = UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, 82 ret_buf, &required_length); 83 if (result == PP_FALSE) { 84 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf); 85 return NULL; 86 } 87 ret_buf[required_length] = 0; // Null terminate. 88 *output_length = required_length; 89 return ret_buf; 90 } 91 92 // static 93 PP_Bool PPB_CharSet_Shared::UTF16ToCharSet( 94 const uint16_t utf16[], 95 uint32_t utf16_len, 96 const char* output_char_set, 97 PP_CharSet_Trusted_ConversionError on_error, 98 char* output_buffer, 99 uint32_t* output_length) { 100 if (!utf16 || !output_char_set || !output_length) { 101 *output_length = 0; 102 return PP_FALSE; 103 } 104 105 UErrorCode status = U_ZERO_ERROR; 106 UConverter* converter = ucnv_open(output_char_set, &status); 107 if (!U_SUCCESS(status)) { 108 *output_length = 0; 109 return PP_FALSE; 110 } 111 112 // Setup our error handler. 113 switch (on_error) { 114 case PP_CHARSET_CONVERSIONERROR_FAIL: 115 ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0, 116 NULL, NULL, &status); 117 break; 118 case PP_CHARSET_CONVERSIONERROR_SKIP: 119 ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0, 120 NULL, NULL, &status); 121 break; 122 case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: { 123 // ICU sets the substitution char for some character sets (like latin1) 124 // to be the ASCII "substitution character" (26). We want to use '?' 125 // instead for backwards-compat with Windows behavior. 126 char subst_chars[32]; 127 int8_t subst_chars_len = 32; 128 ucnv_getSubstChars(converter, subst_chars, &subst_chars_len, &status); 129 if (subst_chars_len == 1 && subst_chars[0] == 26) { 130 // Override to the question mark character if possible. When using 131 // setSubstString, the input is a Unicode character. The function will 132 // try to convert it to the destination character set and fail if that 133 // can not be converted to the destination character set. 134 // 135 // We just ignore any failure. If the dest char set has no 136 // representation for '?', then we'll just stick to the ICU default 137 // substitution character. 138 UErrorCode subst_status = U_ZERO_ERROR; 139 UChar question_mark = '?'; 140 ucnv_setSubstString(converter, &question_mark, 1, &subst_status); 141 } 142 143 ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 144 NULL, NULL, &status); 145 break; 146 } 147 default: 148 *output_length = 0; 149 ucnv_close(converter); 150 return PP_FALSE; 151 } 152 153 // ucnv_fromUChars returns required size not including terminating null. 154 *output_length = static_cast<uint32_t>(ucnv_fromUChars( 155 converter, output_buffer, output_buffer ? *output_length : 0, 156 reinterpret_cast<const UChar*>(utf16), utf16_len, &status)); 157 158 ucnv_close(converter); 159 if (status == U_BUFFER_OVERFLOW_ERROR) { 160 // Don't treat this as a fatal error since we need to return the string 161 // size. 162 return PP_TRUE; 163 } else if (!U_SUCCESS(status)) { 164 *output_length = 0; 165 return PP_FALSE; 166 } 167 return PP_TRUE; 168 } 169 170 // static 171 uint16_t* PPB_CharSet_Shared::CharSetToUTF16Deprecated( 172 const char* input, 173 uint32_t input_len, 174 const char* input_char_set, 175 PP_CharSet_ConversionError deprecated_on_error, 176 uint32_t* output_length) { 177 *output_length = 0; 178 PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError( 179 deprecated_on_error); 180 181 // Compute required length. 182 uint32_t required_length = 0; 183 CharSetToUTF16(input, input_len, input_char_set, on_error, NULL, 184 &required_length); 185 186 // Our output is null terminated, so need one more byte. 187 uint16_t* ret_buf = static_cast<uint16_t*>( 188 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc( 189 (required_length + 1) * sizeof(uint16_t))); 190 191 // Do the conversion into the buffer. 192 PP_Bool result = CharSetToUTF16(input, input_len, input_char_set, on_error, 193 ret_buf, &required_length); 194 if (result == PP_FALSE) { 195 thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf); 196 return NULL; 197 } 198 ret_buf[required_length] = 0; // Null terminate. 199 *output_length = required_length; 200 return ret_buf; 201 } 202 203 PP_Bool PPB_CharSet_Shared::CharSetToUTF16( 204 const char* input, 205 uint32_t input_len, 206 const char* input_char_set, 207 PP_CharSet_Trusted_ConversionError on_error, 208 uint16_t* output_buffer, 209 uint32_t* output_utf16_length) { 210 if (!input || !input_char_set || !output_utf16_length) { 211 *output_utf16_length = 0; 212 return PP_FALSE; 213 } 214 215 base::OnStringConversionError::Type base_on_error; 216 if (!PPToBaseConversionError(on_error, &base_on_error)) { 217 *output_utf16_length = 0; 218 return PP_FALSE; // Invalid enum value. 219 } 220 221 // We can convert this call to the implementation in base to avoid code 222 // duplication, although this does introduce an extra copy of the data. 223 string16 output; 224 if (!base::CodepageToUTF16(std::string(input, input_len), input_char_set, 225 base_on_error, &output)) { 226 *output_utf16_length = 0; 227 return PP_FALSE; 228 } 229 230 if (output_buffer) { 231 memcpy(output_buffer, output.c_str(), 232 std::min(*output_utf16_length, static_cast<uint32_t>(output.size())) 233 * sizeof(uint16_t)); 234 } 235 *output_utf16_length = static_cast<uint32_t>(output.size()); 236 return PP_TRUE; 237 } 238 239 } // namespace ppapi 240