Home | History | Annotate | Download | only in private
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "ppapi/shared_impl/private/ppb_char_set_shared.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/i18n/icu_string_conversions.h"
     10 #include "ppapi/c/dev/ppb_memory_dev.h"
     11 #include "ppapi/thunk/thunk.h"
     12 #include "third_party/icu/source/common/unicode/ucnv.h"
     13 #include "third_party/icu/source/common/unicode/ucnv_cb.h"
     14 #include "third_party/icu/source/common/unicode/ucnv_err.h"
     15 #include "third_party/icu/source/common/unicode/ustring.h"
     16 
     17 namespace ppapi {
     18 
     19 namespace {
     20 
     21 PP_CharSet_Trusted_ConversionError DeprecatedToConversionError(
     22     PP_CharSet_ConversionError on_error) {
     23   switch (on_error) {
     24     case PP_CHARSET_CONVERSIONERROR_SKIP:
     25       return PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP;
     26     case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE:
     27       return PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE;
     28     case PP_CHARSET_CONVERSIONERROR_FAIL:
     29     default:
     30       return PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL;
     31   }
     32 }
     33 
     34 // Converts the given PP error handling behavior to the version in base,
     35 // placing the result in |*result| and returning true on success. Returns false
     36 // if the enum is invalid.
     37 bool PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error,
     38                              base::OnStringConversionError::Type* result) {
     39   switch (on_error) {
     40     case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL:
     41       *result = base::OnStringConversionError::FAIL;
     42       return true;
     43     case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP:
     44       *result = base::OnStringConversionError::SKIP;
     45       return true;
     46     case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE:
     47       *result = base::OnStringConversionError::SUBSTITUTE;
     48       return true;
     49     default:
     50       return false;
     51   }
     52 }
     53 
     54 }  // namespace
     55 
     56 // static
     57 // The "substitution" behavior of this function does not match the
     58 // implementation in base, so we partially duplicate the code from
     59 // icu_string_conversions.cc with the correct error handling setup required
     60 // by the PPAPI interface.
     61 char* PPB_CharSet_Shared::UTF16ToCharSetDeprecated(
     62     const uint16_t* utf16,
     63     uint32_t utf16_len,
     64     const char* output_char_set,
     65     PP_CharSet_ConversionError deprecated_on_error,
     66     uint32_t* output_length) {
     67   *output_length = 0;
     68   PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
     69       deprecated_on_error);
     70 
     71   // Compute required length.
     72   uint32_t required_length = 0;
     73   UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, NULL,
     74                  &required_length);
     75 
     76   // Our output is null terminated, so need one more byte.
     77   char* ret_buf = static_cast<char*>(
     78       thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(required_length + 1));
     79 
     80   // Do the conversion into the buffer.
     81   PP_Bool result = UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error,
     82                                   ret_buf, &required_length);
     83   if (result == PP_FALSE) {
     84     thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
     85     return NULL;
     86   }
     87   ret_buf[required_length] = 0;  // Null terminate.
     88   *output_length = required_length;
     89   return ret_buf;
     90 }
     91 
     92 // static
     93 PP_Bool PPB_CharSet_Shared::UTF16ToCharSet(
     94     const uint16_t utf16[],
     95     uint32_t utf16_len,
     96     const char* output_char_set,
     97     PP_CharSet_Trusted_ConversionError on_error,
     98     char* output_buffer,
     99     uint32_t* output_length) {
    100   if (!utf16 || !output_char_set || !output_length) {
    101     *output_length = 0;
    102     return PP_FALSE;
    103   }
    104 
    105   UErrorCode status = U_ZERO_ERROR;
    106   UConverter* converter = ucnv_open(output_char_set, &status);
    107   if (!U_SUCCESS(status)) {
    108     *output_length = 0;
    109     return PP_FALSE;
    110   }
    111 
    112   // Setup our error handler.
    113   switch (on_error) {
    114     case PP_CHARSET_CONVERSIONERROR_FAIL:
    115       ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0,
    116                             NULL, NULL, &status);
    117       break;
    118     case PP_CHARSET_CONVERSIONERROR_SKIP:
    119       ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0,
    120                             NULL, NULL, &status);
    121       break;
    122     case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: {
    123       // ICU sets the substitution char for some character sets (like latin1)
    124       // to be the ASCII "substitution character" (26). We want to use '?'
    125       // instead for backwards-compat with Windows behavior.
    126       char subst_chars[32];
    127       int8_t subst_chars_len = 32;
    128       ucnv_getSubstChars(converter, subst_chars, &subst_chars_len, &status);
    129       if (subst_chars_len == 1 && subst_chars[0] == 26) {
    130         // Override to the question mark character if possible. When using
    131         // setSubstString, the input is a Unicode character. The function will
    132         // try to convert it to the destination character set and fail if that
    133         // can not be converted to the destination character set.
    134         //
    135         // We just ignore any failure. If the dest char set has no
    136         // representation for '?', then we'll just stick to the ICU default
    137         // substitution character.
    138         UErrorCode subst_status = U_ZERO_ERROR;
    139         UChar question_mark = '?';
    140         ucnv_setSubstString(converter, &question_mark, 1, &subst_status);
    141       }
    142 
    143       ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
    144                             NULL, NULL, &status);
    145       break;
    146     }
    147     default:
    148       *output_length = 0;
    149       ucnv_close(converter);
    150       return PP_FALSE;
    151   }
    152 
    153   // ucnv_fromUChars returns required size not including terminating null.
    154   *output_length = static_cast<uint32_t>(ucnv_fromUChars(
    155       converter, output_buffer, output_buffer ? *output_length : 0,
    156       reinterpret_cast<const UChar*>(utf16), utf16_len, &status));
    157 
    158   ucnv_close(converter);
    159   if (status == U_BUFFER_OVERFLOW_ERROR) {
    160     // Don't treat this as a fatal error since we need to return the string
    161     // size.
    162     return PP_TRUE;
    163   } else if (!U_SUCCESS(status)) {
    164     *output_length = 0;
    165     return PP_FALSE;
    166   }
    167   return PP_TRUE;
    168 }
    169 
    170 // static
    171 uint16_t* PPB_CharSet_Shared::CharSetToUTF16Deprecated(
    172     const char* input,
    173     uint32_t input_len,
    174     const char* input_char_set,
    175     PP_CharSet_ConversionError deprecated_on_error,
    176     uint32_t* output_length) {
    177   *output_length = 0;
    178   PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError(
    179       deprecated_on_error);
    180 
    181   // Compute required length.
    182   uint32_t required_length = 0;
    183   CharSetToUTF16(input, input_len, input_char_set, on_error, NULL,
    184                  &required_length);
    185 
    186   // Our output is null terminated, so need one more byte.
    187   uint16_t* ret_buf = static_cast<uint16_t*>(
    188       thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(
    189           (required_length + 1) * sizeof(uint16_t)));
    190 
    191   // Do the conversion into the buffer.
    192   PP_Bool result = CharSetToUTF16(input, input_len, input_char_set, on_error,
    193                                   ret_buf, &required_length);
    194   if (result == PP_FALSE) {
    195     thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf);
    196     return NULL;
    197   }
    198   ret_buf[required_length] = 0;  // Null terminate.
    199   *output_length = required_length;
    200   return ret_buf;
    201 }
    202 
    203 PP_Bool PPB_CharSet_Shared::CharSetToUTF16(
    204     const char* input,
    205     uint32_t input_len,
    206     const char* input_char_set,
    207     PP_CharSet_Trusted_ConversionError on_error,
    208     uint16_t* output_buffer,
    209     uint32_t* output_utf16_length) {
    210   if (!input || !input_char_set || !output_utf16_length) {
    211     *output_utf16_length = 0;
    212     return PP_FALSE;
    213   }
    214 
    215   base::OnStringConversionError::Type base_on_error;
    216   if (!PPToBaseConversionError(on_error, &base_on_error)) {
    217     *output_utf16_length = 0;
    218     return PP_FALSE;  // Invalid enum value.
    219   }
    220 
    221   // We can convert this call to the implementation in base to avoid code
    222   // duplication, although this does introduce an extra copy of the data.
    223   string16 output;
    224   if (!base::CodepageToUTF16(std::string(input, input_len), input_char_set,
    225                              base_on_error, &output)) {
    226     *output_utf16_length = 0;
    227     return PP_FALSE;
    228   }
    229 
    230   if (output_buffer) {
    231     memcpy(output_buffer, output.c_str(),
    232            std::min(*output_utf16_length, static_cast<uint32_t>(output.size()))
    233            * sizeof(uint16_t));
    234   }
    235   *output_utf16_length = static_cast<uint32_t>(output.size());
    236   return PP_TRUE;
    237 }
    238 
    239 }  // namespace ppapi
    240