1 // Copyright (c) 2006, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 #include <string.h> 31 32 #include "common/convert_UTF.h" 33 #include "common/scoped_ptr.h" 34 #include "common/string_conversion.h" 35 #include "common/using_std_string.h" 36 37 namespace google_breakpad { 38 39 using std::vector; 40 41 void UTF8ToUTF16(const char *in, vector<uint16_t> *out) { 42 size_t source_length = strlen(in); 43 const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); 44 const UTF8 *source_end_ptr = source_ptr + source_length; 45 // Erase the contents and zero fill to the expected size 46 out->clear(); 47 out->insert(out->begin(), source_length, 0); 48 uint16_t *target_ptr = &(*out)[0]; 49 uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); 50 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 51 &target_ptr, target_end_ptr, 52 strictConversion); 53 54 // Resize to be the size of the # of converted characters + NULL 55 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 56 } 57 58 int UTF8ToUTF16Char(const char *in, int in_length, uint16_t out[2]) { 59 const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); 60 const UTF8 *source_end_ptr = source_ptr + sizeof(char); 61 uint16_t *target_ptr = out; 62 uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); 63 out[0] = out[1] = 0; 64 65 // Process one character at a time 66 while (1) { 67 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 68 &target_ptr, target_end_ptr, 69 strictConversion); 70 71 if (result == conversionOK) 72 return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in)); 73 74 // Add another character to the input stream and try again 75 source_ptr = reinterpret_cast<const UTF8 *>(in); 76 ++source_end_ptr; 77 78 if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length) 79 break; 80 } 81 82 return 0; 83 } 84 85 void UTF32ToUTF16(const wchar_t *in, vector<uint16_t> *out) { 86 size_t source_length = wcslen(in); 87 const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in); 88 const UTF32 *source_end_ptr = source_ptr + source_length; 89 // Erase the contents and zero fill to the expected size 90 out->clear(); 91 out->insert(out->begin(), source_length, 0); 92 uint16_t *target_ptr = &(*out)[0]; 93 uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); 94 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 95 &target_ptr, target_end_ptr, 96 strictConversion); 97 98 // Resize to be the size of the # of converted characters + NULL 99 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 100 } 101 102 void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) { 103 const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in); 104 const UTF32 *source_end_ptr = source_ptr + 1; 105 uint16_t *target_ptr = out; 106 uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); 107 out[0] = out[1] = 0; 108 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 109 &target_ptr, target_end_ptr, 110 strictConversion); 111 112 if (result != conversionOK) { 113 out[0] = out[1] = 0; 114 } 115 } 116 117 static inline uint16_t Swap(uint16_t value) { 118 return (value >> 8) | static_cast<uint16_t>(value << 8); 119 } 120 121 string UTF16ToUTF8(const vector<uint16_t> &in, bool swap) { 122 const UTF16 *source_ptr = &in[0]; 123 scoped_array<uint16_t> source_buffer; 124 125 // If we're to swap, we need to make a local copy and swap each byte pair 126 if (swap) { 127 int idx = 0; 128 source_buffer.reset(new uint16_t[in.size()]); 129 UTF16 *source_buffer_ptr = source_buffer.get(); 130 for (vector<uint16_t>::const_iterator it = in.begin(); 131 it != in.end(); ++it, ++idx) 132 source_buffer_ptr[idx] = Swap(*it); 133 134 source_ptr = source_buffer.get(); 135 } 136 137 // The maximum expansion would be 4x the size of the input string. 138 const UTF16 *source_end_ptr = source_ptr + in.size(); 139 size_t target_capacity = in.size() * 4; 140 scoped_array<UTF8> target_buffer(new UTF8[target_capacity]); 141 UTF8 *target_ptr = target_buffer.get(); 142 UTF8 *target_end_ptr = target_ptr + target_capacity; 143 ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, 144 &target_ptr, target_end_ptr, 145 strictConversion); 146 147 if (result == conversionOK) { 148 const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get()); 149 return targetPtr; 150 } 151 152 return ""; 153 } 154 155 } // namespace google_breakpad 156