1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/utf_offset_string_conversions.h" 6 7 #include <algorithm> 8 9 #include "base/memory/scoped_ptr.h" 10 #include "base/strings/string_piece.h" 11 #include "base/strings/utf_string_conversion_utils.h" 12 13 namespace base { 14 15 // Converts the given source Unicode character type to the given destination 16 // Unicode character type as a STL string. The given input buffer and size 17 // determine the source, and the given output STL string will be replaced by 18 // the result. 19 template<typename SrcChar, typename DestStdString> 20 bool ConvertUnicode(const SrcChar* src, 21 size_t src_len, 22 DestStdString* output, 23 std::vector<size_t>* offsets_for_adjustment) { 24 if (offsets_for_adjustment) { 25 std::for_each(offsets_for_adjustment->begin(), 26 offsets_for_adjustment->end(), 27 LimitOffset<DestStdString>(src_len)); 28 } 29 30 // ICU requires 32-bit numbers. 31 bool success = true; 32 OffsetAdjuster offset_adjuster(offsets_for_adjustment); 33 int32 src_len32 = static_cast<int32>(src_len); 34 for (int32 i = 0; i < src_len32; i++) { 35 uint32 code_point; 36 size_t original_i = i; 37 size_t chars_written = 0; 38 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 39 chars_written = WriteUnicodeCharacter(code_point, output); 40 } else { 41 chars_written = WriteUnicodeCharacter(0xFFFD, output); 42 success = false; 43 } 44 if (offsets_for_adjustment) { 45 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last 46 // character read, not after it (so that incrementing it in the loop 47 // increment will place it at the right location), so we need to account 48 // for that in determining the amount that was read. 49 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, 50 i - original_i + 1, chars_written)); 51 } 52 } 53 return success; 54 } 55 56 bool UTF8ToUTF16AndAdjustOffset(const char* src, 57 size_t src_len, 58 string16* output, 59 size_t* offset_for_adjustment) { 60 std::vector<size_t> offsets; 61 if (offset_for_adjustment) 62 offsets.push_back(*offset_for_adjustment); 63 PrepareForUTF16Or32Output(src, src_len, output); 64 bool ret = ConvertUnicode(src, src_len, output, &offsets); 65 if (offset_for_adjustment) 66 *offset_for_adjustment = offsets[0]; 67 return ret; 68 } 69 70 bool UTF8ToUTF16AndAdjustOffsets(const char* src, 71 size_t src_len, 72 string16* output, 73 std::vector<size_t>* offsets_for_adjustment) { 74 PrepareForUTF16Or32Output(src, src_len, output); 75 return ConvertUnicode(src, src_len, output, offsets_for_adjustment); 76 } 77 78 string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8, 79 size_t* offset_for_adjustment) { 80 std::vector<size_t> offsets; 81 if (offset_for_adjustment) 82 offsets.push_back(*offset_for_adjustment); 83 string16 result; 84 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, 85 &offsets); 86 if (offset_for_adjustment) 87 *offset_for_adjustment = offsets[0]; 88 return result; 89 } 90 91 string16 UTF8ToUTF16AndAdjustOffsets( 92 const base::StringPiece& utf8, 93 std::vector<size_t>* offsets_for_adjustment) { 94 string16 result; 95 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, 96 offsets_for_adjustment); 97 return result; 98 } 99 100 std::string UTF16ToUTF8AndAdjustOffset( 101 const base::StringPiece16& utf16, 102 size_t* offset_for_adjustment) { 103 std::vector<size_t> offsets; 104 if (offset_for_adjustment) 105 offsets.push_back(*offset_for_adjustment); 106 std::string result = UTF16ToUTF8AndAdjustOffsets(utf16, &offsets); 107 if (offset_for_adjustment) 108 *offset_for_adjustment = offsets[0]; 109 return result; 110 } 111 112 std::string UTF16ToUTF8AndAdjustOffsets( 113 const base::StringPiece16& utf16, 114 std::vector<size_t>* offsets_for_adjustment) { 115 std::string result; 116 PrepareForUTF8Output(utf16.data(), utf16.length(), &result); 117 ConvertUnicode(utf16.data(), utf16.length(), &result, offsets_for_adjustment); 118 return result; 119 } 120 121 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset, 122 size_t original_length, 123 size_t output_length) 124 : original_offset(original_offset), 125 original_length(original_length), 126 output_length(output_length) { 127 } 128 129 OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment) 130 : offsets_for_adjustment_(offsets_for_adjustment) { 131 } 132 133 OffsetAdjuster::~OffsetAdjuster() { 134 if (!offsets_for_adjustment_ || adjustments_.empty()) 135 return; 136 for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin()); 137 i != offsets_for_adjustment_->end(); ++i) 138 AdjustOffset(i); 139 } 140 141 void OffsetAdjuster::Add(const Adjustment& adjustment) { 142 adjustments_.push_back(adjustment); 143 } 144 145 void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) { 146 if (*offset == string16::npos) 147 return; 148 size_t adjustment = 0; 149 for (std::vector<Adjustment>::const_iterator i = adjustments_.begin(); 150 i != adjustments_.end(); ++i) { 151 if (*offset <= i->original_offset) 152 break; 153 if (*offset < (i->original_offset + i->original_length)) { 154 *offset = string16::npos; 155 return; 156 } 157 adjustment += (i->original_length - i->output_length); 158 } 159 *offset -= adjustment; 160 } 161 162 } // namespace base 163