Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/utf_offset_string_conversions.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/memory/scoped_ptr.h"
     10 #include "base/strings/string_piece.h"
     11 #include "base/strings/utf_string_conversion_utils.h"
     12 
     13 namespace base {
     14 
     15 // Converts the given source Unicode character type to the given destination
     16 // Unicode character type as a STL string. The given input buffer and size
     17 // determine the source, and the given output STL string will be replaced by
     18 // the result.
     19 template<typename SrcChar, typename DestStdString>
     20 bool ConvertUnicode(const SrcChar* src,
     21                     size_t src_len,
     22                     DestStdString* output,
     23                     std::vector<size_t>* offsets_for_adjustment) {
     24   if (offsets_for_adjustment) {
     25     std::for_each(offsets_for_adjustment->begin(),
     26                   offsets_for_adjustment->end(),
     27                   LimitOffset<DestStdString>(src_len));
     28   }
     29 
     30   // ICU requires 32-bit numbers.
     31   bool success = true;
     32   OffsetAdjuster offset_adjuster(offsets_for_adjustment);
     33   int32 src_len32 = static_cast<int32>(src_len);
     34   for (int32 i = 0; i < src_len32; i++) {
     35     uint32 code_point;
     36     size_t original_i = i;
     37     size_t chars_written = 0;
     38     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
     39       chars_written = WriteUnicodeCharacter(code_point, output);
     40     } else {
     41       chars_written = WriteUnicodeCharacter(0xFFFD, output);
     42       success = false;
     43     }
     44     if (offsets_for_adjustment) {
     45       // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
     46       // character read, not after it (so that incrementing it in the loop
     47       // increment will place it at the right location), so we need to account
     48       // for that in determining the amount that was read.
     49       offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i,
     50           i - original_i + 1, chars_written));
     51     }
     52   }
     53   return success;
     54 }
     55 
     56 bool UTF8ToUTF16AndAdjustOffset(const char* src,
     57                                 size_t src_len,
     58                                 string16* output,
     59                                 size_t* offset_for_adjustment) {
     60   std::vector<size_t> offsets;
     61   if (offset_for_adjustment)
     62     offsets.push_back(*offset_for_adjustment);
     63   PrepareForUTF16Or32Output(src, src_len, output);
     64   bool ret = ConvertUnicode(src, src_len, output, &offsets);
     65   if (offset_for_adjustment)
     66     *offset_for_adjustment = offsets[0];
     67   return ret;
     68 }
     69 
     70 bool UTF8ToUTF16AndAdjustOffsets(const char* src,
     71                                  size_t src_len,
     72                                  string16* output,
     73                                  std::vector<size_t>* offsets_for_adjustment) {
     74   PrepareForUTF16Or32Output(src, src_len, output);
     75   return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
     76 }
     77 
     78 string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8,
     79                                         size_t* offset_for_adjustment) {
     80   std::vector<size_t> offsets;
     81   if (offset_for_adjustment)
     82     offsets.push_back(*offset_for_adjustment);
     83   string16 result;
     84   UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
     85                               &offsets);
     86   if (offset_for_adjustment)
     87     *offset_for_adjustment = offsets[0];
     88   return result;
     89 }
     90 
     91 string16 UTF8ToUTF16AndAdjustOffsets(
     92     const base::StringPiece& utf8,
     93     std::vector<size_t>* offsets_for_adjustment) {
     94   string16 result;
     95   UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
     96                               offsets_for_adjustment);
     97   return result;
     98 }
     99 
    100 std::string UTF16ToUTF8AndAdjustOffset(
    101     const base::StringPiece16& utf16,
    102     size_t* offset_for_adjustment) {
    103   std::vector<size_t> offsets;
    104   if (offset_for_adjustment)
    105     offsets.push_back(*offset_for_adjustment);
    106   std::string result = UTF16ToUTF8AndAdjustOffsets(utf16, &offsets);
    107   if (offset_for_adjustment)
    108     *offset_for_adjustment = offsets[0];
    109   return result;
    110 }
    111 
    112 std::string UTF16ToUTF8AndAdjustOffsets(
    113     const base::StringPiece16& utf16,
    114     std::vector<size_t>* offsets_for_adjustment) {
    115   std::string result;
    116   PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
    117   ConvertUnicode(utf16.data(), utf16.length(), &result, offsets_for_adjustment);
    118   return result;
    119 }
    120 
    121 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
    122                                        size_t original_length,
    123                                        size_t output_length)
    124     : original_offset(original_offset),
    125       original_length(original_length),
    126       output_length(output_length) {
    127 }
    128 
    129 OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment)
    130     : offsets_for_adjustment_(offsets_for_adjustment) {
    131 }
    132 
    133 OffsetAdjuster::~OffsetAdjuster() {
    134   if (!offsets_for_adjustment_ || adjustments_.empty())
    135     return;
    136   for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin());
    137        i != offsets_for_adjustment_->end(); ++i)
    138     AdjustOffset(i);
    139 }
    140 
    141 void OffsetAdjuster::Add(const Adjustment& adjustment) {
    142   adjustments_.push_back(adjustment);
    143 }
    144 
    145 void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) {
    146   if (*offset == string16::npos)
    147     return;
    148   size_t adjustment = 0;
    149   for (std::vector<Adjustment>::const_iterator i = adjustments_.begin();
    150        i != adjustments_.end(); ++i) {
    151     if (*offset == i->original_offset && i->output_length == 0) {
    152       *offset = string16::npos;
    153       return;
    154     }
    155     if (*offset <= i->original_offset)
    156       break;
    157     if (*offset < (i->original_offset + i->original_length)) {
    158       *offset = string16::npos;
    159       return;
    160     }
    161     adjustment += (i->original_length - i->output_length);
    162   }
    163   *offset -= adjustment;
    164 }
    165 
    166 }  // namespace base
    167