Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/utf_offset_string_conversions.h"
      6 
      7 #include "base/string_piece.h"
      8 #include "base/utf_string_conversion_utils.h"
      9 
     10 using base::PrepareForUTF16Or32Output;
     11 using base::ReadUnicodeCharacter;
     12 using base::WriteUnicodeCharacter;
     13 
     14 // Generalized Unicode converter -----------------------------------------------
     15 
     16 // Converts the given source Unicode character type to the given destination
     17 // Unicode character type as a STL string. The given input buffer and size
     18 // determine the source, and the given output STL string will be replaced by
     19 // the result.
     20 template<typename SRC_CHAR>
     21 bool ConvertUnicode(const SRC_CHAR* src,
     22                     size_t src_len,
     23                     std::wstring* output,
     24                     size_t* offset_for_adjustment) {
     25   size_t output_offset =
     26       (offset_for_adjustment && *offset_for_adjustment < src_len) ?
     27           *offset_for_adjustment : std::wstring::npos;
     28 
     29   // ICU requires 32-bit numbers.
     30   bool success = true;
     31   int32 src_len32 = static_cast<int32>(src_len);
     32   for (int32 i = 0; i < src_len32; i++) {
     33     uint32 code_point;
     34     size_t original_i = i;
     35     size_t chars_written = 0;
     36     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
     37       chars_written = WriteUnicodeCharacter(code_point, output);
     38     } else {
     39       chars_written = WriteUnicodeCharacter(0xFFFD, output);
     40       success = false;
     41     }
     42     if ((output_offset != std::wstring::npos) &&
     43         (*offset_for_adjustment > original_i)) {
     44       // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
     45       // character read, not after it (so that incrementing it in the loop
     46       // increment will place it at the right location), so we need to account
     47       // for that in determining the amount that was read.
     48       if (*offset_for_adjustment <= static_cast<size_t>(i))
     49         output_offset = std::wstring::npos;
     50       else
     51         output_offset += chars_written - (i - original_i + 1);
     52     }
     53   }
     54 
     55   if (offset_for_adjustment)
     56     *offset_for_adjustment = output_offset;
     57   return success;
     58 }
     59 
     60 // UTF-8 <-> Wide --------------------------------------------------------------
     61 
     62 bool UTF8ToWideAndAdjustOffset(const char* src,
     63                                size_t src_len,
     64                                std::wstring* output,
     65                                size_t* offset_for_adjustment) {
     66   PrepareForUTF16Or32Output(src, src_len, output);
     67   return ConvertUnicode(src, src_len, output, offset_for_adjustment);
     68 }
     69 
     70 std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
     71                                        size_t* offset_for_adjustment) {
     72   std::wstring ret;
     73   UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret,
     74                             offset_for_adjustment);
     75   return ret;
     76 }
     77 
     78 // UTF-16 <-> Wide -------------------------------------------------------------
     79 
     80 #if defined(WCHAR_T_IS_UTF16)
     81 
     82 // When wide == UTF-16, then conversions are a NOP.
     83 bool UTF16ToWideAndAdjustOffset(const char16* src,
     84                                 size_t src_len,
     85                                 std::wstring* output,
     86                                 size_t* offset_for_adjustment) {
     87   output->assign(src, src_len);
     88   if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
     89     *offset_for_adjustment = std::wstring::npos;
     90   return true;
     91 }
     92 
     93 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
     94                                         size_t* offset_for_adjustment) {
     95   if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
     96     *offset_for_adjustment = std::wstring::npos;
     97   return utf16;
     98 }
     99 
    100 #elif defined(WCHAR_T_IS_UTF32)
    101 
    102 bool UTF16ToWideAndAdjustOffset(const char16* src,
    103                                 size_t src_len,
    104                                 std::wstring* output,
    105                                 size_t* offset_for_adjustment) {
    106   output->clear();
    107   // Assume that normally we won't have any non-BMP characters so the counts
    108   // will be the same.
    109   output->reserve(src_len);
    110   return ConvertUnicode(src, src_len, output, offset_for_adjustment);
    111 }
    112 
    113 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
    114                                         size_t* offset_for_adjustment) {
    115   std::wstring ret;
    116   UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret,
    117                              offset_for_adjustment);
    118   return ret;
    119 }
    120 
    121 #endif  // defined(WCHAR_T_IS_UTF32)
    122