Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
      6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
      7 
      8 #include <string>
      9 #include <vector>
     10 
     11 #include "base/base_export.h"
     12 #include "base/strings/string16.h"
     13 #include "base/strings/string_piece.h"
     14 
     15 namespace base {
     16 
     17 // A helper class and associated data structures to adjust offsets into a
     18 // string in response to various adjustments one might do to that string
     19 // (e.g., eliminating a range).  For details on offsets, see the comments by
     20 // the AdjustOffsets() function below.
     21 class BASE_EXPORT OffsetAdjuster {
     22  public:
     23   struct BASE_EXPORT Adjustment {
     24     Adjustment(size_t original_offset,
     25                size_t original_length,
     26                size_t output_length);
     27 
     28     size_t original_offset;
     29     size_t original_length;
     30     size_t output_length;
     31   };
     32   typedef std::vector<Adjustment> Adjustments;
     33 
     34   // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
     35   // recorded in |adjustments|.
     36   //
     37   // Offsets represents insertion/selection points between characters: if |src|
     38   // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
     39   // end of the string.  Valid input offsets range from 0 to |src_len|.  On
     40   // exit, each offset will have been modified to point at the same logical
     41   // position in the output string.  If an offset cannot be successfully
     42   // adjusted (e.g., because it points into the middle of a multibyte sequence),
     43   // it will be set to string16::npos.
     44   static void AdjustOffsets(const Adjustments& adjustments,
     45                             std::vector<size_t>* offsets_for_adjustment);
     46 
     47   // Adjusts the single |offset| to reflect the adjustments recorded in
     48   // |adjustments|.
     49   static void AdjustOffset(const Adjustments& adjustments,
     50                            size_t* offset);
     51 
     52   // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
     53   // of the adjustments recorded in |adjustments|.  In other words, the offsets
     54   // provided represent offsets into an adjusted string and the caller wants
     55   // to know the offsets they correspond to in the original string.  If an
     56   // offset cannot be successfully unadjusted (e.g., because it points into
     57   // the middle of a multibyte sequence), it will be set to string16::npos.
     58   static void UnadjustOffsets(const Adjustments& adjustments,
     59                               std::vector<size_t>* offsets_for_unadjustment);
     60 
     61   // Adjusts the single |offset| to reflect the reverse of the adjustments
     62   // recorded in |adjustments|.
     63   static void UnadjustOffset(const Adjustments& adjustments,
     64                              size_t* offset);
     65 
     66   // Combines two sequential sets of adjustments, storing the combined revised
     67   // adjustments in |adjustments_on_adjusted_string|.  That is, suppose a
     68   // string was altered in some way, with the alterations recorded as
     69   // adjustments in |first_adjustments|.  Then suppose the resulting string is
     70   // further altered, with the alterations recorded as adjustments scored in
     71   // |adjustments_on_adjusted_string|, with the offsets recorded in these
     72   // adjustments being with respect to the intermediate string.  This function
     73   // combines the two sets of adjustments into one, storing the result in
     74   // |adjustments_on_adjusted_string|, whose offsets are correct with respect
     75   // to the original string.
     76   //
     77   // Assumes both parameters are sorted by increasing offset.
     78   //
     79   // WARNING: Only supports |first_adjustments| that involve collapsing ranges
     80   // of text, not expanding ranges.
     81   static void MergeSequentialAdjustments(
     82       const Adjustments& first_adjustments,
     83       Adjustments* adjustments_on_adjusted_string);
     84 };
     85 
     86 // Like the conversions in utf_string_conversions.h, but also fills in an
     87 // |adjustments| parameter that reflects the alterations done to the string.
     88 // It may be NULL.
     89 BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
     90     const char* src,
     91     size_t src_len,
     92     string16* output,
     93     base::OffsetAdjuster::Adjustments* adjustments);
     94 BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
     95     const base::StringPiece& utf8,
     96     base::OffsetAdjuster::Adjustments* adjustments);
     97 // As above, but instead internally examines the adjustments and applies them
     98 // to |offsets_for_adjustment|.  See comments by AdjustOffsets().
     99 BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
    100     const base::StringPiece& utf8,
    101     std::vector<size_t>* offsets_for_adjustment);
    102 
    103 BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
    104     const base::StringPiece16& utf16,
    105     std::vector<size_t>* offsets_for_adjustment);
    106 
    107 // Limiting function callable by std::for_each which will replace any value
    108 // which is greater than |limit| with npos.  Typically this is called with a
    109 // string length to clamp offsets into the string to [0, length] (as opposed to
    110 // [0, length); see comments above).
    111 template <typename T>
    112 struct LimitOffset {
    113   explicit LimitOffset(size_t limit)
    114     : limit_(limit) {}
    115 
    116   void operator()(size_t& offset) {
    117     if (offset > limit_)
    118       offset = T::npos;
    119   }
    120 
    121   size_t limit_;
    122 };
    123 
    124 }  // namespace base
    125 
    126 #endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
    127