Home | History | Annotate | Download | only in text
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This file implements utility functions for eliding and formatting UI text.
      6 //
      7 // Note that several of the functions declared in text_elider.h are implemented
      8 // in this file using helper classes in an unnamed namespace.
      9 
     10 #include "ui/base/text/text_elider.h"
     11 
     12 #include <string>
     13 #include <vector>
     14 
     15 #include "base/files/file_path.h"
     16 #include "base/i18n/break_iterator.h"
     17 #include "base/i18n/char_iterator.h"
     18 #include "base/i18n/rtl.h"
     19 #include "base/memory/scoped_ptr.h"
     20 #include "base/strings/string_split.h"
     21 #include "base/strings/string_util.h"
     22 #include "base/strings/sys_string_conversions.h"
     23 #include "base/strings/utf_string_conversions.h"
     24 #include "net/base/escape.h"
     25 #include "net/base/net_util.h"
     26 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     27 #include "third_party/icu/source/common/unicode/rbbi.h"
     28 #include "third_party/icu/source/common/unicode/uloc.h"
     29 #include "ui/gfx/font.h"
     30 #include "url/gurl.h"
     31 
     32 namespace ui {
     33 
     34 // U+2026 in utf8
     35 const char kEllipsis[] = "\xE2\x80\xA6";
     36 const char16 kEllipsisUTF16[] = { 0x2026, 0 };
     37 const char16 kForwardSlash = '/';
     38 
     39 namespace {
     40 
     41 // Helper class to split + elide text, while respecting UTF16 surrogate pairs.
     42 class StringSlicer {
     43  public:
     44   StringSlicer(const string16& text,
     45                const string16& ellipsis,
     46                bool elide_in_middle)
     47       : text_(text),
     48         ellipsis_(ellipsis),
     49         elide_in_middle_(elide_in_middle) {
     50   }
     51 
     52   // Cuts |text_| to be |length| characters long. If |elide_in_middle_| is true,
     53   // the middle of the string is removed to leave equal-length pieces from the
     54   // beginning and end of the string; otherwise, the end of the string is
     55   // removed and only the beginning remains. If |insert_ellipsis| is true,
     56   // then an ellipsis character will be inserted at the cut point.
     57   string16 CutString(size_t length, bool insert_ellipsis) {
     58     const string16 ellipsis_text = insert_ellipsis ? ellipsis_ : string16();
     59 
     60     if (!elide_in_middle_)
     61       return text_.substr(0, FindValidBoundaryBefore(length)) + ellipsis_text;
     62 
     63     // We put the extra character, if any, before the cut.
     64     const size_t half_length = length / 2;
     65     const size_t prefix_length = FindValidBoundaryBefore(length - half_length);
     66     const size_t suffix_start_guess = text_.length() - half_length;
     67     const size_t suffix_start = FindValidBoundaryAfter(suffix_start_guess);
     68     const size_t suffix_length =
     69         half_length - (suffix_start_guess - suffix_start);
     70     return text_.substr(0, prefix_length) + ellipsis_text +
     71            text_.substr(suffix_start, suffix_length);
     72   }
     73 
     74  private:
     75   // Returns a valid cut boundary at or before |index|.
     76   size_t FindValidBoundaryBefore(size_t index) const {
     77     DCHECK_LE(index, text_.length());
     78     if (index != text_.length())
     79       U16_SET_CP_START(text_.data(), 0, index);
     80     return index;
     81   }
     82 
     83   // Returns a valid cut boundary at or after |index|.
     84   size_t FindValidBoundaryAfter(size_t index) const {
     85     DCHECK_LE(index, text_.length());
     86     if (index != text_.length())
     87       U16_SET_CP_LIMIT(text_.data(), 0, index, text_.length());
     88     return index;
     89   }
     90 
     91   // The text to be sliced.
     92   const string16& text_;
     93 
     94   // Ellipsis string to use.
     95   const string16& ellipsis_;
     96 
     97   // If true, the middle of the string will be elided.
     98   bool elide_in_middle_;
     99 
    100   DISALLOW_COPY_AND_ASSIGN(StringSlicer);
    101 };
    102 
    103 // Build a path from the first |num_components| elements in |path_elements|.
    104 // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
    105 string16 BuildPathFromComponents(const string16& path_prefix,
    106                                  const std::vector<string16>& path_elements,
    107                                  const string16& filename,
    108                                  size_t num_components) {
    109   // Add the initial elements of the path.
    110   string16 path = path_prefix;
    111 
    112   // Build path from first |num_components| elements.
    113   for (size_t j = 0; j < num_components; ++j)
    114     path += path_elements[j] + kForwardSlash;
    115 
    116   // Add |filename|, ellipsis if necessary.
    117   if (num_components != (path_elements.size() - 1))
    118     path += UTF8ToUTF16(kEllipsis) + kForwardSlash;
    119   path += filename;
    120 
    121   return path;
    122 }
    123 
    124 // Takes a prefix (Domain, or Domain+subdomain) and a collection of path
    125 // components and elides if possible. Returns a string containing the longest
    126 // possible elided path, or an empty string if elision is not possible.
    127 string16 ElideComponentizedPath(const string16& url_path_prefix,
    128                                 const std::vector<string16>& url_path_elements,
    129                                 const string16& url_filename,
    130                                 const string16& url_query,
    131                                 const gfx::Font& font,
    132                                 int available_pixel_width) {
    133   const size_t url_path_number_of_elements = url_path_elements.size();
    134 
    135   CHECK(url_path_number_of_elements);
    136   for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
    137     string16 elided_path = BuildPathFromComponents(url_path_prefix,
    138         url_path_elements, url_filename, i);
    139     if (available_pixel_width >= font.GetStringWidth(elided_path))
    140       return ElideText(elided_path + url_query,
    141                        font, available_pixel_width, ELIDE_AT_END);
    142   }
    143 
    144   return string16();
    145 }
    146 
    147 }  // namespace
    148 
    149 string16 ElideEmail(const string16& email,
    150                     const gfx::Font& font,
    151                     int available_pixel_width) {
    152   if (font.GetStringWidth(email) <= available_pixel_width)
    153     return email;
    154 
    155   // Split the email into its local-part (username) and domain-part. The email
    156   // spec technically allows for @ symbols in the local-part (username) of the
    157   // email under some special requirements. It is guaranteed that there is no @
    158   // symbol in the domain part of the email however so splitting at the last @
    159   // symbol is safe.
    160   const size_t split_index = email.find_last_of('@');
    161   DCHECK_NE(split_index, string16::npos);
    162   string16 username = email.substr(0, split_index);
    163   string16 domain = email.substr(split_index + 1);
    164   DCHECK(!username.empty());
    165   DCHECK(!domain.empty());
    166 
    167   // Subtract the @ symbol from the available width as it is mandatory.
    168   const string16 kAtSignUTF16 = ASCIIToUTF16("@");
    169   available_pixel_width -= font.GetStringWidth(kAtSignUTF16);
    170 
    171   // Check whether eliding the domain is necessary: if eliding the username
    172   // is sufficient, the domain will not be elided.
    173   const int full_username_width = font.GetStringWidth(username);
    174   const int available_domain_width =
    175       available_pixel_width -
    176       std::min(full_username_width,
    177                font.GetStringWidth(username.substr(0, 1) + kEllipsisUTF16));
    178   if (font.GetStringWidth(domain) > available_domain_width) {
    179     // Elide the domain so that it only takes half of the available width.
    180     // Should the username not need all the width available in its half, the
    181     // domain will occupy the leftover width.
    182     // If |desired_domain_width| is greater than |available_domain_width|: the
    183     // minimal username elision allowed by the specifications will not fit; thus
    184     // |desired_domain_width| must be <= |available_domain_width| at all cost.
    185     const int desired_domain_width =
    186         std::min(available_domain_width,
    187                  std::max(available_pixel_width - full_username_width,
    188                           available_pixel_width / 2));
    189     domain = ElideText(domain, font, desired_domain_width, ELIDE_IN_MIDDLE);
    190     // Failing to elide the domain such that at least one character remains
    191     // (other than the ellipsis itself) remains: return a single ellipsis.
    192     if (domain.length() <= 1U)
    193       return string16(kEllipsisUTF16);
    194   }
    195 
    196   // Fit the username in the remaining width (at this point the elided username
    197   // is guaranteed to fit with at least one character remaining given all the
    198   // precautions taken earlier).
    199   username = ElideText(username,
    200                        font,
    201                        available_pixel_width - font.GetStringWidth(domain),
    202                        ELIDE_AT_END);
    203 
    204   return username + kAtSignUTF16 + domain;
    205 }
    206 
    207 // TODO(pkasting): http://crbug.com/77883 This whole function gets
    208 // kerning/ligatures/etc. issues potentially wrong by assuming that the width of
    209 // a rendered string is always the sum of the widths of its substrings.  Also I
    210 // suspect it could be made simpler.
    211 string16 ElideUrl(const GURL& url,
    212                   const gfx::Font& font,
    213                   int available_pixel_width,
    214                   const std::string& languages) {
    215   // Get a formatted string and corresponding parsing of the url.
    216   url_parse::Parsed parsed;
    217   const string16 url_string =
    218       net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
    219                      net::UnescapeRule::SPACES, &parsed, NULL, NULL);
    220   if (available_pixel_width <= 0)
    221     return url_string;
    222 
    223   // If non-standard, return plain eliding.
    224   if (!url.IsStandard())
    225     return ElideText(url_string, font, available_pixel_width, ELIDE_AT_END);
    226 
    227   // Now start eliding url_string to fit within available pixel width.
    228   // Fist pass - check to see whether entire url_string fits.
    229   const int pixel_width_url_string = font.GetStringWidth(url_string);
    230   if (available_pixel_width >= pixel_width_url_string)
    231     return url_string;
    232 
    233   // Get the path substring, including query and reference.
    234   const size_t path_start_index = parsed.path.begin;
    235   const size_t path_len = parsed.path.len;
    236   string16 url_path_query_etc = url_string.substr(path_start_index);
    237   string16 url_path = url_string.substr(path_start_index, path_len);
    238 
    239   // Return general elided text if url minus the query fits.
    240   const string16 url_minus_query =
    241       url_string.substr(0, path_start_index + path_len);
    242   if (available_pixel_width >= font.GetStringWidth(url_minus_query))
    243     return ElideText(url_string, font, available_pixel_width, ELIDE_AT_END);
    244 
    245   // Get Host.
    246   string16 url_host = UTF8ToUTF16(url.host());
    247 
    248   // Get domain and registry information from the URL.
    249   string16 url_domain = UTF8ToUTF16(
    250       net::registry_controlled_domains::GetDomainAndRegistry(
    251           url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
    252   if (url_domain.empty())
    253     url_domain = url_host;
    254 
    255   // Add port if required.
    256   if (!url.port().empty()) {
    257     url_host += UTF8ToUTF16(":" + url.port());
    258     url_domain += UTF8ToUTF16(":" + url.port());
    259   }
    260 
    261   // Get sub domain.
    262   string16 url_subdomain;
    263   const size_t domain_start_index = url_host.find(url_domain);
    264   if (domain_start_index != string16::npos)
    265     url_subdomain = url_host.substr(0, domain_start_index);
    266   const string16 kWwwPrefix = UTF8ToUTF16("www.");
    267   if ((url_subdomain == kWwwPrefix || url_subdomain.empty() ||
    268       url.SchemeIsFile())) {
    269     url_subdomain.clear();
    270   }
    271 
    272   // If this is a file type, the path is now defined as everything after ":".
    273   // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
    274   // domain is now C: - this is a nice hack for eliding to work pleasantly.
    275   if (url.SchemeIsFile()) {
    276     // Split the path string using ":"
    277     std::vector<string16> file_path_split;
    278     base::SplitString(url_path, ':', &file_path_split);
    279     if (file_path_split.size() > 1) {  // File is of type "file:///C:/.."
    280       url_host.clear();
    281       url_domain.clear();
    282       url_subdomain.clear();
    283 
    284       const string16 kColon = UTF8ToUTF16(":");
    285       url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
    286       url_path_query_etc = url_path = file_path_split.at(1);
    287     }
    288   }
    289 
    290   // Second Pass - remove scheme - the rest fits.
    291   const int pixel_width_url_host = font.GetStringWidth(url_host);
    292   const int pixel_width_url_path = font.GetStringWidth(url_path_query_etc);
    293   if (available_pixel_width >=
    294       pixel_width_url_host + pixel_width_url_path)
    295     return url_host + url_path_query_etc;
    296 
    297   // Third Pass: Subdomain, domain and entire path fits.
    298   const int pixel_width_url_domain = font.GetStringWidth(url_domain);
    299   const int pixel_width_url_subdomain = font.GetStringWidth(url_subdomain);
    300   if (available_pixel_width >=
    301       pixel_width_url_subdomain + pixel_width_url_domain +
    302       pixel_width_url_path)
    303     return url_subdomain + url_domain + url_path_query_etc;
    304 
    305   // Query element.
    306   string16 url_query;
    307   const int kPixelWidthDotsTrailer =
    308       font.GetStringWidth(UTF8ToUTF16(kEllipsis));
    309   if (parsed.query.is_nonempty()) {
    310     url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
    311     if (available_pixel_width >= (pixel_width_url_subdomain +
    312         pixel_width_url_domain + pixel_width_url_path -
    313         font.GetStringWidth(url_query))) {
    314       return ElideText(url_subdomain + url_domain + url_path_query_etc,
    315                        font, available_pixel_width, ELIDE_AT_END);
    316     }
    317   }
    318 
    319   // Parse url_path using '/'.
    320   std::vector<string16> url_path_elements;
    321   base::SplitString(url_path, kForwardSlash, &url_path_elements);
    322 
    323   // Get filename - note that for a path ending with /
    324   // such as www.google.com/intl/ads/, the file name is ads/.
    325   size_t url_path_number_of_elements = url_path_elements.size();
    326   DCHECK(url_path_number_of_elements != 0);
    327   string16 url_filename;
    328   if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) {
    329     url_filename = *(url_path_elements.end() - 1);
    330   } else if (url_path_number_of_elements > 1) {  // Path ends with a '/'.
    331     url_filename = url_path_elements.at(url_path_number_of_elements - 2) +
    332         kForwardSlash;
    333     url_path_number_of_elements--;
    334   }
    335   DCHECK(url_path_number_of_elements != 0);
    336 
    337   const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
    338   if (url_path_number_of_elements <= 1 ||
    339       url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
    340     // No path to elide, or too long of a path (could overflow in loop below)
    341     // Just elide this as a text string.
    342     return ElideText(url_subdomain + url_domain + url_path_query_etc, font,
    343                      available_pixel_width, ELIDE_AT_END);
    344   }
    345 
    346   // Start eliding the path and replacing elements by ".../".
    347   const string16 kEllipsisAndSlash = UTF8ToUTF16(kEllipsis) + kForwardSlash;
    348   const int pixel_width_ellipsis_slash = font.GetStringWidth(kEllipsisAndSlash);
    349 
    350   // Check with both subdomain and domain.
    351   string16 elided_path =
    352       ElideComponentizedPath(url_subdomain + url_domain, url_path_elements,
    353                              url_filename, url_query, font,
    354                              available_pixel_width);
    355   if (!elided_path.empty())
    356     return elided_path;
    357 
    358   // Check with only domain.
    359   // If a subdomain is present, add an ellipsis before domain.
    360   // This is added only if the subdomain pixel width is larger than
    361   // the pixel width of kEllipsis. Otherwise, subdomain remains,
    362   // which means that this case has been resolved earlier.
    363   string16 url_elided_domain = url_subdomain + url_domain;
    364   if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
    365     if (!url_subdomain.empty())
    366       url_elided_domain = kEllipsisAndSlash[0] + url_domain;
    367     else
    368       url_elided_domain = url_domain;
    369 
    370     elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
    371                                          url_filename, url_query, font,
    372                                          available_pixel_width);
    373 
    374     if (!elided_path.empty())
    375       return elided_path;
    376   }
    377 
    378   // Return elided domain/.../filename anyway.
    379   string16 final_elided_url_string(url_elided_domain);
    380   const int url_elided_domain_width = font.GetStringWidth(url_elided_domain);
    381 
    382   // A hack to prevent trailing ".../...".
    383   if ((available_pixel_width - url_elided_domain_width) >
    384       pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
    385       font.GetStringWidth(ASCIIToUTF16("UV"))) {
    386     final_elided_url_string += BuildPathFromComponents(string16(),
    387         url_path_elements, url_filename, 1);
    388   } else {
    389     final_elided_url_string += url_path;
    390   }
    391 
    392   return ElideText(final_elided_url_string, font, available_pixel_width,
    393                    ELIDE_AT_END);
    394 }
    395 
    396 string16 ElideFilename(const base::FilePath& filename,
    397                        const gfx::Font& font,
    398                        int available_pixel_width) {
    399 #if defined(OS_WIN)
    400   string16 filename_utf16 = filename.value();
    401   string16 extension = filename.Extension();
    402   string16 rootname = filename.BaseName().RemoveExtension().value();
    403 #elif defined(OS_POSIX)
    404   string16 filename_utf16 = WideToUTF16(base::SysNativeMBToWide(
    405       filename.value()));
    406   string16 extension = WideToUTF16(base::SysNativeMBToWide(
    407       filename.Extension()));
    408   string16 rootname = WideToUTF16(base::SysNativeMBToWide(
    409       filename.BaseName().RemoveExtension().value()));
    410 #endif
    411 
    412   const int full_width = font.GetStringWidth(filename_utf16);
    413   if (full_width <= available_pixel_width)
    414     return base::i18n::GetDisplayStringInLTRDirectionality(filename_utf16);
    415 
    416   if (rootname.empty() || extension.empty()) {
    417     const string16 elided_name = ElideText(filename_utf16, font,
    418                                            available_pixel_width, ELIDE_AT_END);
    419     return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    420   }
    421 
    422   const int ext_width = font.GetStringWidth(extension);
    423   const int root_width = font.GetStringWidth(rootname);
    424 
    425   // We may have trimmed the path.
    426   if (root_width + ext_width <= available_pixel_width) {
    427     const string16 elided_name = rootname + extension;
    428     return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    429   }
    430 
    431   if (ext_width >= available_pixel_width) {
    432     const string16 elided_name = ElideText(rootname + extension, font,
    433                                            available_pixel_width,
    434                                            ELIDE_IN_MIDDLE);
    435     return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    436   }
    437 
    438   int available_root_width = available_pixel_width - ext_width;
    439   string16 elided_name =
    440       ElideText(rootname, font, available_root_width, ELIDE_AT_END);
    441   elided_name += extension;
    442   return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    443 }
    444 
    445 string16 ElideText(const string16& text,
    446                    const gfx::Font& font,
    447                    int available_pixel_width,
    448                    ElideBehavior elide_behavior) {
    449   if (text.empty())
    450     return text;
    451 
    452   const int current_text_pixel_width = font.GetStringWidth(text);
    453   const bool elide_in_middle = (elide_behavior == ELIDE_IN_MIDDLE);
    454   const bool insert_ellipsis = (elide_behavior != TRUNCATE_AT_END);
    455 
    456   const string16 ellipsis = string16(kEllipsisUTF16);
    457   StringSlicer slicer(text, ellipsis, elide_in_middle);
    458 
    459   // Pango will return 0 width for absurdly long strings. Cut the string in
    460   // half and try again.
    461   // This is caused by an int overflow in Pango (specifically, in
    462   // pango_glyph_string_extents_range). It's actually more subtle than just
    463   // returning 0, since on super absurdly long strings, the int can wrap and
    464   // return positive numbers again. Detecting that is probably not worth it
    465   // (eliding way too much from a ridiculous string is probably still
    466   // ridiculous), but we should check other widths for bogus values as well.
    467   if (current_text_pixel_width <= 0 && !text.empty()) {
    468     const string16 cut = slicer.CutString(text.length() / 2, false);
    469     return ElideText(cut, font, available_pixel_width, elide_behavior);
    470   }
    471 
    472   if (current_text_pixel_width <= available_pixel_width)
    473     return text;
    474 
    475   if (insert_ellipsis && font.GetStringWidth(ellipsis) > available_pixel_width)
    476     return string16();
    477 
    478   // Use binary search to compute the elided text.
    479   size_t lo = 0;
    480   size_t hi = text.length() - 1;
    481   size_t guess;
    482   for (guess = (lo + hi) / 2; lo <= hi; guess = (lo + hi) / 2) {
    483     // We check the length of the whole desired string at once to ensure we
    484     // handle kerning/ligatures/etc. correctly.
    485     const string16 cut = slicer.CutString(guess, insert_ellipsis);
    486     const int guess_length = font.GetStringWidth(cut);
    487     // Check again that we didn't hit a Pango width overflow. If so, cut the
    488     // current string in half and start over.
    489     if (guess_length <= 0) {
    490       return ElideText(slicer.CutString(guess / 2, false),
    491                        font, available_pixel_width, elide_behavior);
    492     }
    493     if (guess_length > available_pixel_width)
    494       hi = guess - 1;
    495     else
    496       lo = guess + 1;
    497   }
    498 
    499   return slicer.CutString(guess, insert_ellipsis);
    500 }
    501 
    502 SortedDisplayURL::SortedDisplayURL(const GURL& url,
    503                                    const std::string& languages) {
    504   net::AppendFormattedHost(url, languages, &sort_host_);
    505   string16 host_minus_www = net::StripWWW(sort_host_);
    506   url_parse::Parsed parsed;
    507   display_url_ =
    508       net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
    509                      net::UnescapeRule::SPACES, &parsed, &prefix_end_, NULL);
    510   if (sort_host_.length() > host_minus_www.length()) {
    511     prefix_end_ += sort_host_.length() - host_minus_www.length();
    512     sort_host_.swap(host_minus_www);
    513   }
    514 }
    515 
    516 SortedDisplayURL::SortedDisplayURL() : prefix_end_(0) {
    517 }
    518 
    519 SortedDisplayURL::~SortedDisplayURL() {
    520 }
    521 
    522 int SortedDisplayURL::Compare(const SortedDisplayURL& other,
    523                               icu::Collator* collator) const {
    524   // Compare on hosts first. The host won't contain 'www.'.
    525   UErrorCode compare_status = U_ZERO_ERROR;
    526   UCollationResult host_compare_result = collator->compare(
    527       static_cast<const UChar*>(sort_host_.c_str()),
    528       static_cast<int>(sort_host_.length()),
    529       static_cast<const UChar*>(other.sort_host_.c_str()),
    530       static_cast<int>(other.sort_host_.length()),
    531       compare_status);
    532   DCHECK(U_SUCCESS(compare_status));
    533   if (host_compare_result != 0)
    534     return host_compare_result;
    535 
    536   // Hosts match, compare on the portion of the url after the host.
    537   string16 path = this->AfterHost();
    538   string16 o_path = other.AfterHost();
    539   compare_status = U_ZERO_ERROR;
    540   UCollationResult path_compare_result = collator->compare(
    541       static_cast<const UChar*>(path.c_str()),
    542       static_cast<int>(path.length()),
    543       static_cast<const UChar*>(o_path.c_str()),
    544       static_cast<int>(o_path.length()),
    545       compare_status);
    546   DCHECK(U_SUCCESS(compare_status));
    547   if (path_compare_result != 0)
    548     return path_compare_result;
    549 
    550   // Hosts and paths match, compare on the complete url. This'll push the www.
    551   // ones to the end.
    552   compare_status = U_ZERO_ERROR;
    553   UCollationResult display_url_compare_result = collator->compare(
    554       static_cast<const UChar*>(display_url_.c_str()),
    555       static_cast<int>(display_url_.length()),
    556       static_cast<const UChar*>(other.display_url_.c_str()),
    557       static_cast<int>(other.display_url_.length()),
    558       compare_status);
    559   DCHECK(U_SUCCESS(compare_status));
    560   return display_url_compare_result;
    561 }
    562 
    563 string16 SortedDisplayURL::AfterHost() const {
    564   const size_t slash_index = display_url_.find(sort_host_, prefix_end_);
    565   if (slash_index == string16::npos) {
    566     NOTREACHED();
    567     return string16();
    568   }
    569   return display_url_.substr(slash_index + sort_host_.length());
    570 }
    571 
    572 bool ElideString(const string16& input, int max_len, string16* output) {
    573   DCHECK_GE(max_len, 0);
    574   if (static_cast<int>(input.length()) <= max_len) {
    575     output->assign(input);
    576     return false;
    577   }
    578 
    579   switch (max_len) {
    580     case 0:
    581       output->clear();
    582       break;
    583     case 1:
    584       output->assign(input.substr(0, 1));
    585       break;
    586     case 2:
    587       output->assign(input.substr(0, 2));
    588       break;
    589     case 3:
    590       output->assign(input.substr(0, 1) + ASCIIToUTF16(".") +
    591                      input.substr(input.length() - 1));
    592       break;
    593     case 4:
    594       output->assign(input.substr(0, 1) + ASCIIToUTF16("..") +
    595                      input.substr(input.length() - 1));
    596       break;
    597     default: {
    598       int rstr_len = (max_len - 3) / 2;
    599       int lstr_len = rstr_len + ((max_len - 3) % 2);
    600       output->assign(input.substr(0, lstr_len) + ASCIIToUTF16("...") +
    601                      input.substr(input.length() - rstr_len));
    602       break;
    603     }
    604   }
    605 
    606   return true;
    607 }
    608 
    609 }  // namespace ui
    610 
    611 namespace {
    612 
    613 // Internal class used to track progress of a rectangular string elide
    614 // operation.  Exists so the top-level ElideRectangleString() function
    615 // can be broken into smaller methods sharing this state.
    616 class RectangleString {
    617  public:
    618   RectangleString(size_t max_rows, size_t max_cols,
    619                   bool strict, string16 *output)
    620       : max_rows_(max_rows),
    621         max_cols_(max_cols),
    622         current_row_(0),
    623         current_col_(0),
    624         strict_(strict),
    625         suppressed_(false),
    626         output_(output) {}
    627 
    628   // Perform deferred initializations following creation.  Must be called
    629   // before any input can be added via AddString().
    630   void Init() { output_->clear(); }
    631 
    632   // Add an input string, reformatting to fit the desired dimensions.
    633   // AddString() may be called multiple times to concatenate together
    634   // multiple strings into the region (the current caller doesn't do
    635   // this, however).
    636   void AddString(const string16& input);
    637 
    638   // Perform any deferred output processing.  Must be called after the
    639   // last AddString() call has occurred.
    640   bool Finalize();
    641 
    642  private:
    643   // Add a line to the rectangular region at the current position,
    644   // either by itself or by breaking it into words.
    645   void AddLine(const string16& line);
    646 
    647   // Add a word to the rectangular region at the current position,
    648   // either by itself or by breaking it into characters.
    649   void AddWord(const string16& word);
    650 
    651   // Add text to the output string if the rectangular boundaries
    652   // have not been exceeded, advancing the current position.
    653   void Append(const string16& string);
    654 
    655   // Set the current position to the beginning of the next line.  If
    656   // |output| is true, add a newline to the output string if the rectangular
    657   // boundaries have not been exceeded.  If |output| is false, we assume
    658   // some other mechanism will (likely) do similar breaking after the fact.
    659   void NewLine(bool output);
    660 
    661   // Maximum number of rows allowed in the output string.
    662   size_t max_rows_;
    663 
    664   // Maximum number of characters allowed in the output string.
    665   size_t max_cols_;
    666 
    667   // Current row position, always incremented and may exceed max_rows_
    668   // when the input can not fit in the region.  We stop appending to
    669   // the output string, however, when this condition occurs.  In the
    670   // future, we may want to expose this value to allow the caller to
    671   // determine how many rows would actually be required to hold the
    672   // formatted string.
    673   size_t current_row_;
    674 
    675   // Current character position, should never exceed max_cols_.
    676   size_t current_col_;
    677 
    678   // True when we do whitespace to newline conversions ourselves.
    679   bool strict_;
    680 
    681   // True when some of the input has been truncated.
    682   bool suppressed_;
    683 
    684   // String onto which the output is accumulated.
    685   string16* output_;
    686 
    687   DISALLOW_COPY_AND_ASSIGN(RectangleString);
    688 };
    689 
    690 void RectangleString::AddString(const string16& input) {
    691   base::i18n::BreakIterator lines(input,
    692                                   base::i18n::BreakIterator::BREAK_NEWLINE);
    693   if (lines.Init()) {
    694     while (lines.Advance())
    695       AddLine(lines.GetString());
    696   } else {
    697     NOTREACHED() << "BreakIterator (lines) init failed";
    698   }
    699 }
    700 
    701 bool RectangleString::Finalize() {
    702   if (suppressed_) {
    703     output_->append(ASCIIToUTF16("..."));
    704     return true;
    705   }
    706   return false;
    707 }
    708 
    709 void RectangleString::AddLine(const string16& line) {
    710   if (line.length() < max_cols_) {
    711     Append(line);
    712   } else {
    713     base::i18n::BreakIterator words(line,
    714                                     base::i18n::BreakIterator::BREAK_SPACE);
    715     if (words.Init()) {
    716       while (words.Advance())
    717         AddWord(words.GetString());
    718     } else {
    719       NOTREACHED() << "BreakIterator (words) init failed";
    720     }
    721   }
    722   // Account for naturally-occuring newlines.
    723   ++current_row_;
    724   current_col_ = 0;
    725 }
    726 
    727 void RectangleString::AddWord(const string16& word) {
    728   if (word.length() < max_cols_) {
    729     // Word can be made to fit, no need to fragment it.
    730     if (current_col_ + word.length() >= max_cols_)
    731       NewLine(strict_);
    732     Append(word);
    733   } else {
    734     // Word is so big that it must be fragmented.
    735     int array_start = 0;
    736     int char_start = 0;
    737     base::i18n::UTF16CharIterator chars(&word);
    738     while (!chars.end()) {
    739       // When boundary is hit, add as much as will fit on this line.
    740       if (current_col_ + (chars.char_pos() - char_start) >= max_cols_) {
    741         Append(word.substr(array_start, chars.array_pos() - array_start));
    742         NewLine(true);
    743         array_start = chars.array_pos();
    744         char_start = chars.char_pos();
    745       }
    746       chars.Advance();
    747     }
    748     // Add the last remaining fragment, if any.
    749     if (array_start != chars.array_pos())
    750       Append(word.substr(array_start, chars.array_pos() - array_start));
    751   }
    752 }
    753 
    754 void RectangleString::Append(const string16& string) {
    755   if (current_row_ < max_rows_)
    756     output_->append(string);
    757   else
    758     suppressed_ = true;
    759   current_col_ += string.length();
    760 }
    761 
    762 void RectangleString::NewLine(bool output) {
    763   if (current_row_ < max_rows_) {
    764     if (output)
    765       output_->append(ASCIIToUTF16("\n"));
    766   } else {
    767     suppressed_ = true;
    768   }
    769   ++current_row_;
    770   current_col_ = 0;
    771 }
    772 
    773 // Internal class used to track progress of a rectangular text elide
    774 // operation.  Exists so the top-level ElideRectangleText() function
    775 // can be broken into smaller methods sharing this state.
    776 class RectangleText {
    777  public:
    778   RectangleText(const gfx::Font& font,
    779                 int available_pixel_width,
    780                 int available_pixel_height,
    781                 ui::WordWrapBehavior wrap_behavior,
    782                 std::vector<string16>* lines)
    783       : font_(font),
    784         line_height_(font.GetHeight()),
    785         available_pixel_width_(available_pixel_width),
    786         available_pixel_height_(available_pixel_height),
    787         wrap_behavior_(wrap_behavior),
    788         current_width_(0),
    789         current_height_(0),
    790         last_line_ended_in_lf_(false),
    791         lines_(lines),
    792         insufficient_width_(false),
    793         insufficient_height_(false) {}
    794 
    795   // Perform deferred initializions following creation.  Must be called
    796   // before any input can be added via AddString().
    797   void Init() { lines_->clear(); }
    798 
    799   // Add an input string, reformatting to fit the desired dimensions.
    800   // AddString() may be called multiple times to concatenate together
    801   // multiple strings into the region (the current caller doesn't do
    802   // this, however).
    803   void AddString(const string16& input);
    804 
    805   // Perform any deferred output processing.  Must be called after the last
    806   // AddString() call has occured. Returns a combination of
    807   // |ReformattingResultFlags| indicating whether the given width or height was
    808   // insufficient, leading to elision or truncation.
    809   int Finalize();
    810 
    811  private:
    812   // Add a line to the rectangular region at the current position,
    813   // either by itself or by breaking it into words.
    814   void AddLine(const string16& line);
    815 
    816   // Wrap the specified word across multiple lines.
    817   int WrapWord(const string16& word);
    818 
    819   // Add a long word - wrapping, eliding or truncating per the wrap behavior.
    820   int AddWordOverflow(const string16& word);
    821 
    822   // Add a word to the rectangluar region at the current position.
    823   int AddWord(const string16& word);
    824 
    825   // Append the specified |text| to the current output line, incrementing the
    826   // running width by the specified amount. This is an optimization over
    827   // |AddToCurrentLine()| when |text_width| is already known.
    828   void AddToCurrentLineWithWidth(const string16& text, int text_width);
    829 
    830   // Append the specified |text| to the current output line.
    831   void AddToCurrentLine(const string16& text);
    832 
    833   // Set the current position to the beginning of the next line.
    834   bool NewLine();
    835 
    836   // The font used for measuring text width.
    837   const gfx::Font& font_;
    838 
    839   // The height of each line of text.
    840   const int line_height_;
    841 
    842   // The number of pixels of available width in the rectangle.
    843   const int available_pixel_width_;
    844 
    845   // The number of pixels of available height in the rectangle.
    846   const int available_pixel_height_;
    847 
    848   // The wrap behavior for words that are too long to fit on a single line.
    849   const ui::WordWrapBehavior wrap_behavior_;
    850 
    851   // The current running width.
    852   int current_width_;
    853 
    854   // The current running height.
    855   int current_height_;
    856 
    857   // The current line of text.
    858   string16 current_line_;
    859 
    860   // Indicates whether the last line ended with \n.
    861   bool last_line_ended_in_lf_;
    862 
    863   // The output vector of lines.
    864   std::vector<string16>* lines_;
    865 
    866   // Indicates whether a word was so long that it had to be truncated or elided
    867   // to fit the available width.
    868   bool insufficient_width_;
    869 
    870   // Indicates whether there were too many lines for the available height.
    871   bool insufficient_height_;
    872 
    873   DISALLOW_COPY_AND_ASSIGN(RectangleText);
    874 };
    875 
    876 void RectangleText::AddString(const string16& input) {
    877   base::i18n::BreakIterator lines(input,
    878                                   base::i18n::BreakIterator::BREAK_NEWLINE);
    879   if (lines.Init()) {
    880     while (!insufficient_height_ && lines.Advance()) {
    881       string16 line = lines.GetString();
    882       // The BREAK_NEWLINE iterator will keep the trailing newline character,
    883       // except in the case of the last line, which may not have one.  Remove
    884       // the newline character, if it exists.
    885       last_line_ended_in_lf_ = !line.empty() && line[line.length() - 1] == '\n';
    886       if (last_line_ended_in_lf_)
    887         line.resize(line.length() - 1);
    888       AddLine(line);
    889     }
    890   } else {
    891     NOTREACHED() << "BreakIterator (lines) init failed";
    892   }
    893 }
    894 
    895 int RectangleText::Finalize() {
    896   // Remove trailing whitespace from the last line or remove the last line
    897   // completely, if it's just whitespace.
    898   if (!insufficient_height_ && !lines_->empty()) {
    899     TrimWhitespace(lines_->back(), TRIM_TRAILING, &lines_->back());
    900     if (lines_->back().empty() && !last_line_ended_in_lf_)
    901       lines_->pop_back();
    902   }
    903   if (last_line_ended_in_lf_)
    904     lines_->push_back(string16());
    905   return (insufficient_width_ ? ui::INSUFFICIENT_SPACE_HORIZONTAL : 0) |
    906          (insufficient_height_ ? ui::INSUFFICIENT_SPACE_VERTICAL : 0);
    907 }
    908 
    909 void RectangleText::AddLine(const string16& line) {
    910   const int line_width = font_.GetStringWidth(line);
    911   if (line_width <= available_pixel_width_) {
    912     AddToCurrentLineWithWidth(line, line_width);
    913   } else {
    914     // Iterate over positions that are valid to break the line at. In general,
    915     // these are word boundaries but after any punctuation following the word.
    916     base::i18n::BreakIterator words(line,
    917                                     base::i18n::BreakIterator::BREAK_LINE);
    918     if (words.Init()) {
    919       while (words.Advance()) {
    920         const bool truncate = !current_line_.empty();
    921         const string16& word = words.GetString();
    922         const int lines_added = AddWord(word);
    923         if (lines_added) {
    924           if (truncate) {
    925             // Trim trailing whitespace from the line that was added.
    926             const int line = lines_->size() - lines_added;
    927             TrimWhitespace(lines_->at(line), TRIM_TRAILING, &lines_->at(line));
    928           }
    929           if (ContainsOnlyWhitespace(word)) {
    930             // Skip the first space if the previous line was carried over.
    931             current_width_ = 0;
    932             current_line_.clear();
    933           }
    934         }
    935       }
    936     } else {
    937       NOTREACHED() << "BreakIterator (words) init failed";
    938     }
    939   }
    940   // Account for naturally-occuring newlines.
    941   NewLine();
    942 }
    943 
    944 int RectangleText::WrapWord(const string16& word) {
    945   // Word is so wide that it must be fragmented.
    946   string16 text = word;
    947   int lines_added = 0;
    948   bool first_fragment = true;
    949   while (!insufficient_height_ && !text.empty()) {
    950     string16 fragment =
    951         ui::ElideText(text, font_, available_pixel_width_, ui::TRUNCATE_AT_END);
    952     // At least one character has to be added at every line, even if the
    953     // available space is too small.
    954     if(fragment.empty())
    955       fragment = text.substr(0, 1);
    956     if (!first_fragment && NewLine())
    957       lines_added++;
    958     AddToCurrentLine(fragment);
    959     text = text.substr(fragment.length());
    960     first_fragment = false;
    961   }
    962   return lines_added;
    963 }
    964 
    965 int RectangleText::AddWordOverflow(const string16& word) {
    966   int lines_added = 0;
    967 
    968   // Unless this is the very first word, put it on a new line.
    969   if (!current_line_.empty()) {
    970     if (!NewLine())
    971       return 0;
    972     lines_added++;
    973   }
    974 
    975   if (wrap_behavior_ == ui::IGNORE_LONG_WORDS) {
    976     current_line_ = word;
    977     current_width_ = available_pixel_width_;
    978   } else if (wrap_behavior_ == ui::WRAP_LONG_WORDS) {
    979     lines_added += WrapWord(word);
    980   } else {
    981     const ui::ElideBehavior elide_behavior =
    982         (wrap_behavior_ == ui::ELIDE_LONG_WORDS ? ui::ELIDE_AT_END :
    983                                                   ui::TRUNCATE_AT_END);
    984     const string16 elided_word =
    985         ui::ElideText(word, font_, available_pixel_width_, elide_behavior);
    986     AddToCurrentLine(elided_word);
    987     insufficient_width_ = true;
    988   }
    989 
    990   return lines_added;
    991 }
    992 
    993 int RectangleText::AddWord(const string16& word) {
    994   int lines_added = 0;
    995   string16 trimmed;
    996   TrimWhitespace(word, TRIM_TRAILING, &trimmed);
    997   const int trimmed_width = font_.GetStringWidth(trimmed);
    998   if (trimmed_width <= available_pixel_width_) {
    999     // Word can be made to fit, no need to fragment it.
   1000     if ((current_width_ + trimmed_width > available_pixel_width_) && NewLine())
   1001       lines_added++;
   1002     // Append the non-trimmed word, in case more words are added after.
   1003     AddToCurrentLine(word);
   1004   } else {
   1005     lines_added = AddWordOverflow(wrap_behavior_ == ui::IGNORE_LONG_WORDS ?
   1006                                   trimmed : word);
   1007   }
   1008   return lines_added;
   1009 }
   1010 
   1011 void RectangleText::AddToCurrentLine(const string16& text) {
   1012   AddToCurrentLineWithWidth(text, font_.GetStringWidth(text));
   1013 }
   1014 
   1015 void RectangleText::AddToCurrentLineWithWidth(const string16& text,
   1016                                               int text_width) {
   1017   if (current_height_ >= available_pixel_height_) {
   1018     insufficient_height_ = true;
   1019     return;
   1020   }
   1021   current_line_.append(text);
   1022   current_width_ += text_width;
   1023 }
   1024 
   1025 bool RectangleText::NewLine() {
   1026   bool line_added = false;
   1027   if (current_height_ < available_pixel_height_) {
   1028     lines_->push_back(current_line_);
   1029     current_line_.clear();
   1030     line_added = true;
   1031   } else {
   1032     insufficient_height_ = true;
   1033   }
   1034   current_height_ += line_height_;
   1035   current_width_ = 0;
   1036   return line_added;
   1037 }
   1038 
   1039 }  // namespace
   1040 
   1041 namespace ui {
   1042 
   1043 bool ElideRectangleString(const string16& input, size_t max_rows,
   1044                           size_t max_cols, bool strict, string16* output) {
   1045   RectangleString rect(max_rows, max_cols, strict, output);
   1046   rect.Init();
   1047   rect.AddString(input);
   1048   return rect.Finalize();
   1049 }
   1050 
   1051 int ElideRectangleText(const string16& input,
   1052                         const gfx::Font& font,
   1053                         int available_pixel_width,
   1054                         int available_pixel_height,
   1055                         WordWrapBehavior wrap_behavior,
   1056                         std::vector<string16>* lines) {
   1057   RectangleText rect(font,
   1058                      available_pixel_width,
   1059                      available_pixel_height,
   1060                      wrap_behavior,
   1061                      lines);
   1062   rect.Init();
   1063   rect.AddString(input);
   1064   return rect.Finalize();
   1065 }
   1066 
   1067 string16 TruncateString(const string16& string, size_t length) {
   1068   if (string.size() <= length)
   1069     // String fits, return it.
   1070     return string;
   1071 
   1072   if (length == 0)
   1073     // No room for the elide string, return an empty string.
   1074     return string16();
   1075 
   1076   size_t max = length - 1;
   1077 
   1078   // Added to the end of strings that are too big.
   1079   static const char16 kElideString[] = { 0x2026, 0 };
   1080 
   1081   if (max == 0)
   1082     // Just enough room for the elide string.
   1083     return kElideString;
   1084 
   1085   // Use a line iterator to find the first boundary.
   1086   UErrorCode status = U_ZERO_ERROR;
   1087   scoped_ptr<icu::RuleBasedBreakIterator> bi(
   1088       static_cast<icu::RuleBasedBreakIterator*>(
   1089           icu::RuleBasedBreakIterator::createLineInstance(
   1090               icu::Locale::getDefault(), status)));
   1091   if (U_FAILURE(status))
   1092     return string.substr(0, max) + kElideString;
   1093   bi->setText(string.c_str());
   1094   int32_t index = bi->preceding(static_cast<int32_t>(max));
   1095   if (index == icu::BreakIterator::DONE) {
   1096     index = static_cast<int32_t>(max);
   1097   } else {
   1098     // Found a valid break (may be the beginning of the string). Now use
   1099     // a character iterator to find the previous non-whitespace character.
   1100     icu::StringCharacterIterator char_iterator(string.c_str());
   1101     if (index == 0) {
   1102       // No valid line breaks. Start at the end again. This ensures we break
   1103       // on a valid character boundary.
   1104       index = static_cast<int32_t>(max);
   1105     }
   1106     char_iterator.setIndex(index);
   1107     while (char_iterator.hasPrevious()) {
   1108       char_iterator.previous();
   1109       if (!(u_isspace(char_iterator.current()) ||
   1110             u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
   1111             u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
   1112         // Not a whitespace character. Advance the iterator so that we
   1113         // include the current character in the truncated string.
   1114         char_iterator.next();
   1115         break;
   1116       }
   1117     }
   1118     if (char_iterator.hasPrevious()) {
   1119       // Found a valid break point.
   1120       index = char_iterator.getIndex();
   1121     } else {
   1122       // String has leading whitespace, return the elide string.
   1123       return kElideString;
   1124     }
   1125   }
   1126   return string.substr(0, index) + kElideString;
   1127 }
   1128 
   1129 }  // namespace ui
   1130