Home | History | Annotate | Download | only in gfx
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This file implements utility functions for eliding and formatting UI text.
      6 //
      7 // Note that several of the functions declared in text_elider.h are implemented
      8 // in this file using helper classes in an unnamed namespace.
      9 
     10 #include "ui/gfx/text_elider.h"
     11 
     12 #include <string>
     13 #include <vector>
     14 
     15 #include "base/files/file_path.h"
     16 #include "base/i18n/break_iterator.h"
     17 #include "base/i18n/char_iterator.h"
     18 #include "base/i18n/rtl.h"
     19 #include "base/memory/scoped_ptr.h"
     20 #include "base/strings/string_split.h"
     21 #include "base/strings/string_util.h"
     22 #include "base/strings/sys_string_conversions.h"
     23 #include "base/strings/utf_string_conversions.h"
     24 #include "net/base/escape.h"
     25 #include "net/base/net_util.h"
     26 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     27 #include "third_party/icu/source/common/unicode/rbbi.h"
     28 #include "third_party/icu/source/common/unicode/uloc.h"
     29 #include "ui/gfx/font_list.h"
     30 #include "ui/gfx/text_utils.h"
     31 #include "url/gurl.h"
     32 
     33 namespace gfx {
     34 
     35 // U+2026 in utf8
     36 const char kEllipsis[] = "\xE2\x80\xA6";
     37 const base::char16 kEllipsisUTF16[] = { 0x2026, 0 };
     38 const base::char16 kForwardSlash = '/';
     39 
     40 namespace {
     41 
     42 // Helper class to split + elide text, while respecting UTF16 surrogate pairs.
     43 class StringSlicer {
     44  public:
     45   StringSlicer(const base::string16& text,
     46                const base::string16& ellipsis,
     47                bool elide_in_middle)
     48       : text_(text),
     49         ellipsis_(ellipsis),
     50         elide_in_middle_(elide_in_middle) {
     51   }
     52 
     53   // Cuts |text_| to be |length| characters long. If |elide_in_middle_| is true,
     54   // the middle of the string is removed to leave equal-length pieces from the
     55   // beginning and end of the string; otherwise, the end of the string is
     56   // removed and only the beginning remains. If |insert_ellipsis| is true,
     57   // then an ellipsis character will be inserted at the cut point.
     58   base::string16 CutString(size_t length, bool insert_ellipsis) {
     59     const base::string16 ellipsis_text = insert_ellipsis ? ellipsis_
     60                                                          : base::string16();
     61 
     62     if (!elide_in_middle_)
     63       return text_.substr(0, FindValidBoundaryBefore(length)) + ellipsis_text;
     64 
     65     // We put the extra character, if any, before the cut.
     66     const size_t half_length = length / 2;
     67     const size_t prefix_length = FindValidBoundaryBefore(length - half_length);
     68     const size_t suffix_start_guess = text_.length() - half_length;
     69     const size_t suffix_start = FindValidBoundaryAfter(suffix_start_guess);
     70     const size_t suffix_length =
     71         half_length - (suffix_start_guess - suffix_start);
     72     return text_.substr(0, prefix_length) + ellipsis_text +
     73            text_.substr(suffix_start, suffix_length);
     74   }
     75 
     76  private:
     77   // Returns a valid cut boundary at or before |index|.
     78   size_t FindValidBoundaryBefore(size_t index) const {
     79     DCHECK_LE(index, text_.length());
     80     if (index != text_.length())
     81       U16_SET_CP_START(text_.data(), 0, index);
     82     return index;
     83   }
     84 
     85   // Returns a valid cut boundary at or after |index|.
     86   size_t FindValidBoundaryAfter(size_t index) const {
     87     DCHECK_LE(index, text_.length());
     88     if (index != text_.length())
     89       U16_SET_CP_LIMIT(text_.data(), 0, index, text_.length());
     90     return index;
     91   }
     92 
     93   // The text to be sliced.
     94   const base::string16& text_;
     95 
     96   // Ellipsis string to use.
     97   const base::string16& ellipsis_;
     98 
     99   // If true, the middle of the string will be elided.
    100   bool elide_in_middle_;
    101 
    102   DISALLOW_COPY_AND_ASSIGN(StringSlicer);
    103 };
    104 
    105 // Build a path from the first |num_components| elements in |path_elements|.
    106 // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
    107 base::string16 BuildPathFromComponents(
    108     const base::string16& path_prefix,
    109     const std::vector<base::string16>& path_elements,
    110     const base::string16& filename,
    111     size_t num_components) {
    112   // Add the initial elements of the path.
    113   base::string16 path = path_prefix;
    114 
    115   // Build path from first |num_components| elements.
    116   for (size_t j = 0; j < num_components; ++j)
    117     path += path_elements[j] + kForwardSlash;
    118 
    119   // Add |filename|, ellipsis if necessary.
    120   if (num_components != (path_elements.size() - 1))
    121     path += base::string16(kEllipsisUTF16) + kForwardSlash;
    122   path += filename;
    123 
    124   return path;
    125 }
    126 
    127 // Takes a prefix (Domain, or Domain+subdomain) and a collection of path
    128 // components and elides if possible. Returns a string containing the longest
    129 // possible elided path, or an empty string if elision is not possible.
    130 base::string16 ElideComponentizedPath(
    131     const base::string16& url_path_prefix,
    132     const std::vector<base::string16>& url_path_elements,
    133     const base::string16& url_filename,
    134     const base::string16& url_query,
    135     const FontList& font_list,
    136     float available_pixel_width) {
    137   const size_t url_path_number_of_elements = url_path_elements.size();
    138 
    139   CHECK(url_path_number_of_elements);
    140   for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
    141     base::string16 elided_path = BuildPathFromComponents(url_path_prefix,
    142         url_path_elements, url_filename, i);
    143     if (available_pixel_width >= GetStringWidthF(elided_path, font_list))
    144       return ElideText(elided_path + url_query, font_list,
    145                        available_pixel_width, ELIDE_AT_END);
    146   }
    147 
    148   return base::string16();
    149 }
    150 
    151 }  // namespace
    152 
    153 base::string16 ElideEmail(const base::string16& email,
    154                           const FontList& font_list,
    155                           float available_pixel_width) {
    156   if (GetStringWidthF(email, font_list) <= available_pixel_width)
    157     return email;
    158 
    159   // Split the email into its local-part (username) and domain-part. The email
    160   // spec technically allows for @ symbols in the local-part (username) of the
    161   // email under some special requirements. It is guaranteed that there is no @
    162   // symbol in the domain part of the email however so splitting at the last @
    163   // symbol is safe.
    164   const size_t split_index = email.find_last_of('@');
    165   DCHECK_NE(split_index, base::string16::npos);
    166   base::string16 username = email.substr(0, split_index);
    167   base::string16 domain = email.substr(split_index + 1);
    168   DCHECK(!username.empty());
    169   DCHECK(!domain.empty());
    170 
    171   // Subtract the @ symbol from the available width as it is mandatory.
    172   const base::string16 kAtSignUTF16 = ASCIIToUTF16("@");
    173   available_pixel_width -= GetStringWidthF(kAtSignUTF16, font_list);
    174 
    175   // Check whether eliding the domain is necessary: if eliding the username
    176   // is sufficient, the domain will not be elided.
    177   const float full_username_width = GetStringWidthF(username, font_list);
    178   const float available_domain_width =
    179       available_pixel_width -
    180       std::min(full_username_width,
    181                GetStringWidthF(username.substr(0, 1) + kEllipsisUTF16,
    182                                font_list));
    183   if (GetStringWidthF(domain, font_list) > available_domain_width) {
    184     // Elide the domain so that it only takes half of the available width.
    185     // Should the username not need all the width available in its half, the
    186     // domain will occupy the leftover width.
    187     // If |desired_domain_width| is greater than |available_domain_width|: the
    188     // minimal username elision allowed by the specifications will not fit; thus
    189     // |desired_domain_width| must be <= |available_domain_width| at all cost.
    190     const float desired_domain_width =
    191         std::min(available_domain_width,
    192                  std::max(available_pixel_width - full_username_width,
    193                           available_pixel_width / 2));
    194     domain = ElideText(domain, font_list, desired_domain_width,
    195                        ELIDE_IN_MIDDLE);
    196     // Failing to elide the domain such that at least one character remains
    197     // (other than the ellipsis itself) remains: return a single ellipsis.
    198     if (domain.length() <= 1U)
    199       return base::string16(kEllipsisUTF16);
    200   }
    201 
    202   // Fit the username in the remaining width (at this point the elided username
    203   // is guaranteed to fit with at least one character remaining given all the
    204   // precautions taken earlier).
    205   available_pixel_width -= GetStringWidthF(domain, font_list);
    206   username = ElideText(username, font_list, available_pixel_width,
    207                        ELIDE_AT_END);
    208 
    209   return username + kAtSignUTF16 + domain;
    210 }
    211 
    212 // TODO(pkasting): http://crbug.com/77883 This whole function gets
    213 // kerning/ligatures/etc. issues potentially wrong by assuming that the width of
    214 // a rendered string is always the sum of the widths of its substrings.  Also I
    215 // suspect it could be made simpler.
    216 base::string16 ElideUrl(const GURL& url,
    217                         const FontList& font_list,
    218                         float available_pixel_width,
    219                         const std::string& languages) {
    220   // Get a formatted string and corresponding parsing of the url.
    221   url_parse::Parsed parsed;
    222   const base::string16 url_string =
    223       net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
    224                      net::UnescapeRule::SPACES, &parsed, NULL, NULL);
    225   if (available_pixel_width <= 0)
    226     return url_string;
    227 
    228   // If non-standard, return plain eliding.
    229   if (!url.IsStandard())
    230     return ElideText(url_string, font_list, available_pixel_width,
    231                      ELIDE_AT_END);
    232 
    233   // Now start eliding url_string to fit within available pixel width.
    234   // Fist pass - check to see whether entire url_string fits.
    235   const float pixel_width_url_string = GetStringWidthF(url_string, font_list);
    236   if (available_pixel_width >= pixel_width_url_string)
    237     return url_string;
    238 
    239   // Get the path substring, including query and reference.
    240   const size_t path_start_index = parsed.path.begin;
    241   const size_t path_len = parsed.path.len;
    242   base::string16 url_path_query_etc = url_string.substr(path_start_index);
    243   base::string16 url_path = url_string.substr(path_start_index, path_len);
    244 
    245   // Return general elided text if url minus the query fits.
    246   const base::string16 url_minus_query =
    247       url_string.substr(0, path_start_index + path_len);
    248   if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list))
    249     return ElideText(url_string, font_list, available_pixel_width,
    250                      ELIDE_AT_END);
    251 
    252   // Get Host.
    253   base::string16 url_host = UTF8ToUTF16(url.host());
    254 
    255   // Get domain and registry information from the URL.
    256   base::string16 url_domain = UTF8ToUTF16(
    257       net::registry_controlled_domains::GetDomainAndRegistry(
    258           url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
    259   if (url_domain.empty())
    260     url_domain = url_host;
    261 
    262   // Add port if required.
    263   if (!url.port().empty()) {
    264     url_host += UTF8ToUTF16(":" + url.port());
    265     url_domain += UTF8ToUTF16(":" + url.port());
    266   }
    267 
    268   // Get sub domain.
    269   base::string16 url_subdomain;
    270   const size_t domain_start_index = url_host.find(url_domain);
    271   if (domain_start_index != base::string16::npos)
    272     url_subdomain = url_host.substr(0, domain_start_index);
    273   const base::string16 kWwwPrefix = UTF8ToUTF16("www.");
    274   if ((url_subdomain == kWwwPrefix || url_subdomain.empty() ||
    275       url.SchemeIsFile())) {
    276     url_subdomain.clear();
    277   }
    278 
    279   // If this is a file type, the path is now defined as everything after ":".
    280   // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
    281   // domain is now C: - this is a nice hack for eliding to work pleasantly.
    282   if (url.SchemeIsFile()) {
    283     // Split the path string using ":"
    284     std::vector<base::string16> file_path_split;
    285     base::SplitString(url_path, ':', &file_path_split);
    286     if (file_path_split.size() > 1) {  // File is of type "file:///C:/.."
    287       url_host.clear();
    288       url_domain.clear();
    289       url_subdomain.clear();
    290 
    291       const base::string16 kColon = UTF8ToUTF16(":");
    292       url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
    293       url_path_query_etc = url_path = file_path_split.at(1);
    294     }
    295   }
    296 
    297   // Second Pass - remove scheme - the rest fits.
    298   const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
    299   const float pixel_width_url_path = GetStringWidthF(url_path_query_etc,
    300                                                      font_list);
    301   if (available_pixel_width >=
    302       pixel_width_url_host + pixel_width_url_path)
    303     return url_host + url_path_query_etc;
    304 
    305   // Third Pass: Subdomain, domain and entire path fits.
    306   const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
    307   const float pixel_width_url_subdomain =
    308       GetStringWidthF(url_subdomain, font_list);
    309   if (available_pixel_width >=
    310       pixel_width_url_subdomain + pixel_width_url_domain +
    311       pixel_width_url_path)
    312     return url_subdomain + url_domain + url_path_query_etc;
    313 
    314   // Query element.
    315   base::string16 url_query;
    316   const float kPixelWidthDotsTrailer = GetStringWidthF(
    317       base::string16(kEllipsisUTF16), font_list);
    318   if (parsed.query.is_nonempty()) {
    319     url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
    320     if (available_pixel_width >=
    321         (pixel_width_url_subdomain + pixel_width_url_domain +
    322          pixel_width_url_path - GetStringWidthF(url_query, font_list))) {
    323       return ElideText(url_subdomain + url_domain + url_path_query_etc,
    324                        font_list, available_pixel_width, ELIDE_AT_END);
    325     }
    326   }
    327 
    328   // Parse url_path using '/'.
    329   std::vector<base::string16> url_path_elements;
    330   base::SplitString(url_path, kForwardSlash, &url_path_elements);
    331 
    332   // Get filename - note that for a path ending with /
    333   // such as www.google.com/intl/ads/, the file name is ads/.
    334   size_t url_path_number_of_elements = url_path_elements.size();
    335   DCHECK(url_path_number_of_elements != 0);
    336   base::string16 url_filename;
    337   if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) {
    338     url_filename = *(url_path_elements.end() - 1);
    339   } else if (url_path_number_of_elements > 1) {  // Path ends with a '/'.
    340     url_filename = url_path_elements.at(url_path_number_of_elements - 2) +
    341         kForwardSlash;
    342     url_path_number_of_elements--;
    343   }
    344   DCHECK(url_path_number_of_elements != 0);
    345 
    346   const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
    347   if (url_path_number_of_elements <= 1 ||
    348       url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
    349     // No path to elide, or too long of a path (could overflow in loop below)
    350     // Just elide this as a text string.
    351     return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list,
    352                      available_pixel_width, ELIDE_AT_END);
    353   }
    354 
    355   // Start eliding the path and replacing elements by ".../".
    356   const base::string16 kEllipsisAndSlash =
    357       base::string16(kEllipsisUTF16) + kForwardSlash;
    358   const float pixel_width_ellipsis_slash =
    359       GetStringWidthF(kEllipsisAndSlash, font_list);
    360 
    361   // Check with both subdomain and domain.
    362   base::string16 elided_path =
    363       ElideComponentizedPath(url_subdomain + url_domain, url_path_elements,
    364                              url_filename, url_query, font_list,
    365                              available_pixel_width);
    366   if (!elided_path.empty())
    367     return elided_path;
    368 
    369   // Check with only domain.
    370   // If a subdomain is present, add an ellipsis before domain.
    371   // This is added only if the subdomain pixel width is larger than
    372   // the pixel width of kEllipsis. Otherwise, subdomain remains,
    373   // which means that this case has been resolved earlier.
    374   base::string16 url_elided_domain = url_subdomain + url_domain;
    375   if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
    376     if (!url_subdomain.empty())
    377       url_elided_domain = kEllipsisAndSlash[0] + url_domain;
    378     else
    379       url_elided_domain = url_domain;
    380 
    381     elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
    382                                          url_filename, url_query, font_list,
    383                                          available_pixel_width);
    384 
    385     if (!elided_path.empty())
    386       return elided_path;
    387   }
    388 
    389   // Return elided domain/.../filename anyway.
    390   base::string16 final_elided_url_string(url_elided_domain);
    391   const float url_elided_domain_width = GetStringWidthF(url_elided_domain,
    392                                                         font_list);
    393 
    394   // A hack to prevent trailing ".../...".
    395   if ((available_pixel_width - url_elided_domain_width) >
    396       pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
    397       GetStringWidthF(ASCIIToUTF16("UV"), font_list)) {
    398     final_elided_url_string += BuildPathFromComponents(base::string16(),
    399         url_path_elements, url_filename, 1);
    400   } else {
    401     final_elided_url_string += url_path;
    402   }
    403 
    404   return ElideText(final_elided_url_string, font_list, available_pixel_width,
    405                    ELIDE_AT_END);
    406 }
    407 
    408 base::string16 ElideFilename(const base::FilePath& filename,
    409                              const FontList& font_list,
    410                              float available_pixel_width) {
    411 #if defined(OS_WIN)
    412   base::string16 filename_utf16 = filename.value();
    413   base::string16 extension = filename.Extension();
    414   base::string16 rootname = filename.BaseName().RemoveExtension().value();
    415 #elif defined(OS_POSIX)
    416   base::string16 filename_utf16 = WideToUTF16(base::SysNativeMBToWide(
    417       filename.value()));
    418   base::string16 extension = WideToUTF16(base::SysNativeMBToWide(
    419       filename.Extension()));
    420   base::string16 rootname = WideToUTF16(base::SysNativeMBToWide(
    421       filename.BaseName().RemoveExtension().value()));
    422 #endif
    423 
    424   const float full_width = GetStringWidthF(filename_utf16, font_list);
    425   if (full_width <= available_pixel_width)
    426     return base::i18n::GetDisplayStringInLTRDirectionality(filename_utf16);
    427 
    428   if (rootname.empty() || extension.empty()) {
    429     const base::string16 elided_name = ElideText(filename_utf16, font_list,
    430                                            available_pixel_width, ELIDE_AT_END);
    431     return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    432   }
    433 
    434   const float ext_width = GetStringWidthF(extension, font_list);
    435   const float root_width = GetStringWidthF(rootname, font_list);
    436 
    437   // We may have trimmed the path.
    438   if (root_width + ext_width <= available_pixel_width) {
    439     const base::string16 elided_name = rootname + extension;
    440     return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    441   }
    442 
    443   if (ext_width >= available_pixel_width) {
    444     const base::string16 elided_name = ElideText(
    445         rootname + extension, font_list, available_pixel_width,
    446         ELIDE_IN_MIDDLE);
    447     return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    448   }
    449 
    450   float available_root_width = available_pixel_width - ext_width;
    451   base::string16 elided_name =
    452       ElideText(rootname, font_list, available_root_width, ELIDE_AT_END);
    453   elided_name += extension;
    454   return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
    455 }
    456 
    457 base::string16 ElideText(const base::string16& text,
    458                          const FontList& font_list,
    459                          float available_pixel_width,
    460                          ElideBehavior elide_behavior) {
    461   if (text.empty())
    462     return text;
    463 
    464   const float current_text_pixel_width = GetStringWidthF(text, font_list);
    465   const bool elide_in_middle = (elide_behavior == ELIDE_IN_MIDDLE);
    466   const bool insert_ellipsis = (elide_behavior != TRUNCATE_AT_END);
    467 
    468   const base::string16 ellipsis = base::string16(kEllipsisUTF16);
    469   StringSlicer slicer(text, ellipsis, elide_in_middle);
    470 
    471   // Pango will return 0 width for absurdly long strings. Cut the string in
    472   // half and try again.
    473   // This is caused by an int overflow in Pango (specifically, in
    474   // pango_glyph_string_extents_range). It's actually more subtle than just
    475   // returning 0, since on super absurdly long strings, the int can wrap and
    476   // return positive numbers again. Detecting that is probably not worth it
    477   // (eliding way too much from a ridiculous string is probably still
    478   // ridiculous), but we should check other widths for bogus values as well.
    479   if (current_text_pixel_width <= 0 && !text.empty()) {
    480     const base::string16 cut = slicer.CutString(text.length() / 2, false);
    481     return ElideText(cut, font_list, available_pixel_width, elide_behavior);
    482   }
    483 
    484   if (current_text_pixel_width <= available_pixel_width)
    485     return text;
    486 
    487   if (insert_ellipsis &&
    488       GetStringWidthF(ellipsis, font_list) > available_pixel_width)
    489     return base::string16();
    490 
    491   // Use binary search to compute the elided text.
    492   size_t lo = 0;
    493   size_t hi = text.length() - 1;
    494   size_t guess;
    495   for (guess = (lo + hi) / 2; lo <= hi; guess = (lo + hi) / 2) {
    496     // We check the length of the whole desired string at once to ensure we
    497     // handle kerning/ligatures/etc. correctly.
    498     const base::string16 cut = slicer.CutString(guess, insert_ellipsis);
    499     const float guess_length = GetStringWidthF(cut, font_list);
    500     // Check again that we didn't hit a Pango width overflow. If so, cut the
    501     // current string in half and start over.
    502     if (guess_length <= 0) {
    503       return ElideText(slicer.CutString(guess / 2, false),
    504                        font_list, available_pixel_width, elide_behavior);
    505     }
    506     if (guess_length > available_pixel_width)
    507       hi = guess - 1;
    508     else
    509       lo = guess + 1;
    510   }
    511 
    512   return slicer.CutString(guess, insert_ellipsis);
    513 }
    514 
    515 base::string16 ElideText(const base::string16& text,
    516                          const Font& font,
    517                          float available_pixel_width,
    518                          ElideBehavior elide_behavior) {
    519   return ElideText(text, FontList(font), available_pixel_width, elide_behavior);
    520 }
    521 
    522 SortedDisplayURL::SortedDisplayURL(const GURL& url,
    523                                    const std::string& languages) {
    524   net::AppendFormattedHost(url, languages, &sort_host_);
    525   base::string16 host_minus_www = net::StripWWW(sort_host_);
    526   url_parse::Parsed parsed;
    527   display_url_ =
    528       net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
    529                      net::UnescapeRule::SPACES, &parsed, &prefix_end_, NULL);
    530   if (sort_host_.length() > host_minus_www.length()) {
    531     prefix_end_ += sort_host_.length() - host_minus_www.length();
    532     sort_host_.swap(host_minus_www);
    533   }
    534 }
    535 
    536 SortedDisplayURL::SortedDisplayURL() : prefix_end_(0) {
    537 }
    538 
    539 SortedDisplayURL::~SortedDisplayURL() {
    540 }
    541 
    542 int SortedDisplayURL::Compare(const SortedDisplayURL& other,
    543                               icu::Collator* collator) const {
    544   // Compare on hosts first. The host won't contain 'www.'.
    545   UErrorCode compare_status = U_ZERO_ERROR;
    546   UCollationResult host_compare_result = collator->compare(
    547       static_cast<const UChar*>(sort_host_.c_str()),
    548       static_cast<int>(sort_host_.length()),
    549       static_cast<const UChar*>(other.sort_host_.c_str()),
    550       static_cast<int>(other.sort_host_.length()),
    551       compare_status);
    552   DCHECK(U_SUCCESS(compare_status));
    553   if (host_compare_result != 0)
    554     return host_compare_result;
    555 
    556   // Hosts match, compare on the portion of the url after the host.
    557   base::string16 path = this->AfterHost();
    558   base::string16 o_path = other.AfterHost();
    559   compare_status = U_ZERO_ERROR;
    560   UCollationResult path_compare_result = collator->compare(
    561       static_cast<const UChar*>(path.c_str()),
    562       static_cast<int>(path.length()),
    563       static_cast<const UChar*>(o_path.c_str()),
    564       static_cast<int>(o_path.length()),
    565       compare_status);
    566   DCHECK(U_SUCCESS(compare_status));
    567   if (path_compare_result != 0)
    568     return path_compare_result;
    569 
    570   // Hosts and paths match, compare on the complete url. This'll push the www.
    571   // ones to the end.
    572   compare_status = U_ZERO_ERROR;
    573   UCollationResult display_url_compare_result = collator->compare(
    574       static_cast<const UChar*>(display_url_.c_str()),
    575       static_cast<int>(display_url_.length()),
    576       static_cast<const UChar*>(other.display_url_.c_str()),
    577       static_cast<int>(other.display_url_.length()),
    578       compare_status);
    579   DCHECK(U_SUCCESS(compare_status));
    580   return display_url_compare_result;
    581 }
    582 
    583 base::string16 SortedDisplayURL::AfterHost() const {
    584   const size_t slash_index = display_url_.find(sort_host_, prefix_end_);
    585   if (slash_index == base::string16::npos) {
    586     NOTREACHED();
    587     return base::string16();
    588   }
    589   return display_url_.substr(slash_index + sort_host_.length());
    590 }
    591 
    592 bool ElideString(const base::string16& input, int max_len,
    593                  base::string16* output) {
    594   DCHECK_GE(max_len, 0);
    595   if (static_cast<int>(input.length()) <= max_len) {
    596     output->assign(input);
    597     return false;
    598   }
    599 
    600   switch (max_len) {
    601     case 0:
    602       output->clear();
    603       break;
    604     case 1:
    605       output->assign(input.substr(0, 1));
    606       break;
    607     case 2:
    608       output->assign(input.substr(0, 2));
    609       break;
    610     case 3:
    611       output->assign(input.substr(0, 1) + ASCIIToUTF16(".") +
    612                      input.substr(input.length() - 1));
    613       break;
    614     case 4:
    615       output->assign(input.substr(0, 1) + ASCIIToUTF16("..") +
    616                      input.substr(input.length() - 1));
    617       break;
    618     default: {
    619       int rstr_len = (max_len - 3) / 2;
    620       int lstr_len = rstr_len + ((max_len - 3) % 2);
    621       output->assign(input.substr(0, lstr_len) + ASCIIToUTF16("...") +
    622                      input.substr(input.length() - rstr_len));
    623       break;
    624     }
    625   }
    626 
    627   return true;
    628 }
    629 
    630 namespace {
    631 
    632 // Internal class used to track progress of a rectangular string elide
    633 // operation.  Exists so the top-level ElideRectangleString() function
    634 // can be broken into smaller methods sharing this state.
    635 class RectangleString {
    636  public:
    637   RectangleString(size_t max_rows, size_t max_cols,
    638                   bool strict, base::string16 *output)
    639       : max_rows_(max_rows),
    640         max_cols_(max_cols),
    641         current_row_(0),
    642         current_col_(0),
    643         strict_(strict),
    644         suppressed_(false),
    645         output_(output) {}
    646 
    647   // Perform deferred initializations following creation.  Must be called
    648   // before any input can be added via AddString().
    649   void Init() { output_->clear(); }
    650 
    651   // Add an input string, reformatting to fit the desired dimensions.
    652   // AddString() may be called multiple times to concatenate together
    653   // multiple strings into the region (the current caller doesn't do
    654   // this, however).
    655   void AddString(const base::string16& input);
    656 
    657   // Perform any deferred output processing.  Must be called after the
    658   // last AddString() call has occurred.
    659   bool Finalize();
    660 
    661  private:
    662   // Add a line to the rectangular region at the current position,
    663   // either by itself or by breaking it into words.
    664   void AddLine(const base::string16& line);
    665 
    666   // Add a word to the rectangular region at the current position,
    667   // either by itself or by breaking it into characters.
    668   void AddWord(const base::string16& word);
    669 
    670   // Add text to the output string if the rectangular boundaries
    671   // have not been exceeded, advancing the current position.
    672   void Append(const base::string16& string);
    673 
    674   // Set the current position to the beginning of the next line.  If
    675   // |output| is true, add a newline to the output string if the rectangular
    676   // boundaries have not been exceeded.  If |output| is false, we assume
    677   // some other mechanism will (likely) do similar breaking after the fact.
    678   void NewLine(bool output);
    679 
    680   // Maximum number of rows allowed in the output string.
    681   size_t max_rows_;
    682 
    683   // Maximum number of characters allowed in the output string.
    684   size_t max_cols_;
    685 
    686   // Current row position, always incremented and may exceed max_rows_
    687   // when the input can not fit in the region.  We stop appending to
    688   // the output string, however, when this condition occurs.  In the
    689   // future, we may want to expose this value to allow the caller to
    690   // determine how many rows would actually be required to hold the
    691   // formatted string.
    692   size_t current_row_;
    693 
    694   // Current character position, should never exceed max_cols_.
    695   size_t current_col_;
    696 
    697   // True when we do whitespace to newline conversions ourselves.
    698   bool strict_;
    699 
    700   // True when some of the input has been truncated.
    701   bool suppressed_;
    702 
    703   // String onto which the output is accumulated.
    704   base::string16* output_;
    705 
    706   DISALLOW_COPY_AND_ASSIGN(RectangleString);
    707 };
    708 
    709 void RectangleString::AddString(const base::string16& input) {
    710   base::i18n::BreakIterator lines(input,
    711                                   base::i18n::BreakIterator::BREAK_NEWLINE);
    712   if (lines.Init()) {
    713     while (lines.Advance())
    714       AddLine(lines.GetString());
    715   } else {
    716     NOTREACHED() << "BreakIterator (lines) init failed";
    717   }
    718 }
    719 
    720 bool RectangleString::Finalize() {
    721   if (suppressed_) {
    722     output_->append(ASCIIToUTF16("..."));
    723     return true;
    724   }
    725   return false;
    726 }
    727 
    728 void RectangleString::AddLine(const base::string16& line) {
    729   if (line.length() < max_cols_) {
    730     Append(line);
    731   } else {
    732     base::i18n::BreakIterator words(line,
    733                                     base::i18n::BreakIterator::BREAK_SPACE);
    734     if (words.Init()) {
    735       while (words.Advance())
    736         AddWord(words.GetString());
    737     } else {
    738       NOTREACHED() << "BreakIterator (words) init failed";
    739     }
    740   }
    741   // Account for naturally-occuring newlines.
    742   ++current_row_;
    743   current_col_ = 0;
    744 }
    745 
    746 void RectangleString::AddWord(const base::string16& word) {
    747   if (word.length() < max_cols_) {
    748     // Word can be made to fit, no need to fragment it.
    749     if (current_col_ + word.length() >= max_cols_)
    750       NewLine(strict_);
    751     Append(word);
    752   } else {
    753     // Word is so big that it must be fragmented.
    754     int array_start = 0;
    755     int char_start = 0;
    756     base::i18n::UTF16CharIterator chars(&word);
    757     while (!chars.end()) {
    758       // When boundary is hit, add as much as will fit on this line.
    759       if (current_col_ + (chars.char_pos() - char_start) >= max_cols_) {
    760         Append(word.substr(array_start, chars.array_pos() - array_start));
    761         NewLine(true);
    762         array_start = chars.array_pos();
    763         char_start = chars.char_pos();
    764       }
    765       chars.Advance();
    766     }
    767     // Add the last remaining fragment, if any.
    768     if (array_start != chars.array_pos())
    769       Append(word.substr(array_start, chars.array_pos() - array_start));
    770   }
    771 }
    772 
    773 void RectangleString::Append(const base::string16& string) {
    774   if (current_row_ < max_rows_)
    775     output_->append(string);
    776   else
    777     suppressed_ = true;
    778   current_col_ += string.length();
    779 }
    780 
    781 void RectangleString::NewLine(bool output) {
    782   if (current_row_ < max_rows_) {
    783     if (output)
    784       output_->append(ASCIIToUTF16("\n"));
    785   } else {
    786     suppressed_ = true;
    787   }
    788   ++current_row_;
    789   current_col_ = 0;
    790 }
    791 
    792 // Internal class used to track progress of a rectangular text elide
    793 // operation.  Exists so the top-level ElideRectangleText() function
    794 // can be broken into smaller methods sharing this state.
    795 class RectangleText {
    796  public:
    797   RectangleText(const FontList& font_list,
    798                 float available_pixel_width,
    799                 int available_pixel_height,
    800                 WordWrapBehavior wrap_behavior,
    801                 std::vector<base::string16>* lines)
    802       : font_list_(font_list),
    803         line_height_(font_list.GetHeight()),
    804         available_pixel_width_(available_pixel_width),
    805         available_pixel_height_(available_pixel_height),
    806         wrap_behavior_(wrap_behavior),
    807         current_width_(0),
    808         current_height_(0),
    809         last_line_ended_in_lf_(false),
    810         lines_(lines),
    811         insufficient_width_(false),
    812         insufficient_height_(false) {}
    813 
    814   // Perform deferred initializions following creation.  Must be called
    815   // before any input can be added via AddString().
    816   void Init() { lines_->clear(); }
    817 
    818   // Add an input string, reformatting to fit the desired dimensions.
    819   // AddString() may be called multiple times to concatenate together
    820   // multiple strings into the region (the current caller doesn't do
    821   // this, however).
    822   void AddString(const base::string16& input);
    823 
    824   // Perform any deferred output processing.  Must be called after the last
    825   // AddString() call has occured. Returns a combination of
    826   // |ReformattingResultFlags| indicating whether the given width or height was
    827   // insufficient, leading to elision or truncation.
    828   int Finalize();
    829 
    830  private:
    831   // Add a line to the rectangular region at the current position,
    832   // either by itself or by breaking it into words.
    833   void AddLine(const base::string16& line);
    834 
    835   // Wrap the specified word across multiple lines.
    836   int WrapWord(const base::string16& word);
    837 
    838   // Add a long word - wrapping, eliding or truncating per the wrap behavior.
    839   int AddWordOverflow(const base::string16& word);
    840 
    841   // Add a word to the rectangluar region at the current position.
    842   int AddWord(const base::string16& word);
    843 
    844   // Append the specified |text| to the current output line, incrementing the
    845   // running width by the specified amount. This is an optimization over
    846   // |AddToCurrentLine()| when |text_width| is already known.
    847   void AddToCurrentLineWithWidth(const base::string16& text, float text_width);
    848 
    849   // Append the specified |text| to the current output line.
    850   void AddToCurrentLine(const base::string16& text);
    851 
    852   // Set the current position to the beginning of the next line.
    853   bool NewLine();
    854 
    855   // The font list used for measuring text width.
    856   const FontList& font_list_;
    857 
    858   // The height of each line of text.
    859   const int line_height_;
    860 
    861   // The number of pixels of available width in the rectangle.
    862   const float available_pixel_width_;
    863 
    864   // The number of pixels of available height in the rectangle.
    865   const int available_pixel_height_;
    866 
    867   // The wrap behavior for words that are too long to fit on a single line.
    868   const WordWrapBehavior wrap_behavior_;
    869 
    870   // The current running width.
    871   float current_width_;
    872 
    873   // The current running height.
    874   int current_height_;
    875 
    876   // The current line of text.
    877   base::string16 current_line_;
    878 
    879   // Indicates whether the last line ended with \n.
    880   bool last_line_ended_in_lf_;
    881 
    882   // The output vector of lines.
    883   std::vector<base::string16>* lines_;
    884 
    885   // Indicates whether a word was so long that it had to be truncated or elided
    886   // to fit the available width.
    887   bool insufficient_width_;
    888 
    889   // Indicates whether there were too many lines for the available height.
    890   bool insufficient_height_;
    891 
    892   DISALLOW_COPY_AND_ASSIGN(RectangleText);
    893 };
    894 
    895 void RectangleText::AddString(const base::string16& input) {
    896   base::i18n::BreakIterator lines(input,
    897                                   base::i18n::BreakIterator::BREAK_NEWLINE);
    898   if (lines.Init()) {
    899     while (!insufficient_height_ && lines.Advance()) {
    900       base::string16 line = lines.GetString();
    901       // The BREAK_NEWLINE iterator will keep the trailing newline character,
    902       // except in the case of the last line, which may not have one.  Remove
    903       // the newline character, if it exists.
    904       last_line_ended_in_lf_ = !line.empty() && line[line.length() - 1] == '\n';
    905       if (last_line_ended_in_lf_)
    906         line.resize(line.length() - 1);
    907       AddLine(line);
    908     }
    909   } else {
    910     NOTREACHED() << "BreakIterator (lines) init failed";
    911   }
    912 }
    913 
    914 int RectangleText::Finalize() {
    915   // Remove trailing whitespace from the last line or remove the last line
    916   // completely, if it's just whitespace.
    917   if (!insufficient_height_ && !lines_->empty()) {
    918     TrimWhitespace(lines_->back(), TRIM_TRAILING, &lines_->back());
    919     if (lines_->back().empty() && !last_line_ended_in_lf_)
    920       lines_->pop_back();
    921   }
    922   if (last_line_ended_in_lf_)
    923     lines_->push_back(base::string16());
    924   return (insufficient_width_ ? INSUFFICIENT_SPACE_HORIZONTAL : 0) |
    925          (insufficient_height_ ? INSUFFICIENT_SPACE_VERTICAL : 0);
    926 }
    927 
    928 void RectangleText::AddLine(const base::string16& line) {
    929   const float line_width = GetStringWidthF(line, font_list_);
    930   if (line_width <= available_pixel_width_) {
    931     AddToCurrentLineWithWidth(line, line_width);
    932   } else {
    933     // Iterate over positions that are valid to break the line at. In general,
    934     // these are word boundaries but after any punctuation following the word.
    935     base::i18n::BreakIterator words(line,
    936                                     base::i18n::BreakIterator::BREAK_LINE);
    937     if (words.Init()) {
    938       while (words.Advance()) {
    939         const bool truncate = !current_line_.empty();
    940         const base::string16& word = words.GetString();
    941         const int lines_added = AddWord(word);
    942         if (lines_added) {
    943           if (truncate) {
    944             // Trim trailing whitespace from the line that was added.
    945             const int line = lines_->size() - lines_added;
    946             TrimWhitespace(lines_->at(line), TRIM_TRAILING, &lines_->at(line));
    947           }
    948           if (ContainsOnlyWhitespace(word)) {
    949             // Skip the first space if the previous line was carried over.
    950             current_width_ = 0;
    951             current_line_.clear();
    952           }
    953         }
    954       }
    955     } else {
    956       NOTREACHED() << "BreakIterator (words) init failed";
    957     }
    958   }
    959   // Account for naturally-occuring newlines.
    960   NewLine();
    961 }
    962 
    963 int RectangleText::WrapWord(const base::string16& word) {
    964   // Word is so wide that it must be fragmented.
    965   base::string16 text = word;
    966   int lines_added = 0;
    967   bool first_fragment = true;
    968   while (!insufficient_height_ && !text.empty()) {
    969     base::string16 fragment =
    970         ElideText(text, font_list_, available_pixel_width_,
    971                   TRUNCATE_AT_END);
    972     // At least one character has to be added at every line, even if the
    973     // available space is too small.
    974     if(fragment.empty())
    975       fragment = text.substr(0, 1);
    976     if (!first_fragment && NewLine())
    977       lines_added++;
    978     AddToCurrentLine(fragment);
    979     text = text.substr(fragment.length());
    980     first_fragment = false;
    981   }
    982   return lines_added;
    983 }
    984 
    985 int RectangleText::AddWordOverflow(const base::string16& word) {
    986   int lines_added = 0;
    987 
    988   // Unless this is the very first word, put it on a new line.
    989   if (!current_line_.empty()) {
    990     if (!NewLine())
    991       return 0;
    992     lines_added++;
    993   }
    994 
    995   if (wrap_behavior_ == IGNORE_LONG_WORDS) {
    996     current_line_ = word;
    997     current_width_ = available_pixel_width_;
    998   } else if (wrap_behavior_ == WRAP_LONG_WORDS) {
    999     lines_added += WrapWord(word);
   1000   } else {
   1001     const ElideBehavior elide_behavior =
   1002         (wrap_behavior_ == ELIDE_LONG_WORDS ? ELIDE_AT_END : TRUNCATE_AT_END);
   1003     const base::string16 elided_word =
   1004         ElideText(word, font_list_, available_pixel_width_, elide_behavior);
   1005     AddToCurrentLine(elided_word);
   1006     insufficient_width_ = true;
   1007   }
   1008 
   1009   return lines_added;
   1010 }
   1011 
   1012 int RectangleText::AddWord(const base::string16& word) {
   1013   int lines_added = 0;
   1014   base::string16 trimmed;
   1015   TrimWhitespace(word, TRIM_TRAILING, &trimmed);
   1016   const float trimmed_width = GetStringWidthF(trimmed, font_list_);
   1017   if (trimmed_width <= available_pixel_width_) {
   1018     // Word can be made to fit, no need to fragment it.
   1019     if ((current_width_ + trimmed_width > available_pixel_width_) && NewLine())
   1020       lines_added++;
   1021     // Append the non-trimmed word, in case more words are added after.
   1022     AddToCurrentLine(word);
   1023   } else {
   1024     lines_added = AddWordOverflow(wrap_behavior_ == IGNORE_LONG_WORDS ?
   1025                                   trimmed : word);
   1026   }
   1027   return lines_added;
   1028 }
   1029 
   1030 void RectangleText::AddToCurrentLine(const base::string16& text) {
   1031   AddToCurrentLineWithWidth(text, GetStringWidthF(text, font_list_));
   1032 }
   1033 
   1034 void RectangleText::AddToCurrentLineWithWidth(const base::string16& text,
   1035                                               float text_width) {
   1036   if (current_height_ >= available_pixel_height_) {
   1037     insufficient_height_ = true;
   1038     return;
   1039   }
   1040   current_line_.append(text);
   1041   current_width_ += text_width;
   1042 }
   1043 
   1044 bool RectangleText::NewLine() {
   1045   bool line_added = false;
   1046   if (current_height_ < available_pixel_height_) {
   1047     lines_->push_back(current_line_);
   1048     current_line_.clear();
   1049     line_added = true;
   1050   } else {
   1051     insufficient_height_ = true;
   1052   }
   1053   current_height_ += line_height_;
   1054   current_width_ = 0;
   1055   return line_added;
   1056 }
   1057 
   1058 }  // namespace
   1059 
   1060 bool ElideRectangleString(const base::string16& input, size_t max_rows,
   1061                           size_t max_cols, bool strict,
   1062                           base::string16* output) {
   1063   RectangleString rect(max_rows, max_cols, strict, output);
   1064   rect.Init();
   1065   rect.AddString(input);
   1066   return rect.Finalize();
   1067 }
   1068 
   1069 int ElideRectangleText(const base::string16& input,
   1070                        const FontList& font_list,
   1071                        float available_pixel_width,
   1072                        int available_pixel_height,
   1073                        WordWrapBehavior wrap_behavior,
   1074                        std::vector<base::string16>* lines) {
   1075   RectangleText rect(font_list,
   1076                      available_pixel_width,
   1077                      available_pixel_height,
   1078                      wrap_behavior,
   1079                      lines);
   1080   rect.Init();
   1081   rect.AddString(input);
   1082   return rect.Finalize();
   1083 }
   1084 
   1085 base::string16 TruncateString(const base::string16& string, size_t length) {
   1086   if (string.size() <= length)
   1087     // String fits, return it.
   1088     return string;
   1089 
   1090   if (length == 0)
   1091     // No room for the elide string, return an empty string.
   1092     return base::string16();
   1093 
   1094   size_t max = length - 1;
   1095 
   1096   // Added to the end of strings that are too big.
   1097   static const base::char16 kElideString[] = { 0x2026, 0 };
   1098 
   1099   if (max == 0)
   1100     // Just enough room for the elide string.
   1101     return kElideString;
   1102 
   1103   // Use a line iterator to find the first boundary.
   1104   UErrorCode status = U_ZERO_ERROR;
   1105   scoped_ptr<icu::RuleBasedBreakIterator> bi(
   1106       static_cast<icu::RuleBasedBreakIterator*>(
   1107           icu::RuleBasedBreakIterator::createLineInstance(
   1108               icu::Locale::getDefault(), status)));
   1109   if (U_FAILURE(status))
   1110     return string.substr(0, max) + kElideString;
   1111   bi->setText(string.c_str());
   1112   int32_t index = bi->preceding(static_cast<int32_t>(max));
   1113   if (index == icu::BreakIterator::DONE) {
   1114     index = static_cast<int32_t>(max);
   1115   } else {
   1116     // Found a valid break (may be the beginning of the string). Now use
   1117     // a character iterator to find the previous non-whitespace character.
   1118     icu::StringCharacterIterator char_iterator(string.c_str());
   1119     if (index == 0) {
   1120       // No valid line breaks. Start at the end again. This ensures we break
   1121       // on a valid character boundary.
   1122       index = static_cast<int32_t>(max);
   1123     }
   1124     char_iterator.setIndex(index);
   1125     while (char_iterator.hasPrevious()) {
   1126       char_iterator.previous();
   1127       if (!(u_isspace(char_iterator.current()) ||
   1128             u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
   1129             u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
   1130         // Not a whitespace character. Advance the iterator so that we
   1131         // include the current character in the truncated string.
   1132         char_iterator.next();
   1133         break;
   1134       }
   1135     }
   1136     if (char_iterator.hasPrevious()) {
   1137       // Found a valid break point.
   1138       index = char_iterator.getIndex();
   1139     } else {
   1140       // String has leading whitespace, return the elide string.
   1141       return kElideString;
   1142     }
   1143   }
   1144   return string.substr(0, index) + kElideString;
   1145 }
   1146 
   1147 }  // namespace gfx
   1148