1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file implements utility functions for eliding and formatting UI text. 6 // 7 // Note that several of the functions declared in text_elider.h are implemented 8 // in this file using helper classes in an unnamed namespace. 9 10 #include "ui/gfx/text_elider.h" 11 12 #include <string> 13 #include <vector> 14 15 #include "base/files/file_path.h" 16 #include "base/i18n/break_iterator.h" 17 #include "base/i18n/char_iterator.h" 18 #include "base/i18n/rtl.h" 19 #include "base/memory/scoped_ptr.h" 20 #include "base/strings/string_split.h" 21 #include "base/strings/string_util.h" 22 #include "base/strings/sys_string_conversions.h" 23 #include "base/strings/utf_string_conversions.h" 24 #include "net/base/escape.h" 25 #include "net/base/net_util.h" 26 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 27 #include "third_party/icu/source/common/unicode/rbbi.h" 28 #include "third_party/icu/source/common/unicode/uloc.h" 29 #include "ui/gfx/font_list.h" 30 #include "ui/gfx/text_utils.h" 31 #include "url/gurl.h" 32 33 namespace gfx { 34 35 // U+2026 in utf8 36 const char kEllipsis[] = "\xE2\x80\xA6"; 37 const base::char16 kEllipsisUTF16[] = { 0x2026, 0 }; 38 const base::char16 kForwardSlash = '/'; 39 40 namespace { 41 42 // Helper class to split + elide text, while respecting UTF16 surrogate pairs. 43 class StringSlicer { 44 public: 45 StringSlicer(const base::string16& text, 46 const base::string16& ellipsis, 47 bool elide_in_middle) 48 : text_(text), 49 ellipsis_(ellipsis), 50 elide_in_middle_(elide_in_middle) { 51 } 52 53 // Cuts |text_| to be |length| characters long. If |elide_in_middle_| is true, 54 // the middle of the string is removed to leave equal-length pieces from the 55 // beginning and end of the string; otherwise, the end of the string is 56 // removed and only the beginning remains. If |insert_ellipsis| is true, 57 // then an ellipsis character will be inserted at the cut point. 58 base::string16 CutString(size_t length, bool insert_ellipsis) { 59 const base::string16 ellipsis_text = insert_ellipsis ? ellipsis_ 60 : base::string16(); 61 62 if (!elide_in_middle_) 63 return text_.substr(0, FindValidBoundaryBefore(length)) + ellipsis_text; 64 65 // We put the extra character, if any, before the cut. 66 const size_t half_length = length / 2; 67 const size_t prefix_length = FindValidBoundaryBefore(length - half_length); 68 const size_t suffix_start_guess = text_.length() - half_length; 69 const size_t suffix_start = FindValidBoundaryAfter(suffix_start_guess); 70 const size_t suffix_length = 71 half_length - (suffix_start_guess - suffix_start); 72 return text_.substr(0, prefix_length) + ellipsis_text + 73 text_.substr(suffix_start, suffix_length); 74 } 75 76 private: 77 // Returns a valid cut boundary at or before |index|. 78 size_t FindValidBoundaryBefore(size_t index) const { 79 DCHECK_LE(index, text_.length()); 80 if (index != text_.length()) 81 U16_SET_CP_START(text_.data(), 0, index); 82 return index; 83 } 84 85 // Returns a valid cut boundary at or after |index|. 86 size_t FindValidBoundaryAfter(size_t index) const { 87 DCHECK_LE(index, text_.length()); 88 if (index != text_.length()) 89 U16_SET_CP_LIMIT(text_.data(), 0, index, text_.length()); 90 return index; 91 } 92 93 // The text to be sliced. 94 const base::string16& text_; 95 96 // Ellipsis string to use. 97 const base::string16& ellipsis_; 98 99 // If true, the middle of the string will be elided. 100 bool elide_in_middle_; 101 102 DISALLOW_COPY_AND_ASSIGN(StringSlicer); 103 }; 104 105 // Build a path from the first |num_components| elements in |path_elements|. 106 // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate. 107 base::string16 BuildPathFromComponents( 108 const base::string16& path_prefix, 109 const std::vector<base::string16>& path_elements, 110 const base::string16& filename, 111 size_t num_components) { 112 // Add the initial elements of the path. 113 base::string16 path = path_prefix; 114 115 // Build path from first |num_components| elements. 116 for (size_t j = 0; j < num_components; ++j) 117 path += path_elements[j] + kForwardSlash; 118 119 // Add |filename|, ellipsis if necessary. 120 if (num_components != (path_elements.size() - 1)) 121 path += base::string16(kEllipsisUTF16) + kForwardSlash; 122 path += filename; 123 124 return path; 125 } 126 127 // Takes a prefix (Domain, or Domain+subdomain) and a collection of path 128 // components and elides if possible. Returns a string containing the longest 129 // possible elided path, or an empty string if elision is not possible. 130 base::string16 ElideComponentizedPath( 131 const base::string16& url_path_prefix, 132 const std::vector<base::string16>& url_path_elements, 133 const base::string16& url_filename, 134 const base::string16& url_query, 135 const FontList& font_list, 136 float available_pixel_width) { 137 const size_t url_path_number_of_elements = url_path_elements.size(); 138 139 CHECK(url_path_number_of_elements); 140 for (size_t i = url_path_number_of_elements - 1; i > 0; --i) { 141 base::string16 elided_path = BuildPathFromComponents(url_path_prefix, 142 url_path_elements, url_filename, i); 143 if (available_pixel_width >= GetStringWidthF(elided_path, font_list)) 144 return ElideText(elided_path + url_query, font_list, 145 available_pixel_width, ELIDE_AT_END); 146 } 147 148 return base::string16(); 149 } 150 151 } // namespace 152 153 base::string16 ElideEmail(const base::string16& email, 154 const FontList& font_list, 155 float available_pixel_width) { 156 if (GetStringWidthF(email, font_list) <= available_pixel_width) 157 return email; 158 159 // Split the email into its local-part (username) and domain-part. The email 160 // spec technically allows for @ symbols in the local-part (username) of the 161 // email under some special requirements. It is guaranteed that there is no @ 162 // symbol in the domain part of the email however so splitting at the last @ 163 // symbol is safe. 164 const size_t split_index = email.find_last_of('@'); 165 DCHECK_NE(split_index, base::string16::npos); 166 base::string16 username = email.substr(0, split_index); 167 base::string16 domain = email.substr(split_index + 1); 168 DCHECK(!username.empty()); 169 DCHECK(!domain.empty()); 170 171 // Subtract the @ symbol from the available width as it is mandatory. 172 const base::string16 kAtSignUTF16 = ASCIIToUTF16("@"); 173 available_pixel_width -= GetStringWidthF(kAtSignUTF16, font_list); 174 175 // Check whether eliding the domain is necessary: if eliding the username 176 // is sufficient, the domain will not be elided. 177 const float full_username_width = GetStringWidthF(username, font_list); 178 const float available_domain_width = 179 available_pixel_width - 180 std::min(full_username_width, 181 GetStringWidthF(username.substr(0, 1) + kEllipsisUTF16, 182 font_list)); 183 if (GetStringWidthF(domain, font_list) > available_domain_width) { 184 // Elide the domain so that it only takes half of the available width. 185 // Should the username not need all the width available in its half, the 186 // domain will occupy the leftover width. 187 // If |desired_domain_width| is greater than |available_domain_width|: the 188 // minimal username elision allowed by the specifications will not fit; thus 189 // |desired_domain_width| must be <= |available_domain_width| at all cost. 190 const float desired_domain_width = 191 std::min(available_domain_width, 192 std::max(available_pixel_width - full_username_width, 193 available_pixel_width / 2)); 194 domain = ElideText(domain, font_list, desired_domain_width, 195 ELIDE_IN_MIDDLE); 196 // Failing to elide the domain such that at least one character remains 197 // (other than the ellipsis itself) remains: return a single ellipsis. 198 if (domain.length() <= 1U) 199 return base::string16(kEllipsisUTF16); 200 } 201 202 // Fit the username in the remaining width (at this point the elided username 203 // is guaranteed to fit with at least one character remaining given all the 204 // precautions taken earlier). 205 available_pixel_width -= GetStringWidthF(domain, font_list); 206 username = ElideText(username, font_list, available_pixel_width, 207 ELIDE_AT_END); 208 209 return username + kAtSignUTF16 + domain; 210 } 211 212 // TODO(pkasting): http://crbug.com/77883 This whole function gets 213 // kerning/ligatures/etc. issues potentially wrong by assuming that the width of 214 // a rendered string is always the sum of the widths of its substrings. Also I 215 // suspect it could be made simpler. 216 base::string16 ElideUrl(const GURL& url, 217 const FontList& font_list, 218 float available_pixel_width, 219 const std::string& languages) { 220 // Get a formatted string and corresponding parsing of the url. 221 url_parse::Parsed parsed; 222 const base::string16 url_string = 223 net::FormatUrl(url, languages, net::kFormatUrlOmitAll, 224 net::UnescapeRule::SPACES, &parsed, NULL, NULL); 225 if (available_pixel_width <= 0) 226 return url_string; 227 228 // If non-standard, return plain eliding. 229 if (!url.IsStandard()) 230 return ElideText(url_string, font_list, available_pixel_width, 231 ELIDE_AT_END); 232 233 // Now start eliding url_string to fit within available pixel width. 234 // Fist pass - check to see whether entire url_string fits. 235 const float pixel_width_url_string = GetStringWidthF(url_string, font_list); 236 if (available_pixel_width >= pixel_width_url_string) 237 return url_string; 238 239 // Get the path substring, including query and reference. 240 const size_t path_start_index = parsed.path.begin; 241 const size_t path_len = parsed.path.len; 242 base::string16 url_path_query_etc = url_string.substr(path_start_index); 243 base::string16 url_path = url_string.substr(path_start_index, path_len); 244 245 // Return general elided text if url minus the query fits. 246 const base::string16 url_minus_query = 247 url_string.substr(0, path_start_index + path_len); 248 if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list)) 249 return ElideText(url_string, font_list, available_pixel_width, 250 ELIDE_AT_END); 251 252 // Get Host. 253 base::string16 url_host = UTF8ToUTF16(url.host()); 254 255 // Get domain and registry information from the URL. 256 base::string16 url_domain = UTF8ToUTF16( 257 net::registry_controlled_domains::GetDomainAndRegistry( 258 url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES)); 259 if (url_domain.empty()) 260 url_domain = url_host; 261 262 // Add port if required. 263 if (!url.port().empty()) { 264 url_host += UTF8ToUTF16(":" + url.port()); 265 url_domain += UTF8ToUTF16(":" + url.port()); 266 } 267 268 // Get sub domain. 269 base::string16 url_subdomain; 270 const size_t domain_start_index = url_host.find(url_domain); 271 if (domain_start_index != base::string16::npos) 272 url_subdomain = url_host.substr(0, domain_start_index); 273 const base::string16 kWwwPrefix = UTF8ToUTF16("www."); 274 if ((url_subdomain == kWwwPrefix || url_subdomain.empty() || 275 url.SchemeIsFile())) { 276 url_subdomain.clear(); 277 } 278 279 // If this is a file type, the path is now defined as everything after ":". 280 // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the 281 // domain is now C: - this is a nice hack for eliding to work pleasantly. 282 if (url.SchemeIsFile()) { 283 // Split the path string using ":" 284 std::vector<base::string16> file_path_split; 285 base::SplitString(url_path, ':', &file_path_split); 286 if (file_path_split.size() > 1) { // File is of type "file:///C:/.." 287 url_host.clear(); 288 url_domain.clear(); 289 url_subdomain.clear(); 290 291 const base::string16 kColon = UTF8ToUTF16(":"); 292 url_host = url_domain = file_path_split.at(0).substr(1) + kColon; 293 url_path_query_etc = url_path = file_path_split.at(1); 294 } 295 } 296 297 // Second Pass - remove scheme - the rest fits. 298 const float pixel_width_url_host = GetStringWidthF(url_host, font_list); 299 const float pixel_width_url_path = GetStringWidthF(url_path_query_etc, 300 font_list); 301 if (available_pixel_width >= 302 pixel_width_url_host + pixel_width_url_path) 303 return url_host + url_path_query_etc; 304 305 // Third Pass: Subdomain, domain and entire path fits. 306 const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list); 307 const float pixel_width_url_subdomain = 308 GetStringWidthF(url_subdomain, font_list); 309 if (available_pixel_width >= 310 pixel_width_url_subdomain + pixel_width_url_domain + 311 pixel_width_url_path) 312 return url_subdomain + url_domain + url_path_query_etc; 313 314 // Query element. 315 base::string16 url_query; 316 const float kPixelWidthDotsTrailer = GetStringWidthF( 317 base::string16(kEllipsisUTF16), font_list); 318 if (parsed.query.is_nonempty()) { 319 url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin); 320 if (available_pixel_width >= 321 (pixel_width_url_subdomain + pixel_width_url_domain + 322 pixel_width_url_path - GetStringWidthF(url_query, font_list))) { 323 return ElideText(url_subdomain + url_domain + url_path_query_etc, 324 font_list, available_pixel_width, ELIDE_AT_END); 325 } 326 } 327 328 // Parse url_path using '/'. 329 std::vector<base::string16> url_path_elements; 330 base::SplitString(url_path, kForwardSlash, &url_path_elements); 331 332 // Get filename - note that for a path ending with / 333 // such as www.google.com/intl/ads/, the file name is ads/. 334 size_t url_path_number_of_elements = url_path_elements.size(); 335 DCHECK(url_path_number_of_elements != 0); 336 base::string16 url_filename; 337 if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) { 338 url_filename = *(url_path_elements.end() - 1); 339 } else if (url_path_number_of_elements > 1) { // Path ends with a '/'. 340 url_filename = url_path_elements.at(url_path_number_of_elements - 2) + 341 kForwardSlash; 342 url_path_number_of_elements--; 343 } 344 DCHECK(url_path_number_of_elements != 0); 345 346 const size_t kMaxNumberOfUrlPathElementsAllowed = 1024; 347 if (url_path_number_of_elements <= 1 || 348 url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) { 349 // No path to elide, or too long of a path (could overflow in loop below) 350 // Just elide this as a text string. 351 return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list, 352 available_pixel_width, ELIDE_AT_END); 353 } 354 355 // Start eliding the path and replacing elements by ".../". 356 const base::string16 kEllipsisAndSlash = 357 base::string16(kEllipsisUTF16) + kForwardSlash; 358 const float pixel_width_ellipsis_slash = 359 GetStringWidthF(kEllipsisAndSlash, font_list); 360 361 // Check with both subdomain and domain. 362 base::string16 elided_path = 363 ElideComponentizedPath(url_subdomain + url_domain, url_path_elements, 364 url_filename, url_query, font_list, 365 available_pixel_width); 366 if (!elided_path.empty()) 367 return elided_path; 368 369 // Check with only domain. 370 // If a subdomain is present, add an ellipsis before domain. 371 // This is added only if the subdomain pixel width is larger than 372 // the pixel width of kEllipsis. Otherwise, subdomain remains, 373 // which means that this case has been resolved earlier. 374 base::string16 url_elided_domain = url_subdomain + url_domain; 375 if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) { 376 if (!url_subdomain.empty()) 377 url_elided_domain = kEllipsisAndSlash[0] + url_domain; 378 else 379 url_elided_domain = url_domain; 380 381 elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements, 382 url_filename, url_query, font_list, 383 available_pixel_width); 384 385 if (!elided_path.empty()) 386 return elided_path; 387 } 388 389 // Return elided domain/.../filename anyway. 390 base::string16 final_elided_url_string(url_elided_domain); 391 const float url_elided_domain_width = GetStringWidthF(url_elided_domain, 392 font_list); 393 394 // A hack to prevent trailing ".../...". 395 if ((available_pixel_width - url_elided_domain_width) > 396 pixel_width_ellipsis_slash + kPixelWidthDotsTrailer + 397 GetStringWidthF(ASCIIToUTF16("UV"), font_list)) { 398 final_elided_url_string += BuildPathFromComponents(base::string16(), 399 url_path_elements, url_filename, 1); 400 } else { 401 final_elided_url_string += url_path; 402 } 403 404 return ElideText(final_elided_url_string, font_list, available_pixel_width, 405 ELIDE_AT_END); 406 } 407 408 base::string16 ElideFilename(const base::FilePath& filename, 409 const FontList& font_list, 410 float available_pixel_width) { 411 #if defined(OS_WIN) 412 base::string16 filename_utf16 = filename.value(); 413 base::string16 extension = filename.Extension(); 414 base::string16 rootname = filename.BaseName().RemoveExtension().value(); 415 #elif defined(OS_POSIX) 416 base::string16 filename_utf16 = WideToUTF16(base::SysNativeMBToWide( 417 filename.value())); 418 base::string16 extension = WideToUTF16(base::SysNativeMBToWide( 419 filename.Extension())); 420 base::string16 rootname = WideToUTF16(base::SysNativeMBToWide( 421 filename.BaseName().RemoveExtension().value())); 422 #endif 423 424 const float full_width = GetStringWidthF(filename_utf16, font_list); 425 if (full_width <= available_pixel_width) 426 return base::i18n::GetDisplayStringInLTRDirectionality(filename_utf16); 427 428 if (rootname.empty() || extension.empty()) { 429 const base::string16 elided_name = ElideText(filename_utf16, font_list, 430 available_pixel_width, ELIDE_AT_END); 431 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name); 432 } 433 434 const float ext_width = GetStringWidthF(extension, font_list); 435 const float root_width = GetStringWidthF(rootname, font_list); 436 437 // We may have trimmed the path. 438 if (root_width + ext_width <= available_pixel_width) { 439 const base::string16 elided_name = rootname + extension; 440 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name); 441 } 442 443 if (ext_width >= available_pixel_width) { 444 const base::string16 elided_name = ElideText( 445 rootname + extension, font_list, available_pixel_width, 446 ELIDE_IN_MIDDLE); 447 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name); 448 } 449 450 float available_root_width = available_pixel_width - ext_width; 451 base::string16 elided_name = 452 ElideText(rootname, font_list, available_root_width, ELIDE_AT_END); 453 elided_name += extension; 454 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name); 455 } 456 457 base::string16 ElideText(const base::string16& text, 458 const FontList& font_list, 459 float available_pixel_width, 460 ElideBehavior elide_behavior) { 461 if (text.empty()) 462 return text; 463 464 const float current_text_pixel_width = GetStringWidthF(text, font_list); 465 const bool elide_in_middle = (elide_behavior == ELIDE_IN_MIDDLE); 466 const bool insert_ellipsis = (elide_behavior != TRUNCATE_AT_END); 467 468 const base::string16 ellipsis = base::string16(kEllipsisUTF16); 469 StringSlicer slicer(text, ellipsis, elide_in_middle); 470 471 // Pango will return 0 width for absurdly long strings. Cut the string in 472 // half and try again. 473 // This is caused by an int overflow in Pango (specifically, in 474 // pango_glyph_string_extents_range). It's actually more subtle than just 475 // returning 0, since on super absurdly long strings, the int can wrap and 476 // return positive numbers again. Detecting that is probably not worth it 477 // (eliding way too much from a ridiculous string is probably still 478 // ridiculous), but we should check other widths for bogus values as well. 479 if (current_text_pixel_width <= 0 && !text.empty()) { 480 const base::string16 cut = slicer.CutString(text.length() / 2, false); 481 return ElideText(cut, font_list, available_pixel_width, elide_behavior); 482 } 483 484 if (current_text_pixel_width <= available_pixel_width) 485 return text; 486 487 if (insert_ellipsis && 488 GetStringWidthF(ellipsis, font_list) > available_pixel_width) 489 return base::string16(); 490 491 // Use binary search to compute the elided text. 492 size_t lo = 0; 493 size_t hi = text.length() - 1; 494 size_t guess; 495 for (guess = (lo + hi) / 2; lo <= hi; guess = (lo + hi) / 2) { 496 // We check the length of the whole desired string at once to ensure we 497 // handle kerning/ligatures/etc. correctly. 498 const base::string16 cut = slicer.CutString(guess, insert_ellipsis); 499 const float guess_length = GetStringWidthF(cut, font_list); 500 // Check again that we didn't hit a Pango width overflow. If so, cut the 501 // current string in half and start over. 502 if (guess_length <= 0) { 503 return ElideText(slicer.CutString(guess / 2, false), 504 font_list, available_pixel_width, elide_behavior); 505 } 506 if (guess_length > available_pixel_width) 507 hi = guess - 1; 508 else 509 lo = guess + 1; 510 } 511 512 return slicer.CutString(guess, insert_ellipsis); 513 } 514 515 base::string16 ElideText(const base::string16& text, 516 const Font& font, 517 float available_pixel_width, 518 ElideBehavior elide_behavior) { 519 return ElideText(text, FontList(font), available_pixel_width, elide_behavior); 520 } 521 522 SortedDisplayURL::SortedDisplayURL(const GURL& url, 523 const std::string& languages) { 524 net::AppendFormattedHost(url, languages, &sort_host_); 525 base::string16 host_minus_www = net::StripWWW(sort_host_); 526 url_parse::Parsed parsed; 527 display_url_ = 528 net::FormatUrl(url, languages, net::kFormatUrlOmitAll, 529 net::UnescapeRule::SPACES, &parsed, &prefix_end_, NULL); 530 if (sort_host_.length() > host_minus_www.length()) { 531 prefix_end_ += sort_host_.length() - host_minus_www.length(); 532 sort_host_.swap(host_minus_www); 533 } 534 } 535 536 SortedDisplayURL::SortedDisplayURL() : prefix_end_(0) { 537 } 538 539 SortedDisplayURL::~SortedDisplayURL() { 540 } 541 542 int SortedDisplayURL::Compare(const SortedDisplayURL& other, 543 icu::Collator* collator) const { 544 // Compare on hosts first. The host won't contain 'www.'. 545 UErrorCode compare_status = U_ZERO_ERROR; 546 UCollationResult host_compare_result = collator->compare( 547 static_cast<const UChar*>(sort_host_.c_str()), 548 static_cast<int>(sort_host_.length()), 549 static_cast<const UChar*>(other.sort_host_.c_str()), 550 static_cast<int>(other.sort_host_.length()), 551 compare_status); 552 DCHECK(U_SUCCESS(compare_status)); 553 if (host_compare_result != 0) 554 return host_compare_result; 555 556 // Hosts match, compare on the portion of the url after the host. 557 base::string16 path = this->AfterHost(); 558 base::string16 o_path = other.AfterHost(); 559 compare_status = U_ZERO_ERROR; 560 UCollationResult path_compare_result = collator->compare( 561 static_cast<const UChar*>(path.c_str()), 562 static_cast<int>(path.length()), 563 static_cast<const UChar*>(o_path.c_str()), 564 static_cast<int>(o_path.length()), 565 compare_status); 566 DCHECK(U_SUCCESS(compare_status)); 567 if (path_compare_result != 0) 568 return path_compare_result; 569 570 // Hosts and paths match, compare on the complete url. This'll push the www. 571 // ones to the end. 572 compare_status = U_ZERO_ERROR; 573 UCollationResult display_url_compare_result = collator->compare( 574 static_cast<const UChar*>(display_url_.c_str()), 575 static_cast<int>(display_url_.length()), 576 static_cast<const UChar*>(other.display_url_.c_str()), 577 static_cast<int>(other.display_url_.length()), 578 compare_status); 579 DCHECK(U_SUCCESS(compare_status)); 580 return display_url_compare_result; 581 } 582 583 base::string16 SortedDisplayURL::AfterHost() const { 584 const size_t slash_index = display_url_.find(sort_host_, prefix_end_); 585 if (slash_index == base::string16::npos) { 586 NOTREACHED(); 587 return base::string16(); 588 } 589 return display_url_.substr(slash_index + sort_host_.length()); 590 } 591 592 bool ElideString(const base::string16& input, int max_len, 593 base::string16* output) { 594 DCHECK_GE(max_len, 0); 595 if (static_cast<int>(input.length()) <= max_len) { 596 output->assign(input); 597 return false; 598 } 599 600 switch (max_len) { 601 case 0: 602 output->clear(); 603 break; 604 case 1: 605 output->assign(input.substr(0, 1)); 606 break; 607 case 2: 608 output->assign(input.substr(0, 2)); 609 break; 610 case 3: 611 output->assign(input.substr(0, 1) + ASCIIToUTF16(".") + 612 input.substr(input.length() - 1)); 613 break; 614 case 4: 615 output->assign(input.substr(0, 1) + ASCIIToUTF16("..") + 616 input.substr(input.length() - 1)); 617 break; 618 default: { 619 int rstr_len = (max_len - 3) / 2; 620 int lstr_len = rstr_len + ((max_len - 3) % 2); 621 output->assign(input.substr(0, lstr_len) + ASCIIToUTF16("...") + 622 input.substr(input.length() - rstr_len)); 623 break; 624 } 625 } 626 627 return true; 628 } 629 630 namespace { 631 632 // Internal class used to track progress of a rectangular string elide 633 // operation. Exists so the top-level ElideRectangleString() function 634 // can be broken into smaller methods sharing this state. 635 class RectangleString { 636 public: 637 RectangleString(size_t max_rows, size_t max_cols, 638 bool strict, base::string16 *output) 639 : max_rows_(max_rows), 640 max_cols_(max_cols), 641 current_row_(0), 642 current_col_(0), 643 strict_(strict), 644 suppressed_(false), 645 output_(output) {} 646 647 // Perform deferred initializations following creation. Must be called 648 // before any input can be added via AddString(). 649 void Init() { output_->clear(); } 650 651 // Add an input string, reformatting to fit the desired dimensions. 652 // AddString() may be called multiple times to concatenate together 653 // multiple strings into the region (the current caller doesn't do 654 // this, however). 655 void AddString(const base::string16& input); 656 657 // Perform any deferred output processing. Must be called after the 658 // last AddString() call has occurred. 659 bool Finalize(); 660 661 private: 662 // Add a line to the rectangular region at the current position, 663 // either by itself or by breaking it into words. 664 void AddLine(const base::string16& line); 665 666 // Add a word to the rectangular region at the current position, 667 // either by itself or by breaking it into characters. 668 void AddWord(const base::string16& word); 669 670 // Add text to the output string if the rectangular boundaries 671 // have not been exceeded, advancing the current position. 672 void Append(const base::string16& string); 673 674 // Set the current position to the beginning of the next line. If 675 // |output| is true, add a newline to the output string if the rectangular 676 // boundaries have not been exceeded. If |output| is false, we assume 677 // some other mechanism will (likely) do similar breaking after the fact. 678 void NewLine(bool output); 679 680 // Maximum number of rows allowed in the output string. 681 size_t max_rows_; 682 683 // Maximum number of characters allowed in the output string. 684 size_t max_cols_; 685 686 // Current row position, always incremented and may exceed max_rows_ 687 // when the input can not fit in the region. We stop appending to 688 // the output string, however, when this condition occurs. In the 689 // future, we may want to expose this value to allow the caller to 690 // determine how many rows would actually be required to hold the 691 // formatted string. 692 size_t current_row_; 693 694 // Current character position, should never exceed max_cols_. 695 size_t current_col_; 696 697 // True when we do whitespace to newline conversions ourselves. 698 bool strict_; 699 700 // True when some of the input has been truncated. 701 bool suppressed_; 702 703 // String onto which the output is accumulated. 704 base::string16* output_; 705 706 DISALLOW_COPY_AND_ASSIGN(RectangleString); 707 }; 708 709 void RectangleString::AddString(const base::string16& input) { 710 base::i18n::BreakIterator lines(input, 711 base::i18n::BreakIterator::BREAK_NEWLINE); 712 if (lines.Init()) { 713 while (lines.Advance()) 714 AddLine(lines.GetString()); 715 } else { 716 NOTREACHED() << "BreakIterator (lines) init failed"; 717 } 718 } 719 720 bool RectangleString::Finalize() { 721 if (suppressed_) { 722 output_->append(ASCIIToUTF16("...")); 723 return true; 724 } 725 return false; 726 } 727 728 void RectangleString::AddLine(const base::string16& line) { 729 if (line.length() < max_cols_) { 730 Append(line); 731 } else { 732 base::i18n::BreakIterator words(line, 733 base::i18n::BreakIterator::BREAK_SPACE); 734 if (words.Init()) { 735 while (words.Advance()) 736 AddWord(words.GetString()); 737 } else { 738 NOTREACHED() << "BreakIterator (words) init failed"; 739 } 740 } 741 // Account for naturally-occuring newlines. 742 ++current_row_; 743 current_col_ = 0; 744 } 745 746 void RectangleString::AddWord(const base::string16& word) { 747 if (word.length() < max_cols_) { 748 // Word can be made to fit, no need to fragment it. 749 if (current_col_ + word.length() >= max_cols_) 750 NewLine(strict_); 751 Append(word); 752 } else { 753 // Word is so big that it must be fragmented. 754 int array_start = 0; 755 int char_start = 0; 756 base::i18n::UTF16CharIterator chars(&word); 757 while (!chars.end()) { 758 // When boundary is hit, add as much as will fit on this line. 759 if (current_col_ + (chars.char_pos() - char_start) >= max_cols_) { 760 Append(word.substr(array_start, chars.array_pos() - array_start)); 761 NewLine(true); 762 array_start = chars.array_pos(); 763 char_start = chars.char_pos(); 764 } 765 chars.Advance(); 766 } 767 // Add the last remaining fragment, if any. 768 if (array_start != chars.array_pos()) 769 Append(word.substr(array_start, chars.array_pos() - array_start)); 770 } 771 } 772 773 void RectangleString::Append(const base::string16& string) { 774 if (current_row_ < max_rows_) 775 output_->append(string); 776 else 777 suppressed_ = true; 778 current_col_ += string.length(); 779 } 780 781 void RectangleString::NewLine(bool output) { 782 if (current_row_ < max_rows_) { 783 if (output) 784 output_->append(ASCIIToUTF16("\n")); 785 } else { 786 suppressed_ = true; 787 } 788 ++current_row_; 789 current_col_ = 0; 790 } 791 792 // Internal class used to track progress of a rectangular text elide 793 // operation. Exists so the top-level ElideRectangleText() function 794 // can be broken into smaller methods sharing this state. 795 class RectangleText { 796 public: 797 RectangleText(const FontList& font_list, 798 float available_pixel_width, 799 int available_pixel_height, 800 WordWrapBehavior wrap_behavior, 801 std::vector<base::string16>* lines) 802 : font_list_(font_list), 803 line_height_(font_list.GetHeight()), 804 available_pixel_width_(available_pixel_width), 805 available_pixel_height_(available_pixel_height), 806 wrap_behavior_(wrap_behavior), 807 current_width_(0), 808 current_height_(0), 809 last_line_ended_in_lf_(false), 810 lines_(lines), 811 insufficient_width_(false), 812 insufficient_height_(false) {} 813 814 // Perform deferred initializions following creation. Must be called 815 // before any input can be added via AddString(). 816 void Init() { lines_->clear(); } 817 818 // Add an input string, reformatting to fit the desired dimensions. 819 // AddString() may be called multiple times to concatenate together 820 // multiple strings into the region (the current caller doesn't do 821 // this, however). 822 void AddString(const base::string16& input); 823 824 // Perform any deferred output processing. Must be called after the last 825 // AddString() call has occured. Returns a combination of 826 // |ReformattingResultFlags| indicating whether the given width or height was 827 // insufficient, leading to elision or truncation. 828 int Finalize(); 829 830 private: 831 // Add a line to the rectangular region at the current position, 832 // either by itself or by breaking it into words. 833 void AddLine(const base::string16& line); 834 835 // Wrap the specified word across multiple lines. 836 int WrapWord(const base::string16& word); 837 838 // Add a long word - wrapping, eliding or truncating per the wrap behavior. 839 int AddWordOverflow(const base::string16& word); 840 841 // Add a word to the rectangluar region at the current position. 842 int AddWord(const base::string16& word); 843 844 // Append the specified |text| to the current output line, incrementing the 845 // running width by the specified amount. This is an optimization over 846 // |AddToCurrentLine()| when |text_width| is already known. 847 void AddToCurrentLineWithWidth(const base::string16& text, float text_width); 848 849 // Append the specified |text| to the current output line. 850 void AddToCurrentLine(const base::string16& text); 851 852 // Set the current position to the beginning of the next line. 853 bool NewLine(); 854 855 // The font list used for measuring text width. 856 const FontList& font_list_; 857 858 // The height of each line of text. 859 const int line_height_; 860 861 // The number of pixels of available width in the rectangle. 862 const float available_pixel_width_; 863 864 // The number of pixels of available height in the rectangle. 865 const int available_pixel_height_; 866 867 // The wrap behavior for words that are too long to fit on a single line. 868 const WordWrapBehavior wrap_behavior_; 869 870 // The current running width. 871 float current_width_; 872 873 // The current running height. 874 int current_height_; 875 876 // The current line of text. 877 base::string16 current_line_; 878 879 // Indicates whether the last line ended with \n. 880 bool last_line_ended_in_lf_; 881 882 // The output vector of lines. 883 std::vector<base::string16>* lines_; 884 885 // Indicates whether a word was so long that it had to be truncated or elided 886 // to fit the available width. 887 bool insufficient_width_; 888 889 // Indicates whether there were too many lines for the available height. 890 bool insufficient_height_; 891 892 DISALLOW_COPY_AND_ASSIGN(RectangleText); 893 }; 894 895 void RectangleText::AddString(const base::string16& input) { 896 base::i18n::BreakIterator lines(input, 897 base::i18n::BreakIterator::BREAK_NEWLINE); 898 if (lines.Init()) { 899 while (!insufficient_height_ && lines.Advance()) { 900 base::string16 line = lines.GetString(); 901 // The BREAK_NEWLINE iterator will keep the trailing newline character, 902 // except in the case of the last line, which may not have one. Remove 903 // the newline character, if it exists. 904 last_line_ended_in_lf_ = !line.empty() && line[line.length() - 1] == '\n'; 905 if (last_line_ended_in_lf_) 906 line.resize(line.length() - 1); 907 AddLine(line); 908 } 909 } else { 910 NOTREACHED() << "BreakIterator (lines) init failed"; 911 } 912 } 913 914 int RectangleText::Finalize() { 915 // Remove trailing whitespace from the last line or remove the last line 916 // completely, if it's just whitespace. 917 if (!insufficient_height_ && !lines_->empty()) { 918 TrimWhitespace(lines_->back(), TRIM_TRAILING, &lines_->back()); 919 if (lines_->back().empty() && !last_line_ended_in_lf_) 920 lines_->pop_back(); 921 } 922 if (last_line_ended_in_lf_) 923 lines_->push_back(base::string16()); 924 return (insufficient_width_ ? INSUFFICIENT_SPACE_HORIZONTAL : 0) | 925 (insufficient_height_ ? INSUFFICIENT_SPACE_VERTICAL : 0); 926 } 927 928 void RectangleText::AddLine(const base::string16& line) { 929 const float line_width = GetStringWidthF(line, font_list_); 930 if (line_width <= available_pixel_width_) { 931 AddToCurrentLineWithWidth(line, line_width); 932 } else { 933 // Iterate over positions that are valid to break the line at. In general, 934 // these are word boundaries but after any punctuation following the word. 935 base::i18n::BreakIterator words(line, 936 base::i18n::BreakIterator::BREAK_LINE); 937 if (words.Init()) { 938 while (words.Advance()) { 939 const bool truncate = !current_line_.empty(); 940 const base::string16& word = words.GetString(); 941 const int lines_added = AddWord(word); 942 if (lines_added) { 943 if (truncate) { 944 // Trim trailing whitespace from the line that was added. 945 const int line = lines_->size() - lines_added; 946 TrimWhitespace(lines_->at(line), TRIM_TRAILING, &lines_->at(line)); 947 } 948 if (ContainsOnlyWhitespace(word)) { 949 // Skip the first space if the previous line was carried over. 950 current_width_ = 0; 951 current_line_.clear(); 952 } 953 } 954 } 955 } else { 956 NOTREACHED() << "BreakIterator (words) init failed"; 957 } 958 } 959 // Account for naturally-occuring newlines. 960 NewLine(); 961 } 962 963 int RectangleText::WrapWord(const base::string16& word) { 964 // Word is so wide that it must be fragmented. 965 base::string16 text = word; 966 int lines_added = 0; 967 bool first_fragment = true; 968 while (!insufficient_height_ && !text.empty()) { 969 base::string16 fragment = 970 ElideText(text, font_list_, available_pixel_width_, 971 TRUNCATE_AT_END); 972 // At least one character has to be added at every line, even if the 973 // available space is too small. 974 if(fragment.empty()) 975 fragment = text.substr(0, 1); 976 if (!first_fragment && NewLine()) 977 lines_added++; 978 AddToCurrentLine(fragment); 979 text = text.substr(fragment.length()); 980 first_fragment = false; 981 } 982 return lines_added; 983 } 984 985 int RectangleText::AddWordOverflow(const base::string16& word) { 986 int lines_added = 0; 987 988 // Unless this is the very first word, put it on a new line. 989 if (!current_line_.empty()) { 990 if (!NewLine()) 991 return 0; 992 lines_added++; 993 } 994 995 if (wrap_behavior_ == IGNORE_LONG_WORDS) { 996 current_line_ = word; 997 current_width_ = available_pixel_width_; 998 } else if (wrap_behavior_ == WRAP_LONG_WORDS) { 999 lines_added += WrapWord(word); 1000 } else { 1001 const ElideBehavior elide_behavior = 1002 (wrap_behavior_ == ELIDE_LONG_WORDS ? ELIDE_AT_END : TRUNCATE_AT_END); 1003 const base::string16 elided_word = 1004 ElideText(word, font_list_, available_pixel_width_, elide_behavior); 1005 AddToCurrentLine(elided_word); 1006 insufficient_width_ = true; 1007 } 1008 1009 return lines_added; 1010 } 1011 1012 int RectangleText::AddWord(const base::string16& word) { 1013 int lines_added = 0; 1014 base::string16 trimmed; 1015 TrimWhitespace(word, TRIM_TRAILING, &trimmed); 1016 const float trimmed_width = GetStringWidthF(trimmed, font_list_); 1017 if (trimmed_width <= available_pixel_width_) { 1018 // Word can be made to fit, no need to fragment it. 1019 if ((current_width_ + trimmed_width > available_pixel_width_) && NewLine()) 1020 lines_added++; 1021 // Append the non-trimmed word, in case more words are added after. 1022 AddToCurrentLine(word); 1023 } else { 1024 lines_added = AddWordOverflow(wrap_behavior_ == IGNORE_LONG_WORDS ? 1025 trimmed : word); 1026 } 1027 return lines_added; 1028 } 1029 1030 void RectangleText::AddToCurrentLine(const base::string16& text) { 1031 AddToCurrentLineWithWidth(text, GetStringWidthF(text, font_list_)); 1032 } 1033 1034 void RectangleText::AddToCurrentLineWithWidth(const base::string16& text, 1035 float text_width) { 1036 if (current_height_ >= available_pixel_height_) { 1037 insufficient_height_ = true; 1038 return; 1039 } 1040 current_line_.append(text); 1041 current_width_ += text_width; 1042 } 1043 1044 bool RectangleText::NewLine() { 1045 bool line_added = false; 1046 if (current_height_ < available_pixel_height_) { 1047 lines_->push_back(current_line_); 1048 current_line_.clear(); 1049 line_added = true; 1050 } else { 1051 insufficient_height_ = true; 1052 } 1053 current_height_ += line_height_; 1054 current_width_ = 0; 1055 return line_added; 1056 } 1057 1058 } // namespace 1059 1060 bool ElideRectangleString(const base::string16& input, size_t max_rows, 1061 size_t max_cols, bool strict, 1062 base::string16* output) { 1063 RectangleString rect(max_rows, max_cols, strict, output); 1064 rect.Init(); 1065 rect.AddString(input); 1066 return rect.Finalize(); 1067 } 1068 1069 int ElideRectangleText(const base::string16& input, 1070 const FontList& font_list, 1071 float available_pixel_width, 1072 int available_pixel_height, 1073 WordWrapBehavior wrap_behavior, 1074 std::vector<base::string16>* lines) { 1075 RectangleText rect(font_list, 1076 available_pixel_width, 1077 available_pixel_height, 1078 wrap_behavior, 1079 lines); 1080 rect.Init(); 1081 rect.AddString(input); 1082 return rect.Finalize(); 1083 } 1084 1085 base::string16 TruncateString(const base::string16& string, size_t length) { 1086 if (string.size() <= length) 1087 // String fits, return it. 1088 return string; 1089 1090 if (length == 0) 1091 // No room for the elide string, return an empty string. 1092 return base::string16(); 1093 1094 size_t max = length - 1; 1095 1096 // Added to the end of strings that are too big. 1097 static const base::char16 kElideString[] = { 0x2026, 0 }; 1098 1099 if (max == 0) 1100 // Just enough room for the elide string. 1101 return kElideString; 1102 1103 // Use a line iterator to find the first boundary. 1104 UErrorCode status = U_ZERO_ERROR; 1105 scoped_ptr<icu::RuleBasedBreakIterator> bi( 1106 static_cast<icu::RuleBasedBreakIterator*>( 1107 icu::RuleBasedBreakIterator::createLineInstance( 1108 icu::Locale::getDefault(), status))); 1109 if (U_FAILURE(status)) 1110 return string.substr(0, max) + kElideString; 1111 bi->setText(string.c_str()); 1112 int32_t index = bi->preceding(static_cast<int32_t>(max)); 1113 if (index == icu::BreakIterator::DONE) { 1114 index = static_cast<int32_t>(max); 1115 } else { 1116 // Found a valid break (may be the beginning of the string). Now use 1117 // a character iterator to find the previous non-whitespace character. 1118 icu::StringCharacterIterator char_iterator(string.c_str()); 1119 if (index == 0) { 1120 // No valid line breaks. Start at the end again. This ensures we break 1121 // on a valid character boundary. 1122 index = static_cast<int32_t>(max); 1123 } 1124 char_iterator.setIndex(index); 1125 while (char_iterator.hasPrevious()) { 1126 char_iterator.previous(); 1127 if (!(u_isspace(char_iterator.current()) || 1128 u_charType(char_iterator.current()) == U_CONTROL_CHAR || 1129 u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) { 1130 // Not a whitespace character. Advance the iterator so that we 1131 // include the current character in the truncated string. 1132 char_iterator.next(); 1133 break; 1134 } 1135 } 1136 if (char_iterator.hasPrevious()) { 1137 // Found a valid break point. 1138 index = char_iterator.getIndex(); 1139 } else { 1140 // String has leading whitespace, return the elide string. 1141 return kElideString; 1142 } 1143 } 1144 return string.substr(0, index) + kElideString; 1145 } 1146 1147 } // namespace gfx 1148