Home | History | Annotate | Download | only in chromium
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "cpp/src/util/canonicalize_string.h"
      6 
      7 #include "base/logging.h"
      8 #include "cpp/include/libaddressinput/util/scoped_ptr.h"
      9 #include "third_party/icu/source/common/unicode/errorcode.h"
     10 #include "third_party/icu/source/common/unicode/locid.h"
     11 #include "third_party/icu/source/common/unicode/unistr.h"
     12 #include "third_party/icu/source/common/unicode/utypes.h"
     13 #include "third_party/icu/source/i18n/unicode/coll.h"
     14 
     15 namespace i18n {
     16 namespace addressinput {
     17 
     18 namespace {
     19 
     20 class ChromeStringCanonicalizer : public StringCanonicalizer {
     21  public:
     22   ChromeStringCanonicalizer()
     23       : error_code_(U_ZERO_ERROR),
     24         collator_(
     25             icu::Collator::createInstance(
     26                 icu::Locale::getRoot(), error_code_)) {
     27     collator_->setStrength(icu::Collator::PRIMARY);
     28     DCHECK(U_SUCCESS(error_code_));
     29   }
     30 
     31   virtual ~ChromeStringCanonicalizer() {}
     32 
     33   // StringCanonicalizer implementation.
     34   virtual std::string CanonicalizeString(const std::string& original) {
     35     // Returns a canonical version of the string that can be used for comparing
     36     // strings regardless of diacritics and capitalization.
     37     //    CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
     38     //    CanonicalizeString("Texas") == CanonicalizeString("teXas");
     39     //    CanonicalizeString("Texas") != CanonicalizeString("California");
     40     //
     41     // The output is not human-readable.
     42     //    CanonicalizeString("Texas") != "Texas";
     43     icu::UnicodeString icu_str(
     44         original.c_str(), static_cast<int32_t>(original.length()));
     45     int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
     46     scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
     47     DCHECK(buffer.get());
     48     int32_t filled_size =
     49         collator_->getSortKey(icu_str, buffer.get(), buffer_size);
     50     DCHECK_EQ(buffer_size, filled_size);
     51     return std::string(reinterpret_cast<const char*>(buffer.get()));
     52   }
     53 
     54  private:
     55   UErrorCode error_code_;
     56   scoped_ptr<icu::Collator> collator_;
     57 
     58   DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer);
     59 };
     60 
     61 }  // namespace
     62 
     63 // static
     64 scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() {
     65   return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer);
     66 }
     67 
     68 }  // namespace addressinput
     69 }  // namespace i18n
     70