Home | History | Annotate | Download | only in android
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/renderer/android/email_detector.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/memory/scoped_ptr.h"
      9 #include "base/strings/utf_string_conversions.h"
     10 #include "content/public/renderer/android_content_detection_prefixes.h"
     11 #include "net/base/escape.h"
     12 #include "third_party/icu/source/i18n/unicode/regex.h"
     13 
     14 namespace {
     15 
     16 // Maximum length of an email address.
     17 const size_t kMaximumEmailLength = 254;
     18 
     19 // Regex to match email addresses.
     20 // This is more specific than RFC 2822 (uncommon special characters are
     21 // disallowed) in order to avoid false positives.
     22 // Delimiters are word boundaries to allow punctuation, quote marks etc. around
     23 // the address.
     24 const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";
     25 
     26 }  // anonymous namespace
     27 
     28 namespace content {
     29 
     30 EmailDetector::EmailDetector() {
     31 }
     32 
     33 size_t EmailDetector::GetMaximumContentLength() {
     34   return kMaximumEmailLength;
     35 }
     36 
     37 GURL EmailDetector::GetIntentURL(const std::string& content_text) {
     38   if (content_text.empty())
     39     return GURL();
     40 
     41   return GURL(kEmailPrefix +
     42       net::EscapeQueryParamValue(content_text, true));
     43 }
     44 
     45 bool EmailDetector::FindContent(const base::string16::const_iterator& begin,
     46                                 const base::string16::const_iterator& end,
     47                                 size_t* start_pos,
     48                                 size_t* end_pos,
     49                                 std::string* content_text) {
     50   base::string16 utf16_input = base::string16(begin, end);
     51   icu::UnicodeString pattern(kEmailRegex);
     52   icu::UnicodeString input(utf16_input.data(), utf16_input.length());
     53   UErrorCode status = U_ZERO_ERROR;
     54   scoped_ptr<icu::RegexMatcher> matcher(
     55       new icu::RegexMatcher(pattern,
     56                             input,
     57                             UREGEX_CASE_INSENSITIVE,
     58                             status));
     59   if (matcher->find()) {
     60     *start_pos = matcher->start(status);
     61     DCHECK(U_SUCCESS(status));
     62     *end_pos = matcher->end(status);
     63     DCHECK(U_SUCCESS(status));
     64     icu::UnicodeString content_ustr(matcher->group(status));
     65     DCHECK(U_SUCCESS(status));
     66     base::UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(),
     67         content_text);
     68     return true;
     69   }
     70 
     71   return false;
     72 }
     73 
     74 }  // namespace content
     75