1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/renderer/android/email_detector.h" 6 7 #include "base/logging.h" 8 #include "base/memory/scoped_ptr.h" 9 #include "base/strings/utf_string_conversions.h" 10 #include "content/public/renderer/android_content_detection_prefixes.h" 11 #include "net/base/escape.h" 12 #include "third_party/icu/source/i18n/unicode/regex.h" 13 14 namespace { 15 16 // Maximum length of an email address. 17 const size_t kMaximumEmailLength = 254; 18 19 // Regex to match email addresses. 20 // This is more specific than RFC 2822 (uncommon special characters are 21 // disallowed) in order to avoid false positives. 22 // Delimiters are word boundaries to allow punctuation, quote marks etc. around 23 // the address. 24 const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b"; 25 26 } // anonymous namespace 27 28 namespace content { 29 30 EmailDetector::EmailDetector() { 31 } 32 33 size_t EmailDetector::GetMaximumContentLength() { 34 return kMaximumEmailLength; 35 } 36 37 GURL EmailDetector::GetIntentURL(const std::string& content_text) { 38 if (content_text.empty()) 39 return GURL(); 40 41 return GURL(kEmailPrefix + 42 net::EscapeQueryParamValue(content_text, true)); 43 } 44 45 bool EmailDetector::FindContent(const string16::const_iterator& begin, 46 const string16::const_iterator& end, 47 size_t* start_pos, 48 size_t* end_pos, 49 std::string* content_text) { 50 string16 utf16_input = string16(begin, end); 51 icu::UnicodeString pattern(kEmailRegex); 52 icu::UnicodeString input(utf16_input.data(), utf16_input.length()); 53 UErrorCode status = U_ZERO_ERROR; 54 scoped_ptr<icu::RegexMatcher> matcher( 55 new icu::RegexMatcher(pattern, 56 input, 57 UREGEX_CASE_INSENSITIVE, 58 status)); 59 if (matcher->find()) { 60 *start_pos = matcher->start(status); 61 DCHECK(U_SUCCESS(status)); 62 *end_pos = matcher->end(status); 63 DCHECK(U_SUCCESS(status)); 64 icu::UnicodeString content_ustr(matcher->group(status)); 65 DCHECK(U_SUCCESS(status)); 66 UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), content_text); 67 return true; 68 } 69 70 return false; 71 } 72 73 } // namespace content 74