Home | History | Annotate | Download | only in fpdftext
      1 // Copyright 2015 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "core/fpdftext/cpdf_linkextract.h"
      6 
      7 #include "testing/gtest/include/gtest/gtest.h"
      8 
      9 // Class to help test functions in CPDF_LinkExtract class.
     10 class CPDF_TestLinkExtract : public CPDF_LinkExtract {
     11  public:
     12   CPDF_TestLinkExtract() : CPDF_LinkExtract(nullptr) {}
     13 
     14  private:
     15   // Add test cases as friends to access protected member functions.
     16   // Access CheckMailLink.
     17   FRIEND_TEST(fpdf_text_int, CheckMailLink);
     18 };
     19 
     20 TEST(fpdf_text_int, CheckMailLink) {
     21   CPDF_TestLinkExtract extractor;
     22   // Check cases that fail to extract valid mail link.
     23   const wchar_t* invalid_strs[] = {
     24       L"",
     25       L"peter.pan",       // '@' is required.
     26       L"abc@server",      // Domain name needs at least one '.'.
     27       L"abc. (at) gmail.com",  // '.' can not immediately precede '@'.
     28       L"abc@xyz&q.org",   // Domain name should not contain '&'.
     29       L"abc (at) .xyz.org",    // Domain name should not start with '.'.
     30       L"fan (at) g..com"       // Domain name should not have consecutive '.'
     31   };
     32   for (size_t i = 0; i < FX_ArraySize(invalid_strs); ++i) {
     33     CFX_WideString text_str(invalid_strs[i]);
     34     EXPECT_FALSE(extractor.CheckMailLink(text_str));
     35   }
     36 
     37   // Check cases that can extract valid mail link.
     38   // An array of {input_string, expected_extracted_email_address}.
     39   const wchar_t* valid_strs[][2] = {
     40       {L"peter (at) abc.d", L"peter (at) abc.d"},
     41       {L"red.teddy.b (at) abc.com", L"red.teddy.b (at) abc.com"},
     42       {L"abc_ (at) gmail.com", L"abc_ (at) gmail.com"},  // '_' is ok before '@'.
     43       {L"dummy-hi (at) gmail.com",
     44        L"dummy-hi (at) gmail.com"},                  // '-' is ok in user name.
     45       {L"a..df (at) gmail.com", L"df (at) gmail.com"},    // Stop at consecutive '.'.
     46       {L".john (at) yahoo.com", L"john (at) yahoo.com"},  // Remove heading '.'.
     47       {L"abc (at) xyz.org?/", L"abc (at) xyz.org"},       // Trim ending invalid chars.
     48       {L"fan{abc (at) xyz.org", L"abc (at) xyz.org"},     // Trim beginning invalid chars.
     49       {L"fan (at) g.com..", L"fan (at) g.com"},           // Trim the ending periods.
     50       {L"CAP.cap (at) Gmail.Com", L"CAP.cap (at) Gmail.Com"},  // Keep the original case.
     51   };
     52   for (size_t i = 0; i < FX_ArraySize(valid_strs); ++i) {
     53     CFX_WideString text_str(valid_strs[i][0]);
     54     CFX_WideString expected_str(L"mailto:");
     55     expected_str += valid_strs[i][1];
     56     EXPECT_TRUE(extractor.CheckMailLink(text_str));
     57     EXPECT_STREQ(text_str.c_str(), expected_str.c_str());
     58   }
     59 }
     60