Home | History | Annotate | Download | only in translate
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/common/translate/language_detection_util.h"
      6 
      7 #include "base/strings/string16.h"
      8 #include "base/strings/utf_string_conversions.h"
      9 #include "chrome/common/chrome_constants.h"
     10 #include "testing/gtest/include/gtest/gtest.h"
     11 
     12 typedef testing::Test LanguageDetectionUtilTest;
     13 
     14 // Tests that well-known language code typos are fixed.
     15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) {
     16   std::string language;
     17 
     18   // Strip the second and later codes.
     19   language = std::string("ja,en");
     20   LanguageDetectionUtil::CorrectLanguageCodeTypo(&language);
     21   EXPECT_EQ("ja", language);
     22 
     23   // Replace dash with hyphen.
     24   language = std::string("ja_JP");
     25   LanguageDetectionUtil::CorrectLanguageCodeTypo(&language);
     26   EXPECT_EQ("ja-JP", language);
     27 
     28   // Correct wrong cases.
     29   language = std::string("JA-jp");
     30   LanguageDetectionUtil::CorrectLanguageCodeTypo(&language);
     31   EXPECT_EQ("ja-JP", language);
     32 }
     33 
     34 // Tests if the language codes' format is invalid.
     35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) {
     36   std::string language;
     37 
     38   language = std::string("ja");
     39   EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
     40 
     41   language = std::string("ja-JP");
     42   EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
     43 
     44   language = std::string("ceb");
     45   EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
     46 
     47   language = std::string("ceb-XX");
     48   EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language));
     49 
     50   // Invalid because the sub code consists of a number.
     51   language = std::string("utf-8");
     52   EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language));
     53 
     54   // Invalid because of six characters after hyphen.
     55   language = std::string("ja-YUKARI");
     56   EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language));
     57 
     58   // Invalid because of four characters.
     59   language = std::string("DHMO");
     60   EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language));
     61 }
     62 
     63 // Tests that similar language table works.
     64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) {
     65   EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "en"));
     66   EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "ja"));
     67   EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hr"));
     68   EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr-ME", "sr"));
     69   EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("ne", "hi"));
     70   EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hi"));
     71 }
     72 
     73 // Tests that well-known languages which often have wrong server configuration
     74 // are handles.
     75 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) {
     76   EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", "ja"));
     77   EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en-US",
     78                                                                    "ja"));
     79   EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en",
     80                                                                    "zh-CN"));
     81   EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("ja",
     82                                                                     "en"));
     83   EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en",
     84                                                                     "he"));
     85 }
     86 
     87 // Tests that the language meta tag providing wrong information is ignored by
     88 // LanguageDetectionUtil due to disagreement between meta tag and CLD.
     89 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) {
     90   base::string16 contents = ASCIIToUTF16(
     91       "<html><head><meta http-equiv='Content-Language' content='ja'></head>"
     92       "<body>This is a page apparently written in English. Even though "
     93       "content-language is provided, the value will be ignored if the value "
     94       "is suspicious.</body></html>");
     95   std::string cld_language;
     96   bool is_cld_reliable;
     97   std::string language = LanguageDetectionUtil::DeterminePageLanguage(
     98       std::string("ja"), std::string(), contents, &cld_language,
     99       &is_cld_reliable);
    100   EXPECT_EQ(chrome::kUnknownLanguageCode, language);
    101   EXPECT_EQ("en", cld_language);
    102   EXPECT_TRUE(is_cld_reliable);
    103 }
    104 
    105 // Tests that the language meta tag providing "en-US" style information is
    106 // agreed by CLD.
    107 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
    108   base::string16 contents = ASCIIToUTF16(
    109       "<html><head><meta http-equiv='Content-Language' content='en-US'></head>"
    110       "<body>This is a page apparently written in English. Even though "
    111       "content-language is provided, the value will be ignored if the value "
    112       "is suspicious.</body></html>");
    113   std::string cld_language;
    114   bool is_cld_reliable;
    115   std::string language = LanguageDetectionUtil::DeterminePageLanguage(
    116       std::string("en-US"), std::string(), contents, &cld_language,
    117       &is_cld_reliable);
    118   EXPECT_EQ("en-US", language);
    119   EXPECT_EQ("en", cld_language);
    120   EXPECT_TRUE(is_cld_reliable);
    121 }
    122 
    123 // Tests that the language meta tag providing wrong information is ignored and
    124 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid
    125 // meta tag.
    126 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) {
    127   base::string16 contents = ASCIIToUTF16(
    128       "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>"
    129       "<body>This is a page apparently written in English. Even though "
    130       "content-language is provided, the value will be ignored and CLD's"
    131       " language will be adopted if the value is invalid.</body></html>");
    132   std::string cld_language;
    133   bool is_cld_reliable;
    134   std::string language = LanguageDetectionUtil::DeterminePageLanguage(
    135       std::string("utf-8"), std::string(), contents, &cld_language,
    136       &is_cld_reliable);
    137   EXPECT_EQ("en", language);
    138   EXPECT_EQ("en", cld_language);
    139   EXPECT_TRUE(is_cld_reliable);
    140 }
    141 
    142 // Tests that the language meta tag providing wrong information is ignored
    143 // because of valid html lang attribute.
    144 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) {
    145   base::string16 contents = ASCIIToUTF16(
    146       "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>"
    147       "</head><body>This is a page apparently written in English. Even though "
    148       "content-language is provided, the value will be ignored if the value "
    149       "is suspicious.</body></html>");
    150   std::string cld_language;
    151   bool is_cld_reliable;
    152   std::string language = LanguageDetectionUtil::DeterminePageLanguage(
    153       std::string("ja"), std::string("en"), contents, &cld_language,
    154       &is_cld_reliable);
    155   EXPECT_EQ("en", language);
    156   EXPECT_EQ("en", cld_language);
    157   EXPECT_TRUE(is_cld_reliable);
    158 }
    159