Home | History | Annotate | Download | only in browser
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/string_util.h"
      6 #include "base/utf_string_conversions.h"
      7 #include "chrome/browser/spellchecker_platform_engine.h"
      8 #include "testing/gtest/include/gtest/gtest.h"
      9 
     10 // Tests that words are properly ignored. Currently only enabled on OS X as it
     11 // is the only platform to support ignoring words. Note that in this test, we
     12 // supply a non-zero doc_tag, in order to test that ignored words are matched to
     13 // the correct document.
     14 TEST(PlatformSpellCheckTest, IgnoreWords_EN_US) {
     15   const char* kTestCases[] = {
     16     "teh",
     17     "morblier",
     18     "watre",
     19     "noooen",
     20   };
     21 
     22   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
     23     const string16 word(ASCIIToUTF16(kTestCases[i]));
     24     const int doc_tag = SpellCheckerPlatform::GetDocumentTag();
     25 
     26     // The word should show up as misspelled.
     27     EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
     28 
     29     // Ignore the word.
     30     SpellCheckerPlatform::IgnoreWord(word);
     31 
     32     // The word should now show up as correctly spelled.
     33     EXPECT_TRUE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
     34 
     35     // Close the docuemnt. Any words that we had previously ignored should no
     36     // longer be ignored and thus should show up as misspelled.
     37     SpellCheckerPlatform::CloseDocumentWithTag(doc_tag);
     38 
     39     // The word should now show be spelled wrong again
     40     EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, doc_tag)) << word;
     41   }
     42 }  // Test IgnoreWords_EN_US
     43 
     44 TEST(PlatformSpellCheckTest, SpellCheckSuggestions_EN_US) {
     45   static const struct {
     46     const char* input;           // A string to be tested.
     47     const char* suggested_word;  // A suggested word that should occur.
     48   } kTestCases[] = {
     49     // We need to have separate test cases here, since hunspell and the OS X
     50     // spellchecking service occasionally differ on what they consider a valid
     51     // suggestion for a given word, although these lists could likely be
     52     // integrated somewhat. The test cases for non-Mac are in
     53     // chrome/renderer/spellcheck_unittest.cc
     54     // These words come from the wikipedia page of the most commonly
     55     // misspelled words in english.
     56     // (http://en.wikipedia.org/wiki/Commonly_misspelled_words).
     57     // However, 10.6 loads multiple dictionaries and enables many non-English
     58     // dictionaries by default. As a result, we have removed from the list any
     59     // word that is marked as correct because it is correct in another
     60     // language.
     61     {"absense", "absence"},
     62     {"acceptible", "acceptable"},
     63     {"accidentaly", "accidentally"},
     64     {"acheive", "achieve"},
     65     {"acknowlege", "acknowledge"},
     66     {"acquaintence", "acquaintance"},
     67     {"aquire", "acquire"},
     68     {"aquit", "acquit"},
     69     {"acrage", "acreage"},
     70     {"adultary", "adultery"},
     71     {"advertize", "advertise"},
     72     {"adviseable", "advisable"},
     73     {"alchohol", "alcohol"},
     74     {"alege", "allege"},
     75     {"allegaince", "allegiance"},
     76     {"allmost", "almost"},
     77     // Ideally, this test should pass. It works in firefox, but not in hunspell
     78     // or OS X.
     79     // {"alot", "a lot"},
     80     {"amatuer", "amateur"},
     81     {"ammend", "amend"},
     82     {"amung", "among"},
     83     {"anually", "annually"},
     84     {"apparant", "apparent"},
     85     {"artic", "arctic"},
     86     {"arguement", "argument"},
     87     {"athiest", "atheist"},
     88     {"athelete", "athlete"},
     89     {"avrage", "average"},
     90     {"awfull", "awful"},
     91     {"ballance", "balance"},
     92     {"basicly", "basically"},
     93     {"becuase", "because"},
     94     {"becomeing", "becoming"},
     95     {"befor", "before"},
     96     {"begining", "beginning"},
     97     {"beleive", "believe"},
     98     {"bellweather", "bellwether"},
     99     {"benifit", "benefit"},
    100     {"bouy", "buoy"},
    101     {"briliant", "brilliant"},
    102     {"burgler", "burglar"},
    103     {"camoflage", "camouflage"},
    104     {"carefull", "careful"},
    105     {"Carribean", "Caribbean"},
    106     {"catagory", "category"},
    107     {"cauhgt", "caught"},
    108     {"cieling", "ceiling"},
    109     {"cemetary", "cemetery"},
    110     {"certin", "certain"},
    111     {"changable", "changeable"},
    112     {"cheif", "chief"},
    113     {"citezen", "citizen"},
    114     {"collaegue", "colleague"},
    115     {"colum", "column"},
    116     {"comming", "coming"},
    117     {"commited", "committed"},
    118     {"compitition", "competition"},
    119     {"conceed", "concede"},
    120     {"congradulate", "congratulate"},
    121     {"consciencious", "conscientious"},
    122     {"concious", "conscious"},
    123     {"concensus", "consensus"},
    124     {"contraversy", "controversy"},
    125     {"conveniance", "convenience"},
    126     {"critecize", "criticize"},
    127     {"dacquiri", "daiquiri"},
    128     {"decieve", "deceive"},
    129     {"dicide", "decide"},
    130     {"definate", "definite"},
    131     {"definitly", "definitely"},
    132     {"desparate", "desperate"},
    133     {"develope", "develop"},
    134     {"diffrence", "difference"},
    135     {"disapear", "disappear"},
    136     {"disapoint", "disappoint"},
    137     {"disasterous", "disastrous"},
    138     {"disipline", "discipline"},
    139     {"drunkeness", "drunkenness"},
    140     {"dumbell", "dumbbell"},
    141     {"easely", "easily"},
    142     {"eigth", "eight"},
    143     {"embarass", "embarrass"},
    144     {"enviroment", "environment"},
    145     {"equiped", "equipped"},
    146     {"equiptment", "equipment"},
    147     {"exagerate", "exaggerate"},
    148     {"exellent", "excellent"},
    149     {"exsept", "except"},
    150     {"exercize", "exercise"},
    151     {"exilerate", "exhilarate"},
    152     {"existance", "existence"},
    153     {"experiance", "experience"},
    154     {"experament", "experiment"},
    155     {"explaination", "explanation"},
    156     {"facinating", "fascinating"},
    157     {"firey", "fiery"},
    158     {"finaly", "finally"},
    159     {"flourescent", "fluorescent"},
    160     {"foriegn", "foreign"},
    161     {"fourty", "forty"},
    162     {"foreward", "forward"},
    163     {"freind", "friend"},
    164     {"fundemental", "fundamental"},
    165     {"guage", "gauge"},
    166     {"generaly", "generally"},
    167     {"goverment", "government"},
    168     {"gratefull", "grateful"},
    169     {"garantee", "guarantee"},
    170     {"guidence", "guidance"},
    171     {"happyness", "happiness"},
    172     {"harrass", "harass"},
    173     {"heighth", "height"},
    174     {"heirarchy", "hierarchy"},
    175     {"humerous", "humorous"},
    176     {"hygene", "hygiene"},
    177     {"hipocrit", "hypocrite"},
    178     {"idenity", "identity"},
    179     {"ignorence", "ignorance"},
    180     {"imaginery", "imaginary"},
    181     {"immitate", "imitate"},
    182     {"immitation", "imitation"},
    183     {"imediately", "immediately"},
    184     {"incidently", "incidentally"},
    185     {"independant", "independent"},
    186     {"indispensible", "indispensable"},
    187     {"innoculate", "inoculate"},
    188     {"inteligence", "intelligence"},
    189     {"intresting", "interesting"},
    190     {"interuption", "interruption"},
    191     {"irrelevent", "irrelevant"},
    192     {"irritible", "irritable"},
    193     {"jellous", "jealous"},
    194     {"knowlege", "knowledge"},
    195     {"labratory", "laboratory"},
    196     {"lenght", "length"},
    197     {"liason", "liaison"},
    198     {"libary", "library"},
    199     {"lisence", "license"},
    200     {"lonelyness", "loneliness"},
    201     {"lieing", "lying"},
    202     {"maintenence", "maintenance"},
    203     {"manuever", "maneuver"},
    204     {"marrige", "marriage"},
    205     {"mathmatics", "mathematics"},
    206     {"medcine", "medicine"},
    207     {"miniture", "miniature"},
    208     {"minite", "minute"},
    209     {"mischevous", "mischievous"},
    210     {"mispell", "misspell"},
    211     // Maybe this one should pass, as it works in hunspell, but not in firefox.
    212     // {"misterius", "mysterious"},
    213     {"naturaly", "naturally"},
    214     {"neccessary", "necessary"},
    215     {"neice", "niece"},
    216     {"nieghbor", "neighbor"},
    217     {"nieghbour", "neighbor"},
    218     {"niether", "neither"},
    219     {"noticable", "noticeable"},
    220     {"occassion", "occasion"},
    221     {"occasionaly", "occasionally"},
    222     {"occurrance", "occurrence"},
    223     {"occured", "occurred"},
    224     {"ommision", "omission"},
    225     {"oppurtunity", "opportunity"},
    226     {"outragous", "outrageous"},
    227     {"parrallel", "parallel"},
    228     {"parliment", "parliament"},
    229     {"particurly", "particularly"},
    230     {"passtime", "pastime"},
    231     {"peculier", "peculiar"},
    232     {"percieve", "perceive"},
    233     {"pernament", "permanent"},
    234     {"perseverence", "perseverance"},
    235     {"personaly", "personally"},
    236     {"persaude", "persuade"},
    237     {"pichure", "picture"},
    238     {"peice", "piece"},
    239     {"plagerize", "plagiarize"},
    240     {"playright", "playwright"},
    241     {"plesant", "pleasant"},
    242     {"pollitical", "political"},
    243     {"posession", "possession"},
    244     {"potatos", "potatoes"},
    245     {"practicle", "practical"},
    246     {"preceed", "precede"},
    247     {"predjudice", "prejudice"},
    248     {"presance", "presence"},
    249     {"privelege", "privilege"},
    250     // This one should probably work. It does in FF and Hunspell.
    251     // {"probly", "probably"},
    252     {"proffesional", "professional"},
    253     {"promiss", "promise"},
    254     {"pronounciation", "pronunciation"},
    255     {"prufe", "proof"},
    256     {"psycology", "psychology"},
    257     {"publically", "publicly"},
    258     {"quanity", "quantity"},
    259     {"quarentine", "quarantine"},
    260     {"questionaire", "questionnaire"},
    261     {"readible", "readable"},
    262     {"realy", "really"},
    263     {"recieve", "receive"},
    264     {"reciept", "receipt"},
    265     {"reconize", "recognize"},
    266     {"recomend", "recommend"},
    267     {"refered", "referred"},
    268     {"referance", "reference"},
    269     {"relevent", "relevant"},
    270     {"religous", "religious"},
    271     {"repitition", "repetition"},
    272     {"restarant", "restaurant"},
    273     {"rythm", "rhythm"},
    274     {"rediculous", "ridiculous"},
    275     {"sacrefice", "sacrifice"},
    276     {"saftey", "safety"},
    277     {"sissors", "scissors"},
    278     {"secratary", "secretary"},
    279     {"seperate", "separate"},
    280     {"sargent", "sergeant"},
    281     {"shineing", "shining"},
    282     {"similer", "similar"},
    283     {"sinceerly", "sincerely"},
    284     {"speach", "speech"},
    285     {"strenght", "strength"},
    286     {"succesful", "successful"},
    287     {"supercede", "supersede"},
    288     {"surelly", "surely"},
    289     {"suprise", "surprise"},
    290     {"temperture", "temperature"},
    291     {"temprary", "temporary"},
    292     {"tommorrow", "tomorrow"},
    293     {"tounge", "tongue"},
    294     {"truely", "truly"},
    295     {"twelth", "twelfth"},
    296     {"tyrany", "tyranny"},
    297     {"underate", "underrate"},
    298     {"untill", "until"},
    299     {"unuseual", "unusual"},
    300     {"upholstry", "upholstery"},
    301     {"usible", "usable"},
    302     {"useing", "using"},
    303     {"usualy", "usually"},
    304     {"vaccuum", "vacuum"},
    305     {"vegatarian", "vegetarian"},
    306     {"vehical", "vehicle"},
    307     {"visious", "vicious"},
    308     {"villege", "village"},
    309     {"wierd", "weird"},
    310     {"wellcome", "welcome"},
    311     {"wellfare", "welfare"},
    312     {"wilfull", "willful"},
    313     {"withold", "withhold"},
    314     {"writting", "writing"},
    315   };
    316 
    317   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
    318     const string16 word(ASCIIToUTF16(kTestCases[i].input));
    319     EXPECT_FALSE(SpellCheckerPlatform::CheckSpelling(word, 0)) << word;
    320 
    321     // Check if the suggested words occur.
    322     std::vector<string16> suggestions;
    323     SpellCheckerPlatform::FillSuggestionList(word, &suggestions);
    324     bool suggested_word_is_present = false;
    325     const string16 suggested_word(ASCIIToUTF16(kTestCases[i].suggested_word));
    326     for (size_t j = 0; j < suggestions.size(); j++) {
    327       if (suggestions[j].compare(suggested_word) == 0) {
    328         suggested_word_is_present = true;
    329         break;
    330       }
    331     }
    332     EXPECT_TRUE(suggested_word_is_present) << suggested_word;
    333   }
    334 }
    335