Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/basictypes.h"
      6 #include "base/memory/scoped_vector.h"
      7 #include "base/utf_string_conversions.h"
      8 #include "chrome/browser/history/query_parser.h"
      9 #include "testing/gtest/include/gtest/gtest.h"
     10 
     11 class QueryParserTest : public testing::Test {
     12  public:
     13   struct TestData {
     14     const char* input;
     15     const int expected_word_count;
     16   };
     17 
     18   std::string QueryToString(const std::string& query);
     19 
     20  protected:
     21   QueryParser query_parser_;
     22 };
     23 
     24 // Test helper: Convert a user query string in 8-bit (for hardcoding
     25 // convenience) to a SQLite query string.
     26 std::string QueryParserTest::QueryToString(const std::string& query) {
     27   string16 sqlite_query;
     28   query_parser_.ParseQuery(UTF8ToUTF16(query), &sqlite_query);
     29   return UTF16ToUTF8(sqlite_query);
     30 }
     31 
     32 // Basic multi-word queries, including prefix matching.
     33 TEST_F(QueryParserTest, SimpleQueries) {
     34   EXPECT_EQ("", QueryToString(" "));
     35   EXPECT_EQ("singleword*", QueryToString("singleword"));
     36   EXPECT_EQ("spacedout*", QueryToString("  spacedout "));
     37   EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
     38   // Short words aren't prefix matches. For Korean Hangul
     39   // the minimum is 2 while for other scripts, it's 3.
     40   EXPECT_EQ("f b", QueryToString(" f b"));
     41   // KA JANG
     42   EXPECT_EQ(WideToUTF8(L"\xAC00 \xC7A5"),
     43             QueryToString(WideToUTF8(L" \xAC00 \xC7A5")));
     44   EXPECT_EQ("foo* bar*", QueryToString(" foo   bar "));
     45   // KA-JANG BICH-GO
     46   EXPECT_EQ(WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
     47             QueryToString(WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
     48 }
     49 
     50 // Quoted substring parsing.
     51 TEST_F(QueryParserTest, Quoted) {
     52   // ASCII quotes
     53   EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
     54   // Missing end quotes
     55   EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
     56   // Missing begin quotes
     57   EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
     58   // Weird formatting
     59   EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many   \"\"quotes"));
     60 }
     61 
     62 // Apostrophes within words should be preserved, but otherwise stripped.
     63 TEST_F(QueryParserTest, Apostrophes) {
     64   EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
     65   EXPECT_EQ("l'foo*", QueryToString("l'foo"));
     66   EXPECT_EQ("foo*", QueryToString("'foo"));
     67 }
     68 
     69 // Special characters.
     70 TEST_F(QueryParserTest, SpecialChars) {
     71   EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
     72 }
     73 
     74 TEST_F(QueryParserTest, NumWords) {
     75   TestData data[] = {
     76     { "blah",                  1 },
     77     { "foo \"bar baz\"",       3 },
     78     { "foo \"baz\"",           2 },
     79     { "foo \"bar baz\"  blah", 4 },
     80   };
     81 
     82   for (size_t i = 0; i < arraysize(data); ++i) {
     83     string16 query_string;
     84     EXPECT_EQ(data[i].expected_word_count,
     85               query_parser_.ParseQuery(UTF8ToUTF16(data[i].input),
     86                                        &query_string));
     87   }
     88 }
     89 
     90 TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
     91   struct TestData2 {
     92     const std::string query;
     93     const std::string text;
     94     const bool matches;
     95     const size_t m1_start;
     96     const size_t m1_end;
     97     const size_t m2_start;
     98     const size_t m2_end;
     99   } data[] = {
    100     { "foo foo",       "foo",              true,  0, 3, 0, 0 },
    101     { "foo fooey",     "fooey",            true,  0, 5, 0, 0 },
    102     { "foo fooey bar", "bar fooey",        true,  0, 3, 4, 9 },
    103     { "blah",          "blah",             true,  0, 4, 0, 0 },
    104     { "blah",          "foo",              false, 0, 0, 0, 0 },
    105     { "blah",          "blahblah",         true,  0, 4, 0, 0 },
    106     { "blah",          "foo blah",         true,  4, 8, 0, 0 },
    107     { "foo blah",      "blah",             false, 0, 0, 0, 0 },
    108     { "foo blah",      "blahx foobar",     true,  0, 4, 6, 9 },
    109     { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
    110     { "\"foo blah\"",  "foox blahx",       false, 0, 0, 0, 0 },
    111     { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
    112     { "\"foo blah\"",  "\"foo blah\"",     true,  1, 9, 0, 0 },
    113     { "foo blah",      "\"foo bar blah\"", true,  1, 4, 9, 13 },
    114   };
    115   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
    116     QueryParser parser;
    117     ScopedVector<QueryNode> query_nodes;
    118     parser.ParseQuery(UTF8ToUTF16(data[i].query), &query_nodes.get());
    119     Snippet::MatchPositions match_positions;
    120     ASSERT_EQ(data[i].matches,
    121               parser.DoesQueryMatch(UTF8ToUTF16(data[i].text),
    122                                     query_nodes.get(),
    123                                     &match_positions));
    124     size_t offset = 0;
    125     if (data[i].m1_start != 0 || data[i].m1_end != 0) {
    126       ASSERT_TRUE(match_positions.size() >= 1);
    127       EXPECT_EQ(data[i].m1_start, match_positions[0].first);
    128       EXPECT_EQ(data[i].m1_end, match_positions[0].second);
    129       offset++;
    130     }
    131     if (data[i].m2_start != 0 || data[i].m2_end != 0) {
    132       ASSERT_TRUE(match_positions.size() == 1 + offset);
    133       EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
    134       EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
    135     }
    136   }
    137 }
    138 
    139 TEST_F(QueryParserTest, ExtractQueryWords) {
    140   struct TestData2 {
    141     const std::string text;
    142     const std::string w1;
    143     const std::string w2;
    144     const std::string w3;
    145     const size_t word_count;
    146   } data[] = {
    147     { "foo",           "foo", "",    "",  1 },
    148     { "foo bar",       "foo", "bar", "",  2 },
    149     { "\"foo bar\"",   "foo", "bar", "",  2 },
    150     { "\"foo bar\" a", "foo", "bar", "a", 3 },
    151   };
    152   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
    153     std::vector<string16> results;
    154     QueryParser parser;
    155     parser.ExtractQueryWords(UTF8ToUTF16(data[i].text), &results);
    156     ASSERT_EQ(data[i].word_count, results.size());
    157     EXPECT_EQ(data[i].w1, UTF16ToUTF8(results[0]));
    158     if (results.size() == 2)
    159       EXPECT_EQ(data[i].w2, UTF16ToUTF8(results[1]));
    160     if (results.size() == 3)
    161       EXPECT_EQ(data[i].w3, UTF16ToUTF8(results[2]));
    162   }
    163 }
    164