1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/basictypes.h" 6 #include "base/memory/scoped_vector.h" 7 #include "base/strings/utf_string_conversions.h" 8 #include "components/query_parser/query_parser.h" 9 #include "testing/gtest/include/gtest/gtest.h" 10 11 namespace query_parser { 12 13 class QueryParserTest : public testing::Test { 14 public: 15 struct TestData { 16 const char* input; 17 const int expected_word_count; 18 }; 19 20 std::string QueryToString(const std::string& query); 21 22 protected: 23 QueryParser query_parser_; 24 }; 25 26 // Test helper: Convert a user query string in 8-bit (for hardcoding 27 // convenience) to a SQLite query string. 28 std::string QueryParserTest::QueryToString(const std::string& query) { 29 base::string16 sqlite_query; 30 query_parser_.ParseQuery(base::UTF8ToUTF16(query), &sqlite_query); 31 return base::UTF16ToUTF8(sqlite_query); 32 } 33 34 // Basic multi-word queries, including prefix matching. 35 TEST_F(QueryParserTest, SimpleQueries) { 36 EXPECT_EQ("", QueryToString(" ")); 37 EXPECT_EQ("singleword*", QueryToString("singleword")); 38 EXPECT_EQ("spacedout*", QueryToString(" spacedout ")); 39 EXPECT_EQ("foo* bar*", QueryToString("foo bar")); 40 // Short words aren't prefix matches. For Korean Hangul 41 // the minimum is 2 while for other scripts, it's 3. 42 EXPECT_EQ("f b", QueryToString(" f b")); 43 // KA JANG 44 EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"), 45 QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5"))); 46 EXPECT_EQ("foo* bar*", QueryToString(" foo bar ")); 47 // KA-JANG BICH-GO 48 EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"), 49 QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0"))); 50 } 51 52 // Quoted substring parsing. 53 TEST_F(QueryParserTest, Quoted) { 54 // ASCII quotes 55 EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\"")); 56 // Missing end quotes 57 EXPECT_EQ("\"miss end\"", QueryToString("\"miss end")); 58 // Missing begin quotes 59 EXPECT_EQ("miss* beg*", QueryToString("miss beg\"")); 60 // Weird formatting 61 EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes")); 62 } 63 64 // Apostrophes within words should be preserved, but otherwise stripped. 65 TEST_F(QueryParserTest, Apostrophes) { 66 EXPECT_EQ("foo* bar's*", QueryToString("foo bar's")); 67 EXPECT_EQ("l'foo*", QueryToString("l'foo")); 68 EXPECT_EQ("foo*", QueryToString("'foo")); 69 } 70 71 // Special characters. 72 TEST_F(QueryParserTest, SpecialChars) { 73 EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar")); 74 } 75 76 TEST_F(QueryParserTest, NumWords) { 77 TestData data[] = { 78 { "blah", 1 }, 79 { "foo \"bar baz\"", 3 }, 80 { "foo \"baz\"", 2 }, 81 { "foo \"bar baz\" blah", 4 }, 82 }; 83 84 for (size_t i = 0; i < arraysize(data); ++i) { 85 base::string16 query_string; 86 EXPECT_EQ(data[i].expected_word_count, 87 query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input), 88 &query_string)); 89 } 90 } 91 92 TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { 93 struct TestData2 { 94 const std::string query; 95 const std::string text; 96 const bool matches; 97 const size_t m1_start; 98 const size_t m1_end; 99 const size_t m2_start; 100 const size_t m2_end; 101 } data[] = { 102 { "foo", "fooey foo", true, 0, 3, 6, 9 }, 103 { "foo foo", "foo", true, 0, 3, 0, 0 }, 104 { "foo fooey", "fooey", true, 0, 5, 0, 0 }, 105 { "fooey foo", "fooey", true, 0, 5, 0, 0 }, 106 { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 }, 107 { "blah", "blah", true, 0, 4, 0, 0 }, 108 { "blah", "foo", false, 0, 0, 0, 0 }, 109 { "blah", "blahblah", true, 0, 4, 0, 0 }, 110 { "blah", "foo blah", true, 4, 8, 0, 0 }, 111 { "foo blah", "blah", false, 0, 0, 0, 0 }, 112 { "foo blah", "blahx foobar", true, 0, 4, 6, 9 }, 113 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, 114 { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 }, 115 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, 116 { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 }, 117 { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 }, 118 }; 119 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { 120 QueryParser parser; 121 ScopedVector<QueryNode> query_nodes; 122 parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query), 123 &query_nodes.get()); 124 Snippet::MatchPositions match_positions; 125 ASSERT_EQ(data[i].matches, 126 parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text), 127 query_nodes.get(), 128 &match_positions)); 129 size_t offset = 0; 130 if (data[i].m1_start != 0 || data[i].m1_end != 0) { 131 ASSERT_TRUE(match_positions.size() >= 1); 132 EXPECT_EQ(data[i].m1_start, match_positions[0].first); 133 EXPECT_EQ(data[i].m1_end, match_positions[0].second); 134 offset++; 135 } 136 if (data[i].m2_start != 0 || data[i].m2_end != 0) { 137 ASSERT_TRUE(match_positions.size() == 1 + offset); 138 EXPECT_EQ(data[i].m2_start, match_positions[offset].first); 139 EXPECT_EQ(data[i].m2_end, match_positions[offset].second); 140 } 141 } 142 } 143 144 TEST_F(QueryParserTest, ParseQueryWords) { 145 struct TestData2 { 146 const std::string text; 147 const std::string w1; 148 const std::string w2; 149 const std::string w3; 150 const size_t word_count; 151 } data[] = { 152 { "foo", "foo", "", "", 1 }, 153 { "foo bar", "foo", "bar", "", 2 }, 154 { "\"foo bar\"", "foo", "bar", "", 2 }, 155 { "\"foo bar\" a", "foo", "bar", "a", 3 }, 156 }; 157 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { 158 std::vector<base::string16> results; 159 QueryParser parser; 160 parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text), &results); 161 ASSERT_EQ(data[i].word_count, results.size()); 162 EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0])); 163 if (results.size() == 2) 164 EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1])); 165 if (results.size() == 3) 166 EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2])); 167 } 168 } 169 170 } // namespace query_parser 171