Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/i18n/break_iterator.h"
      6 
      7 #include "base/string_piece.h"
      8 #include "base/string_util.h"
      9 #include "base/utf_string_conversions.h"
     10 #include "testing/gtest/include/gtest/gtest.h"
     11 
     12 TEST(BreakIteratorTest, BreakWordEmpty) {
     13   string16 empty;
     14   base::BreakIterator iter(&empty, base::BreakIterator::BREAK_WORD);
     15   ASSERT_TRUE(iter.Init());
     16   EXPECT_FALSE(iter.Advance());
     17   EXPECT_FALSE(iter.IsWord());
     18   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
     19   EXPECT_FALSE(iter.IsWord());
     20 }
     21 
     22 TEST(BreakIteratorTest, BreakWord) {
     23   string16 space(UTF8ToUTF16(" "));
     24   string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
     25   base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
     26   ASSERT_TRUE(iter.Init());
     27   EXPECT_TRUE(iter.Advance());
     28   EXPECT_FALSE(iter.IsWord());
     29   EXPECT_EQ(space, iter.GetString());
     30   EXPECT_TRUE(iter.Advance());
     31   EXPECT_TRUE(iter.IsWord());
     32   EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString());
     33   EXPECT_TRUE(iter.Advance());
     34   EXPECT_FALSE(iter.IsWord());
     35   EXPECT_EQ(space, iter.GetString());
     36   EXPECT_TRUE(iter.Advance());
     37   EXPECT_TRUE(iter.IsWord());
     38   EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString());
     39   EXPECT_TRUE(iter.Advance());
     40   EXPECT_FALSE(iter.IsWord());
     41   EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString());
     42   EXPECT_TRUE(iter.Advance());
     43   EXPECT_FALSE(iter.IsWord());
     44   EXPECT_EQ(space, iter.GetString());
     45   EXPECT_TRUE(iter.Advance());
     46   EXPECT_FALSE(iter.IsWord());
     47   EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString());
     48   EXPECT_TRUE(iter.Advance());
     49   EXPECT_TRUE(iter.IsWord());
     50   EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString());
     51   EXPECT_TRUE(iter.Advance());
     52   EXPECT_FALSE(iter.IsWord());
     53   EXPECT_EQ(space, iter.GetString());
     54   EXPECT_TRUE(iter.Advance());
     55   EXPECT_TRUE(iter.IsWord());
     56   EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
     57   EXPECT_FALSE(iter.Advance());
     58   EXPECT_FALSE(iter.IsWord());
     59   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
     60   EXPECT_FALSE(iter.IsWord());
     61 }
     62 
     63 TEST(BreakIteratorTest, BreakWide16) {
     64   // Two greek words separated by space.
     65   const string16 str(WideToUTF16(
     66       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
     67       L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
     68   const string16 word1(str.substr(0, 10));
     69   const string16 word2(str.substr(11, 5));
     70   base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
     71   ASSERT_TRUE(iter.Init());
     72   EXPECT_TRUE(iter.Advance());
     73   EXPECT_TRUE(iter.IsWord());
     74   EXPECT_EQ(word1, iter.GetString());
     75   EXPECT_TRUE(iter.Advance());
     76   EXPECT_FALSE(iter.IsWord());
     77   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
     78   EXPECT_TRUE(iter.Advance());
     79   EXPECT_TRUE(iter.IsWord());
     80   EXPECT_EQ(word2, iter.GetString());
     81   EXPECT_FALSE(iter.Advance());
     82   EXPECT_FALSE(iter.IsWord());
     83   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
     84   EXPECT_FALSE(iter.IsWord());
     85 }
     86 
     87 TEST(BreakIteratorTest, BreakWide32) {
     88   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
     89   const char* very_wide_char = "\xF0\x9D\x92\x9C";
     90   const string16 str(
     91       UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
     92   const string16 very_wide_word(str.substr(0, 2));
     93 
     94   base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
     95   ASSERT_TRUE(iter.Init());
     96   EXPECT_TRUE(iter.Advance());
     97   EXPECT_TRUE(iter.IsWord());
     98   EXPECT_EQ(very_wide_word, iter.GetString());
     99   EXPECT_TRUE(iter.Advance());
    100   EXPECT_FALSE(iter.IsWord());
    101   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
    102   EXPECT_TRUE(iter.Advance());
    103   EXPECT_TRUE(iter.IsWord());
    104   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
    105   EXPECT_FALSE(iter.Advance());
    106   EXPECT_FALSE(iter.IsWord());
    107   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
    108   EXPECT_FALSE(iter.IsWord());
    109 }
    110 
    111 TEST(BreakIteratorTest, BreakSpaceEmpty) {
    112   string16 empty;
    113   base::BreakIterator iter(&empty, base::BreakIterator::BREAK_SPACE);
    114   ASSERT_TRUE(iter.Init());
    115   EXPECT_FALSE(iter.Advance());
    116   EXPECT_FALSE(iter.IsWord());
    117   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
    118   EXPECT_FALSE(iter.IsWord());
    119 }
    120 
    121 TEST(BreakIteratorTest, BreakSpace) {
    122   string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
    123   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
    124   ASSERT_TRUE(iter.Init());
    125   EXPECT_TRUE(iter.Advance());
    126   EXPECT_FALSE(iter.IsWord());
    127   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
    128   EXPECT_TRUE(iter.Advance());
    129   EXPECT_FALSE(iter.IsWord());
    130   EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
    131   EXPECT_TRUE(iter.Advance());
    132   EXPECT_FALSE(iter.IsWord());
    133   EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
    134   EXPECT_TRUE(iter.Advance());
    135   EXPECT_FALSE(iter.IsWord());
    136   EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
    137   EXPECT_TRUE(iter.Advance());
    138   EXPECT_FALSE(iter.IsWord());
    139   EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
    140   EXPECT_FALSE(iter.Advance());
    141   EXPECT_FALSE(iter.IsWord());
    142   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
    143   EXPECT_FALSE(iter.IsWord());
    144 }
    145 
    146 TEST(BreakIteratorTest, BreakSpaceSP) {
    147   string16 str(UTF8ToUTF16(" foo bar! \npouet boom "));
    148   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
    149   ASSERT_TRUE(iter.Init());
    150   EXPECT_TRUE(iter.Advance());
    151   EXPECT_FALSE(iter.IsWord());
    152   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
    153   EXPECT_TRUE(iter.Advance());
    154   EXPECT_FALSE(iter.IsWord());
    155   EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
    156   EXPECT_TRUE(iter.Advance());
    157   EXPECT_FALSE(iter.IsWord());
    158   EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
    159   EXPECT_TRUE(iter.Advance());
    160   EXPECT_FALSE(iter.IsWord());
    161   EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
    162   EXPECT_TRUE(iter.Advance());
    163   EXPECT_FALSE(iter.IsWord());
    164   EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString());
    165   EXPECT_FALSE(iter.Advance());
    166   EXPECT_FALSE(iter.IsWord());
    167   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
    168   EXPECT_FALSE(iter.IsWord());
    169 }
    170 
    171 TEST(BreakIteratorTest, BreakSpacekWide16) {
    172   // Two Greek words.
    173   const string16 str(WideToUTF16(
    174       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
    175       L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
    176   const string16 word1(str.substr(0, 11));
    177   const string16 word2(str.substr(11, 5));
    178   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
    179   ASSERT_TRUE(iter.Init());
    180   EXPECT_TRUE(iter.Advance());
    181   EXPECT_FALSE(iter.IsWord());
    182   EXPECT_EQ(word1, iter.GetString());
    183   EXPECT_TRUE(iter.Advance());
    184   EXPECT_FALSE(iter.IsWord());
    185   EXPECT_EQ(word2, iter.GetString());
    186   EXPECT_FALSE(iter.Advance());
    187   EXPECT_FALSE(iter.IsWord());
    188   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
    189   EXPECT_FALSE(iter.IsWord());
    190 }
    191 
    192 TEST(BreakIteratorTest, BreakSpaceWide32) {
    193   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
    194   const char* very_wide_char = "\xF0\x9D\x92\x9C";
    195   const string16 str(
    196       UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
    197   const string16 very_wide_word(str.substr(0, 3));
    198 
    199   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
    200   ASSERT_TRUE(iter.Init());
    201   EXPECT_TRUE(iter.Advance());
    202   EXPECT_FALSE(iter.IsWord());
    203   EXPECT_EQ(very_wide_word, iter.GetString());
    204   EXPECT_TRUE(iter.Advance());
    205   EXPECT_FALSE(iter.IsWord());
    206   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
    207   EXPECT_FALSE(iter.Advance());
    208   EXPECT_FALSE(iter.IsWord());
    209   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
    210   EXPECT_FALSE(iter.IsWord());
    211 }
    212 
    213 TEST(BreakIteratorTest, BreakLineEmpty) {
    214   string16 empty;
    215   base::BreakIterator iter(&empty, base::BreakIterator::BREAK_NEWLINE);
    216   ASSERT_TRUE(iter.Init());
    217   EXPECT_FALSE(iter.Advance());
    218   EXPECT_FALSE(iter.IsWord());
    219   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
    220   EXPECT_FALSE(iter.IsWord());
    221 }
    222 
    223 TEST(BreakIteratorTest, BreakLine) {
    224   string16 nl(UTF8ToUTF16("\n"));
    225   string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom"));
    226   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
    227   ASSERT_TRUE(iter.Init());
    228   EXPECT_TRUE(iter.Advance());
    229   EXPECT_FALSE(iter.IsWord());
    230   EXPECT_EQ(nl, iter.GetString());
    231   EXPECT_TRUE(iter.Advance());
    232   EXPECT_FALSE(iter.IsWord());
    233   EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
    234   EXPECT_TRUE(iter.Advance());
    235   EXPECT_FALSE(iter.IsWord());
    236   EXPECT_EQ(nl, iter.GetString());
    237   EXPECT_TRUE(iter.Advance());
    238   EXPECT_FALSE(iter.IsWord());
    239   EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString());
    240   EXPECT_FALSE(iter.Advance());
    241   EXPECT_FALSE(iter.IsWord());
    242   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
    243   EXPECT_FALSE(iter.IsWord());
    244 }
    245 
    246 TEST(BreakIteratorTest, BreakLineNL) {
    247   string16 nl(UTF8ToUTF16("\n"));
    248   string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n"));
    249   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
    250   ASSERT_TRUE(iter.Init());
    251   EXPECT_TRUE(iter.Advance());
    252   EXPECT_FALSE(iter.IsWord());
    253   EXPECT_EQ(nl, iter.GetString());
    254   EXPECT_TRUE(iter.Advance());
    255   EXPECT_FALSE(iter.IsWord());
    256   EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
    257   EXPECT_TRUE(iter.Advance());
    258   EXPECT_FALSE(iter.IsWord());
    259   EXPECT_EQ(nl, iter.GetString());
    260   EXPECT_TRUE(iter.Advance());
    261   EXPECT_FALSE(iter.IsWord());
    262   EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString());
    263   EXPECT_FALSE(iter.Advance());
    264   EXPECT_FALSE(iter.IsWord());
    265   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
    266   EXPECT_FALSE(iter.IsWord());
    267 }
    268 
    269 TEST(BreakIteratorTest, BreakLineWide16) {
    270   // Two Greek words separated by newline.
    271   const string16 str(WideToUTF16(
    272       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
    273       L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2"));
    274   const string16 line1(str.substr(0, 11));
    275   const string16 line2(str.substr(11, 5));
    276   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
    277   ASSERT_TRUE(iter.Init());
    278   EXPECT_TRUE(iter.Advance());
    279   EXPECT_FALSE(iter.IsWord());
    280   EXPECT_EQ(line1, iter.GetString());
    281   EXPECT_TRUE(iter.Advance());
    282   EXPECT_FALSE(iter.IsWord());
    283   EXPECT_EQ(line2, iter.GetString());
    284   EXPECT_FALSE(iter.Advance());
    285   EXPECT_FALSE(iter.IsWord());
    286   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
    287   EXPECT_FALSE(iter.IsWord());
    288 }
    289 
    290 TEST(BreakIteratorTest, BreakLineWide32) {
    291   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
    292   const char* very_wide_char = "\xF0\x9D\x92\x9C";
    293   const string16 str(
    294       UTF8ToUTF16(StringPrintf("%s\na", very_wide_char)));
    295   const string16 very_wide_line(str.substr(0, 3));
    296   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
    297   ASSERT_TRUE(iter.Init());
    298   EXPECT_TRUE(iter.Advance());
    299   EXPECT_FALSE(iter.IsWord());
    300   EXPECT_EQ(very_wide_line, iter.GetString());
    301   EXPECT_TRUE(iter.Advance());
    302   EXPECT_FALSE(iter.IsWord());
    303   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
    304   EXPECT_FALSE(iter.Advance());
    305   EXPECT_FALSE(iter.IsWord());
    306   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
    307   EXPECT_FALSE(iter.IsWord());
    308 }
    309