Home | History | Annotate | Download | only in tests
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <gtest/gtest.h>
     18 #include <UnicodeUtils.h>
     19 
     20 #include "LayoutUtils.h"
     21 
     22 namespace {
     23 
     24 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) {
     25     const size_t BUF_SIZE = 256U;
     26     uint16_t buf[BUF_SIZE];
     27     size_t expected_breakpoint = 0U;
     28     size_t size = 0U;
     29 
     30     ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
     31     EXPECT_EQ(expected_breakpoint,
     32               getNextWordBreakForCache(buf, offset_in, size))
     33         << "Expected position is [" << query_str << "] from offset " << offset_in;
     34 }
     35 
     36 void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) {
     37     const size_t BUF_SIZE = 256U;
     38     uint16_t buf[BUF_SIZE];
     39     size_t expected_breakpoint = 0U;
     40     size_t size = 0U;
     41 
     42     ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
     43     EXPECT_EQ(expected_breakpoint,
     44               getPrevWordBreakForCache(buf, offset_in, size))
     45         << "Expected position is [" << query_str << "] from offset " << offset_in;
     46 }
     47 
     48 TEST(WordBreakTest, goNextWordBreakTest) {
     49     ExpectNextWordBreakForCache(0, "|");
     50 
     51     // Continue for spaces.
     52     ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |");
     53     ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |");
     54     ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |");
     55     ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |");
     56     ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |");
     57     ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |");
     58 
     59     // Space makes word break.
     60     ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'");
     61     ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'");
     62     ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'");
     63     ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |");
     64     ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |");
     65     ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |");
     66     ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |");
     67 
     68     ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'");
     69     ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'");
     70     ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'");
     71     ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |");
     72     ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |");
     73     ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |");
     74     ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |");
     75 
     76     ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'");
     77     ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'");
     78     ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'");
     79     ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
     80     ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     81     ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     82     ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     83     ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     84 
     85     // CJK ideographs makes word break.
     86     ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00   U+4E00   U+4E00   U+4E00");
     87     ExpectNextWordBreakForCache(1, "U+4E00   U+4E00 | U+4E00   U+4E00   U+4E00");
     88     ExpectNextWordBreakForCache(2, "U+4E00   U+4E00   U+4E00 | U+4E00   U+4E00");
     89     ExpectNextWordBreakForCache(3, "U+4E00   U+4E00   U+4E00   U+4E00 | U+4E00");
     90     ExpectNextWordBreakForCache(4, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
     91     ExpectNextWordBreakForCache(5, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
     92     ExpectNextWordBreakForCache(1000,
     93                              "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
     94 
     95     ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C   U+4E09   U+56DB   U+4E94");
     96     ExpectNextWordBreakForCache(1, "U+4E00   U+4E8C | U+4E09   U+56DB   U+4E94");
     97     ExpectNextWordBreakForCache(2, "U+4E00   U+4E8C   U+4E09 | U+56DB   U+4E94");
     98     ExpectNextWordBreakForCache(3, "U+4E00   U+4E8C   U+4E09   U+56DB | U+4E94");
     99     ExpectNextWordBreakForCache(4, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
    100     ExpectNextWordBreakForCache(5, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
    101     ExpectNextWordBreakForCache(1000,
    102                              "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
    103 
    104     ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    105     ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    106     ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    107     ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
    108     ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
    109     ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
    110     ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
    111 
    112     // Continue if trailing characters is Unicode combining characters.
    113     ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00");
    114     ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00");
    115     ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |");
    116     ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |");
    117     ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |");
    118 
    119     // Surrogate pairs.
    120     ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |");
    121     ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |");
    122     ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |");
    123     ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |");
    124     ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |");
    125     ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |");
    126 
    127     // Broken surrogate pairs.
    128     // U+D84D is leading surrogate but there is no trailing surrogate for it.
    129     ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |");
    130     ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |");
    131     ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |");
    132     ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |");
    133     ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |");
    134 
    135     ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |");
    136     ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |");
    137     ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |");
    138     ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |");
    139     ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |");
    140 
    141     // U+DE0D is trailing surrogate but there is no leading surrogate for it.
    142     ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |");
    143     ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |");
    144     ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |");
    145     ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |");
    146     ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |");
    147 
    148     ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |");
    149     ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |");
    150     ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |");
    151     ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |");
    152     ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |");
    153 
    154     // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
    155     ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |");
    156     ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |");
    157     ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |");
    158     ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |");
    159 
    160     // Tone marks.
    161     // CJK ideographic char + Tone mark + CJK ideographic char
    162     ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444");
    163     ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444");
    164     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |");
    165     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |");
    166     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |");
    167 
    168     // Variation Selectors.
    169     // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
    170     ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B");
    171     ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B");
    172     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |");
    173     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |");
    174     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |");
    175 
    176     // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
    177     ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B");
    178     ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B");
    179     ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B");
    180     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |");
    181     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |");
    182     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |");
    183     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |");
    184 
    185     // CJK ideographic char + Tone mark + Variation Character(VS1)
    186     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444");
    187     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444");
    188     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444");
    189     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |");
    190     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |");
    191     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |");
    192 
    193     // CJK ideographic char + Tone mark + Variation Character(VS17)
    194     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444");
    195     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444");
    196     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444");
    197     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444");
    198     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |");
    199     ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |");
    200     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |");
    201 
    202     // CJK ideographic char + Variation Character(VS1) + Tone mark
    203     ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444");
    204     ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444");
    205     ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444");
    206     ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |");
    207     ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |");
    208     ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |");
    209 
    210     // CJK ideographic char + Variation Character(VS17) + Tone mark
    211     ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444");
    212     ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444");
    213     ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444");
    214     ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444");
    215     ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |");
    216     ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |");
    217     ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |");
    218 
    219     // Following test cases are unusual usage of variation selectors and tone
    220     // marks for caching up the further behavior changes, e.g. index of bounds
    221     // or crashes. Please feel free to update the test expectations if the
    222     // behavior change makes sense to you.
    223 
    224     // Isolated Tone marks and Variation Selectors
    225     ExpectNextWordBreakForCache(0, "U+FE00 |");
    226     ExpectNextWordBreakForCache(1, "U+FE00 |");
    227     ExpectNextWordBreakForCache(1000, "U+FE00 |");
    228     ExpectNextWordBreakForCache(0, "U+E0100 |");
    229     ExpectNextWordBreakForCache(1000, "U+E0100 |");
    230     ExpectNextWordBreakForCache(0, "U+302D |");
    231     ExpectNextWordBreakForCache(1000, "U+302D |");
    232 
    233     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
    234     ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B");
    235     ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B");
    236     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B");
    237     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |");
    238     ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |");
    239     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |");
    240 
    241     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
    242     ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B");
    243     ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B");
    244     ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B");
    245     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B");
    246     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B");
    247     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |");
    248     ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |");
    249     ExpectNextWordBreakForCache(1000,
    250                              "U+845B U+E0100 U+E0100 U+845B |");
    251 
    252     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
    253     ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B");
    254     ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B");
    255     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B");
    256     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B");
    257     ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |");
    258     ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |");
    259     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |");
    260 
    261     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
    262     ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B");
    263     ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B");
    264     ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B");
    265     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B");
    266     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |");
    267     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |");
    268     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |");
    269 
    270     // Tone mark. + Tone mark
    271     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444");
    272     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444");
    273     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444");
    274     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |");
    275     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |");
    276     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |");
    277 }
    278 
    279 TEST(WordBreakTest, goPrevWordBreakTest) {
    280     ExpectPrevWordBreakForCache(0, "|");
    281 
    282     // Continue for spaces.
    283     ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'");
    284     ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'");
    285     ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'");
    286     ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'");
    287     ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'");
    288     ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'");
    289 
    290     // Space makes word break.
    291     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'");
    292     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'");
    293     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'");
    294     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'");
    295     ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'");
    296     ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'");
    297     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'");
    298 
    299     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'");
    300     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'");
    301     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'");
    302     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'");
    303     ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'");
    304     ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'");
    305     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'");
    306 
    307     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
    308     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
    309     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
    310     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'");
    311     ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'");
    312     ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
    313     ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
    314     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
    315 
    316     // CJK ideographs makes word break.
    317     ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
    318     ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
    319     ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
    320     ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
    321     ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
    322     ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
    323     ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
    324 
    325     ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
    326     ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
    327     ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
    328     ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
    329     ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
    330     ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
    331     ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
    332 
    333     // Mixed case.
    334     ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    335     ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    336     ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    337     ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    338     ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    339     ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
    340     ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
    341     ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
    342 
    343     // Continue if trailing characters is Unicode combining characters.
    344     ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00");
    345     ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00");
    346     ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00");
    347     ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00");
    348     ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00");
    349 
    350     // Surrogate pairs.
    351     ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618");
    352     ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618");
    353     ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618");
    354     ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618");
    355     ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618");
    356     ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618");
    357 
    358     // Broken surrogate pairs.
    359     // U+D84D is leading surrogate but there is no trailing surrogate for it.
    360     ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618");
    361     ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618");
    362     ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618");
    363     ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618");
    364     ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618");
    365 
    366     ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D");
    367     ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D");
    368     ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D");
    369     ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D");
    370     ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D");
    371 
    372     // U+DE0D is trailing surrogate but there is no leading surrogate for it.
    373     ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618");
    374     ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618");
    375     ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618");
    376     ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618");
    377     ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618");
    378 
    379     ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D");
    380     ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D");
    381     ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D");
    382     ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D");
    383     ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D");
    384 
    385     // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
    386     ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8");
    387     ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8");
    388     ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8");
    389     ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8");
    390 
    391     // Tone marks.
    392     // CJK ideographic char + Tone mark + CJK ideographic char
    393     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444");
    394     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444");
    395     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444");
    396     ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444");
    397     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444");
    398 
    399     // Variation Selectors.
    400     // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
    401     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B");
    402     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B");
    403     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B");
    404     ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B");
    405     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B");
    406 
    407     // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
    408     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B");
    409     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B");
    410     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B");
    411     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B");
    412     ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B");
    413     ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B");
    414     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B");
    415 
    416     // CJK ideographic char + Tone mark + Variation Character(VS1)
    417     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444");
    418     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444");
    419     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444");
    420     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444");
    421     ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444");
    422     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444");
    423 
    424     // CJK ideographic char + Tone mark + Variation Character(VS17)
    425     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444");
    426     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444");
    427     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444");
    428     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444");
    429     ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444");
    430     ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444");
    431     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444");
    432 
    433     // CJK ideographic char + Variation Character(VS1) + Tone mark
    434     ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444");
    435     ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444");
    436     ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444");
    437     ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444");
    438     ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444");
    439     ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444");
    440 
    441     // CJK ideographic char + Variation Character(VS17) + Tone mark
    442     ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444");
    443     ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444");
    444     ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444");
    445     ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444");
    446     ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444");
    447     ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444");
    448     ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444");
    449 
    450     // Following test cases are unusual usage of variation selectors and tone
    451     // marks for caching up the further behavior changes, e.g. index of bounds
    452     // or crashes. Please feel free to update the test expectations if the
    453     // behavior change makes sense to you.
    454 
    455     // Isolated Tone marks and Variation Selectors
    456     ExpectPrevWordBreakForCache(0, "| U+FE00");
    457     ExpectPrevWordBreakForCache(1, "| U+FE00");
    458     ExpectPrevWordBreakForCache(1000, "| U+FE00");
    459     ExpectPrevWordBreakForCache(0, "| U+E0100");
    460     ExpectPrevWordBreakForCache(1000, "| U+E0100");
    461     ExpectPrevWordBreakForCache(0, "| U+302D");
    462     ExpectPrevWordBreakForCache(1000, "| U+302D");
    463 
    464     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
    465     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B");
    466     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B");
    467     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B");
    468     ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B");
    469     ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B");
    470     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B");
    471 
    472     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
    473     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B");
    474     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B");
    475     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B");
    476     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B");
    477     ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B");
    478     ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B");
    479     ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B");
    480     ExpectPrevWordBreakForCache(1000,
    481                              "U+845B U+E0100 U+E0100 | U+845B");
    482 
    483     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
    484     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B");
    485     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B");
    486     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B");
    487     ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B");
    488     ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B");
    489     ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B");
    490     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B");
    491 
    492     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
    493     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B");
    494     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B");
    495     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B");
    496     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B");
    497     ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B");
    498     ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B");
    499     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B");
    500 
    501     // Tone mark. + Tone mark
    502     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444");
    503     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444");
    504     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444");
    505     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444");
    506     ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444");
    507     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444");
    508 }
    509 
    510 }  // namespace
    511