Home | History | Annotate | Download | only in unittest
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "LayoutUtils.h"
     18 
     19 #include <gtest/gtest.h>
     20 
     21 #include "UnicodeUtils.h"
     22 
     23 namespace minikin {
     24 
     25 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) {
     26     const size_t BUF_SIZE = 256U;
     27     uint16_t buf[BUF_SIZE];
     28     size_t expected_breakpoint = 0U;
     29     size_t size = 0U;
     30 
     31     ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
     32     EXPECT_EQ(expected_breakpoint, getNextWordBreakForCache(buf, offset_in, size))
     33             << "Expected position is [" << query_str << "] from offset " << offset_in;
     34 }
     35 
     36 void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) {
     37     const size_t BUF_SIZE = 256U;
     38     uint16_t buf[BUF_SIZE];
     39     size_t expected_breakpoint = 0U;
     40     size_t size = 0U;
     41 
     42     ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
     43     EXPECT_EQ(expected_breakpoint, getPrevWordBreakForCache(buf, offset_in, size))
     44             << "Expected position is [" << query_str << "] from offset " << offset_in;
     45 }
     46 
     47 TEST(WordBreakTest, goNextWordBreakTest) {
     48     ExpectNextWordBreakForCache(0, "|");
     49 
     50     // Continue for spaces.
     51     ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |");
     52     ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |");
     53     ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |");
     54     ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |");
     55     ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |");
     56     ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |");
     57 
     58     // Space makes word break.
     59     ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'");
     60     ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'");
     61     ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'");
     62     ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |");
     63     ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |");
     64     ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |");
     65     ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |");
     66 
     67     ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'");
     68     ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'");
     69     ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'");
     70     ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |");
     71     ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |");
     72     ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |");
     73     ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |");
     74 
     75     ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'");
     76     ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'");
     77     ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'");
     78     ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
     79     ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     80     ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     81     ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     82     ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |");
     83 
     84     // CJK ideographs makes word break.
     85     ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00   U+4E00   U+4E00   U+4E00");
     86     ExpectNextWordBreakForCache(1, "U+4E00   U+4E00 | U+4E00   U+4E00   U+4E00");
     87     ExpectNextWordBreakForCache(2, "U+4E00   U+4E00   U+4E00 | U+4E00   U+4E00");
     88     ExpectNextWordBreakForCache(3, "U+4E00   U+4E00   U+4E00   U+4E00 | U+4E00");
     89     ExpectNextWordBreakForCache(4, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
     90     ExpectNextWordBreakForCache(5, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
     91     ExpectNextWordBreakForCache(1000, "U+4E00   U+4E00   U+4E00   U+4E00   U+4E00 |");
     92 
     93     ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C   U+4E09   U+56DB   U+4E94");
     94     ExpectNextWordBreakForCache(1, "U+4E00   U+4E8C | U+4E09   U+56DB   U+4E94");
     95     ExpectNextWordBreakForCache(2, "U+4E00   U+4E8C   U+4E09 | U+56DB   U+4E94");
     96     ExpectNextWordBreakForCache(3, "U+4E00   U+4E8C   U+4E09   U+56DB | U+4E94");
     97     ExpectNextWordBreakForCache(4, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
     98     ExpectNextWordBreakForCache(5, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
     99     ExpectNextWordBreakForCache(1000, "U+4E00   U+4E8C   U+4E09   U+56DB   U+4E94 |");
    100 
    101     ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    102     ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    103     ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    104     ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
    105     ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
    106     ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
    107     ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
    108 
    109     // Continue if trailing characters is Unicode combining characters.
    110     ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00");
    111     ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00");
    112     ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |");
    113     ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |");
    114     ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |");
    115 
    116     // Surrogate pairs.
    117     ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |");
    118     ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |");
    119     ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |");
    120     ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |");
    121     ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |");
    122     ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |");
    123 
    124     // Broken surrogate pairs.
    125     // U+D84D is leading surrogate but there is no trailing surrogate for it.
    126     ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |");
    127     ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |");
    128     ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |");
    129     ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |");
    130     ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |");
    131 
    132     ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |");
    133     ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |");
    134     ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |");
    135     ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |");
    136     ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |");
    137 
    138     // U+DE0D is trailing surrogate but there is no leading surrogate for it.
    139     ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |");
    140     ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |");
    141     ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |");
    142     ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |");
    143     ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |");
    144 
    145     ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |");
    146     ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |");
    147     ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |");
    148     ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |");
    149     ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |");
    150 
    151     // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
    152     ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |");
    153     ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |");
    154     ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |");
    155     ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |");
    156 
    157     // Tone marks.
    158     // CJK ideographic char + Tone mark + CJK ideographic char
    159     ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444");
    160     ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444");
    161     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |");
    162     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |");
    163     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |");
    164 
    165     // Variation Selectors.
    166     // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
    167     ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B");
    168     ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B");
    169     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |");
    170     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |");
    171     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |");
    172 
    173     // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
    174     ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B");
    175     ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B");
    176     ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B");
    177     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |");
    178     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |");
    179     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |");
    180     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |");
    181 
    182     // CJK ideographic char + Tone mark + Variation Character(VS1)
    183     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444");
    184     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444");
    185     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444");
    186     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |");
    187     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |");
    188     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |");
    189 
    190     // CJK ideographic char + Tone mark + Variation Character(VS17)
    191     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444");
    192     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444");
    193     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444");
    194     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444");
    195     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |");
    196     ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |");
    197     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |");
    198 
    199     // CJK ideographic char + Variation Character(VS1) + Tone mark
    200     ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444");
    201     ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444");
    202     ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444");
    203     ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |");
    204     ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |");
    205     ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |");
    206 
    207     // CJK ideographic char + Variation Character(VS17) + Tone mark
    208     ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444");
    209     ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444");
    210     ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444");
    211     ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444");
    212     ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |");
    213     ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |");
    214     ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |");
    215 
    216     // Following test cases are unusual usage of variation selectors and tone
    217     // marks for caching up the further behavior changes, e.g. index of bounds
    218     // or crashes. Please feel free to update the test expectations if the
    219     // behavior change makes sense to you.
    220 
    221     // Isolated Tone marks and Variation Selectors
    222     ExpectNextWordBreakForCache(0, "U+FE00 |");
    223     ExpectNextWordBreakForCache(1, "U+FE00 |");
    224     ExpectNextWordBreakForCache(1000, "U+FE00 |");
    225     ExpectNextWordBreakForCache(0, "U+E0100 |");
    226     ExpectNextWordBreakForCache(1000, "U+E0100 |");
    227     ExpectNextWordBreakForCache(0, "U+302D |");
    228     ExpectNextWordBreakForCache(1000, "U+302D |");
    229 
    230     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
    231     ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B");
    232     ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B");
    233     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B");
    234     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |");
    235     ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |");
    236     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |");
    237 
    238     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
    239     ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B");
    240     ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B");
    241     ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B");
    242     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B");
    243     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B");
    244     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |");
    245     ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |");
    246     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+E0100 U+845B |");
    247 
    248     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
    249     ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B");
    250     ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B");
    251     ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B");
    252     ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B");
    253     ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |");
    254     ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |");
    255     ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |");
    256 
    257     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
    258     ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B");
    259     ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B");
    260     ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B");
    261     ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B");
    262     ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |");
    263     ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |");
    264     ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |");
    265 
    266     // Tone mark. + Tone mark
    267     ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444");
    268     ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444");
    269     ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444");
    270     ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |");
    271     ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |");
    272     ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |");
    273 }
    274 
    275 TEST(WordBreakTest, goPrevWordBreakTest) {
    276     ExpectPrevWordBreakForCache(0, "|");
    277 
    278     // Continue for spaces.
    279     ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'");
    280     ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'");
    281     ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'");
    282     ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'");
    283     ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'");
    284     ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'");
    285 
    286     // Space makes word break.
    287     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'");
    288     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'");
    289     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'");
    290     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'");
    291     ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'");
    292     ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'");
    293     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'");
    294 
    295     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'");
    296     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'");
    297     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'");
    298     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'");
    299     ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'");
    300     ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'");
    301     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'");
    302 
    303     ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
    304     ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
    305     ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
    306     ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'");
    307     ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'");
    308     ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
    309     ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
    310     ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
    311 
    312     // CJK ideographs makes word break.
    313     ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
    314     ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
    315     ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
    316     ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
    317     ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
    318     ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
    319     ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
    320 
    321     ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
    322     ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
    323     ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
    324     ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
    325     ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
    326     ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
    327     ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
    328 
    329     // Mixed case.
    330     ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    331     ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    332     ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    333     ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
    334     ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
    335     ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
    336     ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
    337     ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
    338 
    339     // Continue if trailing characters is Unicode combining characters.
    340     ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00");
    341     ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00");
    342     ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00");
    343     ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00");
    344     ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00");
    345 
    346     // Surrogate pairs.
    347     ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618");
    348     ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618");
    349     ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618");
    350     ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618");
    351     ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618");
    352     ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618");
    353 
    354     // Broken surrogate pairs.
    355     // U+D84D is leading surrogate but there is no trailing surrogate for it.
    356     ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618");
    357     ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618");
    358     ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618");
    359     ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618");
    360     ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618");
    361 
    362     ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D");
    363     ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D");
    364     ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D");
    365     ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D");
    366     ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D");
    367 
    368     // U+DE0D is trailing surrogate but there is no leading surrogate for it.
    369     ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618");
    370     ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618");
    371     ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618");
    372     ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618");
    373     ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618");
    374 
    375     ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D");
    376     ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D");
    377     ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D");
    378     ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D");
    379     ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D");
    380 
    381     // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
    382     ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8");
    383     ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8");
    384     ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8");
    385     ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8");
    386 
    387     // Tone marks.
    388     // CJK ideographic char + Tone mark + CJK ideographic char
    389     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444");
    390     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444");
    391     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444");
    392     ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444");
    393     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444");
    394 
    395     // Variation Selectors.
    396     // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
    397     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B");
    398     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B");
    399     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B");
    400     ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B");
    401     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B");
    402 
    403     // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
    404     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B");
    405     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B");
    406     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B");
    407     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B");
    408     ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B");
    409     ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B");
    410     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B");
    411 
    412     // CJK ideographic char + Tone mark + Variation Character(VS1)
    413     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444");
    414     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444");
    415     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444");
    416     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444");
    417     ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444");
    418     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444");
    419 
    420     // CJK ideographic char + Tone mark + Variation Character(VS17)
    421     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444");
    422     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444");
    423     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444");
    424     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444");
    425     ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444");
    426     ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444");
    427     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444");
    428 
    429     // CJK ideographic char + Variation Character(VS1) + Tone mark
    430     ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444");
    431     ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444");
    432     ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444");
    433     ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444");
    434     ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444");
    435     ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444");
    436 
    437     // CJK ideographic char + Variation Character(VS17) + Tone mark
    438     ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444");
    439     ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444");
    440     ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444");
    441     ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444");
    442     ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444");
    443     ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444");
    444     ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444");
    445 
    446     // Following test cases are unusual usage of variation selectors and tone
    447     // marks for caching up the further behavior changes, e.g. index of bounds
    448     // or crashes. Please feel free to update the test expectations if the
    449     // behavior change makes sense to you.
    450 
    451     // Isolated Tone marks and Variation Selectors
    452     ExpectPrevWordBreakForCache(0, "| U+FE00");
    453     ExpectPrevWordBreakForCache(1, "| U+FE00");
    454     ExpectPrevWordBreakForCache(1000, "| U+FE00");
    455     ExpectPrevWordBreakForCache(0, "| U+E0100");
    456     ExpectPrevWordBreakForCache(1000, "| U+E0100");
    457     ExpectPrevWordBreakForCache(0, "| U+302D");
    458     ExpectPrevWordBreakForCache(1000, "| U+302D");
    459 
    460     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
    461     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B");
    462     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B");
    463     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B");
    464     ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B");
    465     ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B");
    466     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B");
    467 
    468     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
    469     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B");
    470     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B");
    471     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B");
    472     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B");
    473     ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B");
    474     ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B");
    475     ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B");
    476     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+E0100 | U+845B");
    477 
    478     // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
    479     ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B");
    480     ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B");
    481     ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B");
    482     ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B");
    483     ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B");
    484     ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B");
    485     ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B");
    486 
    487     // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
    488     ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B");
    489     ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B");
    490     ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B");
    491     ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B");
    492     ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B");
    493     ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B");
    494     ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B");
    495 
    496     // Tone mark. + Tone mark
    497     ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444");
    498     ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444");
    499     ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444");
    500     ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444");
    501     ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444");
    502     ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444");
    503 }
    504 
    505 }  // namespace minikin
    506