1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <gtest/gtest.h> 18 #include <UnicodeUtils.h> 19 20 #include "LayoutUtils.h" 21 22 namespace { 23 24 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) { 25 const size_t BUF_SIZE = 256U; 26 uint16_t buf[BUF_SIZE]; 27 size_t expected_breakpoint = 0U; 28 size_t size = 0U; 29 30 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); 31 EXPECT_EQ(expected_breakpoint, 32 getNextWordBreakForCache(buf, offset_in, size)) 33 << "Expected position is [" << query_str << "] from offset " << offset_in; 34 } 35 36 void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) { 37 const size_t BUF_SIZE = 256U; 38 uint16_t buf[BUF_SIZE]; 39 size_t expected_breakpoint = 0U; 40 size_t size = 0U; 41 42 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); 43 EXPECT_EQ(expected_breakpoint, 44 getPrevWordBreakForCache(buf, offset_in, size)) 45 << "Expected position is [" << query_str << "] from offset " << offset_in; 46 } 47 48 TEST(WordBreakTest, goNextWordBreakTest) { 49 ExpectNextWordBreakForCache(0, "|"); 50 51 // Continue for spaces. 52 ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |"); 53 ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |"); 54 ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |"); 55 ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |"); 56 ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |"); 57 ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |"); 58 59 // Space makes word break. 60 ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'"); 61 ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'"); 62 ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'"); 63 ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |"); 64 ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |"); 65 ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |"); 66 ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |"); 67 68 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'"); 69 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'"); 70 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'"); 71 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |"); 72 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |"); 73 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |"); 74 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |"); 75 76 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'"); 77 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'"); 78 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'"); 79 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 80 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 81 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 82 ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 83 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 84 85 // CJK ideographs makes word break. 86 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); 87 ExpectNextWordBreakForCache(1, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); 88 ExpectNextWordBreakForCache(2, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); 89 ExpectNextWordBreakForCache(3, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); 90 ExpectNextWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); 91 ExpectNextWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); 92 ExpectNextWordBreakForCache(1000, 93 "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); 94 95 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); 96 ExpectNextWordBreakForCache(1, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); 97 ExpectNextWordBreakForCache(2, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); 98 ExpectNextWordBreakForCache(3, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); 99 ExpectNextWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); 100 ExpectNextWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); 101 ExpectNextWordBreakForCache(1000, 102 "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); 103 104 ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 105 ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 106 ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 107 ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); 108 ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); 109 ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); 110 ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); 111 112 // Continue if trailing characters is Unicode combining characters. 113 ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00"); 114 ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00"); 115 ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |"); 116 ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |"); 117 ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |"); 118 119 // Surrogate pairs. 120 ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |"); 121 ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |"); 122 ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |"); 123 ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |"); 124 ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |"); 125 ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |"); 126 127 // Broken surrogate pairs. 128 // U+D84D is leading surrogate but there is no trailing surrogate for it. 129 ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |"); 130 ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |"); 131 ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |"); 132 ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |"); 133 ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |"); 134 135 ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |"); 136 ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |"); 137 ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |"); 138 ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |"); 139 ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |"); 140 141 // U+DE0D is trailing surrogate but there is no leading surrogate for it. 142 ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |"); 143 ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |"); 144 ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |"); 145 ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |"); 146 ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |"); 147 148 ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |"); 149 ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |"); 150 ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |"); 151 ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |"); 152 ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |"); 153 154 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. 155 ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |"); 156 ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |"); 157 ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |"); 158 ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |"); 159 160 // Tone marks. 161 // CJK ideographic char + Tone mark + CJK ideographic char 162 ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444"); 163 ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444"); 164 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |"); 165 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |"); 166 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |"); 167 168 // Variation Selectors. 169 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char 170 ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B"); 171 ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B"); 172 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |"); 173 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |"); 174 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |"); 175 176 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char 177 ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B"); 178 ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B"); 179 ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B"); 180 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |"); 181 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |"); 182 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |"); 183 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |"); 184 185 // CJK ideographic char + Tone mark + Variation Character(VS1) 186 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444"); 187 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444"); 188 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444"); 189 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |"); 190 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |"); 191 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |"); 192 193 // CJK ideographic char + Tone mark + Variation Character(VS17) 194 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444"); 195 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444"); 196 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444"); 197 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444"); 198 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |"); 199 ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |"); 200 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |"); 201 202 // CJK ideographic char + Variation Character(VS1) + Tone mark 203 ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444"); 204 ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444"); 205 ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444"); 206 ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |"); 207 ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |"); 208 ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |"); 209 210 // CJK ideographic char + Variation Character(VS17) + Tone mark 211 ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444"); 212 ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444"); 213 ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444"); 214 ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444"); 215 ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |"); 216 ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |"); 217 ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |"); 218 219 // Following test cases are unusual usage of variation selectors and tone 220 // marks for caching up the further behavior changes, e.g. index of bounds 221 // or crashes. Please feel free to update the test expectations if the 222 // behavior change makes sense to you. 223 224 // Isolated Tone marks and Variation Selectors 225 ExpectNextWordBreakForCache(0, "U+FE00 |"); 226 ExpectNextWordBreakForCache(1, "U+FE00 |"); 227 ExpectNextWordBreakForCache(1000, "U+FE00 |"); 228 ExpectNextWordBreakForCache(0, "U+E0100 |"); 229 ExpectNextWordBreakForCache(1000, "U+E0100 |"); 230 ExpectNextWordBreakForCache(0, "U+302D |"); 231 ExpectNextWordBreakForCache(1000, "U+302D |"); 232 233 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) 234 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B"); 235 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B"); 236 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B"); 237 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |"); 238 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |"); 239 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |"); 240 241 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) 242 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B"); 243 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B"); 244 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B"); 245 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B"); 246 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B"); 247 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |"); 248 ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |"); 249 ExpectNextWordBreakForCache(1000, 250 "U+845B U+E0100 U+E0100 U+845B |"); 251 252 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) 253 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B"); 254 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B"); 255 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B"); 256 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B"); 257 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |"); 258 ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |"); 259 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |"); 260 261 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) 262 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B"); 263 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B"); 264 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B"); 265 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B"); 266 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |"); 267 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |"); 268 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |"); 269 270 // Tone mark. + Tone mark 271 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444"); 272 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444"); 273 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444"); 274 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |"); 275 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |"); 276 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |"); 277 } 278 279 TEST(WordBreakTest, goPrevWordBreakTest) { 280 ExpectPrevWordBreakForCache(0, "|"); 281 282 // Continue for spaces. 283 ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'"); 284 ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'"); 285 ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'"); 286 ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'"); 287 ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'"); 288 ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'"); 289 290 // Space makes word break. 291 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'"); 292 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'"); 293 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'"); 294 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'"); 295 ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'"); 296 ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'"); 297 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'"); 298 299 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'"); 300 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'"); 301 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'"); 302 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'"); 303 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'"); 304 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'"); 305 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'"); 306 307 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); 308 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); 309 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); 310 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'"); 311 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'"); 312 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 313 ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 314 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 315 316 // CJK ideographs makes word break. 317 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); 318 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); 319 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); 320 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); 321 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); 322 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); 323 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); 324 325 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); 326 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); 327 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); 328 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); 329 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); 330 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); 331 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); 332 333 // Mixed case. 334 ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 335 ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 336 ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 337 ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 338 ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 339 ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); 340 ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); 341 ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); 342 343 // Continue if trailing characters is Unicode combining characters. 344 ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00"); 345 ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00"); 346 ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00"); 347 ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00"); 348 ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00"); 349 350 // Surrogate pairs. 351 ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618"); 352 ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618"); 353 ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618"); 354 ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618"); 355 ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618"); 356 ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618"); 357 358 // Broken surrogate pairs. 359 // U+D84D is leading surrogate but there is no trailing surrogate for it. 360 ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618"); 361 ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618"); 362 ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618"); 363 ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618"); 364 ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618"); 365 366 ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D"); 367 ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D"); 368 ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D"); 369 ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D"); 370 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D"); 371 372 // U+DE0D is trailing surrogate but there is no leading surrogate for it. 373 ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618"); 374 ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618"); 375 ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618"); 376 ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618"); 377 ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618"); 378 379 ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D"); 380 ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D"); 381 ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D"); 382 ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D"); 383 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D"); 384 385 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. 386 ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8"); 387 ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8"); 388 ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8"); 389 ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8"); 390 391 // Tone marks. 392 // CJK ideographic char + Tone mark + CJK ideographic char 393 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444"); 394 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444"); 395 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444"); 396 ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444"); 397 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444"); 398 399 // Variation Selectors. 400 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char 401 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B"); 402 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B"); 403 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B"); 404 ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B"); 405 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B"); 406 407 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char 408 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B"); 409 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B"); 410 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B"); 411 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B"); 412 ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B"); 413 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B"); 414 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B"); 415 416 // CJK ideographic char + Tone mark + Variation Character(VS1) 417 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444"); 418 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444"); 419 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444"); 420 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444"); 421 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444"); 422 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444"); 423 424 // CJK ideographic char + Tone mark + Variation Character(VS17) 425 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444"); 426 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444"); 427 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444"); 428 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444"); 429 ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444"); 430 ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444"); 431 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444"); 432 433 // CJK ideographic char + Variation Character(VS1) + Tone mark 434 ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444"); 435 ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444"); 436 ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444"); 437 ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444"); 438 ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444"); 439 ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444"); 440 441 // CJK ideographic char + Variation Character(VS17) + Tone mark 442 ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444"); 443 ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444"); 444 ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444"); 445 ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444"); 446 ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444"); 447 ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444"); 448 ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444"); 449 450 // Following test cases are unusual usage of variation selectors and tone 451 // marks for caching up the further behavior changes, e.g. index of bounds 452 // or crashes. Please feel free to update the test expectations if the 453 // behavior change makes sense to you. 454 455 // Isolated Tone marks and Variation Selectors 456 ExpectPrevWordBreakForCache(0, "| U+FE00"); 457 ExpectPrevWordBreakForCache(1, "| U+FE00"); 458 ExpectPrevWordBreakForCache(1000, "| U+FE00"); 459 ExpectPrevWordBreakForCache(0, "| U+E0100"); 460 ExpectPrevWordBreakForCache(1000, "| U+E0100"); 461 ExpectPrevWordBreakForCache(0, "| U+302D"); 462 ExpectPrevWordBreakForCache(1000, "| U+302D"); 463 464 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) 465 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B"); 466 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B"); 467 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B"); 468 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B"); 469 ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B"); 470 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B"); 471 472 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) 473 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B"); 474 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B"); 475 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B"); 476 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B"); 477 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B"); 478 ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B"); 479 ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B"); 480 ExpectPrevWordBreakForCache(1000, 481 "U+845B U+E0100 U+E0100 | U+845B"); 482 483 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) 484 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B"); 485 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B"); 486 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B"); 487 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B"); 488 ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B"); 489 ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B"); 490 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B"); 491 492 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) 493 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B"); 494 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B"); 495 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B"); 496 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B"); 497 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B"); 498 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B"); 499 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B"); 500 501 // Tone mark. + Tone mark 502 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444"); 503 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444"); 504 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444"); 505 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444"); 506 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444"); 507 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444"); 508 } 509 510 } // namespace 511