1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "LayoutUtils.h" 18 19 #include <gtest/gtest.h> 20 21 #include "UnicodeUtils.h" 22 23 namespace minikin { 24 25 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) { 26 const size_t BUF_SIZE = 256U; 27 uint16_t buf[BUF_SIZE]; 28 size_t expected_breakpoint = 0U; 29 size_t size = 0U; 30 31 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); 32 EXPECT_EQ(expected_breakpoint, getNextWordBreakForCache(U16StringPiece(buf, size), offset_in)) 33 << "Expected position is [" << query_str << "] from offset " << offset_in; 34 } 35 36 void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) { 37 const size_t BUF_SIZE = 256U; 38 uint16_t buf[BUF_SIZE]; 39 size_t expected_breakpoint = 0U; 40 size_t size = 0U; 41 42 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); 43 EXPECT_EQ(expected_breakpoint, getPrevWordBreakForCache(U16StringPiece(buf, size), offset_in)) 44 << "Expected position is [" << query_str << "] from offset " << offset_in; 45 } 46 47 TEST(WordBreakTest, goNextWordBreakTest) { 48 ExpectNextWordBreakForCache(0, "|"); 49 50 // Continue for spaces. 51 ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |"); 52 ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |"); 53 ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |"); 54 ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |"); 55 ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |"); 56 ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |"); 57 58 // Space makes word break. 59 ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'"); 60 ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'"); 61 ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'"); 62 ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |"); 63 ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |"); 64 ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |"); 65 ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |"); 66 67 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'"); 68 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'"); 69 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'"); 70 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |"); 71 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |"); 72 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |"); 73 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |"); 74 75 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'"); 76 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'"); 77 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'"); 78 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 79 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 80 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 81 ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 82 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |"); 83 84 // CJK ideographs makes word break. 85 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); 86 ExpectNextWordBreakForCache(1, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); 87 ExpectNextWordBreakForCache(2, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); 88 ExpectNextWordBreakForCache(3, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); 89 ExpectNextWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); 90 ExpectNextWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); 91 ExpectNextWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); 92 93 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); 94 ExpectNextWordBreakForCache(1, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); 95 ExpectNextWordBreakForCache(2, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); 96 ExpectNextWordBreakForCache(3, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); 97 ExpectNextWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); 98 ExpectNextWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); 99 ExpectNextWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); 100 101 ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 102 ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 103 ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 104 ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); 105 ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); 106 ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); 107 ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); 108 109 // Continue if trailing characters is Unicode combining characters. 110 ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00"); 111 ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00"); 112 ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |"); 113 ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |"); 114 ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |"); 115 116 // Surrogate pairs. 117 ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |"); 118 ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |"); 119 ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |"); 120 ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |"); 121 ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |"); 122 ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |"); 123 124 // Broken surrogate pairs. 125 // U+D84D is leading surrogate but there is no trailing surrogate for it. 126 ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |"); 127 ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |"); 128 ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |"); 129 ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |"); 130 ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |"); 131 132 ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |"); 133 ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |"); 134 ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |"); 135 ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |"); 136 ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |"); 137 138 // U+DE0D is trailing surrogate but there is no leading surrogate for it. 139 ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |"); 140 ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |"); 141 ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |"); 142 ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |"); 143 ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |"); 144 145 ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |"); 146 ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |"); 147 ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |"); 148 ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |"); 149 ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |"); 150 151 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. 152 ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |"); 153 ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |"); 154 ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |"); 155 ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |"); 156 157 // Tone marks. 158 // CJK ideographic char + Tone mark + CJK ideographic char 159 ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444"); 160 ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444"); 161 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |"); 162 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |"); 163 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |"); 164 165 // Variation Selectors. 166 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char 167 ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B"); 168 ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B"); 169 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |"); 170 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |"); 171 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |"); 172 173 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char 174 ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B"); 175 ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B"); 176 ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B"); 177 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |"); 178 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |"); 179 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |"); 180 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |"); 181 182 // CJK ideographic char + Tone mark + Variation Character(VS1) 183 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444"); 184 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444"); 185 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444"); 186 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |"); 187 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |"); 188 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |"); 189 190 // CJK ideographic char + Tone mark + Variation Character(VS17) 191 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444"); 192 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444"); 193 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444"); 194 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444"); 195 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |"); 196 ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |"); 197 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |"); 198 199 // CJK ideographic char + Variation Character(VS1) + Tone mark 200 ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444"); 201 ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444"); 202 ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444"); 203 ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |"); 204 ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |"); 205 ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |"); 206 207 // CJK ideographic char + Variation Character(VS17) + Tone mark 208 ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444"); 209 ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444"); 210 ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444"); 211 ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444"); 212 ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |"); 213 ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |"); 214 ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |"); 215 216 // Following test cases are unusual usage of variation selectors and tone 217 // marks for caching up the further behavior changes, e.g. index of bounds 218 // or crashes. Please feel free to update the test expectations if the 219 // behavior change makes sense to you. 220 221 // Isolated Tone marks and Variation Selectors 222 ExpectNextWordBreakForCache(0, "U+FE00 |"); 223 ExpectNextWordBreakForCache(1, "U+FE00 |"); 224 ExpectNextWordBreakForCache(1000, "U+FE00 |"); 225 ExpectNextWordBreakForCache(0, "U+E0100 |"); 226 ExpectNextWordBreakForCache(1000, "U+E0100 |"); 227 ExpectNextWordBreakForCache(0, "U+302D |"); 228 ExpectNextWordBreakForCache(1000, "U+302D |"); 229 230 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) 231 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B"); 232 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B"); 233 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B"); 234 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |"); 235 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |"); 236 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |"); 237 238 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) 239 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B"); 240 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B"); 241 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B"); 242 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B"); 243 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B"); 244 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |"); 245 ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |"); 246 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+E0100 U+845B |"); 247 248 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) 249 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B"); 250 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B"); 251 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B"); 252 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B"); 253 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |"); 254 ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |"); 255 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |"); 256 257 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) 258 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B"); 259 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B"); 260 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B"); 261 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B"); 262 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |"); 263 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |"); 264 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |"); 265 266 // Tone mark. + Tone mark 267 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444"); 268 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444"); 269 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444"); 270 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |"); 271 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |"); 272 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |"); 273 } 274 275 TEST(WordBreakTest, goPrevWordBreakTest) { 276 ExpectPrevWordBreakForCache(0, "|"); 277 278 // Continue for spaces. 279 ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'"); 280 ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'"); 281 ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'"); 282 ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'"); 283 ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'"); 284 ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'"); 285 286 // Space makes word break. 287 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'"); 288 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'"); 289 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'"); 290 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'"); 291 ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'"); 292 ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'"); 293 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'"); 294 295 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'"); 296 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'"); 297 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'"); 298 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'"); 299 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'"); 300 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'"); 301 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'"); 302 303 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); 304 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); 305 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); 306 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'"); 307 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'"); 308 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 309 ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 310 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); 311 312 // CJK ideographs makes word break. 313 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); 314 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); 315 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); 316 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); 317 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); 318 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); 319 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); 320 321 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); 322 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); 323 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); 324 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); 325 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); 326 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); 327 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); 328 329 // Mixed case. 330 ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 331 ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 332 ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 333 ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); 334 ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); 335 ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); 336 ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); 337 ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); 338 339 // Continue if trailing characters is Unicode combining characters. 340 ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00"); 341 ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00"); 342 ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00"); 343 ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00"); 344 ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00"); 345 346 // Surrogate pairs. 347 ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618"); 348 ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618"); 349 ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618"); 350 ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618"); 351 ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618"); 352 ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618"); 353 354 // Broken surrogate pairs. 355 // U+D84D is leading surrogate but there is no trailing surrogate for it. 356 ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618"); 357 ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618"); 358 ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618"); 359 ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618"); 360 ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618"); 361 362 ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D"); 363 ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D"); 364 ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D"); 365 ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D"); 366 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D"); 367 368 // U+DE0D is trailing surrogate but there is no leading surrogate for it. 369 ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618"); 370 ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618"); 371 ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618"); 372 ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618"); 373 ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618"); 374 375 ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D"); 376 ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D"); 377 ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D"); 378 ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D"); 379 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D"); 380 381 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. 382 ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8"); 383 ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8"); 384 ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8"); 385 ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8"); 386 387 // Tone marks. 388 // CJK ideographic char + Tone mark + CJK ideographic char 389 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444"); 390 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444"); 391 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444"); 392 ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444"); 393 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444"); 394 395 // Variation Selectors. 396 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char 397 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B"); 398 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B"); 399 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B"); 400 ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B"); 401 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B"); 402 403 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char 404 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B"); 405 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B"); 406 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B"); 407 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B"); 408 ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B"); 409 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B"); 410 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B"); 411 412 // CJK ideographic char + Tone mark + Variation Character(VS1) 413 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444"); 414 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444"); 415 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444"); 416 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444"); 417 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444"); 418 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444"); 419 420 // CJK ideographic char + Tone mark + Variation Character(VS17) 421 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444"); 422 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444"); 423 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444"); 424 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444"); 425 ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444"); 426 ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444"); 427 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444"); 428 429 // CJK ideographic char + Variation Character(VS1) + Tone mark 430 ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444"); 431 ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444"); 432 ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444"); 433 ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444"); 434 ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444"); 435 ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444"); 436 437 // CJK ideographic char + Variation Character(VS17) + Tone mark 438 ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444"); 439 ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444"); 440 ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444"); 441 ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444"); 442 ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444"); 443 ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444"); 444 ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444"); 445 446 // Following test cases are unusual usage of variation selectors and tone 447 // marks for caching up the further behavior changes, e.g. index of bounds 448 // or crashes. Please feel free to update the test expectations if the 449 // behavior change makes sense to you. 450 451 // Isolated Tone marks and Variation Selectors 452 ExpectPrevWordBreakForCache(0, "| U+FE00"); 453 ExpectPrevWordBreakForCache(1, "| U+FE00"); 454 ExpectPrevWordBreakForCache(1000, "| U+FE00"); 455 ExpectPrevWordBreakForCache(0, "| U+E0100"); 456 ExpectPrevWordBreakForCache(1000, "| U+E0100"); 457 ExpectPrevWordBreakForCache(0, "| U+302D"); 458 ExpectPrevWordBreakForCache(1000, "| U+302D"); 459 460 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) 461 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B"); 462 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B"); 463 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B"); 464 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B"); 465 ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B"); 466 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B"); 467 468 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) 469 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B"); 470 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B"); 471 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B"); 472 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B"); 473 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B"); 474 ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B"); 475 ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B"); 476 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+E0100 | U+845B"); 477 478 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) 479 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B"); 480 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B"); 481 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B"); 482 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B"); 483 ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B"); 484 ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B"); 485 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B"); 486 487 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) 488 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B"); 489 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B"); 490 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B"); 491 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B"); 492 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B"); 493 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B"); 494 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B"); 495 496 // Tone mark. + Tone mark 497 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444"); 498 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444"); 499 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444"); 500 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444"); 501 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444"); 502 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444"); 503 } 504 505 } // namespace minikin 506