Home | History | Annotate | Download | only in unittest
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "minikin/GraphemeBreak.h"
     18 
     19 #include <vector>
     20 
     21 #include <gtest/gtest.h>
     22 
     23 #include "UnicodeUtils.h"
     24 
     25 namespace minikin {
     26 
     27 bool IsBreak(const char* src) {
     28     const size_t BUF_SIZE = 256;
     29     uint16_t buf[BUF_SIZE];
     30     size_t offset;
     31     size_t size;
     32     ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
     33     return GraphemeBreak::isGraphemeBreak(nullptr, buf, 0, size, offset);
     34 }
     35 
     36 bool IsBreakWithAdvances(const float* advances, const char* src) {
     37     const size_t BUF_SIZE = 256;
     38     uint16_t buf[BUF_SIZE];
     39     size_t offset;
     40     size_t size;
     41     ParseUnicode(buf, BUF_SIZE, src, &size, &offset);
     42     return GraphemeBreak::isGraphemeBreak(advances, buf, 0, size, offset);
     43 }
     44 
     45 TEST(GraphemeBreak, utf16) {
     46     EXPECT_FALSE(IsBreak("U+D83C | U+DC31"));  // emoji, U+1F431
     47 
     48     // tests for invalid UTF-16
     49     EXPECT_TRUE(IsBreak("U+D800 | U+D800"));  // two leading surrogates
     50     EXPECT_TRUE(IsBreak("U+DC00 | U+DC00"));  // two trailing surrogates
     51     EXPECT_TRUE(IsBreak("'a' | U+D800"));     // lonely leading surrogate
     52     EXPECT_TRUE(IsBreak("U+DC00 | 'a'"));     // lonely trailing surrogate
     53     EXPECT_TRUE(IsBreak("U+D800 | 'a'"));     // leading surrogate followed by non-surrogate
     54     EXPECT_TRUE(IsBreak("'a' | U+DC00"));     // non-surrogate followed by trailing surrogate
     55 }
     56 
     57 TEST(GraphemeBreak, rules) {
     58     // Rule GB1, sot ; Rule GB2,  eot
     59     EXPECT_TRUE(IsBreak("| 'a'"));
     60     EXPECT_TRUE(IsBreak("'a' |"));
     61 
     62     // Rule GB3, CR x LF
     63     EXPECT_FALSE(IsBreak("U+000D | U+000A"));  // CR x LF
     64 
     65     // Rule GB4, (Control | CR | LF) 
     66     EXPECT_TRUE(IsBreak("'a' | U+2028"));  // Line separator
     67     EXPECT_TRUE(IsBreak("'a' | U+000D"));  // LF
     68     EXPECT_TRUE(IsBreak("'a' | U+000A"));  // CR
     69 
     70     // Rule GB5,  (Control | CR | LF)
     71     EXPECT_TRUE(IsBreak("U+2028 | 'a'"));  // Line separator
     72     EXPECT_TRUE(IsBreak("U+000D | 'a'"));  // LF
     73     EXPECT_TRUE(IsBreak("U+000A | 'a'"));  // CR
     74 
     75     // Rule GB6, L x ( L | V | LV | LVT )
     76     EXPECT_FALSE(IsBreak("U+1100 | U+1100"));  // L x L
     77     EXPECT_FALSE(IsBreak("U+1100 | U+1161"));  // L x V
     78     EXPECT_FALSE(IsBreak("U+1100 | U+AC00"));  // L x LV
     79     EXPECT_FALSE(IsBreak("U+1100 | U+AC01"));  // L x LVT
     80 
     81     // Rule GB7, ( LV | V ) x ( V | T )
     82     EXPECT_FALSE(IsBreak("U+AC00 | U+1161"));  // LV x V
     83     EXPECT_FALSE(IsBreak("U+1161 | U+1161"));  // V x V
     84     EXPECT_FALSE(IsBreak("U+AC00 | U+11A8"));  // LV x T
     85     EXPECT_FALSE(IsBreak("U+1161 | U+11A8"));  // V x T
     86 
     87     // Rule GB8, ( LVT | T ) x T
     88     EXPECT_FALSE(IsBreak("U+AC01 | U+11A8"));  // LVT x T
     89     EXPECT_FALSE(IsBreak("U+11A8 | U+11A8"));  // T x T
     90 
     91     // Other hangul pairs not counted above _are_ breaks (GB10)
     92     EXPECT_TRUE(IsBreak("U+AC00 | U+1100"));  // LV x L
     93     EXPECT_TRUE(IsBreak("U+AC01 | U+1100"));  // LVT x L
     94     EXPECT_TRUE(IsBreak("U+11A8 | U+1100"));  // T x L
     95     EXPECT_TRUE(IsBreak("U+11A8 | U+AC00"));  // T x LV
     96     EXPECT_TRUE(IsBreak("U+11A8 | U+AC01"));  // T x LVT
     97 
     98     // Rule GB12 and Rule GB13, Regional_Indicator x Regional_Indicator
     99     EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8"));
    100     EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8"));   // Regional indicator pair (flag)
    101     EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
    102     EXPECT_FALSE(IsBreak("U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8"));  // Regional indicator pair (flag)
    103 
    104     EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA"));   // Regional indicator pair (flag)
    105     EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
    106     // Same case as the two above, knowing that the first two characters ligate, which is what
    107     // would typically happen.
    108     const float firstPairLigated[] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0};  // Two entries per codepoint
    109     EXPECT_TRUE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA"));
    110     EXPECT_FALSE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA"));
    111     // Repeat the tests, But now the font doesn't have a ligature for the first two characters,
    112     // while it does have a ligature for the last two. This could happen for fonts that do not
    113     // support some (potentially encoded later than they were developed) flags.
    114     const float secondPairLigated[] = {1.0, 0.0, 1.0, 0.0, 0.0, 0.0};
    115     EXPECT_FALSE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA"));
    116     EXPECT_TRUE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA"));
    117 
    118     EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA"));   // Regional indicator pair (flag)
    119     EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA"));  // Regional indicator pair (flag)
    120 
    121     EXPECT_TRUE(
    122             IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
    123     EXPECT_FALSE(
    124             IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
    125     EXPECT_FALSE(
    126             IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8"));  // Regional indicator pair (flag)
    127 
    128     // Rule GB9, x (Extend | ZWJ)
    129     EXPECT_FALSE(IsBreak("'a' | U+0301"));  // combining accent
    130     EXPECT_FALSE(IsBreak("'a' | U+200D"));  // ZWJ
    131     // Rule GB9a, x SpacingMark
    132     EXPECT_FALSE(IsBreak("U+0915 | U+093E"));  // KA, AA (spacing mark)
    133     // Rule GB9b, Prepend x
    134     // see tailoring test for prepend, as current ICU doesn't have any characters in the class
    135 
    136     // Rule GB999, Any  Any
    137     EXPECT_TRUE(IsBreak("'a' | 'b'"));
    138     EXPECT_TRUE(IsBreak("'f' | 'i'"));              // probable ligature
    139     EXPECT_TRUE(IsBreak("U+0644 | U+0627"));        // probable ligature, lam + alef
    140     EXPECT_TRUE(IsBreak("U+4E00 | U+4E00"));        // CJK ideographs
    141     EXPECT_TRUE(IsBreak("'a' | U+1F1FA U+1F1F8"));  // Regional indicator pair (flag)
    142     EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | 'a'"));  // Regional indicator pair (flag)
    143 
    144     // Extended rule for emoji tag sequence.
    145     EXPECT_TRUE(IsBreak("'a' | U+1F3F4 'a'"));
    146     EXPECT_TRUE(IsBreak("'a' U+1F3F4 | 'a'"));
    147 
    148     // Immediate tag_term after tag_base.
    149     EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E007F 'a'"));
    150     EXPECT_FALSE(IsBreak("U+1F3F4 | U+E007F"));
    151     EXPECT_TRUE(IsBreak("'a' U+1F3F4 U+E007F | 'a'"));
    152 
    153     // Flag sequence
    154     // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag sequence for the flag
    155     // of Scotland.
    156     // U+1F3F4 is WAVING BLACK FLAG. This can be a tag_base character.
    157     // U+E0067 is TAG LATIN SMALL LETTER G. This can be a part of tag_spec.
    158     // U+E0062 is TAG LATIN SMALL LETTER B. This can be a part of tag_spec.
    159     // U+E0073 is TAG LATIN SMALL LETTER S. This can be a part of tag_spec.
    160     // U+E0063 is TAG LATIN SMALL LETTER C. This can be a part of tag_spec.
    161     // U+E0074 is TAG LATIN SMALL LETTER T. This can be a part of tag_spec.
    162     // U+E007F is CANCEL TAG. This is a tag_term character.
    163     EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
    164     EXPECT_FALSE(IsBreak("U+1F3F4 | U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
    165     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 | U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"));
    166     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 | U+E0073 U+E0063 U+E0074 U+E007F"));
    167     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 | U+E0063 U+E0074 U+E007F"));
    168     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 | U+E0074 U+E007F"));
    169     EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 | U+E007F"));
    170     EXPECT_TRUE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F | 'a'"));
    171 }
    172 
    173 TEST(GraphemeBreak, tailoring) {
    174     // control characters that we interpret as "extend"
    175     EXPECT_FALSE(IsBreak("'a' | U+00AD"));   // soft hyphen
    176     EXPECT_FALSE(IsBreak("'a' | U+200B"));   // zwsp
    177     EXPECT_FALSE(IsBreak("'a' | U+200E"));   // lrm
    178     EXPECT_FALSE(IsBreak("'a' | U+202A"));   // lre
    179     EXPECT_FALSE(IsBreak("'a' | U+E0041"));  // tag character
    180 
    181     // UTC-approved characters for the Prepend class
    182     EXPECT_FALSE(IsBreak("U+06DD | U+0661"));  // arabic subtending mark + digit one
    183 
    184     EXPECT_TRUE(IsBreak("U+0E01 | U+0E33"));  // Thai sara am
    185 
    186     // virama is not a grapheme break, but "pure killer" is
    187     EXPECT_FALSE(IsBreak("U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
    188     EXPECT_FALSE(IsBreak("U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
    189     EXPECT_FALSE(IsBreak("U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
    190     EXPECT_TRUE(IsBreak("U+0E01 U+0E3A | U+0E01"));   // thai phinthu = pure killer
    191 
    192     // Repetition of above tests, but with a given advances array that implies everything
    193     // became just one cluster.
    194     const float conjoined[] = {1.0, 0.0, 0.0};
    195     EXPECT_FALSE(IsBreakWithAdvances(conjoined,
    196                                      "U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
    197     EXPECT_FALSE(IsBreakWithAdvances(conjoined,
    198                                      "U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
    199     EXPECT_FALSE(IsBreakWithAdvances(conjoined,
    200                                      "U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
    201     EXPECT_TRUE(IsBreakWithAdvances(conjoined,
    202                                     "U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
    203 
    204     // Repetition of above tests, but with a given advances array that the virama did not
    205     // form a cluster with the following consonant. The difference is that there is now
    206     // a grapheme break after the virama in ka+virama+ka.
    207     const float separate[] = {1.0, 0.0, 1.0};
    208     EXPECT_FALSE(IsBreakWithAdvances(separate,
    209                                      "U+0915 | U+094D U+0915"));  // Devanagari ka+virama+ka
    210     EXPECT_TRUE(IsBreakWithAdvances(separate,
    211                                     "U+0915 U+094D | U+0915"));  // Devanagari ka+virama+ka
    212     EXPECT_FALSE(IsBreakWithAdvances(separate,
    213                                      "U+0E01 | U+0E3A U+0E01"));  // thai phinthu = pure killer
    214     EXPECT_TRUE(IsBreakWithAdvances(separate,
    215                                     "U+0E01 U+0E3A | U+0E01"));  // thai phinthu = pure killer
    216 
    217     // suppress grapheme breaks in zwj emoji sequences
    218     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468"));
    219     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D | U+1F48B U+200D U+1F468"));
    220     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D | U+1F468"));
    221     EXPECT_FALSE(IsBreak("U+1F468 U+200D | U+1F469 U+200D U+1F466"));
    222     EXPECT_FALSE(IsBreak("U+1F468 U+200D U+1F469 U+200D | U+1F466"));
    223     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F469 U+200D U+1F467 U+200D U+1F466"));
    224     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D | U+1F467 U+200D U+1F466"));
    225     EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466"));
    226     EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8"));
    227 
    228     // Do not break before and after zwj with all kind of emoji characters.
    229     EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464"));
    230     EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464"));
    231 
    232     // ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve the break
    233     EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764"));
    234 }
    235 
    236 TEST(GraphemeBreak, emojiModifiers) {
    237     EXPECT_FALSE(IsBreak("U+261D | U+1F3FB"));   // white up pointing index + modifier
    238     EXPECT_FALSE(IsBreak("U+270C | U+1F3FB"));   // victory hand + modifier
    239     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB"));  // boy + modifier
    240     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC"));  // boy + modifier
    241     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD"));  // boy + modifier
    242     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE"));  // boy + modifier
    243     EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF"));  // boy + modifier
    244     EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF"));  // sign of the horns + modifier
    245     EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF"));  // selfie (Unicode 9) + modifier
    246     // Reptition of the tests above, with the knowledge that they are ligated.
    247     const float ligated1_2[] = {1.0, 0.0, 0.0};
    248     const float ligated2_2[] = {1.0, 0.0, 0.0, 0.0};
    249     EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+261D | U+1F3FB"));
    250     EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+270C | U+1F3FB"));
    251     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FB"));
    252     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FC"));
    253     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FD"));
    254     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FE"));
    255     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FF"));
    256     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F918 | U+1F3FF"));
    257     EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F933 | U+1F3FF"));
    258     // Reptition of the tests above, with the knowledge that they are not ligated.
    259     const float unligated1_2[] = {1.0, 1.0, 0.0};
    260     const float unligated2_2[] = {1.0, 0.0, 1.0, 0.0};
    261     EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+261D | U+1F3FB"));
    262     EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+270C | U+1F3FB"));
    263     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FB"));
    264     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FC"));
    265     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FD"));
    266     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FE"));
    267     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FF"));
    268     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F918 | U+1F3FF"));
    269     EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F933 | U+1F3FF"));
    270 
    271     // adding extend characters between emoji base and modifier doesn't affect grapheme cluster
    272     EXPECT_FALSE(IsBreak("U+270C U+FE0E | U+1F3FB"));  // victory hand + text style + modifier
    273     EXPECT_FALSE(IsBreak("U+270C U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
    274     // Reptition of the two tests above, with the knowledge that they are ligated.
    275     const float ligated1_1_2[] = {1.0, 0.0, 0.0, 0.0};
    276     EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0E | U+1F3FB"));
    277     EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0F | U+1F3FB"));
    278     // Reptition of the first two tests, with the knowledge that they are not ligated.
    279     const float unligated1_1_2[] = {1.0, 0.0, 1.0, 0.0};
    280     EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0E | U+1F3FB"));
    281     EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0F | U+1F3FB"));
    282 
    283     // heart is not an emoji base
    284     EXPECT_TRUE(IsBreak("U+2764 | U+1F3FB"));         // heart + modifier
    285     EXPECT_TRUE(IsBreak("U+2764 U+FE0E | U+1F3FB"));  // heart + emoji style + modifier
    286     EXPECT_TRUE(IsBreak("U+2764 U+FE0F | U+1F3FB"));  // heart + emoji style + modifier
    287     EXPECT_TRUE(IsBreak("U+1F3FB | U+1F3FB"));        // modifier + modifier
    288 
    289     // rat is not an emoji modifer
    290     EXPECT_TRUE(IsBreak("U+1F466 | U+1F400"));  // boy + rat
    291 }
    292 
    293 TEST(GraphemeBreak, genderBalancedEmoji) {
    294     // U+1F469 is WOMAN, U+200D is ZWJ, U+1F4BC is BRIEFCASE.
    295     EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+1F4BC"));
    296     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F4BC"));
    297     // The above two cases, when the ligature is not supported in the font. We now expect a break
    298     // between them.
    299     const float unligated2_1_2[] = {1.0, 0.0, 0.0, 1.0, 0.0};
    300     EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 | U+200D U+1F4BC"));
    301     EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 U+200D | U+1F4BC"));
    302 
    303     // U+2695 has now emoji property, so should be part of ZWJ sequence.
    304     EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+2695"));
    305     EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2695"));
    306     // The above two cases, when the ligature is not supported in the font. We now expect a break
    307     // between them.
    308     const float unligated2_1_1[] = {1.0, 0.0, 0.0, 1.0};
    309     EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 | U+200D U+2695"));
    310     EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 U+200D | U+2695"));
    311 }
    312 
    313 TEST(GraphemeBreak, offsets) {
    314     uint16_t string[] = {0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301};
    315     EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 2));
    316     EXPECT_FALSE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 3));
    317     EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 4));
    318     EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 5));
    319 }
    320 
    321 }  // namespace minikin
    322