1 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Some UTF character seqeuences in this file were taken from 6 // https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt 7 8 #include <gtest/gtest.h> 9 #include <stdio.h> 10 11 extern "C" { 12 #include "cras_utf8.h" 13 } 14 15 namespace { 16 17 TEST(UTF8, ValidStress) { 18 size_t pos; 19 20 EXPECT_EQ(1, valid_utf8_string("The greek word 'kosme': " 21 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce" 22 "\xbc\xce\xb5", &pos)); 23 EXPECT_EQ(35, pos); 24 25 EXPECT_EQ(1, valid_utf8_string("Playback", &pos)); 26 EXPECT_EQ(8, pos); 27 28 EXPECT_EQ(1, valid_utf8_string("The Euro sign: \xe2\x82\xac", &pos)); 29 EXPECT_EQ(18, pos); 30 31 /* First possible sequence of a certain length. */ 32 EXPECT_EQ(1, valid_utf8_string("\x01", &pos)); 33 EXPECT_EQ(1, pos); 34 EXPECT_EQ(1, valid_utf8_string("\xc2\x80", &pos)); 35 EXPECT_EQ(2, pos); 36 EXPECT_EQ(1, valid_utf8_string("\xe0\xa0\x80", &pos)); 37 EXPECT_EQ(3, pos); 38 EXPECT_EQ(1, valid_utf8_string("\xe1\x80\x80", &pos)); 39 EXPECT_EQ(3, pos); 40 EXPECT_EQ(1, valid_utf8_string("\xf0\x90\x80\x80", &pos)); 41 EXPECT_EQ(4, pos); 42 EXPECT_EQ(1, valid_utf8_string("\xf1\x80\x80\x80", &pos)); 43 EXPECT_EQ(4, pos); 44 45 /* Last possible sequence of a certain length. */ 46 EXPECT_EQ(1, valid_utf8_string("\x7f", &pos)); 47 EXPECT_EQ(1, pos); 48 EXPECT_EQ(1, valid_utf8_string("\xdf\xbf", &pos)); 49 EXPECT_EQ(2, pos); 50 EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbf", &pos)); 51 EXPECT_EQ(3, pos); 52 EXPECT_EQ(1, valid_utf8_string("\xf4\x8f\xbf\xbf", &pos)); 53 EXPECT_EQ(4, pos); 54 55 /* Other boundary conditions. */ 56 EXPECT_EQ(1, valid_utf8_string("\xed\x9f\xbf", &pos)); 57 EXPECT_EQ(3, pos); 58 EXPECT_EQ(1, valid_utf8_string("\xee\x80\x80", &pos)); 59 EXPECT_EQ(3, pos); 60 EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbd", &pos)); 61 EXPECT_EQ(3, pos); 62 EXPECT_EQ(1, valid_utf8_string("\xf0\xbf\xbf\xbf", &pos)); 63 EXPECT_EQ(4, pos); 64 65 /* BOM sequence. */ 66 EXPECT_EQ(1, valid_utf8_string("\xef\xbb\xbf", &pos)); 67 EXPECT_EQ(3, pos); 68 69 /* Valid UTF-8 that shouldn't appear in text; chose to allow 70 * these characters anyway. */ 71 EXPECT_EQ(1, valid_utf8_string("U+FFFE: \xef\xbf\xbe", &pos)); 72 EXPECT_EQ(11, pos); 73 EXPECT_EQ(1, valid_utf8_string("U+FDD0: \xef\xb7\x90", &pos)); 74 EXPECT_EQ(11, pos); 75 EXPECT_EQ(1, valid_utf8_string("\xf0\x9f\xbf\xbe", &pos)); 76 EXPECT_EQ(4, pos); 77 } 78 79 TEST(UTF8, InvalidStress) { 80 size_t pos; 81 82 /* Malformed continuation bytes. */ 83 EXPECT_EQ(0, valid_utf8_string("\x80", &pos)); 84 EXPECT_EQ(0, pos); 85 EXPECT_EQ(0, valid_utf8_string("\xbf", &pos)); 86 EXPECT_EQ(0, pos); 87 EXPECT_EQ(0, valid_utf8_string("\x80\xbf", &pos)); 88 EXPECT_EQ(0, pos); 89 EXPECT_EQ(0, valid_utf8_string("\xc2\x80\xbf", &pos)); 90 EXPECT_EQ(2, pos); 91 92 /* Lonely start characters. */ 93 EXPECT_EQ(0, valid_utf8_string("\xc2 \xc3 \xc4 ", &pos)); 94 EXPECT_EQ(1, pos); 95 96 /* Out of range cases. */ 97 EXPECT_EQ(0, valid_utf8_string("\xf4\x90\xbf\xbf", &pos)); 98 EXPECT_EQ(1, pos); 99 EXPECT_EQ(0, valid_utf8_string(" \xf5\x80", &pos)); 100 EXPECT_EQ(1, pos); 101 EXPECT_EQ(0, valid_utf8_string(" \xe0\x80\x80", &pos)); 102 EXPECT_EQ(2, pos); 103 EXPECT_EQ(0, valid_utf8_string("\xf4\x80\x80\xcf", &pos)); 104 EXPECT_EQ(3, pos); 105 106 /* Stop in mid-sequence. */ 107 EXPECT_EQ(0, valid_utf8_string("\xf4\x80", &pos)); 108 EXPECT_EQ(2, pos); 109 110 /* Bad characters. */ 111 EXPECT_EQ(0, valid_utf8_string("\xff", &pos)); 112 EXPECT_EQ(0, pos); 113 EXPECT_EQ(0, valid_utf8_string("\xfe", &pos)); 114 EXPECT_EQ(0, pos); 115 116 /* Overlong representations of ASCII characters. */ 117 EXPECT_EQ(0, valid_utf8_string("This represents the / character with too" 118 "many bytes: \xe0\x80\xaf", &pos)); 119 EXPECT_EQ(53, pos); 120 EXPECT_EQ(0, valid_utf8_string("This represents the / character with too" 121 "many bytes: \xf0\x80\x80\xaf", &pos)); 122 EXPECT_EQ(53, pos); 123 124 /* Should not be interpreted as the ASCII NUL character. */ 125 EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too" 126 "many bytes: \xe0\x80\x80", &pos)); 127 EXPECT_EQ(55, pos); 128 EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too" 129 "many bytes: \xf0\x80\x80\x80", &pos)); 130 EXPECT_EQ(55, pos); 131 132 /* Single UTF-16 surrogates. */ 133 EXPECT_EQ(0, valid_utf8_string("\xed\xa0\x80", &pos)); 134 EXPECT_EQ(1, pos); 135 EXPECT_EQ(0, valid_utf8_string("\xed\xad\xbf", &pos)); 136 EXPECT_EQ(1, pos); 137 EXPECT_EQ(0, valid_utf8_string("\xed\xae\x80", &pos)); 138 EXPECT_EQ(1, pos); 139 EXPECT_EQ(0, valid_utf8_string("\xed\xaf\xbf", &pos)); 140 EXPECT_EQ(1, pos); 141 EXPECT_EQ(0, valid_utf8_string("\xed\xb0\x80", &pos)); 142 EXPECT_EQ(1, pos); 143 EXPECT_EQ(0, valid_utf8_string("\xed\xbe\x80", &pos)); 144 EXPECT_EQ(1, pos); 145 EXPECT_EQ(0, valid_utf8_string("\xed\xbf\xbf", &pos)); 146 EXPECT_EQ(1, pos); 147 } 148 149 } // namespace 150 151 int main(int argc, char **argv) { 152 ::testing::InitGoogleTest(&argc, argv); 153 return RUN_ALL_TESTS(); 154 } 155