1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 #include <sys/cdefs.h> 19 #if defined(__BIONIC__) 20 #define HAVE_UCHAR 1 21 #elif defined(__GLIBC__) 22 #define HAVE_UCHAR __GLIBC_PREREQ(2, 16) 23 #endif 24 25 #include <gtest/gtest.h> 26 27 #include <errno.h> 28 #include <limits.h> 29 #include <locale.h> 30 #include <stdint.h> 31 32 #if HAVE_UCHAR 33 #include <uchar.h> 34 #endif 35 36 TEST(uchar, sizeof_uchar_t) { 37 #if HAVE_UCHAR 38 EXPECT_EQ(2U, sizeof(char16_t)); 39 EXPECT_EQ(4U, sizeof(char32_t)); 40 #else 41 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 42 #endif 43 } 44 45 TEST(uchar, start_state) { 46 #if HAVE_UCHAR 47 char out[MB_LEN_MAX]; 48 mbstate_t ps; 49 50 // Any non-initial state is invalid when calling c32rtomb. 51 memset(&ps, 0, sizeof(ps)); 52 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps)); 53 EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(out, 0x00a2, &ps)); 54 EXPECT_EQ(EILSEQ, errno); 55 56 // If the first argument to c32rtomb is NULL or the second is L'\0' the shift 57 // state should be reset. 58 memset(&ps, 0, sizeof(ps)); 59 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps)); 60 EXPECT_EQ(1U, c32rtomb(NULL, 0x00a2, &ps)); 61 EXPECT_TRUE(mbsinit(&ps)); 62 63 memset(&ps, 0, sizeof(ps)); 64 EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xf0\xa4", 1, &ps)); 65 EXPECT_EQ(1U, c32rtomb(out, L'\0', &ps)); 66 EXPECT_TRUE(mbsinit(&ps)); 67 #else 68 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 69 #endif 70 } 71 72 TEST(uchar, c16rtomb_null_out) { 73 #if HAVE_UCHAR 74 EXPECT_EQ(1U, c16rtomb(NULL, L'\0', NULL)); 75 EXPECT_EQ(1U, c16rtomb(NULL, L'h', NULL)); 76 #else 77 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 78 #endif 79 } 80 81 TEST(uchar, c16rtomb_null_char) { 82 #if HAVE_UCHAR 83 char bytes[MB_LEN_MAX]; 84 EXPECT_EQ(1U, c16rtomb(bytes, L'\0', NULL)); 85 #else 86 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 87 #endif 88 } 89 90 TEST(uchar, c16rtomb) { 91 #if HAVE_UCHAR 92 char bytes[MB_LEN_MAX]; 93 94 memset(bytes, 0, sizeof(bytes)); 95 EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL)); 96 EXPECT_EQ('h', bytes[0]); 97 98 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 99 uselocale(LC_GLOBAL_LOCALE); 100 101 // 1-byte UTF-8. 102 memset(bytes, 0, sizeof(bytes)); 103 EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL)); 104 EXPECT_EQ('h', bytes[0]); 105 // 2-byte UTF-8. 106 memset(bytes, 0, sizeof(bytes)); 107 EXPECT_EQ(2U, c16rtomb(bytes, 0x00a2, NULL)); 108 EXPECT_EQ('\xc2', bytes[0]); 109 EXPECT_EQ('\xa2', bytes[1]); 110 // 3-byte UTF-8. 111 memset(bytes, 0, sizeof(bytes)); 112 EXPECT_EQ(3U, c16rtomb(bytes, 0x20ac, NULL)); 113 EXPECT_EQ('\xe2', bytes[0]); 114 EXPECT_EQ('\x82', bytes[1]); 115 EXPECT_EQ('\xac', bytes[2]); 116 #else 117 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 118 #endif 119 } 120 121 TEST(uchar, c16rtomb_surrogate) { 122 #if HAVE_UCHAR 123 char bytes[MB_LEN_MAX]; 124 125 memset(bytes, 0, sizeof(bytes)); 126 EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL)); 127 EXPECT_EQ(4U, c16rtomb(bytes, 0xdfcd, NULL)); 128 EXPECT_EQ('\xf4', bytes[0]); 129 EXPECT_EQ('\x8a', bytes[1]); 130 EXPECT_EQ('\xaf', bytes[2]); 131 EXPECT_EQ('\x8d', bytes[3]); 132 #else 133 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 134 #endif 135 } 136 137 TEST(uchar, c16rtomb_invalid) { 138 #if HAVE_UCHAR 139 char bytes[MB_LEN_MAX]; 140 141 memset(bytes, 0, sizeof(bytes)); 142 EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdfcd, NULL)); 143 144 EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL)); 145 EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdbea, NULL)); 146 #else 147 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 148 #endif 149 } 150 151 TEST(uchar, mbrtoc16_null) { 152 #if HAVE_UCHAR 153 ASSERT_EQ(0U, mbrtoc16(NULL, NULL, 0, NULL)); 154 #else 155 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 156 #endif 157 } 158 159 TEST(uchar, mbrtoc16_zero_len) { 160 #if HAVE_UCHAR 161 char16_t out; 162 163 out = L'x'; 164 ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL)); 165 ASSERT_EQ(L'x', out); 166 167 ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL)); 168 ASSERT_EQ(0U, mbrtoc16(&out, "", 0, NULL)); 169 ASSERT_EQ(1U, mbrtoc16(&out, "hello", 1, NULL)); 170 ASSERT_EQ(L'h', out); 171 #else 172 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 173 #endif 174 } 175 176 TEST(uchar, mbrtoc16) { 177 #if HAVE_UCHAR 178 char16_t out; 179 180 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 181 uselocale(LC_GLOBAL_LOCALE); 182 183 // 1-byte UTF-8. 184 ASSERT_EQ(1U, mbrtoc16(&out, "abcdef", 6, NULL)); 185 ASSERT_EQ(L'a', out); 186 // 2-byte UTF-8. 187 ASSERT_EQ(2U, mbrtoc16(&out, "\xc2\xa2" "cdef", 6, NULL)); 188 ASSERT_EQ(static_cast<char16_t>(0x00a2), out); 189 // 3-byte UTF-8. 190 ASSERT_EQ(3U, mbrtoc16(&out, "\xe2\x82\xac" "def", 6, NULL)); 191 ASSERT_EQ(static_cast<char16_t>(0x20ac), out); 192 #else 193 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 194 #endif 195 } 196 197 TEST(uchar, mbrtoc16_surrogate) { 198 #if HAVE_UCHAR 199 char16_t out; 200 201 ASSERT_EQ(static_cast<size_t>(-3), 202 mbrtoc16(&out, "\xf4\x8a\xaf\x8d", 6, NULL)); 203 ASSERT_EQ(static_cast<char16_t>(0xdbea), out); 204 ASSERT_EQ(4U, mbrtoc16(&out, "\xf4\x8a\xaf\x8d" "ef", 6, NULL)); 205 ASSERT_EQ(static_cast<char16_t>(0xdfcd), out); 206 #else 207 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 208 #endif 209 } 210 211 TEST(uchar, mbrtoc16_reserved_range) { 212 #if HAVE_UCHAR 213 char16_t out; 214 ASSERT_EQ(static_cast<size_t>(-1), 215 mbrtoc16(&out, "\xf0\x80\xbf\xbf", 6, NULL)); 216 #else 217 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 218 #endif 219 } 220 221 TEST(uchar, mbrtoc16_beyond_range) { 222 #if HAVE_UCHAR 223 char16_t out; 224 ASSERT_EQ(static_cast<size_t>(-1), 225 mbrtoc16(&out, "\xf5\x80\x80\x80", 6, NULL)); 226 #else 227 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 228 #endif 229 } 230 231 #if HAVE_UCHAR 232 void test_mbrtoc16_incomplete(mbstate_t* ps) { 233 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 234 uselocale(LC_GLOBAL_LOCALE); 235 236 char16_t out; 237 // 2-byte UTF-8. 238 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps)); 239 ASSERT_EQ(1U, mbrtoc16(&out, "\xa2" "cdef", 5, ps)); 240 ASSERT_EQ(static_cast<char16_t>(0x00a2), out); 241 ASSERT_TRUE(mbsinit(ps)); 242 // 3-byte UTF-8. 243 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xe2", 1, ps)); 244 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x82", 1, ps)); 245 ASSERT_EQ(1U, mbrtoc16(&out, "\xac" "def", 4, ps)); 246 ASSERT_EQ(static_cast<char16_t>(0x20ac), out); 247 ASSERT_TRUE(mbsinit(ps)); 248 // 4-byte UTF-8. 249 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xf4", 1, ps)); 250 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x8a\xaf", 2, ps)); 251 ASSERT_EQ(static_cast<size_t>(-3), mbrtoc16(&out, "\x8d" "ef", 3, ps)); 252 ASSERT_EQ(static_cast<char16_t>(0xdbea), out); 253 ASSERT_EQ(1U, mbrtoc16(&out, "\x80" "ef", 3, ps)); 254 ASSERT_EQ(static_cast<char16_t>(0xdfcd), out); 255 ASSERT_TRUE(mbsinit(ps)); 256 257 // Invalid 2-byte 258 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps)); 259 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\x20" "cdef", 5, ps)); 260 ASSERT_EQ(EILSEQ, errno); 261 } 262 #endif 263 264 TEST(uchar, mbrtoc16_incomplete) { 265 #if HAVE_UCHAR 266 mbstate_t ps; 267 memset(&ps, 0, sizeof(ps)); 268 269 test_mbrtoc16_incomplete(&ps); 270 test_mbrtoc16_incomplete(NULL); 271 #else 272 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 273 #endif 274 } 275 276 TEST(uchar, c32rtomb) { 277 #if HAVE_UCHAR 278 EXPECT_EQ(1U, c32rtomb(NULL, L'\0', NULL)); 279 EXPECT_EQ(1U, c32rtomb(NULL, L'h', NULL)); 280 281 char bytes[MB_LEN_MAX]; 282 283 EXPECT_EQ(1U, c32rtomb(bytes, L'\0', NULL)); 284 285 memset(bytes, 0, sizeof(bytes)); 286 EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL)); 287 EXPECT_EQ('h', bytes[0]); 288 289 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 290 uselocale(LC_GLOBAL_LOCALE); 291 292 // 1-byte UTF-8. 293 memset(bytes, 0, sizeof(bytes)); 294 EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL)); 295 EXPECT_EQ('h', bytes[0]); 296 // 2-byte UTF-8. 297 memset(bytes, 0, sizeof(bytes)); 298 EXPECT_EQ(2U, c32rtomb(bytes, 0x00a2, NULL)); 299 EXPECT_EQ('\xc2', bytes[0]); 300 EXPECT_EQ('\xa2', bytes[1]); 301 // 3-byte UTF-8. 302 memset(bytes, 0, sizeof(bytes)); 303 EXPECT_EQ(3U, c32rtomb(bytes, 0x20ac, NULL)); 304 EXPECT_EQ('\xe2', bytes[0]); 305 EXPECT_EQ('\x82', bytes[1]); 306 EXPECT_EQ('\xac', bytes[2]); 307 // 4-byte UTF-8. 308 memset(bytes, 0, sizeof(bytes)); 309 EXPECT_EQ(4U, c32rtomb(bytes, 0x24b62, NULL)); 310 EXPECT_EQ('\xf0', bytes[0]); 311 EXPECT_EQ('\xa4', bytes[1]); 312 EXPECT_EQ('\xad', bytes[2]); 313 EXPECT_EQ('\xa2', bytes[3]); 314 // Invalid code point. 315 EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(bytes, 0xffffffff, NULL)); 316 EXPECT_EQ(EILSEQ, errno); 317 #else 318 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 319 #endif 320 } 321 322 TEST(uchar, mbrtoc32) { 323 #if HAVE_UCHAR 324 char32_t out[8]; 325 326 out[0] = L'x'; 327 ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL)); 328 ASSERT_EQ(static_cast<char32_t>(L'x'), out[0]); 329 330 ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL)); 331 ASSERT_EQ(0U, mbrtoc32(out, "", 0, NULL)); 332 ASSERT_EQ(1U, mbrtoc32(out, "hello", 1, NULL)); 333 ASSERT_EQ(static_cast<char32_t>(L'h'), out[0]); 334 335 ASSERT_EQ(0U, mbrtoc32(NULL, "hello", 0, NULL)); 336 ASSERT_EQ(0U, mbrtoc32(NULL, "", 0, NULL)); 337 ASSERT_EQ(1U, mbrtoc32(NULL, "hello", 1, NULL)); 338 339 ASSERT_EQ(0U, mbrtoc32(NULL, NULL, 0, NULL)); 340 341 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 342 uselocale(LC_GLOBAL_LOCALE); 343 344 // 1-byte UTF-8. 345 ASSERT_EQ(1U, mbrtoc32(out, "abcdef", 6, NULL)); 346 ASSERT_EQ(static_cast<char32_t>(L'a'), out[0]); 347 // 2-byte UTF-8. 348 ASSERT_EQ(2U, mbrtoc32(out, "\xc2\xa2" "cdef", 6, NULL)); 349 ASSERT_EQ(static_cast<char32_t>(0x00a2), out[0]); 350 // 3-byte UTF-8. 351 ASSERT_EQ(3U, mbrtoc32(out, "\xe2\x82\xac" "def", 6, NULL)); 352 ASSERT_EQ(static_cast<char32_t>(0x20ac), out[0]); 353 // 4-byte UTF-8. 354 ASSERT_EQ(4U, mbrtoc32(out, "\xf0\xa4\xad\xa2" "ef", 6, NULL)); 355 ASSERT_EQ(static_cast<char32_t>(0x24b62), out[0]); 356 #if defined(__BIONIC__) // glibc allows this. 357 // Illegal 5-byte UTF-8. 358 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, NULL)); 359 ASSERT_EQ(EILSEQ, errno); 360 #endif 361 // Illegal over-long sequence. 362 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf0\x82\x82\xac" "ef", 6, NULL)); 363 ASSERT_EQ(EILSEQ, errno); 364 #else 365 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 366 #endif 367 } 368 369 #if HAVE_UCHAR 370 void test_mbrtoc32_incomplete(mbstate_t* ps) { 371 ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8")); 372 uselocale(LC_GLOBAL_LOCALE); 373 374 char32_t out; 375 // 2-byte UTF-8. 376 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps)); 377 ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "cdef", 5, ps)); 378 ASSERT_EQ(static_cast<char32_t>(0x00a2), out); 379 ASSERT_TRUE(mbsinit(ps)); 380 // 3-byte UTF-8. 381 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xe2", 1, ps)); 382 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\x82", 1, ps)); 383 ASSERT_EQ(1U, mbrtoc32(&out, "\xac" "def", 4, ps)); 384 ASSERT_EQ(static_cast<char32_t>(0x20ac), out); 385 ASSERT_TRUE(mbsinit(ps)); 386 // 4-byte UTF-8. 387 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xf0", 1, ps)); 388 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xa4\xad", 2, ps)); 389 ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "ef", 3, ps)); 390 ASSERT_EQ(static_cast<char32_t>(0x24b62), out); 391 ASSERT_TRUE(mbsinit(ps)); 392 393 // Invalid 2-byte 394 ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps)); 395 ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(&out, "\x20" "cdef", 5, ps)); 396 ASSERT_EQ(EILSEQ, errno); 397 } 398 #endif 399 400 TEST(uchar, mbrtoc32_incomplete) { 401 #if HAVE_UCHAR 402 mbstate_t ps; 403 memset(&ps, 0, sizeof(ps)); 404 405 test_mbrtoc32_incomplete(&ps); 406 test_mbrtoc32_incomplete(NULL); 407 #else 408 GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; 409 #endif 410 } 411 412