Home | History | Annotate | Download | only in tests
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include <sys/cdefs.h>
     19 #if defined(__BIONIC__)
     20 #define HAVE_UCHAR 1
     21 #elif defined(__GLIBC__)
     22 #include <features.h>
     23 #define HAVE_UCHAR __GLIBC_PREREQ(2, 16)
     24 #endif
     25 
     26 #include <gtest/gtest.h>
     27 
     28 #include <errno.h>
     29 #include <limits.h>
     30 #include <locale.h>
     31 #include <stdint.h>
     32 
     33 #if HAVE_UCHAR
     34 #include <uchar.h>
     35 #endif
     36 
     37 TEST(uchar, sizeof_uchar_t) {
     38 #if HAVE_UCHAR
     39   EXPECT_EQ(2U, sizeof(char16_t));
     40   EXPECT_EQ(4U, sizeof(char32_t));
     41 #else
     42   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     43 #endif
     44 }
     45 
     46 TEST(uchar, start_state) {
     47 #if HAVE_UCHAR
     48   char out[MB_LEN_MAX];
     49   mbstate_t ps;
     50 
     51   // Any non-initial state is invalid when calling c32rtomb.
     52   memset(&ps, 0, sizeof(ps));
     53   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps));
     54   EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(out, 0x00a2, &ps));
     55   EXPECT_EQ(EILSEQ, errno);
     56 
     57   // If the first argument to c32rtomb is NULL or the second is L'\0' the shift
     58   // state should be reset.
     59   memset(&ps, 0, sizeof(ps));
     60   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps));
     61   EXPECT_EQ(1U, c32rtomb(NULL, 0x00a2, &ps));
     62   EXPECT_TRUE(mbsinit(&ps));
     63 
     64   memset(&ps, 0, sizeof(ps));
     65   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xf0\xa4", 1, &ps));
     66   EXPECT_EQ(1U, c32rtomb(out, L'\0', &ps));
     67   EXPECT_TRUE(mbsinit(&ps));
     68 #else
     69   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     70 #endif
     71 }
     72 
     73 TEST(uchar, c16rtomb_null_out) {
     74 #if HAVE_UCHAR
     75   EXPECT_EQ(1U, c16rtomb(NULL, L'\0', NULL));
     76   EXPECT_EQ(1U, c16rtomb(NULL, L'h', NULL));
     77 #else
     78   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     79 #endif
     80 }
     81 
     82 TEST(uchar, c16rtomb_null_char) {
     83 #if HAVE_UCHAR
     84   char bytes[MB_LEN_MAX];
     85   EXPECT_EQ(1U, c16rtomb(bytes, L'\0', NULL));
     86 #else
     87   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     88 #endif
     89 }
     90 
     91 TEST(uchar, c16rtomb) {
     92 #if HAVE_UCHAR
     93   char bytes[MB_LEN_MAX];
     94 
     95   memset(bytes, 0, sizeof(bytes));
     96   EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL));
     97   EXPECT_EQ('h', bytes[0]);
     98 
     99   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    100   uselocale(LC_GLOBAL_LOCALE);
    101 
    102   // 1-byte UTF-8.
    103   memset(bytes, 0, sizeof(bytes));
    104   EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL));
    105   EXPECT_EQ('h', bytes[0]);
    106   // 2-byte UTF-8.
    107   memset(bytes, 0, sizeof(bytes));
    108   EXPECT_EQ(2U, c16rtomb(bytes, 0x00a2, NULL));
    109   EXPECT_EQ('\xc2', bytes[0]);
    110   EXPECT_EQ('\xa2', bytes[1]);
    111   // 3-byte UTF-8.
    112   memset(bytes, 0, sizeof(bytes));
    113   EXPECT_EQ(3U, c16rtomb(bytes, 0x20ac, NULL));
    114   EXPECT_EQ('\xe2', bytes[0]);
    115   EXPECT_EQ('\x82', bytes[1]);
    116   EXPECT_EQ('\xac', bytes[2]);
    117 #else
    118   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    119 #endif
    120 }
    121 
    122 TEST(uchar, c16rtomb_surrogate) {
    123 #if HAVE_UCHAR
    124   char bytes[MB_LEN_MAX];
    125 
    126   memset(bytes, 0, sizeof(bytes));
    127   EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL));
    128   EXPECT_EQ(4U, c16rtomb(bytes, 0xdfcd, NULL));
    129   EXPECT_EQ('\xf4', bytes[0]);
    130   EXPECT_EQ('\x8a', bytes[1]);
    131   EXPECT_EQ('\xaf', bytes[2]);
    132   EXPECT_EQ('\x8d', bytes[3]);
    133 #else
    134   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    135 #endif
    136 }
    137 
    138 TEST(uchar, c16rtomb_invalid) {
    139 #if HAVE_UCHAR
    140   char bytes[MB_LEN_MAX];
    141 
    142   memset(bytes, 0, sizeof(bytes));
    143   EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdfcd, NULL));
    144 
    145   EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL));
    146   EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdbea, NULL));
    147 #else
    148   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    149 #endif
    150 }
    151 
    152 TEST(uchar, mbrtoc16_null) {
    153 #if HAVE_UCHAR
    154   ASSERT_EQ(0U, mbrtoc16(NULL, NULL, 0, NULL));
    155 #else
    156   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    157 #endif
    158 }
    159 
    160 TEST(uchar, mbrtoc16_zero_len) {
    161 #if HAVE_UCHAR
    162   char16_t out;
    163 
    164   out = L'x';
    165   ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL));
    166   ASSERT_EQ(L'x', out);
    167 
    168   ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL));
    169   ASSERT_EQ(0U, mbrtoc16(&out, "", 0, NULL));
    170   ASSERT_EQ(1U, mbrtoc16(&out, "hello", 1, NULL));
    171   ASSERT_EQ(L'h', out);
    172 #else
    173   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    174 #endif
    175 }
    176 
    177 TEST(uchar, mbrtoc16) {
    178 #if HAVE_UCHAR
    179   char16_t out;
    180 
    181   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    182   uselocale(LC_GLOBAL_LOCALE);
    183 
    184   // 1-byte UTF-8.
    185   ASSERT_EQ(1U, mbrtoc16(&out, "abcdef", 6, NULL));
    186   ASSERT_EQ(L'a', out);
    187   // 2-byte UTF-8.
    188   ASSERT_EQ(2U, mbrtoc16(&out, "\xc2\xa2" "cdef", 6, NULL));
    189   ASSERT_EQ(static_cast<char16_t>(0x00a2), out);
    190   // 3-byte UTF-8.
    191   ASSERT_EQ(3U, mbrtoc16(&out, "\xe2\x82\xac" "def", 6, NULL));
    192   ASSERT_EQ(static_cast<char16_t>(0x20ac), out);
    193 #else
    194   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    195 #endif
    196 }
    197 
    198 TEST(uchar, mbrtoc16_surrogate) {
    199 #if HAVE_UCHAR
    200   char16_t out;
    201 
    202   ASSERT_EQ(static_cast<size_t>(-3),
    203             mbrtoc16(&out, "\xf4\x8a\xaf\x8d", 6, NULL));
    204   ASSERT_EQ(static_cast<char16_t>(0xdbea), out);
    205   ASSERT_EQ(4U, mbrtoc16(&out, "\xf4\x8a\xaf\x8d" "ef", 6, NULL));
    206   ASSERT_EQ(static_cast<char16_t>(0xdfcd), out);
    207 #else
    208   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    209 #endif
    210 }
    211 
    212 TEST(uchar, mbrtoc16_reserved_range) {
    213 #if HAVE_UCHAR
    214   char16_t out;
    215   ASSERT_EQ(static_cast<size_t>(-1),
    216             mbrtoc16(&out, "\xf0\x80\xbf\xbf", 6, NULL));
    217 #else
    218   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    219 #endif
    220 }
    221 
    222 TEST(uchar, mbrtoc16_beyond_range) {
    223 #if HAVE_UCHAR
    224   char16_t out;
    225   ASSERT_EQ(static_cast<size_t>(-1),
    226             mbrtoc16(&out, "\xf5\x80\x80\x80", 6, NULL));
    227 #else
    228   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    229 #endif
    230 }
    231 
    232 #if HAVE_UCHAR
    233 void test_mbrtoc16_incomplete(mbstate_t* ps) {
    234   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    235   uselocale(LC_GLOBAL_LOCALE);
    236 
    237   char16_t out;
    238   // 2-byte UTF-8.
    239   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps));
    240   ASSERT_EQ(1U, mbrtoc16(&out, "\xa2" "cdef", 5, ps));
    241   ASSERT_EQ(static_cast<char16_t>(0x00a2), out);
    242   ASSERT_TRUE(mbsinit(ps));
    243   // 3-byte UTF-8.
    244   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xe2", 1, ps));
    245   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x82", 1, ps));
    246   ASSERT_EQ(1U, mbrtoc16(&out, "\xac" "def", 4, ps));
    247   ASSERT_EQ(static_cast<char16_t>(0x20ac), out);
    248   ASSERT_TRUE(mbsinit(ps));
    249   // 4-byte UTF-8.
    250   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xf4", 1, ps));
    251   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x8a\xaf", 2, ps));
    252   ASSERT_EQ(static_cast<size_t>(-3), mbrtoc16(&out, "\x8d" "ef", 3, ps));
    253   ASSERT_EQ(static_cast<char16_t>(0xdbea), out);
    254   ASSERT_EQ(1U, mbrtoc16(&out, "\x80" "ef", 3, ps));
    255   ASSERT_EQ(static_cast<char16_t>(0xdfcd), out);
    256   ASSERT_TRUE(mbsinit(ps));
    257 
    258   // Invalid 2-byte
    259   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps));
    260   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\x20" "cdef", 5, ps));
    261   ASSERT_EQ(EILSEQ, errno);
    262 }
    263 #endif
    264 
    265 TEST(uchar, mbrtoc16_incomplete) {
    266 #if HAVE_UCHAR
    267   mbstate_t ps;
    268   memset(&ps, 0, sizeof(ps));
    269 
    270   test_mbrtoc16_incomplete(&ps);
    271   test_mbrtoc16_incomplete(NULL);
    272 #else
    273   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    274 #endif
    275 }
    276 
    277 TEST(uchar, c32rtomb) {
    278 #if HAVE_UCHAR
    279   EXPECT_EQ(1U, c32rtomb(NULL, L'\0', NULL));
    280   EXPECT_EQ(1U, c32rtomb(NULL, L'h', NULL));
    281 
    282   char bytes[MB_LEN_MAX];
    283 
    284   EXPECT_EQ(1U, c32rtomb(bytes, L'\0', NULL));
    285 
    286   memset(bytes, 0, sizeof(bytes));
    287   EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL));
    288   EXPECT_EQ('h', bytes[0]);
    289 
    290   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    291   uselocale(LC_GLOBAL_LOCALE);
    292 
    293   // 1-byte UTF-8.
    294   memset(bytes, 0, sizeof(bytes));
    295   EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL));
    296   EXPECT_EQ('h', bytes[0]);
    297   // 2-byte UTF-8.
    298   memset(bytes, 0, sizeof(bytes));
    299   EXPECT_EQ(2U, c32rtomb(bytes, 0x00a2, NULL));
    300   EXPECT_EQ('\xc2', bytes[0]);
    301   EXPECT_EQ('\xa2', bytes[1]);
    302   // 3-byte UTF-8.
    303   memset(bytes, 0, sizeof(bytes));
    304   EXPECT_EQ(3U, c32rtomb(bytes, 0x20ac, NULL));
    305   EXPECT_EQ('\xe2', bytes[0]);
    306   EXPECT_EQ('\x82', bytes[1]);
    307   EXPECT_EQ('\xac', bytes[2]);
    308   // 4-byte UTF-8.
    309   memset(bytes, 0, sizeof(bytes));
    310   EXPECT_EQ(4U, c32rtomb(bytes, 0x24b62, NULL));
    311   EXPECT_EQ('\xf0', bytes[0]);
    312   EXPECT_EQ('\xa4', bytes[1]);
    313   EXPECT_EQ('\xad', bytes[2]);
    314   EXPECT_EQ('\xa2', bytes[3]);
    315   // Invalid code point.
    316   EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(bytes, 0xffffffff, NULL));
    317   EXPECT_EQ(EILSEQ, errno);
    318 #else
    319   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    320 #endif
    321 }
    322 
    323 TEST(uchar, mbrtoc32) {
    324 #if HAVE_UCHAR
    325   char32_t out[8];
    326 
    327   out[0] = L'x';
    328   ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL));
    329   ASSERT_EQ(static_cast<char32_t>(L'x'), out[0]);
    330 
    331   ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL));
    332   ASSERT_EQ(0U, mbrtoc32(out, "", 0, NULL));
    333   ASSERT_EQ(1U, mbrtoc32(out, "hello", 1, NULL));
    334   ASSERT_EQ(static_cast<char32_t>(L'h'), out[0]);
    335 
    336   ASSERT_EQ(0U, mbrtoc32(NULL, "hello", 0, NULL));
    337   ASSERT_EQ(0U, mbrtoc32(NULL, "", 0, NULL));
    338   ASSERT_EQ(1U, mbrtoc32(NULL, "hello", 1, NULL));
    339 
    340   ASSERT_EQ(0U, mbrtoc32(NULL, NULL, 0, NULL));
    341 
    342   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    343   uselocale(LC_GLOBAL_LOCALE);
    344 
    345   // 1-byte UTF-8.
    346   ASSERT_EQ(1U, mbrtoc32(out, "abcdef", 6, NULL));
    347   ASSERT_EQ(static_cast<char32_t>(L'a'), out[0]);
    348   // 2-byte UTF-8.
    349   ASSERT_EQ(2U, mbrtoc32(out, "\xc2\xa2" "cdef", 6, NULL));
    350   ASSERT_EQ(static_cast<char32_t>(0x00a2), out[0]);
    351   // 3-byte UTF-8.
    352   ASSERT_EQ(3U, mbrtoc32(out, "\xe2\x82\xac" "def", 6, NULL));
    353   ASSERT_EQ(static_cast<char32_t>(0x20ac), out[0]);
    354   // 4-byte UTF-8.
    355   ASSERT_EQ(4U, mbrtoc32(out, "\xf0\xa4\xad\xa2" "ef", 6, NULL));
    356   ASSERT_EQ(static_cast<char32_t>(0x24b62), out[0]);
    357 #if defined(__BIONIC__) // glibc allows this.
    358   // Illegal 5-byte UTF-8.
    359   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, NULL));
    360   ASSERT_EQ(EILSEQ, errno);
    361 #endif
    362   // Illegal over-long sequence.
    363   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf0\x82\x82\xac" "ef", 6, NULL));
    364   ASSERT_EQ(EILSEQ, errno);
    365 #else
    366   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    367 #endif
    368 }
    369 
    370 #if HAVE_UCHAR
    371 void test_mbrtoc32_incomplete(mbstate_t* ps) {
    372   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    373   uselocale(LC_GLOBAL_LOCALE);
    374 
    375   char32_t out;
    376   // 2-byte UTF-8.
    377   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps));
    378   ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "cdef", 5, ps));
    379   ASSERT_EQ(static_cast<char32_t>(0x00a2), out);
    380   ASSERT_TRUE(mbsinit(ps));
    381   // 3-byte UTF-8.
    382   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xe2", 1, ps));
    383   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\x82", 1, ps));
    384   ASSERT_EQ(1U, mbrtoc32(&out, "\xac" "def", 4, ps));
    385   ASSERT_EQ(static_cast<char32_t>(0x20ac), out);
    386   ASSERT_TRUE(mbsinit(ps));
    387   // 4-byte UTF-8.
    388   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xf0", 1, ps));
    389   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xa4\xad", 2, ps));
    390   ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "ef", 3, ps));
    391   ASSERT_EQ(static_cast<char32_t>(0x24b62), out);
    392   ASSERT_TRUE(mbsinit(ps));
    393 
    394   // Invalid 2-byte
    395   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps));
    396   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(&out, "\x20" "cdef", 5, ps));
    397   ASSERT_EQ(EILSEQ, errno);
    398 }
    399 #endif
    400 
    401 TEST(uchar, mbrtoc32_incomplete) {
    402 #if HAVE_UCHAR
    403   mbstate_t ps;
    404   memset(&ps, 0, sizeof(ps));
    405 
    406   test_mbrtoc32_incomplete(&ps);
    407   test_mbrtoc32_incomplete(NULL);
    408 #else
    409   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    410 #endif
    411 }
    412 
    413