Home | History | Annotate | Download | only in tests
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include <sys/cdefs.h>
     19 #if defined(__BIONIC__)
     20 #define HAVE_UCHAR 1
     21 #elif defined(__GLIBC__)
     22 #define HAVE_UCHAR __GLIBC_PREREQ(2, 16)
     23 #endif
     24 
     25 #include <gtest/gtest.h>
     26 
     27 #include <errno.h>
     28 #include <limits.h>
     29 #include <locale.h>
     30 #include <stdint.h>
     31 
     32 #if HAVE_UCHAR
     33 #include <uchar.h>
     34 #endif
     35 
     36 TEST(uchar, sizeof_uchar_t) {
     37 #if HAVE_UCHAR
     38   EXPECT_EQ(2U, sizeof(char16_t));
     39   EXPECT_EQ(4U, sizeof(char32_t));
     40 #else
     41   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     42 #endif
     43 }
     44 
     45 TEST(uchar, start_state) {
     46 #if HAVE_UCHAR
     47   char out[MB_LEN_MAX];
     48   mbstate_t ps;
     49 
     50   // Any non-initial state is invalid when calling c32rtomb.
     51   memset(&ps, 0, sizeof(ps));
     52   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps));
     53   EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(out, 0x00a2, &ps));
     54   EXPECT_EQ(EILSEQ, errno);
     55 
     56   // If the first argument to c32rtomb is NULL or the second is L'\0' the shift
     57   // state should be reset.
     58   memset(&ps, 0, sizeof(ps));
     59   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xc2", 1, &ps));
     60   EXPECT_EQ(1U, c32rtomb(NULL, 0x00a2, &ps));
     61   EXPECT_TRUE(mbsinit(&ps));
     62 
     63   memset(&ps, 0, sizeof(ps));
     64   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(NULL, "\xf0\xa4", 1, &ps));
     65   EXPECT_EQ(1U, c32rtomb(out, L'\0', &ps));
     66   EXPECT_TRUE(mbsinit(&ps));
     67 #else
     68   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     69 #endif
     70 }
     71 
     72 TEST(uchar, c16rtomb_null_out) {
     73 #if HAVE_UCHAR
     74   EXPECT_EQ(1U, c16rtomb(NULL, L'\0', NULL));
     75   EXPECT_EQ(1U, c16rtomb(NULL, L'h', NULL));
     76 #else
     77   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     78 #endif
     79 }
     80 
     81 TEST(uchar, c16rtomb_null_char) {
     82 #if HAVE_UCHAR
     83   char bytes[MB_LEN_MAX];
     84   EXPECT_EQ(1U, c16rtomb(bytes, L'\0', NULL));
     85 #else
     86   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
     87 #endif
     88 }
     89 
     90 TEST(uchar, c16rtomb) {
     91 #if HAVE_UCHAR
     92   char bytes[MB_LEN_MAX];
     93 
     94   memset(bytes, 0, sizeof(bytes));
     95   EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL));
     96   EXPECT_EQ('h', bytes[0]);
     97 
     98   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
     99   uselocale(LC_GLOBAL_LOCALE);
    100 
    101   // 1-byte UTF-8.
    102   memset(bytes, 0, sizeof(bytes));
    103   EXPECT_EQ(1U, c16rtomb(bytes, L'h', NULL));
    104   EXPECT_EQ('h', bytes[0]);
    105   // 2-byte UTF-8.
    106   memset(bytes, 0, sizeof(bytes));
    107   EXPECT_EQ(2U, c16rtomb(bytes, 0x00a2, NULL));
    108   EXPECT_EQ('\xc2', bytes[0]);
    109   EXPECT_EQ('\xa2', bytes[1]);
    110   // 3-byte UTF-8.
    111   memset(bytes, 0, sizeof(bytes));
    112   EXPECT_EQ(3U, c16rtomb(bytes, 0x20ac, NULL));
    113   EXPECT_EQ('\xe2', bytes[0]);
    114   EXPECT_EQ('\x82', bytes[1]);
    115   EXPECT_EQ('\xac', bytes[2]);
    116 #else
    117   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    118 #endif
    119 }
    120 
    121 TEST(uchar, c16rtomb_surrogate) {
    122 #if HAVE_UCHAR
    123   char bytes[MB_LEN_MAX];
    124 
    125   memset(bytes, 0, sizeof(bytes));
    126   EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL));
    127   EXPECT_EQ(4U, c16rtomb(bytes, 0xdfcd, NULL));
    128   EXPECT_EQ('\xf4', bytes[0]);
    129   EXPECT_EQ('\x8a', bytes[1]);
    130   EXPECT_EQ('\xaf', bytes[2]);
    131   EXPECT_EQ('\x8d', bytes[3]);
    132 #else
    133   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    134 #endif
    135 }
    136 
    137 TEST(uchar, c16rtomb_invalid) {
    138 #if HAVE_UCHAR
    139   char bytes[MB_LEN_MAX];
    140 
    141   memset(bytes, 0, sizeof(bytes));
    142   EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdfcd, NULL));
    143 
    144   EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, NULL));
    145   EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdbea, NULL));
    146 #else
    147   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    148 #endif
    149 }
    150 
    151 TEST(uchar, mbrtoc16_null) {
    152 #if HAVE_UCHAR
    153   ASSERT_EQ(0U, mbrtoc16(NULL, NULL, 0, NULL));
    154 #else
    155   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    156 #endif
    157 }
    158 
    159 TEST(uchar, mbrtoc16_zero_len) {
    160 #if HAVE_UCHAR
    161   char16_t out;
    162 
    163   out = L'x';
    164   ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL));
    165   ASSERT_EQ(L'x', out);
    166 
    167   ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, NULL));
    168   ASSERT_EQ(0U, mbrtoc16(&out, "", 0, NULL));
    169   ASSERT_EQ(1U, mbrtoc16(&out, "hello", 1, NULL));
    170   ASSERT_EQ(L'h', out);
    171 #else
    172   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    173 #endif
    174 }
    175 
    176 TEST(uchar, mbrtoc16) {
    177 #if HAVE_UCHAR
    178   char16_t out;
    179 
    180   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    181   uselocale(LC_GLOBAL_LOCALE);
    182 
    183   // 1-byte UTF-8.
    184   ASSERT_EQ(1U, mbrtoc16(&out, "abcdef", 6, NULL));
    185   ASSERT_EQ(L'a', out);
    186   // 2-byte UTF-8.
    187   ASSERT_EQ(2U, mbrtoc16(&out, "\xc2\xa2" "cdef", 6, NULL));
    188   ASSERT_EQ(static_cast<char16_t>(0x00a2), out);
    189   // 3-byte UTF-8.
    190   ASSERT_EQ(3U, mbrtoc16(&out, "\xe2\x82\xac" "def", 6, NULL));
    191   ASSERT_EQ(static_cast<char16_t>(0x20ac), out);
    192 #else
    193   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    194 #endif
    195 }
    196 
    197 TEST(uchar, mbrtoc16_surrogate) {
    198 #if HAVE_UCHAR
    199   char16_t out;
    200 
    201   ASSERT_EQ(static_cast<size_t>(-3),
    202             mbrtoc16(&out, "\xf4\x8a\xaf\x8d", 6, NULL));
    203   ASSERT_EQ(static_cast<char16_t>(0xdbea), out);
    204   ASSERT_EQ(4U, mbrtoc16(&out, "\xf4\x8a\xaf\x8d" "ef", 6, NULL));
    205   ASSERT_EQ(static_cast<char16_t>(0xdfcd), out);
    206 #else
    207   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    208 #endif
    209 }
    210 
    211 TEST(uchar, mbrtoc16_reserved_range) {
    212 #if HAVE_UCHAR
    213   char16_t out;
    214   ASSERT_EQ(static_cast<size_t>(-1),
    215             mbrtoc16(&out, "\xf0\x80\xbf\xbf", 6, NULL));
    216 #else
    217   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    218 #endif
    219 }
    220 
    221 TEST(uchar, mbrtoc16_beyond_range) {
    222 #if HAVE_UCHAR
    223   char16_t out;
    224   ASSERT_EQ(static_cast<size_t>(-1),
    225             mbrtoc16(&out, "\xf5\x80\x80\x80", 6, NULL));
    226 #else
    227   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    228 #endif
    229 }
    230 
    231 #if HAVE_UCHAR
    232 void test_mbrtoc16_incomplete(mbstate_t* ps) {
    233   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    234   uselocale(LC_GLOBAL_LOCALE);
    235 
    236   char16_t out;
    237   // 2-byte UTF-8.
    238   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps));
    239   ASSERT_EQ(1U, mbrtoc16(&out, "\xa2" "cdef", 5, ps));
    240   ASSERT_EQ(static_cast<char16_t>(0x00a2), out);
    241   ASSERT_TRUE(mbsinit(ps));
    242   // 3-byte UTF-8.
    243   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xe2", 1, ps));
    244   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x82", 1, ps));
    245   ASSERT_EQ(1U, mbrtoc16(&out, "\xac" "def", 4, ps));
    246   ASSERT_EQ(static_cast<char16_t>(0x20ac), out);
    247   ASSERT_TRUE(mbsinit(ps));
    248   // 4-byte UTF-8.
    249   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xf4", 1, ps));
    250   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x8a\xaf", 2, ps));
    251   ASSERT_EQ(static_cast<size_t>(-3), mbrtoc16(&out, "\x8d" "ef", 3, ps));
    252   ASSERT_EQ(static_cast<char16_t>(0xdbea), out);
    253   ASSERT_EQ(1U, mbrtoc16(&out, "\x80" "ef", 3, ps));
    254   ASSERT_EQ(static_cast<char16_t>(0xdfcd), out);
    255   ASSERT_TRUE(mbsinit(ps));
    256 
    257   // Invalid 2-byte
    258   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps));
    259   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\x20" "cdef", 5, ps));
    260   ASSERT_EQ(EILSEQ, errno);
    261 }
    262 #endif
    263 
    264 TEST(uchar, mbrtoc16_incomplete) {
    265 #if HAVE_UCHAR
    266   mbstate_t ps;
    267   memset(&ps, 0, sizeof(ps));
    268 
    269   test_mbrtoc16_incomplete(&ps);
    270   test_mbrtoc16_incomplete(NULL);
    271 #else
    272   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    273 #endif
    274 }
    275 
    276 TEST(uchar, c32rtomb) {
    277 #if HAVE_UCHAR
    278   EXPECT_EQ(1U, c32rtomb(NULL, L'\0', NULL));
    279   EXPECT_EQ(1U, c32rtomb(NULL, L'h', NULL));
    280 
    281   char bytes[MB_LEN_MAX];
    282 
    283   memset(bytes, 1, sizeof(bytes));
    284   EXPECT_EQ(1U, c32rtomb(bytes, L'\0', NULL));
    285   EXPECT_EQ('\0', bytes[0]);
    286   EXPECT_EQ('\x01', bytes[1]);
    287 
    288   memset(bytes, 0, sizeof(bytes));
    289   EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL));
    290   EXPECT_EQ('h', bytes[0]);
    291 
    292   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    293   uselocale(LC_GLOBAL_LOCALE);
    294 
    295   // 1-byte UTF-8.
    296   memset(bytes, 0, sizeof(bytes));
    297   EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL));
    298   EXPECT_EQ('h', bytes[0]);
    299   // 2-byte UTF-8.
    300   memset(bytes, 0, sizeof(bytes));
    301   EXPECT_EQ(2U, c32rtomb(bytes, 0x00a2, NULL));
    302   EXPECT_EQ('\xc2', bytes[0]);
    303   EXPECT_EQ('\xa2', bytes[1]);
    304   // 3-byte UTF-8.
    305   memset(bytes, 0, sizeof(bytes));
    306   EXPECT_EQ(3U, c32rtomb(bytes, 0x20ac, NULL));
    307   EXPECT_EQ('\xe2', bytes[0]);
    308   EXPECT_EQ('\x82', bytes[1]);
    309   EXPECT_EQ('\xac', bytes[2]);
    310   // 4-byte UTF-8.
    311   memset(bytes, 0, sizeof(bytes));
    312   EXPECT_EQ(4U, c32rtomb(bytes, 0x24b62, NULL));
    313   EXPECT_EQ('\xf0', bytes[0]);
    314   EXPECT_EQ('\xa4', bytes[1]);
    315   EXPECT_EQ('\xad', bytes[2]);
    316   EXPECT_EQ('\xa2', bytes[3]);
    317   // Invalid code point.
    318   EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(bytes, 0xffffffff, NULL));
    319   EXPECT_EQ(EILSEQ, errno);
    320 #else
    321   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    322 #endif
    323 }
    324 
    325 TEST(uchar, mbrtoc32) {
    326 #if HAVE_UCHAR
    327   char32_t out[8];
    328 
    329   out[0] = L'x';
    330   ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL));
    331   ASSERT_EQ(static_cast<char32_t>(L'x'), out[0]);
    332 
    333   ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, NULL));
    334   ASSERT_EQ(0U, mbrtoc32(out, "", 0, NULL));
    335   ASSERT_EQ(1U, mbrtoc32(out, "hello", 1, NULL));
    336   ASSERT_EQ(static_cast<char32_t>(L'h'), out[0]);
    337 
    338   ASSERT_EQ(0U, mbrtoc32(NULL, "hello", 0, NULL));
    339   ASSERT_EQ(0U, mbrtoc32(NULL, "", 0, NULL));
    340   ASSERT_EQ(1U, mbrtoc32(NULL, "hello", 1, NULL));
    341 
    342   ASSERT_EQ(0U, mbrtoc32(NULL, NULL, 0, NULL));
    343 
    344   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    345   uselocale(LC_GLOBAL_LOCALE);
    346 
    347   // 1-byte UTF-8.
    348   ASSERT_EQ(1U, mbrtoc32(out, "abcdef", 6, NULL));
    349   ASSERT_EQ(static_cast<char32_t>(L'a'), out[0]);
    350   // 2-byte UTF-8.
    351   ASSERT_EQ(2U, mbrtoc32(out, "\xc2\xa2" "cdef", 6, NULL));
    352   ASSERT_EQ(static_cast<char32_t>(0x00a2), out[0]);
    353   // 3-byte UTF-8.
    354   ASSERT_EQ(3U, mbrtoc32(out, "\xe2\x82\xac" "def", 6, NULL));
    355   ASSERT_EQ(static_cast<char32_t>(0x20ac), out[0]);
    356   // 4-byte UTF-8.
    357   ASSERT_EQ(4U, mbrtoc32(out, "\xf0\xa4\xad\xa2" "ef", 6, NULL));
    358   ASSERT_EQ(static_cast<char32_t>(0x24b62), out[0]);
    359 #if defined(__BIONIC__) // glibc allows this.
    360   // Illegal 5-byte UTF-8.
    361   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, NULL));
    362   ASSERT_EQ(EILSEQ, errno);
    363 #endif
    364   // Illegal over-long sequence.
    365   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf0\x82\x82\xac" "ef", 6, NULL));
    366   ASSERT_EQ(EILSEQ, errno);
    367 #else
    368   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    369 #endif
    370 }
    371 
    372 #if HAVE_UCHAR
    373 void test_mbrtoc32_incomplete(mbstate_t* ps) {
    374   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
    375   uselocale(LC_GLOBAL_LOCALE);
    376 
    377   char32_t out;
    378   // 2-byte UTF-8.
    379   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps));
    380   ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "cdef", 5, ps));
    381   ASSERT_EQ(static_cast<char32_t>(0x00a2), out);
    382   ASSERT_TRUE(mbsinit(ps));
    383   // 3-byte UTF-8.
    384   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xe2", 1, ps));
    385   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\x82", 1, ps));
    386   ASSERT_EQ(1U, mbrtoc32(&out, "\xac" "def", 4, ps));
    387   ASSERT_EQ(static_cast<char32_t>(0x20ac), out);
    388   ASSERT_TRUE(mbsinit(ps));
    389   // 4-byte UTF-8.
    390   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xf0", 1, ps));
    391   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xa4\xad", 2, ps));
    392   ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "ef", 3, ps));
    393   ASSERT_EQ(static_cast<char32_t>(0x24b62), out);
    394   ASSERT_TRUE(mbsinit(ps));
    395 
    396   // Invalid 2-byte
    397   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps));
    398   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(&out, "\x20" "cdef", 5, ps));
    399   ASSERT_EQ(EILSEQ, errno);
    400 }
    401 #endif
    402 
    403 TEST(uchar, mbrtoc32_incomplete) {
    404 #if HAVE_UCHAR
    405   mbstate_t ps;
    406   memset(&ps, 0, sizeof(ps));
    407 
    408   test_mbrtoc32_incomplete(&ps);
    409   test_mbrtoc32_incomplete(NULL);
    410 #else
    411   GTEST_LOG_(INFO) << "uchar.h is unavailable.\n";
    412 #endif
    413 }
    414