Home | History | Annotate | Download | only in tests
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <gtest/gtest.h>
     18 #include <unicode/utf.h>
     19 #include <cstdlib>
     20 
     21 // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
     22 // Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
     23 void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
     24         size_t* offset) {
     25     size_t input_ix = 0;
     26     size_t output_ix = 0;
     27     bool seen_offset = false;
     28 
     29     while (src[input_ix] != 0) {
     30         switch (src[input_ix]) {
     31         case '\'':
     32             // single ASCII char
     33             ASSERT_LT(src[input_ix], 0x80);
     34             input_ix++;
     35             ASSERT_NE(src[input_ix], 0);
     36             ASSERT_LT(output_ix, buf_size);
     37             buf[output_ix++] = (uint16_t)src[input_ix++];
     38             ASSERT_EQ(src[input_ix], '\'');
     39             input_ix++;
     40             break;
     41         case 'u':
     42         case 'U': {
     43             // Unicode codepoint in hex syntax
     44             input_ix++;
     45             ASSERT_EQ(src[input_ix], '+');
     46             input_ix++;
     47             char* endptr = (char*)src + input_ix;
     48             unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
     49             size_t num_hex_digits = endptr - (src + input_ix);
     50             ASSERT_GE(num_hex_digits, 4u);  // also triggers on invalid number syntax, digits = 0
     51             ASSERT_LE(num_hex_digits, 6u);
     52             ASSERT_LE(codepoint, 0x10FFFFu);
     53             input_ix += num_hex_digits;
     54             if (U16_LENGTH(codepoint) == 1) {
     55                 ASSERT_LE(output_ix + 1, buf_size);
     56                 buf[output_ix++] = codepoint;
     57             } else {
     58                 // UTF-16 encoding
     59                 ASSERT_LE(output_ix + 2, buf_size);
     60                 buf[output_ix++] = U16_LEAD(codepoint);
     61                 buf[output_ix++] = U16_TRAIL(codepoint);
     62             }
     63             break;
     64         }
     65         case ' ':
     66             input_ix++;
     67             break;
     68         case '|':
     69             ASSERT_FALSE(seen_offset);
     70             ASSERT_NE(offset, nullptr);
     71             *offset = output_ix;
     72             seen_offset = true;
     73             input_ix++;
     74             break;
     75         default:
     76             FAIL();  // unexpected character
     77         }
     78     }
     79     ASSERT_NE(result_size, nullptr);
     80     *result_size = output_ix;
     81     ASSERT_TRUE(seen_offset || offset == nullptr);
     82 }
     83 
     84 TEST(UnicodeUtils, parse) {
     85     const size_t BUF_SIZE = 256;
     86     uint16_t buf[BUF_SIZE];
     87     size_t offset;
     88     size_t size;
     89     ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 | 'a'", &size, &offset);
     90     EXPECT_EQ(size, 4u);
     91     EXPECT_EQ(offset, 3u);
     92     EXPECT_EQ(buf[0], 0x000D);
     93     EXPECT_EQ(buf[1], 0xD83D);
     94     EXPECT_EQ(buf[2], 0xDC31);
     95     EXPECT_EQ(buf[3], 'a');
     96 }
     97