1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "Unicode_test" 18 #include <utils/Log.h> 19 #include <utils/Unicode.h> 20 21 #include <gtest/gtest.h> 22 23 namespace android { 24 25 class UnicodeTest : public testing::Test { 26 protected: 27 virtual void SetUp() { 28 } 29 30 virtual void TearDown() { 31 } 32 33 char16_t const * const kSearchString = u"I am a leaf on the wind."; 34 }; 35 36 TEST_F(UnicodeTest, UTF8toUTF16ZeroLength) { 37 ssize_t measured; 38 39 const uint8_t str[] = { }; 40 41 measured = utf8_to_utf16_length(str, 0); 42 EXPECT_EQ(0, measured) 43 << "Zero length input should return zero length output."; 44 } 45 46 TEST_F(UnicodeTest, UTF8toUTF16ASCIILength) { 47 ssize_t measured; 48 49 // U+0030 or ASCII '0' 50 const uint8_t str[] = { 0x30 }; 51 52 measured = utf8_to_utf16_length(str, sizeof(str)); 53 EXPECT_EQ(1, measured) 54 << "ASCII glyphs should have a length of 1 char16_t"; 55 } 56 57 TEST_F(UnicodeTest, UTF8toUTF16Plane1Length) { 58 ssize_t measured; 59 60 // U+2323 SMILE 61 const uint8_t str[] = { 0xE2, 0x8C, 0xA3 }; 62 63 measured = utf8_to_utf16_length(str, sizeof(str)); 64 EXPECT_EQ(1, measured) 65 << "Plane 1 glyphs should have a length of 1 char16_t"; 66 } 67 68 TEST_F(UnicodeTest, UTF8toUTF16SurrogateLength) { 69 ssize_t measured; 70 71 // U+10000 72 const uint8_t str[] = { 0xF0, 0x90, 0x80, 0x80 }; 73 74 measured = utf8_to_utf16_length(str, sizeof(str)); 75 EXPECT_EQ(2, measured) 76 << "Surrogate pairs should have a length of 2 char16_t"; 77 } 78 79 TEST_F(UnicodeTest, UTF8toUTF16TruncatedUTF8) { 80 ssize_t measured; 81 82 // Truncated U+2323 SMILE 83 // U+2323 SMILE 84 const uint8_t str[] = { 0xE2, 0x8C }; 85 86 measured = utf8_to_utf16_length(str, sizeof(str)); 87 EXPECT_EQ(-1, measured) 88 << "Truncated UTF-8 should return -1 to indicate invalid"; 89 } 90 91 TEST_F(UnicodeTest, UTF8toUTF16Normal) { 92 const uint8_t str[] = { 93 0x30, // U+0030, 1 UTF-16 character 94 0xC4, 0x80, // U+0100, 1 UTF-16 character 95 0xE2, 0x8C, 0xA3, // U+2323, 1 UTF-16 character 96 0xF0, 0x90, 0x80, 0x80, // U+10000, 2 UTF-16 character 97 }; 98 99 char16_t output[1 + 1 + 1 + 2 + 1]; // Room for NULL 100 101 utf8_to_utf16(str, sizeof(str), output, sizeof(output) / sizeof(output[0])); 102 103 EXPECT_EQ(0x0030, output[0]) 104 << "should be U+0030"; 105 EXPECT_EQ(0x0100, output[1]) 106 << "should be U+0100"; 107 EXPECT_EQ(0x2323, output[2]) 108 << "should be U+2323"; 109 EXPECT_EQ(0xD800, output[3]) 110 << "should be first half of surrogate U+10000"; 111 EXPECT_EQ(0xDC00, output[4]) 112 << "should be second half of surrogate U+10000"; 113 EXPECT_EQ(NULL, output[5]) 114 << "should be NULL terminated"; 115 } 116 117 TEST_F(UnicodeTest, strstr16EmptyTarget) { 118 EXPECT_EQ(strstr16(kSearchString, u""), kSearchString) 119 << "should return the original pointer"; 120 } 121 122 TEST_F(UnicodeTest, strstr16SameString) { 123 const char16_t* result = strstr16(kSearchString, kSearchString); 124 EXPECT_EQ(kSearchString, result) 125 << "should return the original pointer"; 126 } 127 128 TEST_F(UnicodeTest, strstr16TargetStartOfString) { 129 const char16_t* result = strstr16(kSearchString, u"I am"); 130 EXPECT_EQ(kSearchString, result) 131 << "should return the original pointer"; 132 } 133 134 135 TEST_F(UnicodeTest, strstr16TargetEndOfString) { 136 const char16_t* result = strstr16(kSearchString, u"wind."); 137 EXPECT_EQ(kSearchString+19, result); 138 } 139 140 TEST_F(UnicodeTest, strstr16TargetWithinString) { 141 const char16_t* result = strstr16(kSearchString, u"leaf"); 142 EXPECT_EQ(kSearchString+7, result); 143 } 144 145 TEST_F(UnicodeTest, strstr16TargetNotPresent) { 146 const char16_t* result = strstr16(kSearchString, u"soar"); 147 EXPECT_EQ(nullptr, result); 148 } 149 150 // http://b/29267949 151 // Test that overreading in utf8_to_utf16_length is detected 152 TEST_F(UnicodeTest, InvalidUtf8OverreadDetected) { 153 // An utf8 char starting with \xc4 is two bytes long. 154 // Add extra zeros so no extra memory is read in case the code doesn't 155 // work as expected. 156 static char utf8[] = "\xc4\x00\x00\x00"; 157 ASSERT_DEATH(utf8_to_utf16_length((uint8_t *) utf8, strlen(utf8), 158 true /* overreadIsFatal */), "" /* regex for ASSERT_DEATH */); 159 } 160 161 } 162