Home | History | Annotate | Download | only in Support
      1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "llvm/Support/ConvertUTF.h"
     11 #include "gtest/gtest.h"
     12 #include <string>
     13 #include <vector>
     14 #include <utility>
     15 
     16 using namespace llvm;
     17 
     18 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
     19   // Src is the look of disapproval.
     20   static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
     21   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
     22   std::string Result;
     23   bool Success = convertUTF16ToUTF8String(Ref, Result);
     24   EXPECT_TRUE(Success);
     25   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
     26   EXPECT_EQ(Expected, Result);
     27 }
     28 
     29 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
     30   // Src is the look of disapproval.
     31   static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
     32   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
     33   std::string Result;
     34   bool Success = convertUTF16ToUTF8String(Ref, Result);
     35   EXPECT_TRUE(Success);
     36   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
     37   EXPECT_EQ(Expected, Result);
     38 }
     39 
     40 TEST(ConvertUTFTest, OddLengthInput) {
     41   std::string Result;
     42   bool Success = convertUTF16ToUTF8String(ArrayRef<char>("xxxxx", 5), Result);
     43   EXPECT_FALSE(Success);
     44 }
     45 
     46 TEST(ConvertUTFTest, Empty) {
     47   std::string Result;
     48   bool Success = convertUTF16ToUTF8String(ArrayRef<char>(), Result);
     49   EXPECT_TRUE(Success);
     50   EXPECT_TRUE(Result.empty());
     51 }
     52 
     53 TEST(ConvertUTFTest, HasUTF16BOM) {
     54   bool HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xff\xfe", 2));
     55   EXPECT_TRUE(HasBOM);
     56   HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff", 2));
     57   EXPECT_TRUE(HasBOM);
     58   HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff ", 3));
     59   EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
     60   HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe\xff\x00asdf", 6));
     61   EXPECT_TRUE(HasBOM);
     62 
     63   HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>());
     64   EXPECT_FALSE(HasBOM);
     65   HasBOM = hasUTF16ByteOrderMark(ArrayRef<char>("\xfe", 1));
     66   EXPECT_FALSE(HasBOM);
     67 }
     68 
     69 struct ConvertUTFResultContainer {
     70   ConversionResult ErrorCode;
     71   std::vector<unsigned> UnicodeScalars;
     72 
     73   ConvertUTFResultContainer(ConversionResult ErrorCode)
     74       : ErrorCode(ErrorCode) {}
     75 
     76   ConvertUTFResultContainer
     77   withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
     78               unsigned US2 = 0x110000, unsigned US3 = 0x110000,
     79               unsigned US4 = 0x110000, unsigned US5 = 0x110000,
     80               unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
     81     ConvertUTFResultContainer Result(*this);
     82     if (US0 != 0x110000)
     83       Result.UnicodeScalars.push_back(US0);
     84     if (US1 != 0x110000)
     85       Result.UnicodeScalars.push_back(US1);
     86     if (US2 != 0x110000)
     87       Result.UnicodeScalars.push_back(US2);
     88     if (US3 != 0x110000)
     89       Result.UnicodeScalars.push_back(US3);
     90     if (US4 != 0x110000)
     91       Result.UnicodeScalars.push_back(US4);
     92     if (US5 != 0x110000)
     93       Result.UnicodeScalars.push_back(US5);
     94     if (US6 != 0x110000)
     95       Result.UnicodeScalars.push_back(US6);
     96     if (US7 != 0x110000)
     97       Result.UnicodeScalars.push_back(US7);
     98     return Result;
     99   }
    100 };
    101 
    102 std::pair<ConversionResult, std::vector<unsigned>>
    103 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
    104   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
    105 
    106   const UTF8 *SourceNext = SourceStart;
    107   std::vector<UTF32> Decoded(S.size(), 0);
    108   UTF32 *TargetStart = Decoded.data();
    109 
    110   auto ErrorCode =
    111       ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
    112                          Decoded.data() + Decoded.size(), lenientConversion);
    113 
    114   Decoded.resize(TargetStart - Decoded.data());
    115 
    116   return std::make_pair(ErrorCode, Decoded);
    117 }
    118 
    119 std::pair<ConversionResult, std::vector<unsigned>>
    120 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
    121   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
    122 
    123   const UTF8 *SourceNext = SourceStart;
    124   std::vector<UTF32> Decoded(S.size(), 0);
    125   UTF32 *TargetStart = Decoded.data();
    126 
    127   auto ErrorCode = ConvertUTF8toUTF32Partial(
    128       &SourceNext, SourceStart + S.size(), &TargetStart,
    129       Decoded.data() + Decoded.size(), lenientConversion);
    130 
    131   Decoded.resize(TargetStart - Decoded.data());
    132 
    133   return std::make_pair(ErrorCode, Decoded);
    134 }
    135 
    136 ::testing::AssertionResult
    137 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
    138                                  StringRef S, bool Partial = false) {
    139   ConversionResult ErrorCode;
    140   std::vector<unsigned> Decoded;
    141   if (!Partial)
    142     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
    143   else
    144 
    145     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
    146   if (Expected.ErrorCode != ErrorCode)
    147     return ::testing::AssertionFailure() << "Expected error code "
    148                                          << Expected.ErrorCode << ", actual "
    149                                          << ErrorCode;
    150 
    151   if (Expected.UnicodeScalars != Decoded)
    152     return ::testing::AssertionFailure()
    153            << "Expected lenient decoded result:\n"
    154            << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
    155            << "Actual result:\n" << ::testing::PrintToString(Decoded);
    156 
    157   return ::testing::AssertionSuccess();
    158 }
    159 
    160 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
    161 
    162   //
    163   // 1-byte sequences
    164   //
    165 
    166   // U+0041 LATIN CAPITAL LETTER A
    167   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    168       ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
    169 
    170   //
    171   // 2-byte sequences
    172   //
    173 
    174   // U+0283 LATIN SMALL LETTER ESH
    175   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    176       ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
    177       "\xca\x83"));
    178 
    179   // U+03BA GREEK SMALL LETTER KAPPA
    180   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
    181   // U+03C3 GREEK SMALL LETTER SIGMA
    182   // U+03BC GREEK SMALL LETTER MU
    183   // U+03B5 GREEK SMALL LETTER EPSILON
    184   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    185       ConvertUTFResultContainer(conversionOK)
    186           .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
    187       "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
    188 
    189   //
    190   // 3-byte sequences
    191   //
    192 
    193   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
    194   // U+6587 CJK UNIFIED IDEOGRAPH-6587
    195   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    196       ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
    197       "\xe4\xbe\x8b\xe6\x96\x87"));
    198 
    199   // U+D55C HANGUL SYLLABLE HAN
    200   // U+AE00 HANGUL SYLLABLE GEUL
    201   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    202       ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
    203       "\xed\x95\x9c\xea\xb8\x80"));
    204 
    205   // U+1112 HANGUL CHOSEONG HIEUH
    206   // U+1161 HANGUL JUNGSEONG A
    207   // U+11AB HANGUL JONGSEONG NIEUN
    208   // U+1100 HANGUL CHOSEONG KIYEOK
    209   // U+1173 HANGUL JUNGSEONG EU
    210   // U+11AF HANGUL JONGSEONG RIEUL
    211   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    212       ConvertUTFResultContainer(conversionOK)
    213           .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
    214       "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
    215       "\xe1\x86\xaf"));
    216 
    217   //
    218   // 4-byte sequences
    219   //
    220 
    221   // U+E0100 VARIATION SELECTOR-17
    222   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    223       ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
    224       "\xf3\xa0\x84\x80"));
    225 
    226   //
    227   // First possible sequence of a certain length
    228   //
    229 
    230   // U+0000 NULL
    231   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    232       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
    233       StringRef("\x00", 1)));
    234 
    235   // U+0080 PADDING CHARACTER
    236   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    237       ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
    238       "\xc2\x80"));
    239 
    240   // U+0800 SAMARITAN LETTER ALAF
    241   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    242       ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
    243       "\xe0\xa0\x80"));
    244 
    245   // U+10000 LINEAR B SYLLABLE B008 A
    246   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    247       ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
    248       "\xf0\x90\x80\x80"));
    249 
    250   // U+200000 (invalid)
    251   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    252       ConvertUTFResultContainer(sourceIllegal)
    253           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    254       "\xf8\x88\x80\x80\x80"));
    255 
    256   // U+4000000 (invalid)
    257   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    258       ConvertUTFResultContainer(sourceIllegal)
    259           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    260       "\xfc\x84\x80\x80\x80\x80"));
    261 
    262   //
    263   // Last possible sequence of a certain length
    264   //
    265 
    266   // U+007F DELETE
    267   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    268       ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
    269 
    270   // U+07FF (unassigned)
    271   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    272       ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
    273       "\xdf\xbf"));
    274 
    275   // U+FFFF (noncharacter)
    276   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    277       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
    278       "\xef\xbf\xbf"));
    279 
    280   // U+1FFFFF (invalid)
    281   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    282       ConvertUTFResultContainer(sourceIllegal)
    283           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    284       "\xf7\xbf\xbf\xbf"));
    285 
    286   // U+3FFFFFF (invalid)
    287   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    288       ConvertUTFResultContainer(sourceIllegal)
    289           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    290       "\xfb\xbf\xbf\xbf\xbf"));
    291 
    292   // U+7FFFFFFF (invalid)
    293   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    294       ConvertUTFResultContainer(sourceIllegal)
    295           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    296       "\xfd\xbf\xbf\xbf\xbf\xbf"));
    297 
    298   //
    299   // Other boundary conditions
    300   //
    301 
    302   // U+D7FF (unassigned)
    303   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    304       ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
    305       "\xed\x9f\xbf"));
    306 
    307   // U+E000 (private use)
    308   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    309       ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
    310       "\xee\x80\x80"));
    311 
    312   // U+FFFD REPLACEMENT CHARACTER
    313   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    314       ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
    315       "\xef\xbf\xbd"));
    316 
    317   // U+10FFFF (noncharacter)
    318   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    319       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
    320       "\xf4\x8f\xbf\xbf"));
    321 
    322   // U+110000 (invalid)
    323   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    324       ConvertUTFResultContainer(sourceIllegal)
    325           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    326       "\xf4\x90\x80\x80"));
    327 
    328   //
    329   // Unexpected continuation bytes
    330   //
    331 
    332   // A sequence of unexpected continuation bytes that don't follow a first
    333   // byte, every byte is a maximal subpart.
    334 
    335   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    336       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
    337   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    338       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
    339   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    340       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    341       "\x80\x80"));
    342   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    343       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    344       "\x80\xbf"));
    345   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    346       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    347       "\xbf\x80"));
    348   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    349       ConvertUTFResultContainer(sourceIllegal)
    350           .withScalars(0xfffd, 0xfffd, 0xfffd),
    351       "\x80\xbf\x80"));
    352   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    353       ConvertUTFResultContainer(sourceIllegal)
    354           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    355       "\x80\xbf\x80\xbf"));
    356   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    357       ConvertUTFResultContainer(sourceIllegal)
    358           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    359       "\x80\xbf\x82\xbf\xaa"));
    360   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    361       ConvertUTFResultContainer(sourceIllegal)
    362           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    363       "\xaa\xb0\xbb\xbf\xaa\xa0"));
    364   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    365       ConvertUTFResultContainer(sourceIllegal)
    366           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    367       "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
    368 
    369   // All continuation bytes (0x80--0xbf).
    370   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    371       ConvertUTFResultContainer(sourceIllegal)
    372           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    373                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    374           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    375                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    376           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    377                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    378           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    379                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    380           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    381                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    382           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    383                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    384           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    385                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    386           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    387                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    388       "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
    389       "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
    390       "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
    391       "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
    392 
    393   //
    394   // Lonely start bytes
    395   //
    396 
    397   // Start bytes of 2-byte sequences (0xc0--0xdf).
    398   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    399       ConvertUTFResultContainer(sourceIllegal)
    400           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    401                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    402           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    403                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    404           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    405                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    406           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    407                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    408       "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
    409       "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
    410 
    411   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    412       ConvertUTFResultContainer(sourceIllegal)
    413           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    414                        0xfffd, 0x0020, 0xfffd, 0x0020)
    415           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    416                        0xfffd, 0x0020, 0xfffd, 0x0020)
    417           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    418                        0xfffd, 0x0020, 0xfffd, 0x0020)
    419           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    420                        0xfffd, 0x0020, 0xfffd, 0x0020)
    421           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    422                        0xfffd, 0x0020, 0xfffd, 0x0020)
    423           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    424                        0xfffd, 0x0020, 0xfffd, 0x0020)
    425           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    426                        0xfffd, 0x0020, 0xfffd, 0x0020)
    427           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    428                        0xfffd, 0x0020, 0xfffd, 0x0020),
    429       "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
    430       "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
    431       "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
    432       "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
    433 
    434   // Start bytes of 3-byte sequences (0xe0--0xef).
    435   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    436       ConvertUTFResultContainer(sourceIllegal)
    437           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    438                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    439           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    440                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    441       "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
    442 
    443   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    444       ConvertUTFResultContainer(sourceIllegal)
    445           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    446                        0xfffd, 0x0020, 0xfffd, 0x0020)
    447           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    448                        0xfffd, 0x0020, 0xfffd, 0x0020)
    449           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    450                        0xfffd, 0x0020, 0xfffd, 0x0020)
    451           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    452                        0xfffd, 0x0020, 0xfffd, 0x0020),
    453       "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
    454       "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
    455 
    456   // Start bytes of 4-byte sequences (0xf0--0xf7).
    457   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    458       ConvertUTFResultContainer(sourceIllegal)
    459           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    460                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    461       "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
    462 
    463   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    464       ConvertUTFResultContainer(sourceIllegal)
    465           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    466                        0xfffd, 0x0020, 0xfffd, 0x0020)
    467           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    468                        0xfffd, 0x0020, 0xfffd, 0x0020),
    469       "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
    470 
    471   // Start bytes of 5-byte sequences (0xf8--0xfb).
    472   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    473       ConvertUTFResultContainer(sourceIllegal)
    474           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    475       "\xf8\xf9\xfa\xfb"));
    476 
    477   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    478       ConvertUTFResultContainer(sourceIllegal)
    479           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    480                        0xfffd, 0x0020, 0xfffd, 0x0020),
    481       "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
    482 
    483   // Start bytes of 6-byte sequences (0xfc--0xfd).
    484   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    485       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    486       "\xfc\xfd"));
    487 
    488   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    489       ConvertUTFResultContainer(sourceIllegal)
    490           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
    491       "\xfc\x20\xfd\x20"));
    492 
    493   //
    494   // Other bytes (0xc0--0xc1, 0xfe--0xff).
    495   //
    496 
    497   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    498       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
    499   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    500       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
    501   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    502       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
    503   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    504       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
    505 
    506   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    507       ConvertUTFResultContainer(sourceIllegal)
    508           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    509       "\xc0\xc1\xfe\xff"));
    510 
    511   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    512       ConvertUTFResultContainer(sourceIllegal)
    513           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    514       "\xfe\xfe\xff\xff"));
    515 
    516   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    517       ConvertUTFResultContainer(sourceIllegal)
    518           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    519       "\xfe\x80\x80\x80\x80\x80"));
    520 
    521   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    522       ConvertUTFResultContainer(sourceIllegal)
    523           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    524       "\xff\x80\x80\x80\x80\x80"));
    525 
    526   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    527       ConvertUTFResultContainer(sourceIllegal)
    528           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    529                        0xfffd, 0x0020, 0xfffd, 0x0020),
    530       "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
    531 
    532   //
    533   // Sequences with one continuation byte missing
    534   //
    535 
    536   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    537       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
    538   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    539       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
    540   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    541       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    542       "\xe0\xa0"));
    543   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    544       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    545       "\xe0\xbf"));
    546   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    547       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    548       "\xe1\x80"));
    549   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    550       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    551       "\xec\xbf"));
    552   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    553       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    554       "\xed\x80"));
    555   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    556       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    557       "\xed\x9f"));
    558   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    559       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    560       "\xee\x80"));
    561   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    562       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    563       "\xef\xbf"));
    564   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    565       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    566       "\xf0\x90\x80"));
    567   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    568       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    569       "\xf0\xbf\xbf"));
    570   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    571       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    572       "\xf1\x80\x80"));
    573   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    574       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    575       "\xf3\xbf\xbf"));
    576   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    577       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    578       "\xf4\x80\x80"));
    579   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    580       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    581       "\xf4\x8f\xbf"));
    582 
    583   // Overlong sequences with one trailing byte missing.
    584   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    585       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    586       "\xc0"));
    587   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    588       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    589       "\xc1"));
    590   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    591       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    592       "\xe0\x80"));
    593   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    594       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    595       "\xe0\x9f"));
    596   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    597       ConvertUTFResultContainer(sourceIllegal)
    598           .withScalars(0xfffd, 0xfffd, 0xfffd),
    599       "\xf0\x80\x80"));
    600   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    601       ConvertUTFResultContainer(sourceIllegal)
    602           .withScalars(0xfffd, 0xfffd, 0xfffd),
    603       "\xf0\x8f\x80"));
    604   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    605       ConvertUTFResultContainer(sourceIllegal)
    606           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    607       "\xf8\x80\x80\x80"));
    608   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    609       ConvertUTFResultContainer(sourceIllegal)
    610           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    611       "\xfc\x80\x80\x80\x80"));
    612 
    613   // Sequences that represent surrogates with one trailing byte missing.
    614   // High surrogates
    615   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    616       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    617       "\xed\xa0"));
    618   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    619       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    620       "\xed\xac"));
    621   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    622       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    623       "\xed\xaf"));
    624   // Low surrogates
    625   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    626       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    627       "\xed\xb0"));
    628   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    629       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    630       "\xed\xb4"));
    631   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    632       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    633       "\xed\xbf"));
    634 
    635   // Ill-formed 4-byte sequences.
    636   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
    637   // U+1100xx (invalid)
    638   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    639       ConvertUTFResultContainer(sourceIllegal)
    640           .withScalars(0xfffd, 0xfffd, 0xfffd),
    641       "\xf4\x90\x80"));
    642   // U+13FBxx (invalid)
    643   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    644       ConvertUTFResultContainer(sourceIllegal)
    645           .withScalars(0xfffd, 0xfffd, 0xfffd),
    646       "\xf4\xbf\xbf"));
    647   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    648       ConvertUTFResultContainer(sourceIllegal)
    649           .withScalars(0xfffd, 0xfffd, 0xfffd),
    650       "\xf5\x80\x80"));
    651   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    652       ConvertUTFResultContainer(sourceIllegal)
    653           .withScalars(0xfffd, 0xfffd, 0xfffd),
    654       "\xf6\x80\x80"));
    655   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    656       ConvertUTFResultContainer(sourceIllegal)
    657           .withScalars(0xfffd, 0xfffd, 0xfffd),
    658       "\xf7\x80\x80"));
    659   // U+1FFBxx (invalid)
    660   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    661       ConvertUTFResultContainer(sourceIllegal)
    662           .withScalars(0xfffd, 0xfffd, 0xfffd),
    663       "\xf7\xbf\xbf"));
    664 
    665   // Ill-formed 5-byte sequences.
    666   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    667   // U+2000xx (invalid)
    668   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    669       ConvertUTFResultContainer(sourceIllegal)
    670           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    671       "\xf8\x88\x80\x80"));
    672   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    673       ConvertUTFResultContainer(sourceIllegal)
    674           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    675       "\xf8\xbf\xbf\xbf"));
    676   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    677       ConvertUTFResultContainer(sourceIllegal)
    678           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    679       "\xf9\x80\x80\x80"));
    680   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    681       ConvertUTFResultContainer(sourceIllegal)
    682           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    683       "\xfa\x80\x80\x80"));
    684   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    685       ConvertUTFResultContainer(sourceIllegal)
    686           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    687       "\xfb\x80\x80\x80"));
    688   // U+3FFFFxx (invalid)
    689   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    690       ConvertUTFResultContainer(sourceIllegal)
    691           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    692       "\xfb\xbf\xbf\xbf"));
    693 
    694   // Ill-formed 6-byte sequences.
    695   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
    696   // U+40000xx (invalid)
    697   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    698       ConvertUTFResultContainer(sourceIllegal)
    699           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    700       "\xfc\x84\x80\x80\x80"));
    701   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    702       ConvertUTFResultContainer(sourceIllegal)
    703           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    704       "\xfc\xbf\xbf\xbf\xbf"));
    705   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    706       ConvertUTFResultContainer(sourceIllegal)
    707           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    708       "\xfd\x80\x80\x80\x80"));
    709   // U+7FFFFFxx (invalid)
    710   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    711       ConvertUTFResultContainer(sourceIllegal)
    712           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    713       "\xfd\xbf\xbf\xbf\xbf"));
    714 
    715   //
    716   // Sequences with two continuation bytes missing
    717   //
    718 
    719   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    720       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    721       "\xf0\x90"));
    722   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    723       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    724       "\xf0\xbf"));
    725   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    726       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    727       "\xf1\x80"));
    728   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    729       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    730       "\xf3\xbf"));
    731   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    732       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    733       "\xf4\x80"));
    734   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    735       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    736       "\xf4\x8f"));
    737 
    738   // Overlong sequences with two trailing byte missing.
    739   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    740       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
    741   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    742       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    743       "\xf0\x80"));
    744   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    745       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    746       "\xf0\x8f"));
    747   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    748       ConvertUTFResultContainer(sourceIllegal)
    749           .withScalars(0xfffd, 0xfffd, 0xfffd),
    750       "\xf8\x80\x80"));
    751   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    752       ConvertUTFResultContainer(sourceIllegal)
    753           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    754       "\xfc\x80\x80\x80"));
    755 
    756   // Sequences that represent surrogates with two trailing bytes missing.
    757   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    758       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
    759 
    760   // Ill-formed 4-byte sequences.
    761   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
    762   // U+110yxx (invalid)
    763   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    764       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    765       "\xf4\x90"));
    766   // U+13Fyxx (invalid)
    767   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    768       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    769       "\xf4\xbf"));
    770   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    771       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    772       "\xf5\x80"));
    773   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    774       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    775       "\xf6\x80"));
    776   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    777       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    778       "\xf7\x80"));
    779   // U+1FFyxx (invalid)
    780   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    781       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    782       "\xf7\xbf"));
    783 
    784   // Ill-formed 5-byte sequences.
    785   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    786   // U+200yxx (invalid)
    787   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    788       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    789       "\xf8\x88\x80"));
    790   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    791       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    792       "\xf8\xbf\xbf"));
    793   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    794       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    795       "\xf9\x80\x80"));
    796   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    797       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    798       "\xfa\x80\x80"));
    799   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    800       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    801       "\xfb\x80\x80"));
    802   // U+3FFFyxx (invalid)
    803   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    804       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    805       "\xfb\xbf\xbf"));
    806 
    807   // Ill-formed 6-byte sequences.
    808   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    809   // U+4000yxx (invalid)
    810   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    811       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    812       "\xfc\x84\x80\x80"));
    813   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    814       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    815       "\xfc\xbf\xbf\xbf"));
    816   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    817       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    818       "\xfd\x80\x80\x80"));
    819   // U+7FFFFyxx (invalid)
    820   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    821       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    822       "\xfd\xbf\xbf\xbf"));
    823 
    824   //
    825   // Sequences with three continuation bytes missing
    826   //
    827 
    828   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    829       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
    830   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    831       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
    832   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    833       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
    834   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    835       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
    836   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    837       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
    838 
    839   // Broken overlong sequences.
    840   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    841       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
    842   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    843       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    844       "\xf8\x80"));
    845   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    846       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    847       "\xfc\x80\x80"));
    848 
    849   // Ill-formed 4-byte sequences.
    850   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
    851   // U+14yyxx (invalid)
    852   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    853       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
    854   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    855       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
    856   // U+1Cyyxx (invalid)
    857   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    858       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
    859 
    860   // Ill-formed 5-byte sequences.
    861   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    862   // U+20yyxx (invalid)
    863   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    864       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    865       "\xf8\x88"));
    866   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    867       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    868       "\xf8\xbf"));
    869   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    870       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    871       "\xf9\x80"));
    872   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    873       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    874       "\xfa\x80"));
    875   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    876       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    877       "\xfb\x80"));
    878   // U+3FCyyxx (invalid)
    879   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    880       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    881       "\xfb\xbf"));
    882 
    883   // Ill-formed 6-byte sequences.
    884   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    885   // U+400yyxx (invalid)
    886   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    887       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    888       "\xfc\x84\x80"));
    889   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    890       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    891       "\xfc\xbf\xbf"));
    892   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    893       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    894       "\xfd\x80\x80"));
    895   // U+7FFCyyxx (invalid)
    896   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    897       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    898       "\xfd\xbf\xbf"));
    899 
    900   //
    901   // Sequences with four continuation bytes missing
    902   //
    903 
    904   // Ill-formed 5-byte sequences.
    905   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    906   // U+uzyyxx (invalid)
    907   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    908       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
    909   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    910       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
    911   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    912       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
    913   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    914       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
    915   // U+3zyyxx (invalid)
    916   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    917       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
    918 
    919   // Broken overlong sequences.
    920   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    921       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
    922   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    923       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    924       "\xfc\x80"));
    925 
    926   // Ill-formed 6-byte sequences.
    927   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    928   // U+uzzyyxx (invalid)
    929   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    930       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    931       "\xfc\x84"));
    932   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    933       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    934       "\xfc\xbf"));
    935   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    936       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    937       "\xfd\x80"));
    938   // U+7Fzzyyxx (invalid)
    939   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    940       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    941       "\xfd\xbf"));
    942 
    943   //
    944   // Sequences with five continuation bytes missing
    945   //
    946 
    947   // Ill-formed 6-byte sequences.
    948   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    949   // U+uzzyyxx (invalid)
    950   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    951       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
    952   // U+uuzzyyxx (invalid)
    953   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    954       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
    955 
    956   //
    957   // Consecutive sequences with trailing bytes missing
    958   //
    959 
    960   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    961       ConvertUTFResultContainer(sourceIllegal)
    962           .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
    963           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
    964           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
    965           .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
    966           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
    967           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    968       "\xc0" "\xe0\x80" "\xf0\x80\x80"
    969       "\xf8\x80\x80\x80"
    970       "\xfc\x80\x80\x80\x80"
    971       "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
    972       "\xfb\xbf\xbf\xbf"
    973       "\xfd\xbf\xbf\xbf\xbf"));
    974 
    975   //
    976   // Overlong UTF-8 sequences
    977   //
    978 
    979   // U+002F SOLIDUS
    980   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    981       ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
    982 
    983   // Overlong sequences of the above.
    984   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    985       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    986       "\xc0\xaf"));
    987   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    988       ConvertUTFResultContainer(sourceIllegal)
    989           .withScalars(0xfffd, 0xfffd, 0xfffd),
    990       "\xe0\x80\xaf"));
    991   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    992       ConvertUTFResultContainer(sourceIllegal)
    993           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    994       "\xf0\x80\x80\xaf"));
    995   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    996       ConvertUTFResultContainer(sourceIllegal)
    997           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    998       "\xf8\x80\x80\x80\xaf"));
    999   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1000       ConvertUTFResultContainer(sourceIllegal)
   1001           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1002       "\xfc\x80\x80\x80\x80\xaf"));
   1003 
   1004   // U+0000 NULL
   1005   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1006       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
   1007       StringRef("\x00", 1)));
   1008 
   1009   // Overlong sequences of the above.
   1010   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1011       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1012       "\xc0\x80"));
   1013   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1014       ConvertUTFResultContainer(sourceIllegal)
   1015           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1016       "\xe0\x80\x80"));
   1017   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1018       ConvertUTFResultContainer(sourceIllegal)
   1019           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1020       "\xf0\x80\x80\x80"));
   1021   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1022       ConvertUTFResultContainer(sourceIllegal)
   1023           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1024       "\xf8\x80\x80\x80\x80"));
   1025   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1026       ConvertUTFResultContainer(sourceIllegal)
   1027           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1028       "\xfc\x80\x80\x80\x80\x80"));
   1029 
   1030   // Other overlong sequences.
   1031   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1032       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1033       "\xc0\xbf"));
   1034   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1035       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1036       "\xc1\x80"));
   1037   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1038       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1039       "\xc1\xbf"));
   1040   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1041       ConvertUTFResultContainer(sourceIllegal)
   1042           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1043       "\xe0\x9f\xbf"));
   1044   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1045       ConvertUTFResultContainer(sourceIllegal)
   1046           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1047       "\xed\xa0\x80"));
   1048   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1049       ConvertUTFResultContainer(sourceIllegal)
   1050           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1051       "\xed\xbf\xbf"));
   1052   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1053       ConvertUTFResultContainer(sourceIllegal)
   1054           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1055       "\xf0\x8f\x80\x80"));
   1056   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1057       ConvertUTFResultContainer(sourceIllegal)
   1058           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1059       "\xf0\x8f\xbf\xbf"));
   1060   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1061       ConvertUTFResultContainer(sourceIllegal)
   1062           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1063       "\xf8\x87\xbf\xbf\xbf"));
   1064   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1065       ConvertUTFResultContainer(sourceIllegal)
   1066           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1067       "\xfc\x83\xbf\xbf\xbf\xbf"));
   1068 
   1069   //
   1070   // Isolated surrogates
   1071   //
   1072 
   1073   // Unicode 6.3.0:
   1074   //
   1075   //    D71.  High-surrogate code point: A Unicode code point in the range
   1076   //    U+D800 to U+DBFF.
   1077   //
   1078   //    D73.  Low-surrogate code point: A Unicode code point in the range
   1079   //    U+DC00 to U+DFFF.
   1080 
   1081   // Note: U+E0100 is <DB40 DD00> in UTF16.
   1082 
   1083   // High surrogates
   1084 
   1085   // U+D800
   1086   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1087       ConvertUTFResultContainer(sourceIllegal)
   1088           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1089       "\xed\xa0\x80"));
   1090 
   1091   // U+DB40
   1092   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1093       ConvertUTFResultContainer(sourceIllegal)
   1094           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1095       "\xed\xac\xa0"));
   1096 
   1097   // U+DBFF
   1098   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1099       ConvertUTFResultContainer(sourceIllegal)
   1100           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1101       "\xed\xaf\xbf"));
   1102 
   1103   // Low surrogates
   1104 
   1105   // U+DC00
   1106   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1107       ConvertUTFResultContainer(sourceIllegal)
   1108           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1109       "\xed\xb0\x80"));
   1110 
   1111   // U+DD00
   1112   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1113       ConvertUTFResultContainer(sourceIllegal)
   1114           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1115       "\xed\xb4\x80"));
   1116 
   1117   // U+DFFF
   1118   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1119       ConvertUTFResultContainer(sourceIllegal)
   1120           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1121       "\xed\xbf\xbf"));
   1122 
   1123   // Surrogate pairs
   1124 
   1125   // U+D800 U+DC00
   1126   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1127       ConvertUTFResultContainer(sourceIllegal)
   1128           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1129       "\xed\xa0\x80\xed\xb0\x80"));
   1130 
   1131   // U+D800 U+DD00
   1132   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1133       ConvertUTFResultContainer(sourceIllegal)
   1134           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1135       "\xed\xa0\x80\xed\xb4\x80"));
   1136 
   1137   // U+D800 U+DFFF
   1138   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1139       ConvertUTFResultContainer(sourceIllegal)
   1140           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1141       "\xed\xa0\x80\xed\xbf\xbf"));
   1142 
   1143   // U+DB40 U+DC00
   1144   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1145       ConvertUTFResultContainer(sourceIllegal)
   1146           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1147       "\xed\xac\xa0\xed\xb0\x80"));
   1148 
   1149   // U+DB40 U+DD00
   1150   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1151       ConvertUTFResultContainer(sourceIllegal)
   1152           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1153       "\xed\xac\xa0\xed\xb4\x80"));
   1154 
   1155   // U+DB40 U+DFFF
   1156   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1157       ConvertUTFResultContainer(sourceIllegal)
   1158           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1159       "\xed\xac\xa0\xed\xbf\xbf"));
   1160 
   1161   // U+DBFF U+DC00
   1162   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1163       ConvertUTFResultContainer(sourceIllegal)
   1164           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1165       "\xed\xaf\xbf\xed\xb0\x80"));
   1166 
   1167   // U+DBFF U+DD00
   1168   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1169       ConvertUTFResultContainer(sourceIllegal)
   1170           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1171       "\xed\xaf\xbf\xed\xb4\x80"));
   1172 
   1173   // U+DBFF U+DFFF
   1174   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1175       ConvertUTFResultContainer(sourceIllegal)
   1176           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1177       "\xed\xaf\xbf\xed\xbf\xbf"));
   1178 
   1179   //
   1180   // Noncharacters
   1181   //
   1182 
   1183   // Unicode 6.3.0:
   1184   //
   1185   //    D14.  Noncharacter: A code point that is permanently reserved for
   1186   //    internal use and that should never be interchanged. Noncharacters
   1187   //    consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
   1188   //    and the values U+FDD0..U+FDEF.
   1189 
   1190   // U+FFFE
   1191   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1192       ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
   1193       "\xef\xbf\xbe"));
   1194 
   1195   // U+FFFF
   1196   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1197       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
   1198       "\xef\xbf\xbf"));
   1199 
   1200   // U+1FFFE
   1201   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1202       ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
   1203       "\xf0\x9f\xbf\xbe"));
   1204 
   1205   // U+1FFFF
   1206   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1207       ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
   1208       "\xf0\x9f\xbf\xbf"));
   1209 
   1210   // U+2FFFE
   1211   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1212       ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
   1213       "\xf0\xaf\xbf\xbe"));
   1214 
   1215   // U+2FFFF
   1216   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1217       ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
   1218       "\xf0\xaf\xbf\xbf"));
   1219 
   1220   // U+3FFFE
   1221   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1222       ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
   1223       "\xf0\xbf\xbf\xbe"));
   1224 
   1225   // U+3FFFF
   1226   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1227       ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
   1228       "\xf0\xbf\xbf\xbf"));
   1229 
   1230   // U+4FFFE
   1231   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1232       ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
   1233       "\xf1\x8f\xbf\xbe"));
   1234 
   1235   // U+4FFFF
   1236   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1237       ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
   1238       "\xf1\x8f\xbf\xbf"));
   1239 
   1240   // U+5FFFE
   1241   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1242       ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
   1243       "\xf1\x9f\xbf\xbe"));
   1244 
   1245   // U+5FFFF
   1246   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1247       ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
   1248       "\xf1\x9f\xbf\xbf"));
   1249 
   1250   // U+6FFFE
   1251   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1252       ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
   1253       "\xf1\xaf\xbf\xbe"));
   1254 
   1255   // U+6FFFF
   1256   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1257       ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
   1258       "\xf1\xaf\xbf\xbf"));
   1259 
   1260   // U+7FFFE
   1261   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1262       ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
   1263       "\xf1\xbf\xbf\xbe"));
   1264 
   1265   // U+7FFFF
   1266   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1267       ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
   1268       "\xf1\xbf\xbf\xbf"));
   1269 
   1270   // U+8FFFE
   1271   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1272       ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
   1273       "\xf2\x8f\xbf\xbe"));
   1274 
   1275   // U+8FFFF
   1276   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1277       ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
   1278       "\xf2\x8f\xbf\xbf"));
   1279 
   1280   // U+9FFFE
   1281   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1282       ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
   1283       "\xf2\x9f\xbf\xbe"));
   1284 
   1285   // U+9FFFF
   1286   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1287       ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
   1288       "\xf2\x9f\xbf\xbf"));
   1289 
   1290   // U+AFFFE
   1291   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1292       ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
   1293       "\xf2\xaf\xbf\xbe"));
   1294 
   1295   // U+AFFFF
   1296   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1297       ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
   1298       "\xf2\xaf\xbf\xbf"));
   1299 
   1300   // U+BFFFE
   1301   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1302       ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
   1303       "\xf2\xbf\xbf\xbe"));
   1304 
   1305   // U+BFFFF
   1306   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1307       ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
   1308       "\xf2\xbf\xbf\xbf"));
   1309 
   1310   // U+CFFFE
   1311   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1312       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
   1313       "\xf3\x8f\xbf\xbe"));
   1314 
   1315   // U+CFFFF
   1316   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1317       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
   1318       "\xf3\x8f\xbf\xbf"));
   1319 
   1320   // U+DFFFE
   1321   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1322       ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
   1323       "\xf3\x9f\xbf\xbe"));
   1324 
   1325   // U+DFFFF
   1326   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1327       ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
   1328       "\xf3\x9f\xbf\xbf"));
   1329 
   1330   // U+EFFFE
   1331   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1332       ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
   1333       "\xf3\xaf\xbf\xbe"));
   1334 
   1335   // U+EFFFF
   1336   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1337       ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
   1338       "\xf3\xaf\xbf\xbf"));
   1339 
   1340   // U+FFFFE
   1341   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1342       ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
   1343       "\xf3\xbf\xbf\xbe"));
   1344 
   1345   // U+FFFFF
   1346   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1347       ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
   1348       "\xf3\xbf\xbf\xbf"));
   1349 
   1350   // U+10FFFE
   1351   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1352       ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
   1353       "\xf4\x8f\xbf\xbe"));
   1354 
   1355   // U+10FFFF
   1356   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1357       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
   1358       "\xf4\x8f\xbf\xbf"));
   1359 
   1360   // U+FDD0
   1361   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1362       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
   1363       "\xef\xb7\x90"));
   1364 
   1365   // U+FDD1
   1366   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1367       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
   1368       "\xef\xb7\x91"));
   1369 
   1370   // U+FDD2
   1371   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1372       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
   1373       "\xef\xb7\x92"));
   1374 
   1375   // U+FDD3
   1376   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1377       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
   1378       "\xef\xb7\x93"));
   1379 
   1380   // U+FDD4
   1381   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1382       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
   1383       "\xef\xb7\x94"));
   1384 
   1385   // U+FDD5
   1386   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1387       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
   1388       "\xef\xb7\x95"));
   1389 
   1390   // U+FDD6
   1391   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1392       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
   1393       "\xef\xb7\x96"));
   1394 
   1395   // U+FDD7
   1396   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1397       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
   1398       "\xef\xb7\x97"));
   1399 
   1400   // U+FDD8
   1401   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1402       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
   1403       "\xef\xb7\x98"));
   1404 
   1405   // U+FDD9
   1406   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1407       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
   1408       "\xef\xb7\x99"));
   1409 
   1410   // U+FDDA
   1411   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1412       ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
   1413       "\xef\xb7\x9a"));
   1414 
   1415   // U+FDDB
   1416   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1417       ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
   1418       "\xef\xb7\x9b"));
   1419 
   1420   // U+FDDC
   1421   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1422       ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
   1423       "\xef\xb7\x9c"));
   1424 
   1425   // U+FDDD
   1426   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1427       ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
   1428       "\xef\xb7\x9d"));
   1429 
   1430   // U+FDDE
   1431   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1432       ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
   1433       "\xef\xb7\x9e"));
   1434 
   1435   // U+FDDF
   1436   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1437       ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
   1438       "\xef\xb7\x9f"));
   1439 
   1440   // U+FDE0
   1441   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1442       ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
   1443       "\xef\xb7\xa0"));
   1444 
   1445   // U+FDE1
   1446   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1447       ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
   1448       "\xef\xb7\xa1"));
   1449 
   1450   // U+FDE2
   1451   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1452       ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
   1453       "\xef\xb7\xa2"));
   1454 
   1455   // U+FDE3
   1456   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1457       ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
   1458       "\xef\xb7\xa3"));
   1459 
   1460   // U+FDE4
   1461   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1462       ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
   1463       "\xef\xb7\xa4"));
   1464 
   1465   // U+FDE5
   1466   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1467       ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
   1468       "\xef\xb7\xa5"));
   1469 
   1470   // U+FDE6
   1471   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1472       ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
   1473       "\xef\xb7\xa6"));
   1474 
   1475   // U+FDE7
   1476   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1477       ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
   1478       "\xef\xb7\xa7"));
   1479 
   1480   // U+FDE8
   1481   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1482       ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
   1483       "\xef\xb7\xa8"));
   1484 
   1485   // U+FDE9
   1486   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1487       ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
   1488       "\xef\xb7\xa9"));
   1489 
   1490   // U+FDEA
   1491   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1492       ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
   1493       "\xef\xb7\xaa"));
   1494 
   1495   // U+FDEB
   1496   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1497       ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
   1498       "\xef\xb7\xab"));
   1499 
   1500   // U+FDEC
   1501   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1502       ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
   1503       "\xef\xb7\xac"));
   1504 
   1505   // U+FDED
   1506   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1507       ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
   1508       "\xef\xb7\xad"));
   1509 
   1510   // U+FDEE
   1511   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1512       ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
   1513       "\xef\xb7\xae"));
   1514 
   1515   // U+FDEF
   1516   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1517       ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
   1518       "\xef\xb7\xaf"));
   1519 
   1520   // U+FDF0
   1521   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1522       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
   1523       "\xef\xb7\xb0"));
   1524 
   1525   // U+FDF1
   1526   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1527       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
   1528       "\xef\xb7\xb1"));
   1529 
   1530   // U+FDF2
   1531   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1532       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
   1533       "\xef\xb7\xb2"));
   1534 
   1535   // U+FDF3
   1536   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1537       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
   1538       "\xef\xb7\xb3"));
   1539 
   1540   // U+FDF4
   1541   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1542       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
   1543       "\xef\xb7\xb4"));
   1544 
   1545   // U+FDF5
   1546   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1547       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
   1548       "\xef\xb7\xb5"));
   1549 
   1550   // U+FDF6
   1551   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1552       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
   1553       "\xef\xb7\xb6"));
   1554 
   1555   // U+FDF7
   1556   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1557       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
   1558       "\xef\xb7\xb7"));
   1559 
   1560   // U+FDF8
   1561   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1562       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
   1563       "\xef\xb7\xb8"));
   1564 
   1565   // U+FDF9
   1566   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1567       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
   1568       "\xef\xb7\xb9"));
   1569 
   1570   // U+FDFA
   1571   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1572       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
   1573       "\xef\xb7\xba"));
   1574 
   1575   // U+FDFB
   1576   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1577       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
   1578       "\xef\xb7\xbb"));
   1579 
   1580   // U+FDFC
   1581   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1582       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
   1583       "\xef\xb7\xbc"));
   1584 
   1585   // U+FDFD
   1586   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1587       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
   1588       "\xef\xb7\xbd"));
   1589 
   1590   // U+FDFE
   1591   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1592       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
   1593       "\xef\xb7\xbe"));
   1594 
   1595   // U+FDFF
   1596   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1597       ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
   1598       "\xef\xb7\xbf"));
   1599 }
   1600 
   1601 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
   1602   // U+0041 LATIN CAPITAL LETTER A
   1603   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1604       ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
   1605       "\x41", true));
   1606 
   1607   //
   1608   // Sequences with one continuation byte missing
   1609   //
   1610 
   1611   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1612       ConvertUTFResultContainer(sourceExhausted),
   1613       "\xc2", true));
   1614   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1615       ConvertUTFResultContainer(sourceExhausted),
   1616       "\xdf", true));
   1617   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1618       ConvertUTFResultContainer(sourceExhausted),
   1619       "\xe0\xa0", true));
   1620   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1621       ConvertUTFResultContainer(sourceExhausted),
   1622       "\xe0\xbf", true));
   1623   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1624       ConvertUTFResultContainer(sourceExhausted),
   1625       "\xe1\x80", true));
   1626   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1627       ConvertUTFResultContainer(sourceExhausted),
   1628       "\xec\xbf", true));
   1629   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1630       ConvertUTFResultContainer(sourceExhausted),
   1631       "\xed\x80", true));
   1632   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1633       ConvertUTFResultContainer(sourceExhausted),
   1634       "\xed\x9f", true));
   1635   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1636       ConvertUTFResultContainer(sourceExhausted),
   1637       "\xee\x80", true));
   1638   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1639       ConvertUTFResultContainer(sourceExhausted),
   1640       "\xef\xbf", true));
   1641   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1642       ConvertUTFResultContainer(sourceExhausted),
   1643       "\xf0\x90\x80", true));
   1644   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1645       ConvertUTFResultContainer(sourceExhausted),
   1646       "\xf0\xbf\xbf", true));
   1647   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1648       ConvertUTFResultContainer(sourceExhausted),
   1649       "\xf1\x80\x80", true));
   1650   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1651       ConvertUTFResultContainer(sourceExhausted),
   1652       "\xf3\xbf\xbf", true));
   1653   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1654       ConvertUTFResultContainer(sourceExhausted),
   1655       "\xf4\x80\x80", true));
   1656   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1657       ConvertUTFResultContainer(sourceExhausted),
   1658       "\xf4\x8f\xbf", true));
   1659 
   1660   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1661       ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
   1662       "\x41\xc2", true));
   1663 }
   1664 
   1665