Home | History | Annotate | Download | only in Support
      1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "llvm/Support/ConvertUTF.h"
     11 #include "llvm/Support/Format.h"
     12 #include "gtest/gtest.h"
     13 #include <string>
     14 #include <utility>
     15 #include <vector>
     16 
     17 using namespace llvm;
     18 
     19 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
     20   // Src is the look of disapproval.
     21   static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
     22   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
     23   std::string Result;
     24   bool Success = convertUTF16ToUTF8String(Ref, Result);
     25   EXPECT_TRUE(Success);
     26   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
     27   EXPECT_EQ(Expected, Result);
     28 }
     29 
     30 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
     31   // Src is the look of disapproval.
     32   static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
     33   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
     34   std::string Result;
     35   bool Success = convertUTF16ToUTF8String(Ref, Result);
     36   EXPECT_TRUE(Success);
     37   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
     38   EXPECT_EQ(Expected, Result);
     39 }
     40 
     41 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
     42   // Src is the look of disapproval.
     43   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
     44   StringRef Ref(Src, sizeof(Src) - 1);
     45   SmallVector<UTF16, 5> Result;
     46   bool Success = convertUTF8ToUTF16String(Ref, Result);
     47   EXPECT_TRUE(Success);
     48   static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
     49   ASSERT_EQ(3u, Result.size());
     50   for (int I = 0, E = 3; I != E; ++I)
     51     EXPECT_EQ(Expected[I], Result[I]);
     52 }
     53 
     54 TEST(ConvertUTFTest, OddLengthInput) {
     55   std::string Result;
     56   bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
     57   EXPECT_FALSE(Success);
     58 }
     59 
     60 TEST(ConvertUTFTest, Empty) {
     61   std::string Result;
     62   bool Success = convertUTF16ToUTF8String(None, Result);
     63   EXPECT_TRUE(Success);
     64   EXPECT_TRUE(Result.empty());
     65 }
     66 
     67 TEST(ConvertUTFTest, HasUTF16BOM) {
     68   bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
     69   EXPECT_TRUE(HasBOM);
     70   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
     71   EXPECT_TRUE(HasBOM);
     72   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
     73   EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
     74   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
     75   EXPECT_TRUE(HasBOM);
     76 
     77   HasBOM = hasUTF16ByteOrderMark(None);
     78   EXPECT_FALSE(HasBOM);
     79   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
     80   EXPECT_FALSE(HasBOM);
     81 }
     82 
     83 struct ConvertUTFResultContainer {
     84   ConversionResult ErrorCode;
     85   std::vector<unsigned> UnicodeScalars;
     86 
     87   ConvertUTFResultContainer(ConversionResult ErrorCode)
     88       : ErrorCode(ErrorCode) {}
     89 
     90   ConvertUTFResultContainer
     91   withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
     92               unsigned US2 = 0x110000, unsigned US3 = 0x110000,
     93               unsigned US4 = 0x110000, unsigned US5 = 0x110000,
     94               unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
     95     ConvertUTFResultContainer Result(*this);
     96     if (US0 != 0x110000)
     97       Result.UnicodeScalars.push_back(US0);
     98     if (US1 != 0x110000)
     99       Result.UnicodeScalars.push_back(US1);
    100     if (US2 != 0x110000)
    101       Result.UnicodeScalars.push_back(US2);
    102     if (US3 != 0x110000)
    103       Result.UnicodeScalars.push_back(US3);
    104     if (US4 != 0x110000)
    105       Result.UnicodeScalars.push_back(US4);
    106     if (US5 != 0x110000)
    107       Result.UnicodeScalars.push_back(US5);
    108     if (US6 != 0x110000)
    109       Result.UnicodeScalars.push_back(US6);
    110     if (US7 != 0x110000)
    111       Result.UnicodeScalars.push_back(US7);
    112     return Result;
    113   }
    114 };
    115 
    116 std::pair<ConversionResult, std::vector<unsigned>>
    117 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
    118   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
    119 
    120   const UTF8 *SourceNext = SourceStart;
    121   std::vector<UTF32> Decoded(S.size(), 0);
    122   UTF32 *TargetStart = Decoded.data();
    123 
    124   auto ErrorCode =
    125       ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
    126                          Decoded.data() + Decoded.size(), lenientConversion);
    127 
    128   Decoded.resize(TargetStart - Decoded.data());
    129 
    130   return std::make_pair(ErrorCode, Decoded);
    131 }
    132 
    133 std::pair<ConversionResult, std::vector<unsigned>>
    134 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
    135   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
    136 
    137   const UTF8 *SourceNext = SourceStart;
    138   std::vector<UTF32> Decoded(S.size(), 0);
    139   UTF32 *TargetStart = Decoded.data();
    140 
    141   auto ErrorCode = ConvertUTF8toUTF32Partial(
    142       &SourceNext, SourceStart + S.size(), &TargetStart,
    143       Decoded.data() + Decoded.size(), lenientConversion);
    144 
    145   Decoded.resize(TargetStart - Decoded.data());
    146 
    147   return std::make_pair(ErrorCode, Decoded);
    148 }
    149 
    150 ::testing::AssertionResult
    151 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
    152                                  StringRef S, bool Partial = false) {
    153   ConversionResult ErrorCode;
    154   std::vector<unsigned> Decoded;
    155   if (!Partial)
    156     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
    157   else
    158     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
    159 
    160   if (Expected.ErrorCode != ErrorCode)
    161     return ::testing::AssertionFailure() << "Expected error code "
    162                                          << Expected.ErrorCode << ", actual "
    163                                          << ErrorCode;
    164 
    165   if (Expected.UnicodeScalars != Decoded)
    166     return ::testing::AssertionFailure()
    167            << "Expected lenient decoded result:\n"
    168            << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
    169            << "Actual result:\n" << ::testing::PrintToString(Decoded);
    170 
    171   return ::testing::AssertionSuccess();
    172 }
    173 
    174 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
    175 
    176   //
    177   // 1-byte sequences
    178   //
    179 
    180   // U+0041 LATIN CAPITAL LETTER A
    181   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    182       ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
    183 
    184   //
    185   // 2-byte sequences
    186   //
    187 
    188   // U+0283 LATIN SMALL LETTER ESH
    189   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    190       ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
    191       "\xca\x83"));
    192 
    193   // U+03BA GREEK SMALL LETTER KAPPA
    194   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
    195   // U+03C3 GREEK SMALL LETTER SIGMA
    196   // U+03BC GREEK SMALL LETTER MU
    197   // U+03B5 GREEK SMALL LETTER EPSILON
    198   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    199       ConvertUTFResultContainer(conversionOK)
    200           .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
    201       "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
    202 
    203   //
    204   // 3-byte sequences
    205   //
    206 
    207   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
    208   // U+6587 CJK UNIFIED IDEOGRAPH-6587
    209   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    210       ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
    211       "\xe4\xbe\x8b\xe6\x96\x87"));
    212 
    213   // U+D55C HANGUL SYLLABLE HAN
    214   // U+AE00 HANGUL SYLLABLE GEUL
    215   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    216       ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
    217       "\xed\x95\x9c\xea\xb8\x80"));
    218 
    219   // U+1112 HANGUL CHOSEONG HIEUH
    220   // U+1161 HANGUL JUNGSEONG A
    221   // U+11AB HANGUL JONGSEONG NIEUN
    222   // U+1100 HANGUL CHOSEONG KIYEOK
    223   // U+1173 HANGUL JUNGSEONG EU
    224   // U+11AF HANGUL JONGSEONG RIEUL
    225   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    226       ConvertUTFResultContainer(conversionOK)
    227           .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
    228       "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
    229       "\xe1\x86\xaf"));
    230 
    231   //
    232   // 4-byte sequences
    233   //
    234 
    235   // U+E0100 VARIATION SELECTOR-17
    236   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    237       ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
    238       "\xf3\xa0\x84\x80"));
    239 
    240   //
    241   // First possible sequence of a certain length
    242   //
    243 
    244   // U+0000 NULL
    245   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    246       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
    247       StringRef("\x00", 1)));
    248 
    249   // U+0080 PADDING CHARACTER
    250   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    251       ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
    252       "\xc2\x80"));
    253 
    254   // U+0800 SAMARITAN LETTER ALAF
    255   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    256       ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
    257       "\xe0\xa0\x80"));
    258 
    259   // U+10000 LINEAR B SYLLABLE B008 A
    260   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    261       ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
    262       "\xf0\x90\x80\x80"));
    263 
    264   // U+200000 (invalid)
    265   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    266       ConvertUTFResultContainer(sourceIllegal)
    267           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    268       "\xf8\x88\x80\x80\x80"));
    269 
    270   // U+4000000 (invalid)
    271   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    272       ConvertUTFResultContainer(sourceIllegal)
    273           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    274       "\xfc\x84\x80\x80\x80\x80"));
    275 
    276   //
    277   // Last possible sequence of a certain length
    278   //
    279 
    280   // U+007F DELETE
    281   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    282       ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
    283 
    284   // U+07FF (unassigned)
    285   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    286       ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
    287       "\xdf\xbf"));
    288 
    289   // U+FFFF (noncharacter)
    290   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    291       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
    292       "\xef\xbf\xbf"));
    293 
    294   // U+1FFFFF (invalid)
    295   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    296       ConvertUTFResultContainer(sourceIllegal)
    297           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    298       "\xf7\xbf\xbf\xbf"));
    299 
    300   // U+3FFFFFF (invalid)
    301   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    302       ConvertUTFResultContainer(sourceIllegal)
    303           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    304       "\xfb\xbf\xbf\xbf\xbf"));
    305 
    306   // U+7FFFFFFF (invalid)
    307   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    308       ConvertUTFResultContainer(sourceIllegal)
    309           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    310       "\xfd\xbf\xbf\xbf\xbf\xbf"));
    311 
    312   //
    313   // Other boundary conditions
    314   //
    315 
    316   // U+D7FF (unassigned)
    317   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    318       ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
    319       "\xed\x9f\xbf"));
    320 
    321   // U+E000 (private use)
    322   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    323       ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
    324       "\xee\x80\x80"));
    325 
    326   // U+FFFD REPLACEMENT CHARACTER
    327   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    328       ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
    329       "\xef\xbf\xbd"));
    330 
    331   // U+10FFFF (noncharacter)
    332   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    333       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
    334       "\xf4\x8f\xbf\xbf"));
    335 
    336   // U+110000 (invalid)
    337   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    338       ConvertUTFResultContainer(sourceIllegal)
    339           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    340       "\xf4\x90\x80\x80"));
    341 
    342   //
    343   // Unexpected continuation bytes
    344   //
    345 
    346   // A sequence of unexpected continuation bytes that don't follow a first
    347   // byte, every byte is a maximal subpart.
    348 
    349   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    350       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
    351   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    352       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
    353   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    354       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    355       "\x80\x80"));
    356   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    357       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    358       "\x80\xbf"));
    359   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    360       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    361       "\xbf\x80"));
    362   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    363       ConvertUTFResultContainer(sourceIllegal)
    364           .withScalars(0xfffd, 0xfffd, 0xfffd),
    365       "\x80\xbf\x80"));
    366   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    367       ConvertUTFResultContainer(sourceIllegal)
    368           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    369       "\x80\xbf\x80\xbf"));
    370   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    371       ConvertUTFResultContainer(sourceIllegal)
    372           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    373       "\x80\xbf\x82\xbf\xaa"));
    374   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    375       ConvertUTFResultContainer(sourceIllegal)
    376           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    377       "\xaa\xb0\xbb\xbf\xaa\xa0"));
    378   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    379       ConvertUTFResultContainer(sourceIllegal)
    380           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    381       "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
    382 
    383   // All continuation bytes (0x80--0xbf).
    384   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    385       ConvertUTFResultContainer(sourceIllegal)
    386           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    387                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    388           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    389                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    390           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    391                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    392           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    393                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    394           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    395                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    396           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    397                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    398           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    399                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    400           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    401                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    402       "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
    403       "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
    404       "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
    405       "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
    406 
    407   //
    408   // Lonely start bytes
    409   //
    410 
    411   // Start bytes of 2-byte sequences (0xc0--0xdf).
    412   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    413       ConvertUTFResultContainer(sourceIllegal)
    414           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    415                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    416           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    417                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    418           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    419                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    420           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    421                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    422       "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
    423       "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
    424 
    425   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    426       ConvertUTFResultContainer(sourceIllegal)
    427           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    428                        0xfffd, 0x0020, 0xfffd, 0x0020)
    429           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    430                        0xfffd, 0x0020, 0xfffd, 0x0020)
    431           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    432                        0xfffd, 0x0020, 0xfffd, 0x0020)
    433           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    434                        0xfffd, 0x0020, 0xfffd, 0x0020)
    435           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    436                        0xfffd, 0x0020, 0xfffd, 0x0020)
    437           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    438                        0xfffd, 0x0020, 0xfffd, 0x0020)
    439           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    440                        0xfffd, 0x0020, 0xfffd, 0x0020)
    441           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    442                        0xfffd, 0x0020, 0xfffd, 0x0020),
    443       "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
    444       "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
    445       "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
    446       "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
    447 
    448   // Start bytes of 3-byte sequences (0xe0--0xef).
    449   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    450       ConvertUTFResultContainer(sourceIllegal)
    451           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    452                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
    453           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    454                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    455       "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
    456 
    457   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    458       ConvertUTFResultContainer(sourceIllegal)
    459           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    460                        0xfffd, 0x0020, 0xfffd, 0x0020)
    461           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    462                        0xfffd, 0x0020, 0xfffd, 0x0020)
    463           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    464                        0xfffd, 0x0020, 0xfffd, 0x0020)
    465           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    466                        0xfffd, 0x0020, 0xfffd, 0x0020),
    467       "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
    468       "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
    469 
    470   // Start bytes of 4-byte sequences (0xf0--0xf7).
    471   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    472       ConvertUTFResultContainer(sourceIllegal)
    473           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
    474                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
    475       "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
    476 
    477   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    478       ConvertUTFResultContainer(sourceIllegal)
    479           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    480                        0xfffd, 0x0020, 0xfffd, 0x0020)
    481           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    482                        0xfffd, 0x0020, 0xfffd, 0x0020),
    483       "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
    484 
    485   // Start bytes of 5-byte sequences (0xf8--0xfb).
    486   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    487       ConvertUTFResultContainer(sourceIllegal)
    488           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    489       "\xf8\xf9\xfa\xfb"));
    490 
    491   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    492       ConvertUTFResultContainer(sourceIllegal)
    493           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    494                        0xfffd, 0x0020, 0xfffd, 0x0020),
    495       "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
    496 
    497   // Start bytes of 6-byte sequences (0xfc--0xfd).
    498   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    499       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    500       "\xfc\xfd"));
    501 
    502   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    503       ConvertUTFResultContainer(sourceIllegal)
    504           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
    505       "\xfc\x20\xfd\x20"));
    506 
    507   //
    508   // Other bytes (0xc0--0xc1, 0xfe--0xff).
    509   //
    510 
    511   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    512       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
    513   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    514       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
    515   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    516       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
    517   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    518       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
    519 
    520   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    521       ConvertUTFResultContainer(sourceIllegal)
    522           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    523       "\xc0\xc1\xfe\xff"));
    524 
    525   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    526       ConvertUTFResultContainer(sourceIllegal)
    527           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    528       "\xfe\xfe\xff\xff"));
    529 
    530   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    531       ConvertUTFResultContainer(sourceIllegal)
    532           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    533       "\xfe\x80\x80\x80\x80\x80"));
    534 
    535   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    536       ConvertUTFResultContainer(sourceIllegal)
    537           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    538       "\xff\x80\x80\x80\x80\x80"));
    539 
    540   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    541       ConvertUTFResultContainer(sourceIllegal)
    542           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
    543                        0xfffd, 0x0020, 0xfffd, 0x0020),
    544       "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
    545 
    546   //
    547   // Sequences with one continuation byte missing
    548   //
    549 
    550   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    551       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
    552   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    553       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
    554   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    555       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    556       "\xe0\xa0"));
    557   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    558       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    559       "\xe0\xbf"));
    560   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    561       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    562       "\xe1\x80"));
    563   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    564       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    565       "\xec\xbf"));
    566   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    567       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    568       "\xed\x80"));
    569   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    570       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    571       "\xed\x9f"));
    572   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    573       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    574       "\xee\x80"));
    575   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    576       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    577       "\xef\xbf"));
    578   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    579       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    580       "\xf0\x90\x80"));
    581   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    582       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    583       "\xf0\xbf\xbf"));
    584   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    585       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    586       "\xf1\x80\x80"));
    587   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    588       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    589       "\xf3\xbf\xbf"));
    590   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    591       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    592       "\xf4\x80\x80"));
    593   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    594       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    595       "\xf4\x8f\xbf"));
    596 
    597   // Overlong sequences with one trailing byte missing.
    598   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    599       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    600       "\xc0"));
    601   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    602       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    603       "\xc1"));
    604   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    605       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    606       "\xe0\x80"));
    607   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    608       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    609       "\xe0\x9f"));
    610   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    611       ConvertUTFResultContainer(sourceIllegal)
    612           .withScalars(0xfffd, 0xfffd, 0xfffd),
    613       "\xf0\x80\x80"));
    614   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    615       ConvertUTFResultContainer(sourceIllegal)
    616           .withScalars(0xfffd, 0xfffd, 0xfffd),
    617       "\xf0\x8f\x80"));
    618   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    619       ConvertUTFResultContainer(sourceIllegal)
    620           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    621       "\xf8\x80\x80\x80"));
    622   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    623       ConvertUTFResultContainer(sourceIllegal)
    624           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    625       "\xfc\x80\x80\x80\x80"));
    626 
    627   // Sequences that represent surrogates with one trailing byte missing.
    628   // High surrogates
    629   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    630       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    631       "\xed\xa0"));
    632   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    633       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    634       "\xed\xac"));
    635   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    636       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    637       "\xed\xaf"));
    638   // Low surrogates
    639   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    640       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    641       "\xed\xb0"));
    642   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    643       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    644       "\xed\xb4"));
    645   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    646       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    647       "\xed\xbf"));
    648 
    649   // Ill-formed 4-byte sequences.
    650   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
    651   // U+1100xx (invalid)
    652   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    653       ConvertUTFResultContainer(sourceIllegal)
    654           .withScalars(0xfffd, 0xfffd, 0xfffd),
    655       "\xf4\x90\x80"));
    656   // U+13FBxx (invalid)
    657   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    658       ConvertUTFResultContainer(sourceIllegal)
    659           .withScalars(0xfffd, 0xfffd, 0xfffd),
    660       "\xf4\xbf\xbf"));
    661   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    662       ConvertUTFResultContainer(sourceIllegal)
    663           .withScalars(0xfffd, 0xfffd, 0xfffd),
    664       "\xf5\x80\x80"));
    665   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    666       ConvertUTFResultContainer(sourceIllegal)
    667           .withScalars(0xfffd, 0xfffd, 0xfffd),
    668       "\xf6\x80\x80"));
    669   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    670       ConvertUTFResultContainer(sourceIllegal)
    671           .withScalars(0xfffd, 0xfffd, 0xfffd),
    672       "\xf7\x80\x80"));
    673   // U+1FFBxx (invalid)
    674   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    675       ConvertUTFResultContainer(sourceIllegal)
    676           .withScalars(0xfffd, 0xfffd, 0xfffd),
    677       "\xf7\xbf\xbf"));
    678 
    679   // Ill-formed 5-byte sequences.
    680   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    681   // U+2000xx (invalid)
    682   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    683       ConvertUTFResultContainer(sourceIllegal)
    684           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    685       "\xf8\x88\x80\x80"));
    686   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    687       ConvertUTFResultContainer(sourceIllegal)
    688           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    689       "\xf8\xbf\xbf\xbf"));
    690   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    691       ConvertUTFResultContainer(sourceIllegal)
    692           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    693       "\xf9\x80\x80\x80"));
    694   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    695       ConvertUTFResultContainer(sourceIllegal)
    696           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    697       "\xfa\x80\x80\x80"));
    698   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    699       ConvertUTFResultContainer(sourceIllegal)
    700           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    701       "\xfb\x80\x80\x80"));
    702   // U+3FFFFxx (invalid)
    703   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    704       ConvertUTFResultContainer(sourceIllegal)
    705           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    706       "\xfb\xbf\xbf\xbf"));
    707 
    708   // Ill-formed 6-byte sequences.
    709   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
    710   // U+40000xx (invalid)
    711   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    712       ConvertUTFResultContainer(sourceIllegal)
    713           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    714       "\xfc\x84\x80\x80\x80"));
    715   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    716       ConvertUTFResultContainer(sourceIllegal)
    717           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    718       "\xfc\xbf\xbf\xbf\xbf"));
    719   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    720       ConvertUTFResultContainer(sourceIllegal)
    721           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    722       "\xfd\x80\x80\x80\x80"));
    723   // U+7FFFFFxx (invalid)
    724   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    725       ConvertUTFResultContainer(sourceIllegal)
    726           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    727       "\xfd\xbf\xbf\xbf\xbf"));
    728 
    729   //
    730   // Sequences with two continuation bytes missing
    731   //
    732 
    733   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    734       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    735       "\xf0\x90"));
    736   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    737       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    738       "\xf0\xbf"));
    739   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    740       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    741       "\xf1\x80"));
    742   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    743       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    744       "\xf3\xbf"));
    745   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    746       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    747       "\xf4\x80"));
    748   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    749       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
    750       "\xf4\x8f"));
    751 
    752   // Overlong sequences with two trailing byte missing.
    753   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    754       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
    755   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    756       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    757       "\xf0\x80"));
    758   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    759       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    760       "\xf0\x8f"));
    761   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    762       ConvertUTFResultContainer(sourceIllegal)
    763           .withScalars(0xfffd, 0xfffd, 0xfffd),
    764       "\xf8\x80\x80"));
    765   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    766       ConvertUTFResultContainer(sourceIllegal)
    767           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    768       "\xfc\x80\x80\x80"));
    769 
    770   // Sequences that represent surrogates with two trailing bytes missing.
    771   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    772       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
    773 
    774   // Ill-formed 4-byte sequences.
    775   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
    776   // U+110yxx (invalid)
    777   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    778       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    779       "\xf4\x90"));
    780   // U+13Fyxx (invalid)
    781   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    782       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    783       "\xf4\xbf"));
    784   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    785       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    786       "\xf5\x80"));
    787   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    788       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    789       "\xf6\x80"));
    790   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    791       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    792       "\xf7\x80"));
    793   // U+1FFyxx (invalid)
    794   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    795       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    796       "\xf7\xbf"));
    797 
    798   // Ill-formed 5-byte sequences.
    799   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    800   // U+200yxx (invalid)
    801   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    802       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    803       "\xf8\x88\x80"));
    804   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    805       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    806       "\xf8\xbf\xbf"));
    807   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    808       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    809       "\xf9\x80\x80"));
    810   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    811       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    812       "\xfa\x80\x80"));
    813   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    814       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    815       "\xfb\x80\x80"));
    816   // U+3FFFyxx (invalid)
    817   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    818       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    819       "\xfb\xbf\xbf"));
    820 
    821   // Ill-formed 6-byte sequences.
    822   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    823   // U+4000yxx (invalid)
    824   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    825       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    826       "\xfc\x84\x80\x80"));
    827   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    828       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    829       "\xfc\xbf\xbf\xbf"));
    830   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    831       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    832       "\xfd\x80\x80\x80"));
    833   // U+7FFFFyxx (invalid)
    834   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    835       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
    836       "\xfd\xbf\xbf\xbf"));
    837 
    838   //
    839   // Sequences with three continuation bytes missing
    840   //
    841 
    842   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    843       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
    844   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    845       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
    846   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    847       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
    848   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    849       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
    850   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    851       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
    852 
    853   // Broken overlong sequences.
    854   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    855       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
    856   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    857       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    858       "\xf8\x80"));
    859   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    860       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    861       "\xfc\x80\x80"));
    862 
    863   // Ill-formed 4-byte sequences.
    864   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
    865   // U+14yyxx (invalid)
    866   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    867       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
    868   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    869       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
    870   // U+1Cyyxx (invalid)
    871   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    872       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
    873 
    874   // Ill-formed 5-byte sequences.
    875   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    876   // U+20yyxx (invalid)
    877   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    878       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    879       "\xf8\x88"));
    880   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    881       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    882       "\xf8\xbf"));
    883   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    884       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    885       "\xf9\x80"));
    886   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    887       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    888       "\xfa\x80"));
    889   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    890       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    891       "\xfb\x80"));
    892   // U+3FCyyxx (invalid)
    893   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    894       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    895       "\xfb\xbf"));
    896 
    897   // Ill-formed 6-byte sequences.
    898   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    899   // U+400yyxx (invalid)
    900   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    901       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    902       "\xfc\x84\x80"));
    903   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    904       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    905       "\xfc\xbf\xbf"));
    906   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    907       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    908       "\xfd\x80\x80"));
    909   // U+7FFCyyxx (invalid)
    910   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    911       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
    912       "\xfd\xbf\xbf"));
    913 
    914   //
    915   // Sequences with four continuation bytes missing
    916   //
    917 
    918   // Ill-formed 5-byte sequences.
    919   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    920   // U+uzyyxx (invalid)
    921   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    922       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
    923   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    924       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
    925   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    926       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
    927   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    928       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
    929   // U+3zyyxx (invalid)
    930   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    931       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
    932 
    933   // Broken overlong sequences.
    934   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    935       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
    936   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    937       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    938       "\xfc\x80"));
    939 
    940   // Ill-formed 6-byte sequences.
    941   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    942   // U+uzzyyxx (invalid)
    943   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    944       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    945       "\xfc\x84"));
    946   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    947       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    948       "\xfc\xbf"));
    949   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    950       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    951       "\xfd\x80"));
    952   // U+7Fzzyyxx (invalid)
    953   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    954       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
    955       "\xfd\xbf"));
    956 
    957   //
    958   // Sequences with five continuation bytes missing
    959   //
    960 
    961   // Ill-formed 6-byte sequences.
    962   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
    963   // U+uzzyyxx (invalid)
    964   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    965       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
    966   // U+uuzzyyxx (invalid)
    967   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    968       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
    969 
    970   //
    971   // Consecutive sequences with trailing bytes missing
    972   //
    973 
    974   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    975       ConvertUTFResultContainer(sourceIllegal)
    976           .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
    977           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
    978           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
    979           .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
    980           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
    981           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
    982       "\xc0" "\xe0\x80" "\xf0\x80\x80"
    983       "\xf8\x80\x80\x80"
    984       "\xfc\x80\x80\x80\x80"
    985       "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
    986       "\xfb\xbf\xbf\xbf"
    987       "\xfd\xbf\xbf\xbf\xbf"));
    988 
    989   //
    990   // Overlong UTF-8 sequences
    991   //
    992 
    993   // U+002F SOLIDUS
    994   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    995       ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
    996 
    997   // Overlong sequences of the above.
    998   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
    999       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1000       "\xc0\xaf"));
   1001   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1002       ConvertUTFResultContainer(sourceIllegal)
   1003           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1004       "\xe0\x80\xaf"));
   1005   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1006       ConvertUTFResultContainer(sourceIllegal)
   1007           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1008       "\xf0\x80\x80\xaf"));
   1009   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1010       ConvertUTFResultContainer(sourceIllegal)
   1011           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1012       "\xf8\x80\x80\x80\xaf"));
   1013   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1014       ConvertUTFResultContainer(sourceIllegal)
   1015           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1016       "\xfc\x80\x80\x80\x80\xaf"));
   1017 
   1018   // U+0000 NULL
   1019   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1020       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
   1021       StringRef("\x00", 1)));
   1022 
   1023   // Overlong sequences of the above.
   1024   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1025       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1026       "\xc0\x80"));
   1027   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1028       ConvertUTFResultContainer(sourceIllegal)
   1029           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1030       "\xe0\x80\x80"));
   1031   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1032       ConvertUTFResultContainer(sourceIllegal)
   1033           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1034       "\xf0\x80\x80\x80"));
   1035   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1036       ConvertUTFResultContainer(sourceIllegal)
   1037           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1038       "\xf8\x80\x80\x80\x80"));
   1039   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1040       ConvertUTFResultContainer(sourceIllegal)
   1041           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1042       "\xfc\x80\x80\x80\x80\x80"));
   1043 
   1044   // Other overlong sequences.
   1045   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1046       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1047       "\xc0\xbf"));
   1048   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1049       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1050       "\xc1\x80"));
   1051   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1052       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
   1053       "\xc1\xbf"));
   1054   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1055       ConvertUTFResultContainer(sourceIllegal)
   1056           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1057       "\xe0\x9f\xbf"));
   1058   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1059       ConvertUTFResultContainer(sourceIllegal)
   1060           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1061       "\xed\xa0\x80"));
   1062   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1063       ConvertUTFResultContainer(sourceIllegal)
   1064           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1065       "\xed\xbf\xbf"));
   1066   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1067       ConvertUTFResultContainer(sourceIllegal)
   1068           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1069       "\xf0\x8f\x80\x80"));
   1070   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1071       ConvertUTFResultContainer(sourceIllegal)
   1072           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1073       "\xf0\x8f\xbf\xbf"));
   1074   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1075       ConvertUTFResultContainer(sourceIllegal)
   1076           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1077       "\xf8\x87\xbf\xbf\xbf"));
   1078   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1079       ConvertUTFResultContainer(sourceIllegal)
   1080           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1081       "\xfc\x83\xbf\xbf\xbf\xbf"));
   1082 
   1083   //
   1084   // Isolated surrogates
   1085   //
   1086 
   1087   // Unicode 6.3.0:
   1088   //
   1089   //    D71.  High-surrogate code point: A Unicode code point in the range
   1090   //    U+D800 to U+DBFF.
   1091   //
   1092   //    D73.  Low-surrogate code point: A Unicode code point in the range
   1093   //    U+DC00 to U+DFFF.
   1094 
   1095   // Note: U+E0100 is <DB40 DD00> in UTF16.
   1096 
   1097   // High surrogates
   1098 
   1099   // U+D800
   1100   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1101       ConvertUTFResultContainer(sourceIllegal)
   1102           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1103       "\xed\xa0\x80"));
   1104 
   1105   // U+DB40
   1106   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1107       ConvertUTFResultContainer(sourceIllegal)
   1108           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1109       "\xed\xac\xa0"));
   1110 
   1111   // U+DBFF
   1112   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1113       ConvertUTFResultContainer(sourceIllegal)
   1114           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1115       "\xed\xaf\xbf"));
   1116 
   1117   // Low surrogates
   1118 
   1119   // U+DC00
   1120   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1121       ConvertUTFResultContainer(sourceIllegal)
   1122           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1123       "\xed\xb0\x80"));
   1124 
   1125   // U+DD00
   1126   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1127       ConvertUTFResultContainer(sourceIllegal)
   1128           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1129       "\xed\xb4\x80"));
   1130 
   1131   // U+DFFF
   1132   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1133       ConvertUTFResultContainer(sourceIllegal)
   1134           .withScalars(0xfffd, 0xfffd, 0xfffd),
   1135       "\xed\xbf\xbf"));
   1136 
   1137   // Surrogate pairs
   1138 
   1139   // U+D800 U+DC00
   1140   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1141       ConvertUTFResultContainer(sourceIllegal)
   1142           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1143       "\xed\xa0\x80\xed\xb0\x80"));
   1144 
   1145   // U+D800 U+DD00
   1146   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1147       ConvertUTFResultContainer(sourceIllegal)
   1148           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1149       "\xed\xa0\x80\xed\xb4\x80"));
   1150 
   1151   // U+D800 U+DFFF
   1152   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1153       ConvertUTFResultContainer(sourceIllegal)
   1154           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1155       "\xed\xa0\x80\xed\xbf\xbf"));
   1156 
   1157   // U+DB40 U+DC00
   1158   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1159       ConvertUTFResultContainer(sourceIllegal)
   1160           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1161       "\xed\xac\xa0\xed\xb0\x80"));
   1162 
   1163   // U+DB40 U+DD00
   1164   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1165       ConvertUTFResultContainer(sourceIllegal)
   1166           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1167       "\xed\xac\xa0\xed\xb4\x80"));
   1168 
   1169   // U+DB40 U+DFFF
   1170   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1171       ConvertUTFResultContainer(sourceIllegal)
   1172           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1173       "\xed\xac\xa0\xed\xbf\xbf"));
   1174 
   1175   // U+DBFF U+DC00
   1176   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1177       ConvertUTFResultContainer(sourceIllegal)
   1178           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1179       "\xed\xaf\xbf\xed\xb0\x80"));
   1180 
   1181   // U+DBFF U+DD00
   1182   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1183       ConvertUTFResultContainer(sourceIllegal)
   1184           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1185       "\xed\xaf\xbf\xed\xb4\x80"));
   1186 
   1187   // U+DBFF U+DFFF
   1188   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1189       ConvertUTFResultContainer(sourceIllegal)
   1190           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
   1191       "\xed\xaf\xbf\xed\xbf\xbf"));
   1192 
   1193   //
   1194   // Noncharacters
   1195   //
   1196 
   1197   // Unicode 6.3.0:
   1198   //
   1199   //    D14.  Noncharacter: A code point that is permanently reserved for
   1200   //    internal use and that should never be interchanged. Noncharacters
   1201   //    consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
   1202   //    and the values U+FDD0..U+FDEF.
   1203 
   1204   // U+FFFE
   1205   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1206       ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
   1207       "\xef\xbf\xbe"));
   1208 
   1209   // U+FFFF
   1210   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1211       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
   1212       "\xef\xbf\xbf"));
   1213 
   1214   // U+1FFFE
   1215   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1216       ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
   1217       "\xf0\x9f\xbf\xbe"));
   1218 
   1219   // U+1FFFF
   1220   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1221       ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
   1222       "\xf0\x9f\xbf\xbf"));
   1223 
   1224   // U+2FFFE
   1225   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1226       ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
   1227       "\xf0\xaf\xbf\xbe"));
   1228 
   1229   // U+2FFFF
   1230   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1231       ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
   1232       "\xf0\xaf\xbf\xbf"));
   1233 
   1234   // U+3FFFE
   1235   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1236       ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
   1237       "\xf0\xbf\xbf\xbe"));
   1238 
   1239   // U+3FFFF
   1240   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1241       ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
   1242       "\xf0\xbf\xbf\xbf"));
   1243 
   1244   // U+4FFFE
   1245   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1246       ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
   1247       "\xf1\x8f\xbf\xbe"));
   1248 
   1249   // U+4FFFF
   1250   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1251       ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
   1252       "\xf1\x8f\xbf\xbf"));
   1253 
   1254   // U+5FFFE
   1255   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1256       ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
   1257       "\xf1\x9f\xbf\xbe"));
   1258 
   1259   // U+5FFFF
   1260   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1261       ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
   1262       "\xf1\x9f\xbf\xbf"));
   1263 
   1264   // U+6FFFE
   1265   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1266       ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
   1267       "\xf1\xaf\xbf\xbe"));
   1268 
   1269   // U+6FFFF
   1270   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1271       ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
   1272       "\xf1\xaf\xbf\xbf"));
   1273 
   1274   // U+7FFFE
   1275   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1276       ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
   1277       "\xf1\xbf\xbf\xbe"));
   1278 
   1279   // U+7FFFF
   1280   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1281       ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
   1282       "\xf1\xbf\xbf\xbf"));
   1283 
   1284   // U+8FFFE
   1285   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1286       ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
   1287       "\xf2\x8f\xbf\xbe"));
   1288 
   1289   // U+8FFFF
   1290   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1291       ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
   1292       "\xf2\x8f\xbf\xbf"));
   1293 
   1294   // U+9FFFE
   1295   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1296       ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
   1297       "\xf2\x9f\xbf\xbe"));
   1298 
   1299   // U+9FFFF
   1300   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1301       ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
   1302       "\xf2\x9f\xbf\xbf"));
   1303 
   1304   // U+AFFFE
   1305   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1306       ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
   1307       "\xf2\xaf\xbf\xbe"));
   1308 
   1309   // U+AFFFF
   1310   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1311       ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
   1312       "\xf2\xaf\xbf\xbf"));
   1313 
   1314   // U+BFFFE
   1315   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1316       ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
   1317       "\xf2\xbf\xbf\xbe"));
   1318 
   1319   // U+BFFFF
   1320   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1321       ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
   1322       "\xf2\xbf\xbf\xbf"));
   1323 
   1324   // U+CFFFE
   1325   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1326       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
   1327       "\xf3\x8f\xbf\xbe"));
   1328 
   1329   // U+CFFFF
   1330   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1331       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
   1332       "\xf3\x8f\xbf\xbf"));
   1333 
   1334   // U+DFFFE
   1335   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1336       ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
   1337       "\xf3\x9f\xbf\xbe"));
   1338 
   1339   // U+DFFFF
   1340   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1341       ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
   1342       "\xf3\x9f\xbf\xbf"));
   1343 
   1344   // U+EFFFE
   1345   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1346       ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
   1347       "\xf3\xaf\xbf\xbe"));
   1348 
   1349   // U+EFFFF
   1350   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1351       ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
   1352       "\xf3\xaf\xbf\xbf"));
   1353 
   1354   // U+FFFFE
   1355   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1356       ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
   1357       "\xf3\xbf\xbf\xbe"));
   1358 
   1359   // U+FFFFF
   1360   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1361       ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
   1362       "\xf3\xbf\xbf\xbf"));
   1363 
   1364   // U+10FFFE
   1365   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1366       ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
   1367       "\xf4\x8f\xbf\xbe"));
   1368 
   1369   // U+10FFFF
   1370   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1371       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
   1372       "\xf4\x8f\xbf\xbf"));
   1373 
   1374   // U+FDD0
   1375   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1376       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
   1377       "\xef\xb7\x90"));
   1378 
   1379   // U+FDD1
   1380   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1381       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
   1382       "\xef\xb7\x91"));
   1383 
   1384   // U+FDD2
   1385   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1386       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
   1387       "\xef\xb7\x92"));
   1388 
   1389   // U+FDD3
   1390   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1391       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
   1392       "\xef\xb7\x93"));
   1393 
   1394   // U+FDD4
   1395   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1396       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
   1397       "\xef\xb7\x94"));
   1398 
   1399   // U+FDD5
   1400   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1401       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
   1402       "\xef\xb7\x95"));
   1403 
   1404   // U+FDD6
   1405   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1406       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
   1407       "\xef\xb7\x96"));
   1408 
   1409   // U+FDD7
   1410   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1411       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
   1412       "\xef\xb7\x97"));
   1413 
   1414   // U+FDD8
   1415   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1416       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
   1417       "\xef\xb7\x98"));
   1418 
   1419   // U+FDD9
   1420   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1421       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
   1422       "\xef\xb7\x99"));
   1423 
   1424   // U+FDDA
   1425   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1426       ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
   1427       "\xef\xb7\x9a"));
   1428 
   1429   // U+FDDB
   1430   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1431       ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
   1432       "\xef\xb7\x9b"));
   1433 
   1434   // U+FDDC
   1435   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1436       ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
   1437       "\xef\xb7\x9c"));
   1438 
   1439   // U+FDDD
   1440   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1441       ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
   1442       "\xef\xb7\x9d"));
   1443 
   1444   // U+FDDE
   1445   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1446       ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
   1447       "\xef\xb7\x9e"));
   1448 
   1449   // U+FDDF
   1450   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1451       ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
   1452       "\xef\xb7\x9f"));
   1453 
   1454   // U+FDE0
   1455   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1456       ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
   1457       "\xef\xb7\xa0"));
   1458 
   1459   // U+FDE1
   1460   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1461       ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
   1462       "\xef\xb7\xa1"));
   1463 
   1464   // U+FDE2
   1465   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1466       ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
   1467       "\xef\xb7\xa2"));
   1468 
   1469   // U+FDE3
   1470   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1471       ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
   1472       "\xef\xb7\xa3"));
   1473 
   1474   // U+FDE4
   1475   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1476       ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
   1477       "\xef\xb7\xa4"));
   1478 
   1479   // U+FDE5
   1480   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1481       ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
   1482       "\xef\xb7\xa5"));
   1483 
   1484   // U+FDE6
   1485   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1486       ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
   1487       "\xef\xb7\xa6"));
   1488 
   1489   // U+FDE7
   1490   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1491       ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
   1492       "\xef\xb7\xa7"));
   1493 
   1494   // U+FDE8
   1495   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1496       ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
   1497       "\xef\xb7\xa8"));
   1498 
   1499   // U+FDE9
   1500   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1501       ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
   1502       "\xef\xb7\xa9"));
   1503 
   1504   // U+FDEA
   1505   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1506       ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
   1507       "\xef\xb7\xaa"));
   1508 
   1509   // U+FDEB
   1510   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1511       ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
   1512       "\xef\xb7\xab"));
   1513 
   1514   // U+FDEC
   1515   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1516       ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
   1517       "\xef\xb7\xac"));
   1518 
   1519   // U+FDED
   1520   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1521       ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
   1522       "\xef\xb7\xad"));
   1523 
   1524   // U+FDEE
   1525   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1526       ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
   1527       "\xef\xb7\xae"));
   1528 
   1529   // U+FDEF
   1530   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1531       ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
   1532       "\xef\xb7\xaf"));
   1533 
   1534   // U+FDF0
   1535   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1536       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
   1537       "\xef\xb7\xb0"));
   1538 
   1539   // U+FDF1
   1540   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1541       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
   1542       "\xef\xb7\xb1"));
   1543 
   1544   // U+FDF2
   1545   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1546       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
   1547       "\xef\xb7\xb2"));
   1548 
   1549   // U+FDF3
   1550   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1551       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
   1552       "\xef\xb7\xb3"));
   1553 
   1554   // U+FDF4
   1555   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1556       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
   1557       "\xef\xb7\xb4"));
   1558 
   1559   // U+FDF5
   1560   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1561       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
   1562       "\xef\xb7\xb5"));
   1563 
   1564   // U+FDF6
   1565   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1566       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
   1567       "\xef\xb7\xb6"));
   1568 
   1569   // U+FDF7
   1570   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1571       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
   1572       "\xef\xb7\xb7"));
   1573 
   1574   // U+FDF8
   1575   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1576       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
   1577       "\xef\xb7\xb8"));
   1578 
   1579   // U+FDF9
   1580   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1581       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
   1582       "\xef\xb7\xb9"));
   1583 
   1584   // U+FDFA
   1585   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1586       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
   1587       "\xef\xb7\xba"));
   1588 
   1589   // U+FDFB
   1590   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1591       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
   1592       "\xef\xb7\xbb"));
   1593 
   1594   // U+FDFC
   1595   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1596       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
   1597       "\xef\xb7\xbc"));
   1598 
   1599   // U+FDFD
   1600   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1601       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
   1602       "\xef\xb7\xbd"));
   1603 
   1604   // U+FDFE
   1605   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1606       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
   1607       "\xef\xb7\xbe"));
   1608 
   1609   // U+FDFF
   1610   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1611       ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
   1612       "\xef\xb7\xbf"));
   1613 }
   1614 
   1615 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
   1616   // U+0041 LATIN CAPITAL LETTER A
   1617   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1618       ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
   1619       "\x41", true));
   1620 
   1621   //
   1622   // Sequences with one continuation byte missing
   1623   //
   1624 
   1625   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1626       ConvertUTFResultContainer(sourceExhausted),
   1627       "\xc2", true));
   1628   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1629       ConvertUTFResultContainer(sourceExhausted),
   1630       "\xdf", true));
   1631   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1632       ConvertUTFResultContainer(sourceExhausted),
   1633       "\xe0\xa0", true));
   1634   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1635       ConvertUTFResultContainer(sourceExhausted),
   1636       "\xe0\xbf", true));
   1637   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1638       ConvertUTFResultContainer(sourceExhausted),
   1639       "\xe1\x80", true));
   1640   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1641       ConvertUTFResultContainer(sourceExhausted),
   1642       "\xec\xbf", true));
   1643   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1644       ConvertUTFResultContainer(sourceExhausted),
   1645       "\xed\x80", true));
   1646   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1647       ConvertUTFResultContainer(sourceExhausted),
   1648       "\xed\x9f", true));
   1649   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1650       ConvertUTFResultContainer(sourceExhausted),
   1651       "\xee\x80", true));
   1652   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1653       ConvertUTFResultContainer(sourceExhausted),
   1654       "\xef\xbf", true));
   1655   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1656       ConvertUTFResultContainer(sourceExhausted),
   1657       "\xf0\x90\x80", true));
   1658   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1659       ConvertUTFResultContainer(sourceExhausted),
   1660       "\xf0\xbf\xbf", true));
   1661   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1662       ConvertUTFResultContainer(sourceExhausted),
   1663       "\xf1\x80\x80", true));
   1664   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1665       ConvertUTFResultContainer(sourceExhausted),
   1666       "\xf3\xbf\xbf", true));
   1667   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1668       ConvertUTFResultContainer(sourceExhausted),
   1669       "\xf4\x80\x80", true));
   1670   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1671       ConvertUTFResultContainer(sourceExhausted),
   1672       "\xf4\x8f\xbf", true));
   1673 
   1674   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
   1675       ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
   1676       "\x41\xc2", true));
   1677 }
   1678 
   1679