Home | History | Annotate | Download | only in AST
      1 //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "clang/AST/CommentLexer.h"
     11 #include "clang/AST/CommentCommandTraits.h"
     12 #include "clang/Basic/CommentOptions.h"
     13 #include "clang/Basic/Diagnostic.h"
     14 #include "clang/Basic/DiagnosticOptions.h"
     15 #include "clang/Basic/FileManager.h"
     16 #include "clang/Basic/SourceManager.h"
     17 #include "llvm/ADT/STLExtras.h"
     18 #include "gtest/gtest.h"
     19 #include <vector>
     20 
     21 using namespace llvm;
     22 using namespace clang;
     23 
     24 namespace clang {
     25 namespace comments {
     26 
     27 namespace {
     28 class CommentLexerTest : public ::testing::Test {
     29 protected:
     30   CommentLexerTest()
     31     : FileMgr(FileMgrOpts),
     32       DiagID(new DiagnosticIDs()),
     33       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
     34       SourceMgr(Diags, FileMgr),
     35       Traits(Allocator, CommentOptions()) {
     36   }
     37 
     38   FileSystemOptions FileMgrOpts;
     39   FileManager FileMgr;
     40   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
     41   DiagnosticsEngine Diags;
     42   SourceManager SourceMgr;
     43   llvm::BumpPtrAllocator Allocator;
     44   CommandTraits Traits;
     45 
     46   void lexString(const char *Source, std::vector<Token> &Toks);
     47 
     48   StringRef getCommandName(const Token &Tok) {
     49     return Traits.getCommandInfo(Tok.getCommandID())->Name;
     50   }
     51 
     52   StringRef getVerbatimBlockName(const Token &Tok) {
     53     return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
     54   }
     55 
     56   StringRef getVerbatimLineName(const Token &Tok) {
     57     return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
     58   }
     59 };
     60 
     61 void CommentLexerTest::lexString(const char *Source,
     62                                  std::vector<Token> &Toks) {
     63   std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Source);
     64   FileID File = SourceMgr.createFileID(std::move(Buf));
     65   SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
     66 
     67   Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source));
     68 
     69   while (1) {
     70     Token Tok;
     71     L.lex(Tok);
     72     if (Tok.is(tok::eof))
     73       break;
     74     Toks.push_back(Tok);
     75   }
     76 }
     77 
     78 } // unnamed namespace
     79 
     80 // Empty source range should be handled.
     81 TEST_F(CommentLexerTest, Basic1) {
     82   const char *Source = "";
     83   std::vector<Token> Toks;
     84 
     85   lexString(Source, Toks);
     86 
     87   ASSERT_EQ(0U, Toks.size());
     88 }
     89 
     90 // Empty comments should be handled.
     91 TEST_F(CommentLexerTest, Basic2) {
     92   const char *Sources[] = {
     93     "//", "///", "//!", "///<", "//!<"
     94   };
     95   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     96     std::vector<Token> Toks;
     97 
     98     lexString(Sources[i], Toks);
     99 
    100     ASSERT_EQ(1U, Toks.size());
    101 
    102     ASSERT_EQ(tok::newline, Toks[0].getKind());
    103   }
    104 }
    105 
    106 // Empty comments should be handled.
    107 TEST_F(CommentLexerTest, Basic3) {
    108   const char *Sources[] = {
    109     "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
    110   };
    111   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    112     std::vector<Token> Toks;
    113 
    114     lexString(Sources[i], Toks);
    115 
    116     ASSERT_EQ(2U, Toks.size());
    117 
    118     ASSERT_EQ(tok::newline, Toks[0].getKind());
    119     ASSERT_EQ(tok::newline, Toks[1].getKind());
    120   }
    121 }
    122 
    123 // Single comment with plain text.
    124 TEST_F(CommentLexerTest, Basic4) {
    125   const char *Sources[] = {
    126     "// Meow",   "/// Meow",    "//! Meow",
    127     "// Meow\n", "// Meow\r\n", "//! Meow\r",
    128   };
    129 
    130   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    131     std::vector<Token> Toks;
    132 
    133     lexString(Sources[i], Toks);
    134 
    135     ASSERT_EQ(2U, Toks.size());
    136 
    137     ASSERT_EQ(tok::text,          Toks[0].getKind());
    138     ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
    139 
    140     ASSERT_EQ(tok::newline,       Toks[1].getKind());
    141   }
    142 }
    143 
    144 // Single comment with plain text.
    145 TEST_F(CommentLexerTest, Basic5) {
    146   const char *Sources[] = {
    147     "/* Meow*/", "/** Meow*/",  "/*! Meow*/"
    148   };
    149 
    150   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    151     std::vector<Token> Toks;
    152 
    153     lexString(Sources[i], Toks);
    154 
    155     ASSERT_EQ(3U, Toks.size());
    156 
    157     ASSERT_EQ(tok::text,          Toks[0].getKind());
    158     ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
    159 
    160     ASSERT_EQ(tok::newline,       Toks[1].getKind());
    161     ASSERT_EQ(tok::newline,       Toks[2].getKind());
    162   }
    163 }
    164 
    165 // Test newline escaping.
    166 TEST_F(CommentLexerTest, Basic6) {
    167   const char *Sources[] = {
    168     "// Aaa\\\n"   " Bbb\\ \n"   " Ccc?" "?/\n",
    169     "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
    170     "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
    171   };
    172 
    173   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    174     std::vector<Token> Toks;
    175 
    176     lexString(Sources[i], Toks);
    177 
    178     ASSERT_EQ(10U, Toks.size());
    179 
    180     ASSERT_EQ(tok::text,         Toks[0].getKind());
    181     ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
    182     ASSERT_EQ(tok::text,         Toks[1].getKind());
    183     ASSERT_EQ(StringRef("\\"),   Toks[1].getText());
    184     ASSERT_EQ(tok::newline,      Toks[2].getKind());
    185 
    186     ASSERT_EQ(tok::text,         Toks[3].getKind());
    187     ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
    188     ASSERT_EQ(tok::text,         Toks[4].getKind());
    189     ASSERT_EQ(StringRef("\\"),   Toks[4].getText());
    190     ASSERT_EQ(tok::text,         Toks[5].getKind());
    191     ASSERT_EQ(StringRef(" "),    Toks[5].getText());
    192     ASSERT_EQ(tok::newline,      Toks[6].getKind());
    193 
    194     ASSERT_EQ(tok::text,         Toks[7].getKind());
    195     ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
    196     ASSERT_EQ(tok::newline,      Toks[8].getKind());
    197 
    198     ASSERT_EQ(tok::newline,      Toks[9].getKind());
    199   }
    200 }
    201 
    202 // Check that we skip C-style aligned stars correctly.
    203 TEST_F(CommentLexerTest, Basic7) {
    204   const char *Source =
    205     "/* Aaa\n"
    206     " * Bbb\r\n"
    207     "\t* Ccc\n"
    208     "  ! Ddd\n"
    209     "  * Eee\n"
    210     "  ** Fff\n"
    211     " */";
    212   std::vector<Token> Toks;
    213 
    214   lexString(Source, Toks);
    215 
    216   ASSERT_EQ(15U, Toks.size());
    217 
    218   ASSERT_EQ(tok::text,         Toks[0].getKind());
    219   ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
    220   ASSERT_EQ(tok::newline,      Toks[1].getKind());
    221 
    222   ASSERT_EQ(tok::text,         Toks[2].getKind());
    223   ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
    224   ASSERT_EQ(tok::newline,      Toks[3].getKind());
    225 
    226   ASSERT_EQ(tok::text,         Toks[4].getKind());
    227   ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
    228   ASSERT_EQ(tok::newline,      Toks[5].getKind());
    229 
    230   ASSERT_EQ(tok::text,            Toks[6].getKind());
    231   ASSERT_EQ(StringRef("  ! Ddd"), Toks[6].getText());
    232   ASSERT_EQ(tok::newline,         Toks[7].getKind());
    233 
    234   ASSERT_EQ(tok::text,         Toks[8].getKind());
    235   ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
    236   ASSERT_EQ(tok::newline,      Toks[9].getKind());
    237 
    238   ASSERT_EQ(tok::text,          Toks[10].getKind());
    239   ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
    240   ASSERT_EQ(tok::newline,       Toks[11].getKind());
    241 
    242   ASSERT_EQ(tok::text,         Toks[12].getKind());
    243   ASSERT_EQ(StringRef(" "),    Toks[12].getText());
    244 
    245   ASSERT_EQ(tok::newline,      Toks[13].getKind());
    246   ASSERT_EQ(tok::newline,      Toks[14].getKind());
    247 }
    248 
    249 // A command marker followed by comment end.
    250 TEST_F(CommentLexerTest, DoxygenCommand1) {
    251   const char *Sources[] = { "//@", "///@", "//!@" };
    252   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    253     std::vector<Token> Toks;
    254 
    255     lexString(Sources[i], Toks);
    256 
    257     ASSERT_EQ(2U, Toks.size());
    258 
    259     ASSERT_EQ(tok::text,          Toks[0].getKind());
    260     ASSERT_EQ(StringRef("@"),     Toks[0].getText());
    261 
    262     ASSERT_EQ(tok::newline,       Toks[1].getKind());
    263   }
    264 }
    265 
    266 // A command marker followed by comment end.
    267 TEST_F(CommentLexerTest, DoxygenCommand2) {
    268   const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
    269   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    270     std::vector<Token> Toks;
    271 
    272     lexString(Sources[i], Toks);
    273 
    274     ASSERT_EQ(3U, Toks.size());
    275 
    276     ASSERT_EQ(tok::text,          Toks[0].getKind());
    277     ASSERT_EQ(StringRef("@"),     Toks[0].getText());
    278 
    279     ASSERT_EQ(tok::newline,       Toks[1].getKind());
    280     ASSERT_EQ(tok::newline,       Toks[2].getKind());
    281   }
    282 }
    283 
    284 // A command marker followed by comment end.
    285 TEST_F(CommentLexerTest, DoxygenCommand3) {
    286   const char *Sources[] = { "/*\\*/", "/**\\*/" };
    287   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    288     std::vector<Token> Toks;
    289 
    290     lexString(Sources[i], Toks);
    291 
    292     ASSERT_EQ(3U, Toks.size());
    293 
    294     ASSERT_EQ(tok::text,           Toks[0].getKind());
    295     ASSERT_EQ(StringRef("\\"),     Toks[0].getText());
    296 
    297     ASSERT_EQ(tok::newline,        Toks[1].getKind());
    298     ASSERT_EQ(tok::newline,        Toks[2].getKind());
    299   }
    300 }
    301 
    302 // Doxygen escape sequences.
    303 TEST_F(CommentLexerTest, DoxygenCommand4) {
    304   const char *Sources[] = {
    305     "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
    306     "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
    307   };
    308   const char *Text[] = {
    309     " ",
    310     "\\", " ", "@", " ", "&", " ", "$",  " ", "#", " ",
    311     "<",  " ", ">", " ", "%", " ", "\"", " ", ".", " ",
    312     "::", ""
    313   };
    314 
    315   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    316     std::vector<Token> Toks;
    317 
    318     lexString(Sources[i], Toks);
    319 
    320     ASSERT_EQ(array_lengthof(Text), Toks.size());
    321 
    322     for (size_t j = 0, e = Toks.size(); j != e; j++) {
    323       if(Toks[j].is(tok::text))
    324         ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
    325           << "index " << i;
    326     }
    327   }
    328 }
    329 
    330 // A command marker followed by a non-letter that is not a part of an escape
    331 // sequence.
    332 TEST_F(CommentLexerTest, DoxygenCommand5) {
    333   const char *Source = "/// \\^ \\0";
    334   std::vector<Token> Toks;
    335 
    336   lexString(Source, Toks);
    337 
    338   ASSERT_EQ(6U, Toks.size());
    339 
    340   ASSERT_EQ(tok::text,       Toks[0].getKind());
    341   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
    342 
    343   ASSERT_EQ(tok::text,       Toks[1].getKind());
    344   ASSERT_EQ(StringRef("\\"), Toks[1].getText());
    345 
    346   ASSERT_EQ(tok::text,       Toks[2].getKind());
    347   ASSERT_EQ(StringRef("^ "), Toks[2].getText());
    348 
    349   ASSERT_EQ(tok::text,       Toks[3].getKind());
    350   ASSERT_EQ(StringRef("\\"), Toks[3].getText());
    351 
    352   ASSERT_EQ(tok::text,       Toks[4].getKind());
    353   ASSERT_EQ(StringRef("0"),  Toks[4].getText());
    354 
    355   ASSERT_EQ(tok::newline,    Toks[5].getKind());
    356 }
    357 
    358 TEST_F(CommentLexerTest, DoxygenCommand6) {
    359   const char *Source = "/// \\brief Aaa.";
    360   std::vector<Token> Toks;
    361 
    362   lexString(Source, Toks);
    363 
    364   ASSERT_EQ(4U, Toks.size());
    365 
    366   ASSERT_EQ(tok::text,          Toks[0].getKind());
    367   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
    368 
    369   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
    370   ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
    371 
    372   ASSERT_EQ(tok::text,          Toks[2].getKind());
    373   ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
    374 
    375   ASSERT_EQ(tok::newline,       Toks[3].getKind());
    376 }
    377 
    378 TEST_F(CommentLexerTest, DoxygenCommand7) {
    379   const char *Source = "/// \\em\\em \\em\t\\em\n";
    380   std::vector<Token> Toks;
    381 
    382   lexString(Source, Toks);
    383 
    384   ASSERT_EQ(8U, Toks.size());
    385 
    386   ASSERT_EQ(tok::text,       Toks[0].getKind());
    387   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
    388 
    389   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
    390   ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
    391 
    392   ASSERT_EQ(tok::backslash_command, Toks[2].getKind());
    393   ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
    394 
    395   ASSERT_EQ(tok::text,       Toks[3].getKind());
    396   ASSERT_EQ(StringRef(" "),  Toks[3].getText());
    397 
    398   ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
    399   ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
    400 
    401   ASSERT_EQ(tok::text,       Toks[5].getKind());
    402   ASSERT_EQ(StringRef("\t"), Toks[5].getText());
    403 
    404   ASSERT_EQ(tok::backslash_command, Toks[6].getKind());
    405   ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
    406 
    407   ASSERT_EQ(tok::newline,    Toks[7].getKind());
    408 }
    409 
    410 TEST_F(CommentLexerTest, DoxygenCommand8) {
    411   const char *Source = "/// @em@em @em\t@em\n";
    412   std::vector<Token> Toks;
    413 
    414   lexString(Source, Toks);
    415 
    416   ASSERT_EQ(8U, Toks.size());
    417 
    418   ASSERT_EQ(tok::text,       Toks[0].getKind());
    419   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
    420 
    421   ASSERT_EQ(tok::at_command, Toks[1].getKind());
    422   ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
    423 
    424   ASSERT_EQ(tok::at_command, Toks[2].getKind());
    425   ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
    426 
    427   ASSERT_EQ(tok::text,       Toks[3].getKind());
    428   ASSERT_EQ(StringRef(" "),  Toks[3].getText());
    429 
    430   ASSERT_EQ(tok::at_command, Toks[4].getKind());
    431   ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
    432 
    433   ASSERT_EQ(tok::text,       Toks[5].getKind());
    434   ASSERT_EQ(StringRef("\t"), Toks[5].getText());
    435 
    436   ASSERT_EQ(tok::at_command, Toks[6].getKind());
    437   ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
    438 
    439   ASSERT_EQ(tok::newline,    Toks[7].getKind());
    440 }
    441 
    442 TEST_F(CommentLexerTest, DoxygenCommand9) {
    443   const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
    444   std::vector<Token> Toks;
    445 
    446   lexString(Source, Toks);
    447 
    448   ASSERT_EQ(8U, Toks.size());
    449 
    450   ASSERT_EQ(tok::text,        Toks[0].getKind());
    451   ASSERT_EQ(StringRef(" "),   Toks[0].getText());
    452 
    453   ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
    454   ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
    455 
    456   ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
    457   ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
    458 
    459   ASSERT_EQ(tok::text,        Toks[3].getKind());
    460   ASSERT_EQ(StringRef(" "),   Toks[3].getText());
    461 
    462   ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
    463   ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
    464 
    465   ASSERT_EQ(tok::text,        Toks[5].getKind());
    466   ASSERT_EQ(StringRef("\t"),  Toks[5].getText());
    467 
    468   ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
    469   ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
    470 
    471   ASSERT_EQ(tok::newline,     Toks[7].getKind());
    472 }
    473 
    474 TEST_F(CommentLexerTest, DoxygenCommand10) {
    475   const char *Source = "// \\c\n";
    476   std::vector<Token> Toks;
    477 
    478   lexString(Source, Toks);
    479 
    480   ASSERT_EQ(3U, Toks.size());
    481 
    482   ASSERT_EQ(tok::text,      Toks[0].getKind());
    483   ASSERT_EQ(StringRef(" "), Toks[0].getText());
    484 
    485   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
    486   ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
    487 
    488   ASSERT_EQ(tok::newline,   Toks[2].getKind());
    489 }
    490 
    491 TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
    492   const char *Source =
    493     "/// \\NewBlockCommand Aaa.\n"
    494     "/// @NewBlockCommand Aaa.\n";
    495 
    496   Traits.registerBlockCommand(StringRef("NewBlockCommand"));
    497 
    498   std::vector<Token> Toks;
    499 
    500   lexString(Source, Toks);
    501 
    502   ASSERT_EQ(8U, Toks.size());
    503 
    504   ASSERT_EQ(tok::text,          Toks[0].getKind());
    505   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
    506 
    507   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
    508   ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
    509 
    510   ASSERT_EQ(tok::text,          Toks[2].getKind());
    511   ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
    512 
    513   ASSERT_EQ(tok::newline,       Toks[3].getKind());
    514 
    515   ASSERT_EQ(tok::text,          Toks[4].getKind());
    516   ASSERT_EQ(StringRef(" "),     Toks[4].getText());
    517 
    518   ASSERT_EQ(tok::at_command,    Toks[5].getKind());
    519   ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
    520 
    521   ASSERT_EQ(tok::text,          Toks[6].getKind());
    522   ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
    523 
    524   ASSERT_EQ(tok::newline,       Toks[7].getKind());
    525 }
    526 
    527 TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {
    528   const char *Source =
    529     "/// \\Foo\n"
    530     "/// \\Bar Baz\n"
    531     "/// \\Blech quux=corge\n";
    532 
    533   Traits.registerBlockCommand(StringRef("Foo"));
    534   Traits.registerBlockCommand(StringRef("Bar"));
    535   Traits.registerBlockCommand(StringRef("Blech"));
    536 
    537   std::vector<Token> Toks;
    538 
    539   lexString(Source, Toks);
    540 
    541   ASSERT_EQ(11U, Toks.size());
    542 
    543   ASSERT_EQ(tok::text,      Toks[0].getKind());
    544   ASSERT_EQ(StringRef(" "), Toks[0].getText());
    545 
    546   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
    547   ASSERT_EQ(StringRef("Foo"), getCommandName(Toks[1]));
    548 
    549   ASSERT_EQ(tok::newline,     Toks[2].getKind());
    550 
    551   ASSERT_EQ(tok::text,      Toks[3].getKind());
    552   ASSERT_EQ(StringRef(" "), Toks[3].getText());
    553 
    554   ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
    555   ASSERT_EQ(StringRef("Bar"), getCommandName(Toks[4]));
    556 
    557   ASSERT_EQ(tok::text,         Toks[5].getKind());
    558   ASSERT_EQ(StringRef(" Baz"), Toks[5].getText());
    559 
    560   ASSERT_EQ(tok::newline,     Toks[6].getKind());
    561 
    562   ASSERT_EQ(tok::text,      Toks[7].getKind());
    563   ASSERT_EQ(StringRef(" "), Toks[7].getText());
    564 
    565   ASSERT_EQ(tok::backslash_command, Toks[8].getKind());
    566   ASSERT_EQ(StringRef("Blech"), getCommandName(Toks[8]));
    567 
    568   ASSERT_EQ(tok::text,                Toks[9].getKind());
    569   ASSERT_EQ(StringRef(" quux=corge"), Toks[9].getText());
    570 
    571   ASSERT_EQ(tok::newline,     Toks[10].getKind());
    572 }
    573 
    574 // Empty verbatim block.
    575 TEST_F(CommentLexerTest, VerbatimBlock1) {
    576   const char *Sources[] = {
    577     "/// \\verbatim\\endverbatim\n//",
    578     "/** \\verbatim\\endverbatim*/"
    579   };
    580 
    581   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    582     std::vector<Token> Toks;
    583 
    584     lexString(Sources[i], Toks);
    585 
    586     ASSERT_EQ(5U, Toks.size());
    587 
    588     ASSERT_EQ(tok::text,                 Toks[0].getKind());
    589     ASSERT_EQ(StringRef(" "),            Toks[0].getText());
    590 
    591     ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    592     ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    593 
    594     ASSERT_EQ(tok::verbatim_block_end,   Toks[2].getKind());
    595     ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[2]));
    596 
    597     ASSERT_EQ(tok::newline,              Toks[3].getKind());
    598     ASSERT_EQ(tok::newline,              Toks[4].getKind());
    599   }
    600 }
    601 
    602 // Empty verbatim block without an end command.
    603 TEST_F(CommentLexerTest, VerbatimBlock2) {
    604   const char *Source = "/// \\verbatim";
    605 
    606   std::vector<Token> Toks;
    607 
    608   lexString(Source, Toks);
    609 
    610   ASSERT_EQ(3U, Toks.size());
    611 
    612   ASSERT_EQ(tok::text,                 Toks[0].getKind());
    613   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
    614 
    615   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    616   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    617 
    618   ASSERT_EQ(tok::newline,              Toks[2].getKind());
    619 }
    620 
    621 // Empty verbatim block without an end command.
    622 TEST_F(CommentLexerTest, VerbatimBlock3) {
    623   const char *Source = "/** \\verbatim*/";
    624 
    625   std::vector<Token> Toks;
    626 
    627   lexString(Source, Toks);
    628 
    629   ASSERT_EQ(4U, Toks.size());
    630 
    631   ASSERT_EQ(tok::text,                 Toks[0].getKind());
    632   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
    633 
    634   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    635   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    636 
    637   ASSERT_EQ(tok::newline,              Toks[2].getKind());
    638   ASSERT_EQ(tok::newline,              Toks[3].getKind());
    639 }
    640 
    641 // Single-line verbatim block.
    642 TEST_F(CommentLexerTest, VerbatimBlock4) {
    643   const char *Sources[] = {
    644     "/// Meow \\verbatim aaa \\endverbatim\n//",
    645     "/** Meow \\verbatim aaa \\endverbatim*/"
    646   };
    647 
    648   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    649     std::vector<Token> Toks;
    650 
    651     lexString(Sources[i], Toks);
    652 
    653     ASSERT_EQ(6U, Toks.size());
    654 
    655     ASSERT_EQ(tok::text,                 Toks[0].getKind());
    656     ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
    657 
    658     ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    659     ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    660 
    661     ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
    662     ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
    663 
    664     ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
    665     ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[3]));
    666 
    667     ASSERT_EQ(tok::newline,              Toks[4].getKind());
    668     ASSERT_EQ(tok::newline,              Toks[5].getKind());
    669   }
    670 }
    671 
    672 // Single-line verbatim block without an end command.
    673 TEST_F(CommentLexerTest, VerbatimBlock5) {
    674   const char *Sources[] = {
    675     "/// Meow \\verbatim aaa \n//",
    676     "/** Meow \\verbatim aaa */"
    677   };
    678 
    679   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    680     std::vector<Token> Toks;
    681 
    682     lexString(Sources[i], Toks);
    683 
    684     ASSERT_EQ(5U, Toks.size());
    685 
    686     ASSERT_EQ(tok::text,                 Toks[0].getKind());
    687     ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
    688 
    689     ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    690     ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    691 
    692     ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
    693     ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
    694 
    695     ASSERT_EQ(tok::newline,              Toks[3].getKind());
    696     ASSERT_EQ(tok::newline,              Toks[4].getKind());
    697   }
    698 }
    699 
    700 TEST_F(CommentLexerTest, VerbatimBlock6) {
    701   const char *Source =
    702     "// \\verbatim\n"
    703     "// Aaa\n"
    704     "//\n"
    705     "// Bbb\n"
    706     "// \\endverbatim\n";
    707 
    708   std::vector<Token> Toks;
    709 
    710   lexString(Source, Toks);
    711 
    712   ASSERT_EQ(10U, Toks.size());
    713 
    714   ASSERT_EQ(tok::text,                 Toks[0].getKind());
    715   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
    716 
    717   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    718   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    719 
    720   ASSERT_EQ(tok::newline,              Toks[2].getKind());
    721 
    722   ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
    723   ASSERT_EQ(StringRef(" Aaa"),         Toks[3].getVerbatimBlockText());
    724 
    725   ASSERT_EQ(tok::newline,              Toks[4].getKind());
    726 
    727   ASSERT_EQ(tok::newline,              Toks[5].getKind());
    728 
    729   ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
    730   ASSERT_EQ(StringRef(" Bbb"),         Toks[6].getVerbatimBlockText());
    731 
    732   ASSERT_EQ(tok::newline,              Toks[7].getKind());
    733 
    734   ASSERT_EQ(tok::verbatim_block_end,   Toks[8].getKind());
    735   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[8]));
    736 
    737   ASSERT_EQ(tok::newline,              Toks[9].getKind());
    738 }
    739 
    740 TEST_F(CommentLexerTest, VerbatimBlock7) {
    741   const char *Source =
    742     "/* \\verbatim\n"
    743     " * Aaa\n"
    744     " *\n"
    745     " * Bbb\n"
    746     " * \\endverbatim\n"
    747     " */";
    748 
    749   std::vector<Token> Toks;
    750 
    751   lexString(Source, Toks);
    752 
    753   ASSERT_EQ(10U, Toks.size());
    754 
    755   ASSERT_EQ(tok::text,                 Toks[0].getKind());
    756   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
    757 
    758   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    759   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    760 
    761   ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
    762   ASSERT_EQ(StringRef(" Aaa"),         Toks[2].getVerbatimBlockText());
    763 
    764   ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
    765   ASSERT_EQ(StringRef(""),             Toks[3].getVerbatimBlockText());
    766 
    767   ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
    768   ASSERT_EQ(StringRef(" Bbb"),         Toks[4].getVerbatimBlockText());
    769 
    770   ASSERT_EQ(tok::verbatim_block_end,   Toks[5].getKind());
    771   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[5]));
    772 
    773   ASSERT_EQ(tok::newline,              Toks[6].getKind());
    774 
    775   ASSERT_EQ(tok::text,                 Toks[7].getKind());
    776   ASSERT_EQ(StringRef(" "),            Toks[7].getText());
    777 
    778   ASSERT_EQ(tok::newline,              Toks[8].getKind());
    779   ASSERT_EQ(tok::newline,              Toks[9].getKind());
    780 }
    781 
    782 // Complex test for verbatim blocks.
    783 TEST_F(CommentLexerTest, VerbatimBlock8) {
    784   const char *Source =
    785     "/* Meow \\verbatim aaa\\$\\@\n"
    786     "bbb \\endverbati\r"
    787     "ccc\r\n"
    788     "ddd \\endverbatim Blah \\verbatim eee\n"
    789     "\\endverbatim BlahBlah*/";
    790   std::vector<Token> Toks;
    791 
    792   lexString(Source, Toks);
    793 
    794   ASSERT_EQ(14U, Toks.size());
    795 
    796   ASSERT_EQ(tok::text,                 Toks[0].getKind());
    797   ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
    798 
    799   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    800   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
    801 
    802   ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
    803   ASSERT_EQ(StringRef(" aaa\\$\\@"),   Toks[2].getVerbatimBlockText());
    804 
    805   ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
    806   ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
    807 
    808   ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
    809   ASSERT_EQ(StringRef("ccc"),          Toks[4].getVerbatimBlockText());
    810 
    811   ASSERT_EQ(tok::verbatim_block_line,  Toks[5].getKind());
    812   ASSERT_EQ(StringRef("ddd "),         Toks[5].getVerbatimBlockText());
    813 
    814   ASSERT_EQ(tok::verbatim_block_end,   Toks[6].getKind());
    815   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[6]));
    816 
    817   ASSERT_EQ(tok::text,                 Toks[7].getKind());
    818   ASSERT_EQ(StringRef(" Blah "),       Toks[7].getText());
    819 
    820   ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
    821   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[8]));
    822 
    823   ASSERT_EQ(tok::verbatim_block_line,  Toks[9].getKind());
    824   ASSERT_EQ(StringRef(" eee"),         Toks[9].getVerbatimBlockText());
    825 
    826   ASSERT_EQ(tok::verbatim_block_end,   Toks[10].getKind());
    827   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[10]));
    828 
    829   ASSERT_EQ(tok::text,                 Toks[11].getKind());
    830   ASSERT_EQ(StringRef(" BlahBlah"),    Toks[11].getText());
    831 
    832   ASSERT_EQ(tok::newline,              Toks[12].getKind());
    833   ASSERT_EQ(tok::newline,              Toks[13].getKind());
    834 }
    835 
    836 // LaTeX verbatim blocks.
    837 TEST_F(CommentLexerTest, VerbatimBlock9) {
    838   const char *Source =
    839     "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}";
    840   std::vector<Token> Toks;
    841 
    842   lexString(Source, Toks);
    843 
    844   ASSERT_EQ(13U, Toks.size());
    845 
    846   ASSERT_EQ(tok::text,                 Toks[0].getKind());
    847   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
    848 
    849   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
    850   ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[1]));
    851 
    852   ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
    853   ASSERT_EQ(StringRef(" Aaa "),        Toks[2].getVerbatimBlockText());
    854 
    855   ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
    856   ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[3]));
    857 
    858   ASSERT_EQ(tok::text,                 Toks[4].getKind());
    859   ASSERT_EQ(StringRef(" "),            Toks[4].getText());
    860 
    861   ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
    862   ASSERT_EQ(StringRef("f["),           getVerbatimBlockName(Toks[5]));
    863 
    864   ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
    865   ASSERT_EQ(StringRef(" Bbb "),        Toks[6].getVerbatimBlockText());
    866 
    867   ASSERT_EQ(tok::verbatim_block_end,   Toks[7].getKind());
    868   ASSERT_EQ(StringRef("f]"),           getVerbatimBlockName(Toks[7]));
    869 
    870   ASSERT_EQ(tok::text,                 Toks[8].getKind());
    871   ASSERT_EQ(StringRef(" "),            Toks[8].getText());
    872 
    873   ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
    874   ASSERT_EQ(StringRef("f{"),           getVerbatimBlockName(Toks[9]));
    875 
    876   ASSERT_EQ(tok::verbatim_block_line,  Toks[10].getKind());
    877   ASSERT_EQ(StringRef(" Ccc "),        Toks[10].getVerbatimBlockText());
    878 
    879   ASSERT_EQ(tok::verbatim_block_end,   Toks[11].getKind());
    880   ASSERT_EQ(StringRef("f}"),           getVerbatimBlockName(Toks[11]));
    881 
    882   ASSERT_EQ(tok::newline,              Toks[12].getKind());
    883 }
    884 
    885 // Empty verbatim line.
    886 TEST_F(CommentLexerTest, VerbatimLine1) {
    887   const char *Sources[] = {
    888     "/// \\fn\n//",
    889     "/** \\fn*/"
    890   };
    891 
    892   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    893     std::vector<Token> Toks;
    894 
    895     lexString(Sources[i], Toks);
    896 
    897     ASSERT_EQ(4U, Toks.size());
    898 
    899     ASSERT_EQ(tok::text,               Toks[0].getKind());
    900     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
    901 
    902     ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
    903     ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
    904 
    905     ASSERT_EQ(tok::newline,            Toks[2].getKind());
    906     ASSERT_EQ(tok::newline,            Toks[3].getKind());
    907   }
    908 }
    909 
    910 // Verbatim line with Doxygen escape sequences, which should not be expanded.
    911 TEST_F(CommentLexerTest, VerbatimLine2) {
    912   const char *Sources[] = {
    913     "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
    914     "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
    915   };
    916 
    917   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
    918     std::vector<Token> Toks;
    919 
    920     lexString(Sources[i], Toks);
    921 
    922     ASSERT_EQ(5U, Toks.size());
    923 
    924     ASSERT_EQ(tok::text,               Toks[0].getKind());
    925     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
    926 
    927     ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
    928     ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
    929 
    930     ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
    931     ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
    932                                        Toks[2].getVerbatimLineText());
    933 
    934     ASSERT_EQ(tok::newline,            Toks[3].getKind());
    935     ASSERT_EQ(tok::newline,            Toks[4].getKind());
    936   }
    937 }
    938 
    939 // Verbatim line should not eat anything from next source line.
    940 TEST_F(CommentLexerTest, VerbatimLine3) {
    941   const char *Source =
    942     "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
    943     " * Meow\n"
    944     " */";
    945 
    946   std::vector<Token> Toks;
    947 
    948   lexString(Source, Toks);
    949 
    950   ASSERT_EQ(9U, Toks.size());
    951 
    952   ASSERT_EQ(tok::text,               Toks[0].getKind());
    953   ASSERT_EQ(StringRef(" "),          Toks[0].getText());
    954 
    955   ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
    956   ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
    957 
    958   ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
    959   ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
    960                                      Toks[2].getVerbatimLineText());
    961   ASSERT_EQ(tok::newline,            Toks[3].getKind());
    962 
    963   ASSERT_EQ(tok::text,               Toks[4].getKind());
    964   ASSERT_EQ(StringRef(" Meow"),      Toks[4].getText());
    965   ASSERT_EQ(tok::newline,            Toks[5].getKind());
    966 
    967   ASSERT_EQ(tok::text,               Toks[6].getKind());
    968   ASSERT_EQ(StringRef(" "),          Toks[6].getText());
    969 
    970   ASSERT_EQ(tok::newline,            Toks[7].getKind());
    971   ASSERT_EQ(tok::newline,            Toks[8].getKind());
    972 }
    973 
    974 TEST_F(CommentLexerTest, HTML1) {
    975   const char *Source =
    976     "// <";
    977 
    978   std::vector<Token> Toks;
    979 
    980   lexString(Source, Toks);
    981 
    982   ASSERT_EQ(3U, Toks.size());
    983 
    984   ASSERT_EQ(tok::text,      Toks[0].getKind());
    985   ASSERT_EQ(StringRef(" "), Toks[0].getText());
    986 
    987   ASSERT_EQ(tok::text,      Toks[1].getKind());
    988   ASSERT_EQ(StringRef("<"), Toks[1].getText());
    989 
    990   ASSERT_EQ(tok::newline,   Toks[2].getKind());
    991 }
    992 
    993 TEST_F(CommentLexerTest, HTML2) {
    994   const char *Source =
    995     "// a<2";
    996 
    997   std::vector<Token> Toks;
    998 
    999   lexString(Source, Toks);
   1000 
   1001   ASSERT_EQ(4U, Toks.size());
   1002 
   1003   ASSERT_EQ(tok::text,       Toks[0].getKind());
   1004   ASSERT_EQ(StringRef(" a"), Toks[0].getText());
   1005 
   1006   ASSERT_EQ(tok::text,       Toks[1].getKind());
   1007   ASSERT_EQ(StringRef("<"),  Toks[1].getText());
   1008 
   1009   ASSERT_EQ(tok::text,       Toks[2].getKind());
   1010   ASSERT_EQ(StringRef("2"),  Toks[2].getText());
   1011 
   1012   ASSERT_EQ(tok::newline,    Toks[3].getKind());
   1013 }
   1014 
   1015 TEST_F(CommentLexerTest, HTML3) {
   1016   const char *Source =
   1017     "// < img";
   1018 
   1019   std::vector<Token> Toks;
   1020 
   1021   lexString(Source, Toks);
   1022 
   1023   ASSERT_EQ(4U, Toks.size());
   1024 
   1025   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1026   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1027 
   1028   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1029   ASSERT_EQ(StringRef("<"),    Toks[1].getText());
   1030 
   1031   ASSERT_EQ(tok::text,         Toks[2].getKind());
   1032   ASSERT_EQ(StringRef(" img"), Toks[2].getText());
   1033 
   1034   ASSERT_EQ(tok::newline,      Toks[3].getKind());
   1035 }
   1036 
   1037 TEST_F(CommentLexerTest, HTML4) {
   1038   const char *Sources[] = {
   1039     "// <img",
   1040     "// <img "
   1041   };
   1042 
   1043   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1044     std::vector<Token> Toks;
   1045 
   1046     lexString(Sources[i], Toks);
   1047 
   1048     ASSERT_EQ(3U, Toks.size());
   1049 
   1050     ASSERT_EQ(tok::text,           Toks[0].getKind());
   1051     ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1052 
   1053     ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1054     ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1055 
   1056     ASSERT_EQ(tok::newline,        Toks[2].getKind());
   1057   }
   1058 }
   1059 
   1060 TEST_F(CommentLexerTest, HTML5) {
   1061   const char *Source =
   1062     "// <img 42";
   1063 
   1064   std::vector<Token> Toks;
   1065 
   1066   lexString(Source, Toks);
   1067 
   1068   ASSERT_EQ(4U, Toks.size());
   1069 
   1070   ASSERT_EQ(tok::text,           Toks[0].getKind());
   1071   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1072 
   1073   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1074   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1075 
   1076   ASSERT_EQ(tok::text,           Toks[2].getKind());
   1077   ASSERT_EQ(StringRef("42"),     Toks[2].getText());
   1078 
   1079   ASSERT_EQ(tok::newline,        Toks[3].getKind());
   1080 }
   1081 
   1082 TEST_F(CommentLexerTest, HTML6) {
   1083   const char *Source = "// <img> Meow";
   1084 
   1085   std::vector<Token> Toks;
   1086 
   1087   lexString(Source, Toks);
   1088 
   1089   ASSERT_EQ(5U, Toks.size());
   1090 
   1091   ASSERT_EQ(tok::text,           Toks[0].getKind());
   1092   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1093 
   1094   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1095   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1096 
   1097   ASSERT_EQ(tok::html_greater,   Toks[2].getKind());
   1098 
   1099   ASSERT_EQ(tok::text,           Toks[3].getKind());
   1100   ASSERT_EQ(StringRef(" Meow"),  Toks[3].getText());
   1101 
   1102   ASSERT_EQ(tok::newline,        Toks[4].getKind());
   1103 }
   1104 
   1105 TEST_F(CommentLexerTest, HTML7) {
   1106   const char *Source = "// <img=";
   1107 
   1108   std::vector<Token> Toks;
   1109 
   1110   lexString(Source, Toks);
   1111 
   1112   ASSERT_EQ(4U, Toks.size());
   1113 
   1114   ASSERT_EQ(tok::text,           Toks[0].getKind());
   1115   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1116 
   1117   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1118   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1119 
   1120   ASSERT_EQ(tok::text,           Toks[2].getKind());
   1121   ASSERT_EQ(StringRef("="),      Toks[2].getText());
   1122 
   1123   ASSERT_EQ(tok::newline,        Toks[3].getKind());
   1124 }
   1125 
   1126 TEST_F(CommentLexerTest, HTML8) {
   1127   const char *Source = "// <img src=> Meow";
   1128 
   1129   std::vector<Token> Toks;
   1130 
   1131   lexString(Source, Toks);
   1132 
   1133   ASSERT_EQ(7U, Toks.size());
   1134 
   1135   ASSERT_EQ(tok::text,           Toks[0].getKind());
   1136   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1137 
   1138   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1139   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1140 
   1141   ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
   1142   ASSERT_EQ(StringRef("src"),   Toks[2].getHTMLIdent());
   1143 
   1144   ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
   1145 
   1146   ASSERT_EQ(tok::html_greater,   Toks[4].getKind());
   1147 
   1148   ASSERT_EQ(tok::text,           Toks[5].getKind());
   1149   ASSERT_EQ(StringRef(" Meow"),  Toks[5].getText());
   1150 
   1151   ASSERT_EQ(tok::newline,        Toks[6].getKind());
   1152 }
   1153 
   1154 TEST_F(CommentLexerTest, HTML9) {
   1155   const char *Sources[] = {
   1156     "// <img src",
   1157     "// <img src "
   1158   };
   1159 
   1160   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1161     std::vector<Token> Toks;
   1162 
   1163     lexString(Sources[i], Toks);
   1164 
   1165     ASSERT_EQ(4U, Toks.size());
   1166 
   1167     ASSERT_EQ(tok::text,           Toks[0].getKind());
   1168     ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1169 
   1170     ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1171     ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1172 
   1173     ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
   1174     ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
   1175 
   1176     ASSERT_EQ(tok::newline,        Toks[3].getKind());
   1177   }
   1178 }
   1179 
   1180 TEST_F(CommentLexerTest, HTML10) {
   1181   const char *Sources[] = {
   1182     "// <img src=",
   1183     "// <img src ="
   1184   };
   1185 
   1186   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1187     std::vector<Token> Toks;
   1188 
   1189     lexString(Sources[i], Toks);
   1190 
   1191     ASSERT_EQ(5U, Toks.size());
   1192 
   1193     ASSERT_EQ(tok::text,           Toks[0].getKind());
   1194     ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1195 
   1196     ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1197     ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1198 
   1199     ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
   1200     ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
   1201 
   1202     ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
   1203 
   1204     ASSERT_EQ(tok::newline,        Toks[4].getKind());
   1205   }
   1206 }
   1207 
   1208 TEST_F(CommentLexerTest, HTML11) {
   1209   const char *Sources[] = {
   1210     "// <img src=\"",
   1211     "// <img src = \"",
   1212     "// <img src=\'",
   1213     "// <img src = \'"
   1214   };
   1215 
   1216   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1217     std::vector<Token> Toks;
   1218 
   1219     lexString(Sources[i], Toks);
   1220 
   1221     ASSERT_EQ(6U, Toks.size());
   1222 
   1223     ASSERT_EQ(tok::text,               Toks[0].getKind());
   1224     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
   1225 
   1226     ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
   1227     ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
   1228 
   1229     ASSERT_EQ(tok::html_ident,         Toks[2].getKind());
   1230     ASSERT_EQ(StringRef("src"),        Toks[2].getHTMLIdent());
   1231 
   1232     ASSERT_EQ(tok::html_equals,        Toks[3].getKind());
   1233 
   1234     ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
   1235     ASSERT_EQ(StringRef(""),           Toks[4].getHTMLQuotedString());
   1236 
   1237     ASSERT_EQ(tok::newline,            Toks[5].getKind());
   1238   }
   1239 }
   1240 
   1241 TEST_F(CommentLexerTest, HTML12) {
   1242   const char *Source = "// <img src=@";
   1243 
   1244   std::vector<Token> Toks;
   1245 
   1246   lexString(Source, Toks);
   1247 
   1248   ASSERT_EQ(6U, Toks.size());
   1249 
   1250   ASSERT_EQ(tok::text,           Toks[0].getKind());
   1251   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
   1252 
   1253   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
   1254   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
   1255 
   1256   ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
   1257   ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
   1258 
   1259   ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
   1260 
   1261   ASSERT_EQ(tok::text,           Toks[4].getKind());
   1262   ASSERT_EQ(StringRef("@"),      Toks[4].getText());
   1263 
   1264   ASSERT_EQ(tok::newline,        Toks[5].getKind());
   1265 }
   1266 
   1267 TEST_F(CommentLexerTest, HTML13) {
   1268   const char *Sources[] = {
   1269     "// <img src=\"val\\\"\\'val",
   1270     "// <img src=\"val\\\"\\'val\"",
   1271     "// <img src=\'val\\\"\\'val",
   1272     "// <img src=\'val\\\"\\'val\'"
   1273   };
   1274 
   1275   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1276     std::vector<Token> Toks;
   1277 
   1278     lexString(Sources[i], Toks);
   1279 
   1280     ASSERT_EQ(6U, Toks.size());
   1281 
   1282     ASSERT_EQ(tok::text,                  Toks[0].getKind());
   1283     ASSERT_EQ(StringRef(" "),             Toks[0].getText());
   1284 
   1285     ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
   1286     ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
   1287 
   1288     ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
   1289     ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
   1290 
   1291     ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
   1292 
   1293     ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
   1294     ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
   1295 
   1296     ASSERT_EQ(tok::newline,               Toks[5].getKind());
   1297   }
   1298 }
   1299 
   1300 TEST_F(CommentLexerTest, HTML14) {
   1301   const char *Sources[] = {
   1302     "// <img src=\"val\\\"\\'val\">",
   1303     "// <img src=\'val\\\"\\'val\'>"
   1304   };
   1305 
   1306   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1307     std::vector<Token> Toks;
   1308 
   1309     lexString(Sources[i], Toks);
   1310 
   1311     ASSERT_EQ(7U, Toks.size());
   1312 
   1313     ASSERT_EQ(tok::text,                  Toks[0].getKind());
   1314     ASSERT_EQ(StringRef(" "),             Toks[0].getText());
   1315 
   1316     ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
   1317     ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
   1318 
   1319     ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
   1320     ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
   1321 
   1322     ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
   1323 
   1324     ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
   1325     ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
   1326 
   1327     ASSERT_EQ(tok::html_greater,          Toks[5].getKind());
   1328 
   1329     ASSERT_EQ(tok::newline,               Toks[6].getKind());
   1330   }
   1331 }
   1332 
   1333 TEST_F(CommentLexerTest, HTML15) {
   1334   const char *Sources[] = {
   1335     "// <img/>",
   1336     "// <img />"
   1337   };
   1338 
   1339   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1340     std::vector<Token> Toks;
   1341 
   1342     lexString(Sources[i], Toks);
   1343 
   1344     ASSERT_EQ(4U, Toks.size());
   1345 
   1346     ASSERT_EQ(tok::text,               Toks[0].getKind());
   1347     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
   1348 
   1349     ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
   1350     ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
   1351 
   1352     ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
   1353 
   1354     ASSERT_EQ(tok::newline,            Toks[3].getKind());
   1355   }
   1356 }
   1357 
   1358 TEST_F(CommentLexerTest, HTML16) {
   1359   const char *Sources[] = {
   1360     "// <img/ Aaa",
   1361     "// <img / Aaa"
   1362   };
   1363 
   1364   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1365     std::vector<Token> Toks;
   1366 
   1367     lexString(Sources[i], Toks);
   1368 
   1369     ASSERT_EQ(5U, Toks.size());
   1370 
   1371     ASSERT_EQ(tok::text,               Toks[0].getKind());
   1372     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
   1373 
   1374     ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
   1375     ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
   1376 
   1377     ASSERT_EQ(tok::text,               Toks[2].getKind());
   1378     ASSERT_EQ(StringRef("/"),          Toks[2].getText());
   1379 
   1380     ASSERT_EQ(tok::text,               Toks[3].getKind());
   1381     ASSERT_EQ(StringRef(" Aaa"),       Toks[3].getText());
   1382 
   1383     ASSERT_EQ(tok::newline,            Toks[4].getKind());
   1384   }
   1385 }
   1386 
   1387 TEST_F(CommentLexerTest, HTML17) {
   1388   const char *Source = "// </";
   1389 
   1390   std::vector<Token> Toks;
   1391 
   1392   lexString(Source, Toks);
   1393 
   1394   ASSERT_EQ(3U, Toks.size());
   1395 
   1396   ASSERT_EQ(tok::text,       Toks[0].getKind());
   1397   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
   1398 
   1399   ASSERT_EQ(tok::text,       Toks[1].getKind());
   1400   ASSERT_EQ(StringRef("</"), Toks[1].getText());
   1401 
   1402   ASSERT_EQ(tok::newline,    Toks[2].getKind());
   1403 }
   1404 
   1405 TEST_F(CommentLexerTest, HTML18) {
   1406   const char *Source = "// </@";
   1407 
   1408   std::vector<Token> Toks;
   1409 
   1410   lexString(Source, Toks);
   1411 
   1412   ASSERT_EQ(4U, Toks.size());
   1413 
   1414   ASSERT_EQ(tok::text,       Toks[0].getKind());
   1415   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
   1416 
   1417   ASSERT_EQ(tok::text,       Toks[1].getKind());
   1418   ASSERT_EQ(StringRef("</"), Toks[1].getText());
   1419 
   1420   ASSERT_EQ(tok::text,       Toks[2].getKind());
   1421   ASSERT_EQ(StringRef("@"),  Toks[2].getText());
   1422 
   1423   ASSERT_EQ(tok::newline,    Toks[3].getKind());
   1424 }
   1425 
   1426 TEST_F(CommentLexerTest, HTML19) {
   1427   const char *Source = "// </img";
   1428 
   1429   std::vector<Token> Toks;
   1430 
   1431   lexString(Source, Toks);
   1432 
   1433   ASSERT_EQ(3U, Toks.size());
   1434 
   1435   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1436   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1437 
   1438   ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
   1439   ASSERT_EQ(StringRef("img"),  Toks[1].getHTMLTagEndName());
   1440 
   1441   ASSERT_EQ(tok::newline,      Toks[2].getKind());
   1442 }
   1443 
   1444 TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
   1445   const char *Source = "// <tag>";
   1446 
   1447   std::vector<Token> Toks;
   1448 
   1449   lexString(Source, Toks);
   1450 
   1451   ASSERT_EQ(4U, Toks.size());
   1452 
   1453   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1454   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1455 
   1456   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1457   ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
   1458 
   1459   ASSERT_EQ(tok::text,         Toks[2].getKind());
   1460   ASSERT_EQ(StringRef(">"),    Toks[2].getText());
   1461 
   1462   ASSERT_EQ(tok::newline,      Toks[3].getKind());
   1463 }
   1464 
   1465 TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
   1466   const char *Source = "// </tag>";
   1467 
   1468   std::vector<Token> Toks;
   1469 
   1470   lexString(Source, Toks);
   1471 
   1472   ASSERT_EQ(4U, Toks.size());
   1473 
   1474   ASSERT_EQ(tok::text,          Toks[0].getKind());
   1475   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
   1476 
   1477   ASSERT_EQ(tok::text,          Toks[1].getKind());
   1478   ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
   1479 
   1480   ASSERT_EQ(tok::text,          Toks[2].getKind());
   1481   ASSERT_EQ(StringRef(">"),     Toks[2].getText());
   1482 
   1483   ASSERT_EQ(tok::newline,       Toks[3].getKind());
   1484 }
   1485 
   1486 TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
   1487   const char *Source = "// &";
   1488 
   1489   std::vector<Token> Toks;
   1490 
   1491   lexString(Source, Toks);
   1492 
   1493   ASSERT_EQ(3U, Toks.size());
   1494 
   1495   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1496   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1497 
   1498   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1499   ASSERT_EQ(StringRef("&"),    Toks[1].getText());
   1500 
   1501   ASSERT_EQ(tok::newline,      Toks[2].getKind());
   1502 }
   1503 
   1504 TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
   1505   const char *Source = "// &!";
   1506 
   1507   std::vector<Token> Toks;
   1508 
   1509   lexString(Source, Toks);
   1510 
   1511   ASSERT_EQ(4U, Toks.size());
   1512 
   1513   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1514   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1515 
   1516   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1517   ASSERT_EQ(StringRef("&"),    Toks[1].getText());
   1518 
   1519   ASSERT_EQ(tok::text,         Toks[2].getKind());
   1520   ASSERT_EQ(StringRef("!"),    Toks[2].getText());
   1521 
   1522   ASSERT_EQ(tok::newline,      Toks[3].getKind());
   1523 }
   1524 
   1525 TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
   1526   const char *Source = "// &amp";
   1527 
   1528   std::vector<Token> Toks;
   1529 
   1530   lexString(Source, Toks);
   1531 
   1532   ASSERT_EQ(3U, Toks.size());
   1533 
   1534   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1535   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1536 
   1537   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1538   ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
   1539 
   1540   ASSERT_EQ(tok::newline,      Toks[2].getKind());
   1541 }
   1542 
   1543 TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
   1544   const char *Source = "// &amp!";
   1545 
   1546   std::vector<Token> Toks;
   1547 
   1548   lexString(Source, Toks);
   1549 
   1550   ASSERT_EQ(4U, Toks.size());
   1551 
   1552   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1553   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1554 
   1555   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1556   ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
   1557 
   1558   ASSERT_EQ(tok::text,         Toks[2].getKind());
   1559   ASSERT_EQ(StringRef("!"),    Toks[2].getText());
   1560 
   1561   ASSERT_EQ(tok::newline,      Toks[3].getKind());
   1562 }
   1563 
   1564 TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
   1565   const char *Source = "// &#";
   1566 
   1567   std::vector<Token> Toks;
   1568 
   1569   lexString(Source, Toks);
   1570 
   1571   ASSERT_EQ(3U, Toks.size());
   1572 
   1573   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1574   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1575 
   1576   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1577   ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
   1578 
   1579   ASSERT_EQ(tok::newline,      Toks[2].getKind());
   1580 }
   1581 
   1582 TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
   1583   const char *Source = "// &#a";
   1584 
   1585   std::vector<Token> Toks;
   1586 
   1587   lexString(Source, Toks);
   1588 
   1589   ASSERT_EQ(4U, Toks.size());
   1590 
   1591   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1592   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1593 
   1594   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1595   ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
   1596 
   1597   ASSERT_EQ(tok::text,         Toks[2].getKind());
   1598   ASSERT_EQ(StringRef("a"),    Toks[2].getText());
   1599 
   1600   ASSERT_EQ(tok::newline,      Toks[3].getKind());
   1601 }
   1602 
   1603 TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
   1604   const char *Source = "// &#42";
   1605 
   1606   std::vector<Token> Toks;
   1607 
   1608   lexString(Source, Toks);
   1609 
   1610   ASSERT_EQ(3U, Toks.size());
   1611 
   1612   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1613   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1614 
   1615   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1616   ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
   1617 
   1618   ASSERT_EQ(tok::newline,      Toks[2].getKind());
   1619 }
   1620 
   1621 TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
   1622   const char *Source = "// &#42a";
   1623 
   1624   std::vector<Token> Toks;
   1625 
   1626   lexString(Source, Toks);
   1627 
   1628   ASSERT_EQ(4U, Toks.size());
   1629 
   1630   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1631   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1632 
   1633   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1634   ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
   1635 
   1636   ASSERT_EQ(tok::text,         Toks[2].getKind());
   1637   ASSERT_EQ(StringRef("a"),    Toks[2].getText());
   1638 
   1639   ASSERT_EQ(tok::newline,      Toks[3].getKind());
   1640 }
   1641 
   1642 TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
   1643   const char *Source = "// &#x";
   1644 
   1645   std::vector<Token> Toks;
   1646 
   1647   lexString(Source, Toks);
   1648 
   1649   ASSERT_EQ(3U, Toks.size());
   1650 
   1651   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1652   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1653 
   1654   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1655   ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
   1656 
   1657   ASSERT_EQ(tok::newline,      Toks[2].getKind());
   1658 }
   1659 
   1660 TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
   1661   const char *Source = "// &#xz";
   1662 
   1663   std::vector<Token> Toks;
   1664 
   1665   lexString(Source, Toks);
   1666 
   1667   ASSERT_EQ(4U, Toks.size());
   1668 
   1669   ASSERT_EQ(tok::text,         Toks[0].getKind());
   1670   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
   1671 
   1672   ASSERT_EQ(tok::text,         Toks[1].getKind());
   1673   ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
   1674 
   1675   ASSERT_EQ(tok::text,         Toks[2].getKind());
   1676   ASSERT_EQ(StringRef("z"),    Toks[2].getText());
   1677 
   1678   ASSERT_EQ(tok::newline,      Toks[3].getKind());
   1679 }
   1680 
   1681 TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
   1682   const char *Source = "// &#xab";
   1683 
   1684   std::vector<Token> Toks;
   1685 
   1686   lexString(Source, Toks);
   1687 
   1688   ASSERT_EQ(3U, Toks.size());
   1689 
   1690   ASSERT_EQ(tok::text,          Toks[0].getKind());
   1691   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
   1692 
   1693   ASSERT_EQ(tok::text,          Toks[1].getKind());
   1694   ASSERT_EQ(StringRef("&#xab"), Toks[1].getText());
   1695 
   1696   ASSERT_EQ(tok::newline,       Toks[2].getKind());
   1697 }
   1698 
   1699 TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
   1700   const char *Source = "// &#xaBz";
   1701 
   1702   std::vector<Token> Toks;
   1703 
   1704   lexString(Source, Toks);
   1705 
   1706   ASSERT_EQ(4U, Toks.size());
   1707 
   1708   ASSERT_EQ(tok::text,          Toks[0].getKind());
   1709   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
   1710 
   1711   ASSERT_EQ(tok::text,          Toks[1].getKind());
   1712   ASSERT_EQ(StringRef("&#xaB"), Toks[1].getText());
   1713 
   1714   ASSERT_EQ(tok::text,          Toks[2].getKind());
   1715   ASSERT_EQ(StringRef("z"),     Toks[2].getText());
   1716 
   1717   ASSERT_EQ(tok::newline,       Toks[3].getKind());
   1718 }
   1719 
   1720 TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
   1721   const char *Source = "// &amp;";
   1722 
   1723   std::vector<Token> Toks;
   1724 
   1725   lexString(Source, Toks);
   1726 
   1727   ASSERT_EQ(3U, Toks.size());
   1728 
   1729   ASSERT_EQ(tok::text,          Toks[0].getKind());
   1730   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
   1731 
   1732   ASSERT_EQ(tok::text,          Toks[1].getKind());
   1733   ASSERT_EQ(StringRef("&"),     Toks[1].getText());
   1734 
   1735   ASSERT_EQ(tok::newline,       Toks[2].getKind());
   1736 }
   1737 
   1738 TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
   1739   const char *Source = "// &amp;&lt;";
   1740 
   1741   std::vector<Token> Toks;
   1742 
   1743   lexString(Source, Toks);
   1744 
   1745   ASSERT_EQ(4U, Toks.size());
   1746 
   1747   ASSERT_EQ(tok::text,          Toks[0].getKind());
   1748   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
   1749 
   1750   ASSERT_EQ(tok::text,          Toks[1].getKind());
   1751   ASSERT_EQ(StringRef("&"),     Toks[1].getText());
   1752 
   1753   ASSERT_EQ(tok::text,          Toks[2].getKind());
   1754   ASSERT_EQ(StringRef("<"),     Toks[2].getText());
   1755 
   1756   ASSERT_EQ(tok::newline,       Toks[3].getKind());
   1757 }
   1758 
   1759 TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
   1760   const char *Source = "// &amp; meow";
   1761 
   1762   std::vector<Token> Toks;
   1763 
   1764   lexString(Source, Toks);
   1765 
   1766   ASSERT_EQ(4U, Toks.size());
   1767 
   1768   ASSERT_EQ(tok::text,          Toks[0].getKind());
   1769   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
   1770 
   1771   ASSERT_EQ(tok::text,          Toks[1].getKind());
   1772   ASSERT_EQ(StringRef("&"),     Toks[1].getText());
   1773 
   1774   ASSERT_EQ(tok::text,          Toks[2].getKind());
   1775   ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
   1776 
   1777   ASSERT_EQ(tok::newline,       Toks[3].getKind());
   1778 }
   1779 
   1780 TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
   1781   const char *Sources[] = {
   1782     "// &#61;",
   1783     "// &#x3d;",
   1784     "// &#X3d;",
   1785     "// &#X3D;"
   1786   };
   1787 
   1788   for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
   1789     std::vector<Token> Toks;
   1790 
   1791     lexString(Sources[i], Toks);
   1792 
   1793     ASSERT_EQ(3U, Toks.size());
   1794 
   1795     ASSERT_EQ(tok::text,          Toks[0].getKind());
   1796     ASSERT_EQ(StringRef(" "),     Toks[0].getText());
   1797 
   1798     ASSERT_EQ(tok::text,          Toks[1].getKind());
   1799     ASSERT_EQ(StringRef("="),     Toks[1].getText());
   1800 
   1801     ASSERT_EQ(tok::newline,       Toks[2].getKind());
   1802   }
   1803 }
   1804 
   1805 TEST_F(CommentLexerTest, MultipleComments) {
   1806   const char *Source =
   1807     "// Aaa\n"
   1808     "/// Bbb\n"
   1809     "/* Ccc\n"
   1810     " * Ddd*/\n"
   1811     "/** Eee*/";
   1812 
   1813   std::vector<Token> Toks;
   1814 
   1815   lexString(Source, Toks);
   1816 
   1817   ASSERT_EQ(12U, Toks.size());
   1818 
   1819   ASSERT_EQ(tok::text,           Toks[0].getKind());
   1820   ASSERT_EQ(StringRef(" Aaa"),   Toks[0].getText());
   1821   ASSERT_EQ(tok::newline,        Toks[1].getKind());
   1822 
   1823   ASSERT_EQ(tok::text,           Toks[2].getKind());
   1824   ASSERT_EQ(StringRef(" Bbb"),   Toks[2].getText());
   1825   ASSERT_EQ(tok::newline,        Toks[3].getKind());
   1826 
   1827   ASSERT_EQ(tok::text,           Toks[4].getKind());
   1828   ASSERT_EQ(StringRef(" Ccc"),   Toks[4].getText());
   1829   ASSERT_EQ(tok::newline,        Toks[5].getKind());
   1830 
   1831   ASSERT_EQ(tok::text,           Toks[6].getKind());
   1832   ASSERT_EQ(StringRef(" Ddd"),   Toks[6].getText());
   1833   ASSERT_EQ(tok::newline,        Toks[7].getKind());
   1834   ASSERT_EQ(tok::newline,        Toks[8].getKind());
   1835 
   1836   ASSERT_EQ(tok::text,           Toks[9].getKind());
   1837   ASSERT_EQ(StringRef(" Eee"),   Toks[9].getText());
   1838 
   1839   ASSERT_EQ(tok::newline,        Toks[10].getKind());
   1840   ASSERT_EQ(tok::newline,        Toks[11].getKind());
   1841 }
   1842 
   1843 } // end namespace comments
   1844 } // end namespace clang
   1845 
   1846