Home | History | Annotate | Download | only in Script
      1 /*===- ScriptScanner.ll ---------------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===*/
      9 
     10 %{
     11 /* C/C++ Declarations */
     12 
     13 #include "mcld/Script/ScriptScanner.h"
     14 #include "mcld/Script/ScriptFile.h"
     15 #include "mcld/Support/MsgHandling.h"
     16 #include <llvm/ADT/StringRef.h>
     17 #include <string>
     18 
     19 typedef mcld::ScriptParser::token token;
     20 typedef mcld::ScriptParser::token_type token_type;
     21 
     22 #define yyterminate() return token::END
     23 #define YY_NO_UNISTD_H
     24 %}
     25 
     26 %{
     27 #ifdef __clang__
     28 #pragma clang diagnostic push
     29 #pragma clang diagnostic ignored "-Wdeprecated-register"
     30 #endif
     31 %}
     32 
     33 /* Flex Declarations and Options */
     34 %option c++
     35 %option batch
     36 %option noyywrap
     37 %option nounput
     38 %option stack
     39 
     40 %{
     41 #define YY_USER_ACTION  yylloc->columns(yyleng);
     42 %}
     43 
     44 /* abbrev. of RE @ref binutils ld/ldlex.l */
     45 FILENAMECHAR1   [_a-zA-Z\/\.\\\$\_\~]
     46 SYMBOLCHARN     [_a-zA-Z\/\.\\\$\_\~0-9]
     47 NOCFILENAMECHAR [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\~]
     48 WILDCHAR        [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\,\~\?\*\^\!]
     49 WS [ \t\r]
     50 
     51 /* Start conditions */
     52 %s LDSCRIPT
     53 %s EXPRESSION
     54 
     55 %% /* Regular Expressions */
     56 
     57  /* code to place at the beginning of yylex() */
     58 %{
     59   /* reset location */
     60   yylloc->step();
     61 
     62   /* determine the initial parser state */
     63   if (m_Kind == ScriptFile::Unknown) {
     64     m_Kind = pScriptFile.getKind();
     65     switch (pScriptFile.getKind()) {
     66     case ScriptFile::LDScript:
     67     case ScriptFile::Expression:
     68       return token::LINKER_SCRIPT;
     69     case ScriptFile::VersionScript:
     70     case ScriptFile::DynamicList:
     71     default:
     72       assert(0 && "Unsupported script type!");
     73       break;
     74     }
     75   }
     76 %}
     77 
     78  /* Entry Point */
     79 <LDSCRIPT>"ENTRY"                      { return token::ENTRY; }
     80  /* File Commands */
     81 <LDSCRIPT>"INCLUDE"                    { return token::INCLUDE; }
     82 <LDSCRIPT>"INPUT"                      { return token::INPUT; }
     83 <LDSCRIPT>"GROUP"                      { return token::GROUP; }
     84 <LDSCRIPT>"AS_NEEDED"                  { return token::AS_NEEDED; }
     85 <LDSCRIPT>"OUTPUT"                     { return token::OUTPUT; }
     86 <LDSCRIPT>"SEARCH_DIR"                 { return token::SEARCH_DIR; }
     87 <LDSCRIPT>"STARTUP"                    { return token::STARTUP; }
     88  /* Format Commands */
     89 <LDSCRIPT>"OUTPUT_FORMAT"              { return token::OUTPUT_FORMAT; }
     90 <LDSCRIPT>"TARGET"                     { return token::TARGET; }
     91  /* Misc Commands */
     92 <LDSCRIPT>"ASSERT"                     { return token::ASSERT; }
     93 <LDSCRIPT>"EXTERN"                     { return token::EXTERN; }
     94 <LDSCRIPT>"FORCE_COMMON_ALLOCATION"    { return token::FORCE_COMMON_ALLOCATION; }
     95 <LDSCRIPT>"INHIBIT_COMMON_ALLOCATION"  { return token::INHIBIT_COMMON_ALLOCATION; }
     96 <LDSCRIPT>"INSERT"                     { return token::INSERT; }
     97 <LDSCRIPT>"NOCROSSREFS"                { return token::NOCROSSREFS; }
     98 <LDSCRIPT>"OUTPUT_ARCH"                { return token::OUTPUT_ARCH; }
     99 <LDSCRIPT>"LD_FEATURE"                 { return token::LD_FEATURE; }
    100  /* Assignemnts */
    101 <LDSCRIPT,EXPRESSION>"HIDDEN"          { return token::HIDDEN; }
    102 <LDSCRIPT,EXPRESSION>"PROVIDE"         { return token::PROVIDE; }
    103 <LDSCRIPT,EXPRESSION>"PROVIDE_HIDDEN"  { return token::PROVIDE_HIDDEN; }
    104  /* SECTIONS Command */
    105 <LDSCRIPT>"SECTIONS"                   { return token::SECTIONS; }
    106  /* MEMORY Command */
    107 <LDSCRIPT>"MEMORY"                     { return token::MEMORY; }
    108  /* PHDRS Command */
    109 <LDSCRIPT>"PHDRS"                      { return token::PHDRS; }
    110  /* Builtin Functions */
    111 <EXPRESSION>"ABSOLUTE"                 { return token::ABSOLUTE; }
    112 <EXPRESSION>"ADDR"                     { return token::ADDR; }
    113 <LDSCRIPT,EXPRESSION>"ALIGN"           { return token::ALIGN; }
    114 <EXPRESSION>"ALIGNOF"                  { return token::ALIGNOF; }
    115 <EXPRESSION>"BLOCK"                    { return token::BLOCK; }
    116 <EXPRESSION>"DATA_SEGMENT_ALIGN"       { return token::DATA_SEGMENT_ALIGN; }
    117 <EXPRESSION>"DATA_SEGMENT_END"         { return token::DATA_SEGMENT_END; }
    118 <EXPRESSION>"DATA_SEGMENT_RELRO_END"   { return token::DATA_SEGMENT_RELRO_END; }
    119 <EXPRESSION>"DEFINED"                  { return token::DEFINED; }
    120 <EXPRESSION>"LENGTH"                   { return token::LENGTH; }
    121 <EXPRESSION>"LOADADDR"                 { return token::LOADADDR; }
    122 <EXPRESSION>"MAX"                      { return token::MAX; }
    123 <EXPRESSION>"MIN"                      { return token::MIN; }
    124 <EXPRESSION>"NEXT"                     { return token::NEXT; }
    125 <EXPRESSION>"ORIGIN"                   { return token::ORIGIN; }
    126 <EXPRESSION>"SEGMENT_START"            { return token::SEGMENT_START; }
    127 <EXPRESSION>"SIZEOF"                   { return token::SIZEOF; }
    128 <EXPRESSION>"SIZEOF_HEADERS"           { return token::SIZEOF_HEADERS; }
    129 <EXPRESSION>"CONSTANT"                 { return token::CONSTANT; }
    130  /* Symbolic Constants */
    131 <EXPRESSION>"MAXPAGESIZE"              { return token::MAXPAGESIZE; }
    132 <EXPRESSION>"COMMONPAGESIZE"           { return token::COMMONPAGESIZE; }
    133  /* Input Section Description */
    134 <LDSCRIPT>"EXCLUDE_FILE"               { return token::EXCLUDE_FILE; }
    135 <LDSCRIPT>"KEEP"                       { return token::KEEP; }
    136 <LDSCRIPT>"SORT"                       { return token::SORT_BY_NAME; }
    137 <LDSCRIPT>"SORT_BY_NAME"               { return token::SORT_BY_NAME; }
    138 <LDSCRIPT>"SORT_BY_ALIGNMENT"          { return token::SORT_BY_ALIGNMENT; }
    139 <LDSCRIPT>"SORT_NONE"                  { return token::SORT_NONE; }
    140 <LDSCRIPT>"SORT_BY_INIT_PRIORITY"      { return token::SORT_BY_INIT_PRIORITY; }
    141  /* Output Section Data */
    142 <LDSCRIPT>"BYTE"                       { return token::BYTE; }
    143 <LDSCRIPT>"SHORT"                      { return token::SHORT; }
    144 <LDSCRIPT>"LONG"                       { return token::LONG; }
    145 <LDSCRIPT>"QUAD"                       { return token::QUAD; }
    146 <LDSCRIPT>"SQUAD"                      { return token::SQUAD; }
    147 <LDSCRIPT>"FILL"                       { return token::FILL; }
    148  /* Output Section Discarding */
    149 <LDSCRIPT>"DISCARD"                    { return token::DISCARD; }
    150  /* Output Section Keywords */
    151 <LDSCRIPT>"CREATE_OBJECT_SYMBOLS"      { return token::CREATE_OBJECT_SYMBOLS; }
    152 <LDSCRIPT>"CONSTRUCTORS"               { return token::CONSTRUCTORS; }
    153  /* Output Section Attributes */
    154  /* Output Section Type */
    155 <LDSCRIPT,EXPRESSION>"NOLOAD"          { return token::NOLOAD; }
    156 <LDSCRIPT,EXPRESSION>"DSECT"           { return token::DSECT; }
    157 <LDSCRIPT,EXPRESSION>"COPY"            { return token::COPY; }
    158 <LDSCRIPT,EXPRESSION>"INFO"            { return token::INFO; }
    159 <LDSCRIPT,EXPRESSION>"OVERLAY"         { return token::OVERLAY; }
    160  /* Output Section LMA */
    161 <LDSCRIPT>"AT"                         { return token::AT; }
    162  /* Forced Input Alignment */
    163 <LDSCRIPT>"SUBALIGN"                   { return token::SUBALIGN; }
    164  /* Output Section Constraint */
    165 <LDSCRIPT>"ONLY_IF_RO"                 { return token::ONLY_IF_RO; }
    166 <LDSCRIPT>"ONLY_IF_RW"                 { return token::ONLY_IF_RW; }
    167  /* Operators */
    168 <LDSCRIPT,EXPRESSION>"<<"              { return token::LSHIFT; }
    169 <LDSCRIPT,EXPRESSION>">>"              { return token::RSHIFT; }
    170 <LDSCRIPT,EXPRESSION>"=="              { return token::EQ; }
    171 <LDSCRIPT,EXPRESSION>"!="              { return token::NE; }
    172 <LDSCRIPT,EXPRESSION>"<="              { return token::LE; }
    173 <LDSCRIPT,EXPRESSION>">="              { return token::GE; }
    174 <LDSCRIPT,EXPRESSION>"&&"              { return token::LOGICAL_AND; }
    175 <LDSCRIPT,EXPRESSION>"||"              { return token::LOGICAL_OR; }
    176 <LDSCRIPT,EXPRESSION>"+="              { return token::ADD_ASSIGN; }
    177 <LDSCRIPT,EXPRESSION>"-="              { return token::SUB_ASSIGN; }
    178 <LDSCRIPT,EXPRESSION>"*="              { return token::MUL_ASSIGN; }
    179 <LDSCRIPT,EXPRESSION>"/="              { return token::DIV_ASSIGN; }
    180 <LDSCRIPT,EXPRESSION>"&="              { return token::AND_ASSIGN; }
    181 <LDSCRIPT,EXPRESSION>"|="              { return token::OR_ASSIGN; }
    182 <LDSCRIPT,EXPRESSION>"<<="             { return token::LS_ASSIGN; }
    183 <LDSCRIPT,EXPRESSION>">>="             { return token::RS_ASSIGN; }
    184 <LDSCRIPT,EXPRESSION>","               { return static_cast<token_type>(*yytext); }
    185 <LDSCRIPT,EXPRESSION>"="               { return static_cast<token_type>(*yytext); }
    186 <LDSCRIPT,EXPRESSION>"?"               { return static_cast<token_type>(*yytext); }
    187 <LDSCRIPT,EXPRESSION>":"               { return static_cast<token_type>(*yytext); }
    188 <LDSCRIPT,EXPRESSION>"|"               { return static_cast<token_type>(*yytext); }
    189 <LDSCRIPT,EXPRESSION>"^"               { return static_cast<token_type>(*yytext); }
    190 <LDSCRIPT,EXPRESSION>"&"               { return static_cast<token_type>(*yytext); }
    191 <LDSCRIPT,EXPRESSION>"<"               { return static_cast<token_type>(*yytext); }
    192 <LDSCRIPT,EXPRESSION>">"               { return static_cast<token_type>(*yytext); }
    193 <LDSCRIPT,EXPRESSION>"+"               { return static_cast<token_type>(*yytext); }
    194 <LDSCRIPT,EXPRESSION>"-"               { return static_cast<token_type>(*yytext); }
    195 <LDSCRIPT,EXPRESSION>"*"               { return static_cast<token_type>(*yytext); }
    196 <LDSCRIPT,EXPRESSION>"/"               { return static_cast<token_type>(*yytext); }
    197 <LDSCRIPT,EXPRESSION>"%"               { return static_cast<token_type>(*yytext); }
    198 <LDSCRIPT,EXPRESSION>"!"               { return static_cast<token_type>(*yytext); }
    199 <LDSCRIPT,EXPRESSION>"~"               { return static_cast<token_type>(*yytext); }
    200 <LDSCRIPT,EXPRESSION>";"               { return static_cast<token_type>(*yytext); }
    201 <LDSCRIPT,EXPRESSION>"("               { return static_cast<token_type>(*yytext); }
    202 <LDSCRIPT,EXPRESSION>")"               { return static_cast<token_type>(*yytext); }
    203 <LDSCRIPT,EXPRESSION>"{"               { return static_cast<token_type>(*yytext); }
    204 <LDSCRIPT,EXPRESSION>"}"               { return static_cast<token_type>(*yytext); }
    205 
    206  /* Numbers */
    207 <LDSCRIPT,EXPRESSION>((("$"|0[xX])([0-9A-Fa-f])+)|(([0-9])+))(M|K|m|k)? {
    208   llvm::StringRef str(yytext, yyleng);
    209   switch (str.back()) {
    210   case 'k':
    211   case 'K':
    212     str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
    213     yylval->integer *= 1024;
    214     break;
    215   case 'm':
    216   case 'M':
    217     str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
    218     yylval->integer *= 1024 * 1024;
    219     break;
    220   default:
    221     str.getAsInteger(0, yylval->integer);
    222     break;
    223   }
    224   return token::INTEGER;
    225 }
    226 
    227  /* Expression string */
    228 <EXPRESSION>{FILENAMECHAR1}{SYMBOLCHARN}* {
    229   const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    230   yylval->string = &str;
    231   return token::STRING;
    232 }
    233 
    234  /* String */
    235 <LDSCRIPT>{FILENAMECHAR1}{NOCFILENAMECHAR}* {
    236   const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    237   yylval->string = &str;
    238   return token::STRING;
    239 }
    240 
    241 <LDSCRIPT,EXPRESSION>\"(\\.|[^\\"])*\" {
    242   /*" c string literal */
    243   const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    244   yylval->string = &str;
    245   return token::STRING;
    246 }
    247 
    248  /* -l namespec */
    249 <LDSCRIPT>"-l"{FILENAMECHAR1}{NOCFILENAMECHAR}* {
    250   const std::string& str = pScriptFile.createParserStr(yytext + 2, yyleng - 2);
    251   yylval->string = &str;
    252   return token::LNAMESPEC;
    253 }
    254 
    255  /* WILDCHAR String */
    256 <LDSCRIPT>{WILDCHAR}* {
    257   if (yytext[0] == '/' && yytext[1] == '*') {
    258     yyless (2);
    259     enterComments(*yylloc);
    260   } else {
    261     const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    262     yylval->string = &str;
    263     return token::STRING;
    264   }
    265 }
    266 
    267  /* gobble up C comments */
    268 <LDSCRIPT,EXPRESSION>"/*" {
    269   enterComments(*yylloc);
    270   yylloc->step();
    271 }
    272 
    273  /* gobble up white-spaces */
    274 <LDSCRIPT,EXPRESSION>{WS}+ {
    275   yylloc->step();
    276 }
    277 
    278  /* gobble up end-of-lines */
    279 <LDSCRIPT,EXPRESSION>\n {
    280   yylloc->lines(1);
    281   yylloc->step();
    282 }
    283 
    284 %% /* Additional Code */
    285 
    286 namespace mcld {
    287 
    288 ScriptScanner::ScriptScanner(std::istream* yyin, std::ostream* yyout)
    289   : yyFlexLexer(yyin, yyout), m_Kind(ScriptFile::Unknown)
    290 {
    291 }
    292 
    293 ScriptScanner::~ScriptScanner()
    294 {
    295 }
    296 
    297 void ScriptScanner::enterComments(ScriptParser::location_type& pLocation)
    298 {
    299   const int start_line = pLocation.begin.line;
    300   const int start_col  = pLocation.begin.column;
    301 
    302   int ch = 0;
    303 
    304   while (true) {
    305     ch = yyinput();
    306     pLocation.columns(1);
    307 
    308     while (ch != '*' && ch != EOF) {
    309       if (ch == '\n') {
    310         pLocation.lines(1);
    311       }
    312 
    313       ch = yyinput();
    314       pLocation.columns(1);
    315     }
    316 
    317     if (ch == '*') {
    318       ch = yyinput();
    319       pLocation.columns(1);
    320 
    321       while (ch == '*') {
    322         ch = yyinput();
    323         pLocation.columns(1);
    324       }
    325 
    326       if (ch == '/')
    327         break;
    328     }
    329 
    330     if (ch == '\n')
    331       pLocation.lines(1);
    332 
    333     if (ch == EOF) {
    334       error(diag::err_unterminated_comment) << pLocation.begin.filename
    335                                             << start_line
    336                                             << start_col;
    337       break;
    338     }
    339   }
    340 }
    341 
    342 void ScriptScanner::setLexState(ScriptFile::Kind pKind)
    343 {
    344   /* push the state into the top of stach */
    345   m_StateStack.push(pKind);
    346 
    347   switch (pKind) {
    348   case ScriptFile::LDScript:
    349     BEGIN(LDSCRIPT);
    350     break;
    351   case ScriptFile::Expression:
    352     BEGIN(EXPRESSION);
    353     break;
    354   case ScriptFile::VersionScript:
    355   case ScriptFile::DynamicList:
    356   default:
    357     assert(0 && "Unsupported script type!");
    358     break;
    359   }
    360 }
    361 
    362 void ScriptScanner::popLexState()
    363 {
    364   /* pop the last state */
    365   m_StateStack.pop();
    366 
    367   /* resume the appropriate state */
    368   if (!m_StateStack.empty()) {
    369     switch (m_StateStack.top()) {
    370     case ScriptFile::LDScript:
    371       BEGIN(LDSCRIPT);
    372       break;
    373     case ScriptFile::Expression:
    374       BEGIN(EXPRESSION);
    375       break;
    376     case ScriptFile::VersionScript:
    377     case ScriptFile::DynamicList:
    378     default:
    379       assert(0 && "Unsupported script type!");
    380       break;
    381     }
    382   }
    383 }
    384 
    385 } /* namespace mcld */
    386 
    387 #ifdef __clang__
    388 #pragma clang diagnostic pop
    389 #endif
    390 
    391 #ifdef yylex
    392 #undef yylex
    393 #endif
    394 
    395 int yyFlexLexer::yylex()
    396 {
    397   return 0;
    398 }
    399 
    400