Home | History | Annotate | Download | only in Script
      1 /*===- ScriptScanner.ll ---------------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===*/
      9 
     10 %{
     11 /* C/C++ Declarations */
     12 
     13 #include <mcld/Script/ScriptScanner.h>
     14 #include <mcld/Script/ScriptFile.h>
     15 #include <mcld/Support/MsgHandling.h>
     16 #include <llvm/ADT/StringRef.h>
     17 #include <string>
     18 
     19 typedef mcld::ScriptParser::token token;
     20 typedef mcld::ScriptParser::token_type token_type;
     21 
     22 #define yyterminate() return token::END
     23 #define YY_NO_UNISTD_H
     24 %}
     25 
     26 /* Flex Declarations and Options */
     27 %option c++
     28 %option batch
     29 %option noyywrap
     30 %option nounput
     31 %option stack
     32 
     33 %{
     34 #define YY_USER_ACTION  yylloc->columns(yyleng);
     35 %}
     36 
     37 /* abbrev. of RE @ref binutils ld/ldlex.l */
     38 FILENAMECHAR1   [_a-zA-Z\/\.\\\$\_\~]
     39 SYMBOLCHARN     [_a-zA-Z\/\.\\\$\_\~0-9]
     40 NOCFILENAMECHAR [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\~]
     41 WILDCHAR        [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\,\~\?\*\^\!]
     42 WS [ \t\r]
     43 
     44 /* Start conditions */
     45 %s LDSCRIPT
     46 %s EXPRESSION
     47 
     48 %% /* Regular Expressions */
     49 
     50  /* code to place at the beginning of yylex() */
     51 %{
     52   /* reset location */
     53   yylloc->step();
     54 
     55   /* determine the initial parser state */
     56   if (m_Kind == ScriptFile::Unknown) {
     57     m_Kind = pScriptFile.getKind();
     58     switch (pScriptFile.getKind()) {
     59     case ScriptFile::LDScript:
     60     case ScriptFile::Expression:
     61       return token::LINKER_SCRIPT;
     62     case ScriptFile::VersionScript:
     63     case ScriptFile::DynamicList:
     64     default:
     65       assert(0 && "Unsupported script type!");
     66       break;
     67     }
     68   }
     69 %}
     70 
     71  /* Entry Point */
     72 <LDSCRIPT>"ENTRY"                      { return token::ENTRY; }
     73  /* File Commands */
     74 <LDSCRIPT>"INCLUDE"                    { return token::INCLUDE; }
     75 <LDSCRIPT>"INPUT"                      { return token::INPUT; }
     76 <LDSCRIPT>"GROUP"                      { return token::GROUP; }
     77 <LDSCRIPT>"AS_NEEDED"                  { return token::AS_NEEDED; }
     78 <LDSCRIPT>"OUTPUT"                     { return token::OUTPUT; }
     79 <LDSCRIPT>"SEARCH_DIR"                 { return token::SEARCH_DIR; }
     80 <LDSCRIPT>"STARTUP"                    { return token::STARTUP; }
     81  /* Format Commands */
     82 <LDSCRIPT>"OUTPUT_FORMAT"              { return token::OUTPUT_FORMAT; }
     83 <LDSCRIPT>"TARGET"                     { return token::TARGET; }
     84  /* Misc Commands */
     85 <LDSCRIPT>"ASSERT"                     { return token::ASSERT; }
     86 <LDSCRIPT>"EXTERN"                     { return token::EXTERN; }
     87 <LDSCRIPT>"FORCE_COMMON_ALLOCATION"    { return token::FORCE_COMMON_ALLOCATION; }
     88 <LDSCRIPT>"INHIBIT_COMMON_ALLOCATION"  { return token::INHIBIT_COMMON_ALLOCATION; }
     89 <LDSCRIPT>"INSERT"                     { return token::INSERT; }
     90 <LDSCRIPT>"NOCROSSREFS"                { return token::NOCROSSREFS; }
     91 <LDSCRIPT>"OUTPUT_ARCH"                { return token::OUTPUT_ARCH; }
     92 <LDSCRIPT>"LD_FEATURE"                 { return token::LD_FEATURE; }
     93  /* Assignemnts */
     94 <LDSCRIPT,EXPRESSION>"HIDDEN"          { return token::HIDDEN; }
     95 <LDSCRIPT,EXPRESSION>"PROVIDE"         { return token::PROVIDE; }
     96 <LDSCRIPT,EXPRESSION>"PROVIDE_HIDDEN"  { return token::PROVIDE_HIDDEN; }
     97  /* SECTIONS Command */
     98 <LDSCRIPT>"SECTIONS"                   { return token::SECTIONS; }
     99  /* MEMORY Command */
    100 <LDSCRIPT>"MEMORY"                     { return token::MEMORY; }
    101  /* PHDRS Command */
    102 <LDSCRIPT>"PHDRS"                      { return token::PHDRS; }
    103  /* Builtin Functions */
    104 <EXPRESSION>"ABSOLUTE"                 { return token::ABSOLUTE; }
    105 <EXPRESSION>"ADDR"                     { return token::ADDR; }
    106 <LDSCRIPT,EXPRESSION>"ALIGN"           { return token::ALIGN; }
    107 <EXPRESSION>"ALIGNOF"                  { return token::ALIGNOF; }
    108 <EXPRESSION>"BLOCK"                    { return token::BLOCK; }
    109 <EXPRESSION>"DATA_SEGMENT_ALIGN"       { return token::DATA_SEGMENT_ALIGN; }
    110 <EXPRESSION>"DATA_SEGMENT_END"         { return token::DATA_SEGMENT_END; }
    111 <EXPRESSION>"DATA_SEGMENT_RELRO_END"   { return token::DATA_SEGMENT_RELRO_END; }
    112 <EXPRESSION>"DEFINED"                  { return token::DEFINED; }
    113 <EXPRESSION>"LENGTH"                   { return token::LENGTH; }
    114 <EXPRESSION>"LOADADDR"                 { return token::LOADADDR; }
    115 <EXPRESSION>"MAX"                      { return token::MAX; }
    116 <EXPRESSION>"MIN"                      { return token::MIN; }
    117 <EXPRESSION>"NEXT"                     { return token::NEXT; }
    118 <EXPRESSION>"ORIGIN"                   { return token::ORIGIN; }
    119 <EXPRESSION>"SEGMENT_START"            { return token::SEGMENT_START; }
    120 <EXPRESSION>"SIZEOF"                   { return token::SIZEOF; }
    121 <EXPRESSION>"SIZEOF_HEADERS"           { return token::SIZEOF_HEADERS; }
    122 <EXPRESSION>"CONSTANT"                 { return token::CONSTANT; }
    123  /* Symbolic Constants */
    124 <EXPRESSION>"MAXPAGESIZE"              { return token::MAXPAGESIZE; }
    125 <EXPRESSION>"COMMONPAGESIZE"           { return token::COMMONPAGESIZE; }
    126  /* Input Section Description */
    127 <LDSCRIPT>"EXCLUDE_FILE"               { return token::EXCLUDE_FILE; }
    128 <LDSCRIPT>"KEEP"                       { return token::KEEP; }
    129 <LDSCRIPT>"SORT"                       { return token::SORT_BY_NAME; }
    130 <LDSCRIPT>"SORT_BY_NAME"               { return token::SORT_BY_NAME; }
    131 <LDSCRIPT>"SORT_BY_ALIGNMENT"          { return token::SORT_BY_ALIGNMENT; }
    132 <LDSCRIPT>"SORT_NONE"                  { return token::SORT_NONE; }
    133 <LDSCRIPT>"SORT_BY_INIT_PRIORITY"      { return token::SORT_BY_INIT_PRIORITY; }
    134  /* Output Section Data */
    135 <LDSCRIPT>"BYTE"                       { return token::BYTE; }
    136 <LDSCRIPT>"SHORT"                      { return token::SHORT; }
    137 <LDSCRIPT>"LONG"                       { return token::LONG; }
    138 <LDSCRIPT>"QUAD"                       { return token::QUAD; }
    139 <LDSCRIPT>"SQUAD"                      { return token::SQUAD; }
    140 <LDSCRIPT>"FILL"                       { return token::FILL; }
    141  /* Output Section Discarding */
    142 <LDSCRIPT>"DISCARD"                    { return token::DISCARD; }
    143  /* Output Section Keywords */
    144 <LDSCRIPT>"CREATE_OBJECT_SYMBOLS"      { return token::CREATE_OBJECT_SYMBOLS; }
    145 <LDSCRIPT>"CONSTRUCTORS"               { return token::CONSTRUCTORS; }
    146  /* Output Section Attributes */
    147  /* Output Section Type */
    148 <LDSCRIPT,EXPRESSION>"NOLOAD"          { return token::NOLOAD; }
    149 <LDSCRIPT,EXPRESSION>"DSECT"           { return token::DSECT; }
    150 <LDSCRIPT,EXPRESSION>"COPY"            { return token::COPY; }
    151 <LDSCRIPT,EXPRESSION>"INFO"            { return token::INFO; }
    152 <LDSCRIPT,EXPRESSION>"OVERLAY"         { return token::OVERLAY; }
    153  /* Output Section LMA */
    154 <LDSCRIPT>"AT"                         { return token::AT; }
    155  /* Forced Input Alignment */
    156 <LDSCRIPT>"SUBALIGN"                   { return token::SUBALIGN; }
    157  /* Output Section Constraint */
    158 <LDSCRIPT>"ONLY_IF_RO"                 { return token::ONLY_IF_RO; }
    159 <LDSCRIPT>"ONLY_IF_RW"                 { return token::ONLY_IF_RW; }
    160  /* Operators */
    161 <LDSCRIPT,EXPRESSION>"<<"              { return token::LSHIFT; }
    162 <LDSCRIPT,EXPRESSION>">>"              { return token::RSHIFT; }
    163 <LDSCRIPT,EXPRESSION>"=="              { return token::EQ; }
    164 <LDSCRIPT,EXPRESSION>"!="              { return token::NE; }
    165 <LDSCRIPT,EXPRESSION>"<="              { return token::LE; }
    166 <LDSCRIPT,EXPRESSION>">="              { return token::GE; }
    167 <LDSCRIPT,EXPRESSION>"&&"              { return token::LOGICAL_AND; }
    168 <LDSCRIPT,EXPRESSION>"||"              { return token::LOGICAL_OR; }
    169 <LDSCRIPT,EXPRESSION>"+="              { return token::ADD_ASSIGN; }
    170 <LDSCRIPT,EXPRESSION>"-="              { return token::SUB_ASSIGN; }
    171 <LDSCRIPT,EXPRESSION>"*="              { return token::MUL_ASSIGN; }
    172 <LDSCRIPT,EXPRESSION>"/="              { return token::DIV_ASSIGN; }
    173 <LDSCRIPT,EXPRESSION>"&="              { return token::AND_ASSIGN; }
    174 <LDSCRIPT,EXPRESSION>"|="              { return token::OR_ASSIGN; }
    175 <LDSCRIPT,EXPRESSION>"<<="             { return token::LS_ASSIGN; }
    176 <LDSCRIPT,EXPRESSION>">>="             { return token::RS_ASSIGN; }
    177 <LDSCRIPT,EXPRESSION>","               { return static_cast<token_type>(*yytext); }
    178 <LDSCRIPT,EXPRESSION>"="               { return static_cast<token_type>(*yytext); }
    179 <LDSCRIPT,EXPRESSION>"?"               { return static_cast<token_type>(*yytext); }
    180 <LDSCRIPT,EXPRESSION>":"               { return static_cast<token_type>(*yytext); }
    181 <LDSCRIPT,EXPRESSION>"|"               { return static_cast<token_type>(*yytext); }
    182 <LDSCRIPT,EXPRESSION>"^"               { return static_cast<token_type>(*yytext); }
    183 <LDSCRIPT,EXPRESSION>"&"               { return static_cast<token_type>(*yytext); }
    184 <LDSCRIPT,EXPRESSION>"<"               { return static_cast<token_type>(*yytext); }
    185 <LDSCRIPT,EXPRESSION>">"               { return static_cast<token_type>(*yytext); }
    186 <LDSCRIPT,EXPRESSION>"+"               { return static_cast<token_type>(*yytext); }
    187 <LDSCRIPT,EXPRESSION>"-"               { return static_cast<token_type>(*yytext); }
    188 <LDSCRIPT,EXPRESSION>"*"               { return static_cast<token_type>(*yytext); }
    189 <LDSCRIPT,EXPRESSION>"/"               { return static_cast<token_type>(*yytext); }
    190 <LDSCRIPT,EXPRESSION>"%"               { return static_cast<token_type>(*yytext); }
    191 <LDSCRIPT,EXPRESSION>"!"               { return static_cast<token_type>(*yytext); }
    192 <LDSCRIPT,EXPRESSION>"~"               { return static_cast<token_type>(*yytext); }
    193 <LDSCRIPT,EXPRESSION>";"               { return static_cast<token_type>(*yytext); }
    194 <LDSCRIPT,EXPRESSION>"("               { return static_cast<token_type>(*yytext); }
    195 <LDSCRIPT,EXPRESSION>")"               { return static_cast<token_type>(*yytext); }
    196 <LDSCRIPT,EXPRESSION>"{"               { return static_cast<token_type>(*yytext); }
    197 <LDSCRIPT,EXPRESSION>"}"               { return static_cast<token_type>(*yytext); }
    198 
    199  /* Numbers */
    200 <LDSCRIPT,EXPRESSION>((("$"|0[xX])([0-9A-Fa-f])+)|(([0-9])+))(M|K|m|k)? {
    201   llvm::StringRef str(yytext, yyleng);
    202   switch (str.back()) {
    203   case 'k':
    204   case 'K':
    205     str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
    206     yylval->integer *= 1024;
    207     break;
    208   case 'm':
    209   case 'M':
    210     str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer);
    211     yylval->integer *= 1024 * 1024;
    212     break;
    213   default:
    214     str.getAsInteger(0, yylval->integer);
    215     break;
    216   }
    217   return token::INTEGER;
    218 }
    219 
    220  /* Expression string */
    221 <EXPRESSION>{FILENAMECHAR1}{SYMBOLCHARN}* {
    222   const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    223   yylval->string = &str;
    224   return token::STRING;
    225 }
    226 
    227  /* String */
    228 <LDSCRIPT>{FILENAMECHAR1}{NOCFILENAMECHAR}* {
    229   const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    230   yylval->string = &str;
    231   return token::STRING;
    232 }
    233 
    234 <LDSCRIPT,EXPRESSION>\"(\\.|[^\\"])*\" {
    235   /*" c string literal */
    236   const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    237   yylval->string = &str;
    238   return token::STRING;
    239 }
    240 
    241  /* -l namespec */
    242 <LDSCRIPT>"-l"{FILENAMECHAR1}{NOCFILENAMECHAR}* {
    243   const std::string& str = pScriptFile.createParserStr(yytext + 2, yyleng - 2);
    244   yylval->string = &str;
    245   return token::LNAMESPEC;
    246 }
    247 
    248  /* WILDCHAR String */
    249 <LDSCRIPT>{WILDCHAR}* {
    250   if (yytext[0] == '/' && yytext[1] == '*') {
    251     yyless (2);
    252     enterComments(*yylloc);
    253   } else {
    254     const std::string& str = pScriptFile.createParserStr(yytext, yyleng);
    255     yylval->string = &str;
    256     return token::STRING;
    257   }
    258 }
    259 
    260  /* gobble up C comments */
    261 <LDSCRIPT,EXPRESSION>"/*" {
    262   enterComments(*yylloc);
    263   yylloc->step();
    264 }
    265 
    266  /* gobble up white-spaces */
    267 <LDSCRIPT,EXPRESSION>{WS}+ {
    268   yylloc->step();
    269 }
    270 
    271  /* gobble up end-of-lines */
    272 <LDSCRIPT,EXPRESSION>\n {
    273   yylloc->lines(1);
    274   yylloc->step();
    275 }
    276 
    277 %% /* Additional Code */
    278 
    279 namespace mcld {
    280 
    281 ScriptScanner::ScriptScanner(std::istream* yyin, std::ostream* yyout)
    282   : yyFlexLexer(yyin, yyout), m_Kind(ScriptFile::Unknown)
    283 {
    284 }
    285 
    286 ScriptScanner::~ScriptScanner()
    287 {
    288 }
    289 
    290 void ScriptScanner::enterComments(ScriptParser::location_type& pLocation)
    291 {
    292   const int start_line = pLocation.begin.line;
    293   const int start_col  = pLocation.begin.column;
    294 
    295   int ch = 0;
    296 
    297   while (true) {
    298     ch = yyinput();
    299     pLocation.columns(1);
    300 
    301     while (ch != '*' && ch != EOF) {
    302       if (ch == '\n') {
    303         pLocation.lines(1);
    304       }
    305 
    306       ch = yyinput();
    307       pLocation.columns(1);
    308     }
    309 
    310     if (ch == '*') {
    311       ch = yyinput();
    312       pLocation.columns(1);
    313 
    314       while (ch == '*') {
    315         ch = yyinput();
    316         pLocation.columns(1);
    317       }
    318 
    319       if (ch == '/')
    320         break;
    321     }
    322 
    323     if (ch == '\n')
    324       pLocation.lines(1);
    325 
    326     if (ch == EOF) {
    327       error(diag::err_unterminated_comment) << pLocation.begin.filename
    328                                             << start_line
    329                                             << start_col;
    330       break;
    331     }
    332   }
    333 }
    334 
    335 void ScriptScanner::setLexState(ScriptFile::Kind pKind)
    336 {
    337   /* push the state into the top of stach */
    338   m_StateStack.push(pKind);
    339 
    340   switch (pKind) {
    341   case ScriptFile::LDScript:
    342     BEGIN(LDSCRIPT);
    343     break;
    344   case ScriptFile::Expression:
    345     BEGIN(EXPRESSION);
    346     break;
    347   case ScriptFile::VersionScript:
    348   case ScriptFile::DynamicList:
    349   default:
    350     assert(0 && "Unsupported script type!");
    351     break;
    352   }
    353 }
    354 
    355 void ScriptScanner::popLexState()
    356 {
    357   /* pop the last state */
    358   m_StateStack.pop();
    359 
    360   /* resume the appropriate state */
    361   if (!m_StateStack.empty()) {
    362     switch (m_StateStack.top()) {
    363     case ScriptFile::LDScript:
    364       BEGIN(LDSCRIPT);
    365       break;
    366     case ScriptFile::Expression:
    367       BEGIN(EXPRESSION);
    368       break;
    369     case ScriptFile::VersionScript:
    370     case ScriptFile::DynamicList:
    371     default:
    372       assert(0 && "Unsupported script type!");
    373       break;
    374     }
    375   }
    376 }
    377 
    378 } /* namespace of mcld */
    379 
    380 #ifdef yylex
    381 #undef yylex
    382 #endif
    383 
    384 int yyFlexLexer::yylex()
    385 {
    386   return 0;
    387 }
    388 
    389