1 /*===- ScriptScanner.ll ---------------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===*/ 9 10 %{ 11 /* C/C++ Declarations */ 12 13 #include "mcld/Script/ScriptScanner.h" 14 #include "mcld/Script/ScriptFile.h" 15 #include "mcld/Support/MsgHandling.h" 16 #include <llvm/ADT/StringRef.h> 17 #include <string> 18 19 typedef mcld::ScriptParser::token token; 20 typedef mcld::ScriptParser::token_type token_type; 21 22 #define yyterminate() return token::END 23 #define YY_NO_UNISTD_H 24 %} 25 26 %{ 27 #ifdef __clang__ 28 #pragma clang diagnostic push 29 #pragma clang diagnostic ignored "-Wdeprecated-register" 30 #endif 31 %} 32 33 /* Flex Declarations and Options */ 34 %option c++ 35 %option batch 36 %option noyywrap 37 %option nounput 38 %option stack 39 40 %{ 41 #define YY_USER_ACTION yylloc->columns(yyleng); 42 %} 43 44 /* abbrev. of RE @ref binutils ld/ldlex.l */ 45 FILENAMECHAR1 [_a-zA-Z\/\.\\\$\_\~] 46 SYMBOLCHARN [_a-zA-Z\/\.\\\$\_\~0-9] 47 NOCFILENAMECHAR [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\~] 48 WILDCHAR [_a-zA-Z0-9\/\.\-\_\+\$\[\]\\\,\~\?\*\^\!] 49 WS [ \t\r] 50 51 /* Start conditions */ 52 %s LDSCRIPT 53 %s EXPRESSION 54 55 %% /* Regular Expressions */ 56 57 /* code to place at the beginning of yylex() */ 58 %{ 59 /* reset location */ 60 yylloc->step(); 61 62 /* determine the initial parser state */ 63 if (m_Kind == ScriptFile::Unknown) { 64 m_Kind = pScriptFile.getKind(); 65 switch (pScriptFile.getKind()) { 66 case ScriptFile::LDScript: 67 case ScriptFile::Expression: 68 return token::LINKER_SCRIPT; 69 case ScriptFile::VersionScript: 70 case ScriptFile::DynamicList: 71 default: 72 assert(0 && "Unsupported script type!"); 73 break; 74 } 75 } 76 %} 77 78 /* Entry Point */ 79 <LDSCRIPT>"ENTRY" { return token::ENTRY; } 80 /* File Commands */ 81 <LDSCRIPT>"INCLUDE" { return token::INCLUDE; } 82 <LDSCRIPT>"INPUT" { return token::INPUT; } 83 <LDSCRIPT>"GROUP" { return token::GROUP; } 84 <LDSCRIPT>"AS_NEEDED" { return token::AS_NEEDED; } 85 <LDSCRIPT>"OUTPUT" { return token::OUTPUT; } 86 <LDSCRIPT>"SEARCH_DIR" { return token::SEARCH_DIR; } 87 <LDSCRIPT>"STARTUP" { return token::STARTUP; } 88 /* Format Commands */ 89 <LDSCRIPT>"OUTPUT_FORMAT" { return token::OUTPUT_FORMAT; } 90 <LDSCRIPT>"TARGET" { return token::TARGET; } 91 /* Misc Commands */ 92 <LDSCRIPT>"ASSERT" { return token::ASSERT; } 93 <LDSCRIPT>"EXTERN" { return token::EXTERN; } 94 <LDSCRIPT>"FORCE_COMMON_ALLOCATION" { return token::FORCE_COMMON_ALLOCATION; } 95 <LDSCRIPT>"INHIBIT_COMMON_ALLOCATION" { return token::INHIBIT_COMMON_ALLOCATION; } 96 <LDSCRIPT>"INSERT" { return token::INSERT; } 97 <LDSCRIPT>"NOCROSSREFS" { return token::NOCROSSREFS; } 98 <LDSCRIPT>"OUTPUT_ARCH" { return token::OUTPUT_ARCH; } 99 <LDSCRIPT>"LD_FEATURE" { return token::LD_FEATURE; } 100 /* Assignemnts */ 101 <LDSCRIPT,EXPRESSION>"HIDDEN" { return token::HIDDEN; } 102 <LDSCRIPT,EXPRESSION>"PROVIDE" { return token::PROVIDE; } 103 <LDSCRIPT,EXPRESSION>"PROVIDE_HIDDEN" { return token::PROVIDE_HIDDEN; } 104 /* SECTIONS Command */ 105 <LDSCRIPT>"SECTIONS" { return token::SECTIONS; } 106 /* MEMORY Command */ 107 <LDSCRIPT>"MEMORY" { return token::MEMORY; } 108 /* PHDRS Command */ 109 <LDSCRIPT>"PHDRS" { return token::PHDRS; } 110 /* Builtin Functions */ 111 <EXPRESSION>"ABSOLUTE" { return token::ABSOLUTE; } 112 <EXPRESSION>"ADDR" { return token::ADDR; } 113 <LDSCRIPT,EXPRESSION>"ALIGN" { return token::ALIGN; } 114 <EXPRESSION>"ALIGNOF" { return token::ALIGNOF; } 115 <EXPRESSION>"BLOCK" { return token::BLOCK; } 116 <EXPRESSION>"DATA_SEGMENT_ALIGN" { return token::DATA_SEGMENT_ALIGN; } 117 <EXPRESSION>"DATA_SEGMENT_END" { return token::DATA_SEGMENT_END; } 118 <EXPRESSION>"DATA_SEGMENT_RELRO_END" { return token::DATA_SEGMENT_RELRO_END; } 119 <EXPRESSION>"DEFINED" { return token::DEFINED; } 120 <EXPRESSION>"LENGTH" { return token::LENGTH; } 121 <EXPRESSION>"LOADADDR" { return token::LOADADDR; } 122 <EXPRESSION>"MAX" { return token::MAX; } 123 <EXPRESSION>"MIN" { return token::MIN; } 124 <EXPRESSION>"NEXT" { return token::NEXT; } 125 <EXPRESSION>"ORIGIN" { return token::ORIGIN; } 126 <EXPRESSION>"SEGMENT_START" { return token::SEGMENT_START; } 127 <EXPRESSION>"SIZEOF" { return token::SIZEOF; } 128 <EXPRESSION>"SIZEOF_HEADERS" { return token::SIZEOF_HEADERS; } 129 <EXPRESSION>"CONSTANT" { return token::CONSTANT; } 130 /* Symbolic Constants */ 131 <EXPRESSION>"MAXPAGESIZE" { return token::MAXPAGESIZE; } 132 <EXPRESSION>"COMMONPAGESIZE" { return token::COMMONPAGESIZE; } 133 /* Input Section Description */ 134 <LDSCRIPT>"EXCLUDE_FILE" { return token::EXCLUDE_FILE; } 135 <LDSCRIPT>"KEEP" { return token::KEEP; } 136 <LDSCRIPT>"SORT" { return token::SORT_BY_NAME; } 137 <LDSCRIPT>"SORT_BY_NAME" { return token::SORT_BY_NAME; } 138 <LDSCRIPT>"SORT_BY_ALIGNMENT" { return token::SORT_BY_ALIGNMENT; } 139 <LDSCRIPT>"SORT_NONE" { return token::SORT_NONE; } 140 <LDSCRIPT>"SORT_BY_INIT_PRIORITY" { return token::SORT_BY_INIT_PRIORITY; } 141 /* Output Section Data */ 142 <LDSCRIPT>"BYTE" { return token::BYTE; } 143 <LDSCRIPT>"SHORT" { return token::SHORT; } 144 <LDSCRIPT>"LONG" { return token::LONG; } 145 <LDSCRIPT>"QUAD" { return token::QUAD; } 146 <LDSCRIPT>"SQUAD" { return token::SQUAD; } 147 <LDSCRIPT>"FILL" { return token::FILL; } 148 /* Output Section Discarding */ 149 <LDSCRIPT>"DISCARD" { return token::DISCARD; } 150 /* Output Section Keywords */ 151 <LDSCRIPT>"CREATE_OBJECT_SYMBOLS" { return token::CREATE_OBJECT_SYMBOLS; } 152 <LDSCRIPT>"CONSTRUCTORS" { return token::CONSTRUCTORS; } 153 /* Output Section Attributes */ 154 /* Output Section Type */ 155 <LDSCRIPT,EXPRESSION>"NOLOAD" { return token::NOLOAD; } 156 <LDSCRIPT,EXPRESSION>"DSECT" { return token::DSECT; } 157 <LDSCRIPT,EXPRESSION>"COPY" { return token::COPY; } 158 <LDSCRIPT,EXPRESSION>"INFO" { return token::INFO; } 159 <LDSCRIPT,EXPRESSION>"OVERLAY" { return token::OVERLAY; } 160 /* Output Section LMA */ 161 <LDSCRIPT>"AT" { return token::AT; } 162 /* Forced Input Alignment */ 163 <LDSCRIPT>"SUBALIGN" { return token::SUBALIGN; } 164 /* Output Section Constraint */ 165 <LDSCRIPT>"ONLY_IF_RO" { return token::ONLY_IF_RO; } 166 <LDSCRIPT>"ONLY_IF_RW" { return token::ONLY_IF_RW; } 167 /* Operators */ 168 <LDSCRIPT,EXPRESSION>"<<" { return token::LSHIFT; } 169 <LDSCRIPT,EXPRESSION>">>" { return token::RSHIFT; } 170 <LDSCRIPT,EXPRESSION>"==" { return token::EQ; } 171 <LDSCRIPT,EXPRESSION>"!=" { return token::NE; } 172 <LDSCRIPT,EXPRESSION>"<=" { return token::LE; } 173 <LDSCRIPT,EXPRESSION>">=" { return token::GE; } 174 <LDSCRIPT,EXPRESSION>"&&" { return token::LOGICAL_AND; } 175 <LDSCRIPT,EXPRESSION>"||" { return token::LOGICAL_OR; } 176 <LDSCRIPT,EXPRESSION>"+=" { return token::ADD_ASSIGN; } 177 <LDSCRIPT,EXPRESSION>"-=" { return token::SUB_ASSIGN; } 178 <LDSCRIPT,EXPRESSION>"*=" { return token::MUL_ASSIGN; } 179 <LDSCRIPT,EXPRESSION>"/=" { return token::DIV_ASSIGN; } 180 <LDSCRIPT,EXPRESSION>"&=" { return token::AND_ASSIGN; } 181 <LDSCRIPT,EXPRESSION>"|=" { return token::OR_ASSIGN; } 182 <LDSCRIPT,EXPRESSION>"<<=" { return token::LS_ASSIGN; } 183 <LDSCRIPT,EXPRESSION>">>=" { return token::RS_ASSIGN; } 184 <LDSCRIPT,EXPRESSION>"," { return static_cast<token_type>(*yytext); } 185 <LDSCRIPT,EXPRESSION>"=" { return static_cast<token_type>(*yytext); } 186 <LDSCRIPT,EXPRESSION>"?" { return static_cast<token_type>(*yytext); } 187 <LDSCRIPT,EXPRESSION>":" { return static_cast<token_type>(*yytext); } 188 <LDSCRIPT,EXPRESSION>"|" { return static_cast<token_type>(*yytext); } 189 <LDSCRIPT,EXPRESSION>"^" { return static_cast<token_type>(*yytext); } 190 <LDSCRIPT,EXPRESSION>"&" { return static_cast<token_type>(*yytext); } 191 <LDSCRIPT,EXPRESSION>"<" { return static_cast<token_type>(*yytext); } 192 <LDSCRIPT,EXPRESSION>">" { return static_cast<token_type>(*yytext); } 193 <LDSCRIPT,EXPRESSION>"+" { return static_cast<token_type>(*yytext); } 194 <LDSCRIPT,EXPRESSION>"-" { return static_cast<token_type>(*yytext); } 195 <LDSCRIPT,EXPRESSION>"*" { return static_cast<token_type>(*yytext); } 196 <LDSCRIPT,EXPRESSION>"/" { return static_cast<token_type>(*yytext); } 197 <LDSCRIPT,EXPRESSION>"%" { return static_cast<token_type>(*yytext); } 198 <LDSCRIPT,EXPRESSION>"!" { return static_cast<token_type>(*yytext); } 199 <LDSCRIPT,EXPRESSION>"~" { return static_cast<token_type>(*yytext); } 200 <LDSCRIPT,EXPRESSION>";" { return static_cast<token_type>(*yytext); } 201 <LDSCRIPT,EXPRESSION>"(" { return static_cast<token_type>(*yytext); } 202 <LDSCRIPT,EXPRESSION>")" { return static_cast<token_type>(*yytext); } 203 <LDSCRIPT,EXPRESSION>"{" { return static_cast<token_type>(*yytext); } 204 <LDSCRIPT,EXPRESSION>"}" { return static_cast<token_type>(*yytext); } 205 206 /* Numbers */ 207 <LDSCRIPT,EXPRESSION>((("$"|0[xX])([0-9A-Fa-f])+)|(([0-9])+))(M|K|m|k)? { 208 llvm::StringRef str(yytext, yyleng); 209 switch (str.back()) { 210 case 'k': 211 case 'K': 212 str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer); 213 yylval->integer *= 1024; 214 break; 215 case 'm': 216 case 'M': 217 str.substr(0, yyleng - 1).getAsInteger(0, yylval->integer); 218 yylval->integer *= 1024 * 1024; 219 break; 220 default: 221 str.getAsInteger(0, yylval->integer); 222 break; 223 } 224 return token::INTEGER; 225 } 226 227 /* Expression string */ 228 <EXPRESSION>{FILENAMECHAR1}{SYMBOLCHARN}* { 229 const std::string& str = pScriptFile.createParserStr(yytext, yyleng); 230 yylval->string = &str; 231 return token::STRING; 232 } 233 234 /* String */ 235 <LDSCRIPT>{FILENAMECHAR1}{NOCFILENAMECHAR}* { 236 const std::string& str = pScriptFile.createParserStr(yytext, yyleng); 237 yylval->string = &str; 238 return token::STRING; 239 } 240 241 <LDSCRIPT,EXPRESSION>\"(\\.|[^\\"])*\" { 242 /*" c string literal */ 243 const std::string& str = pScriptFile.createParserStr(yytext, yyleng); 244 yylval->string = &str; 245 return token::STRING; 246 } 247 248 /* -l namespec */ 249 <LDSCRIPT>"-l"{FILENAMECHAR1}{NOCFILENAMECHAR}* { 250 const std::string& str = pScriptFile.createParserStr(yytext + 2, yyleng - 2); 251 yylval->string = &str; 252 return token::LNAMESPEC; 253 } 254 255 /* WILDCHAR String */ 256 <LDSCRIPT>{WILDCHAR}* { 257 if (yytext[0] == '/' && yytext[1] == '*') { 258 yyless (2); 259 enterComments(*yylloc); 260 } else { 261 const std::string& str = pScriptFile.createParserStr(yytext, yyleng); 262 yylval->string = &str; 263 return token::STRING; 264 } 265 } 266 267 /* gobble up C comments */ 268 <LDSCRIPT,EXPRESSION>"/*" { 269 enterComments(*yylloc); 270 yylloc->step(); 271 } 272 273 /* gobble up white-spaces */ 274 <LDSCRIPT,EXPRESSION>{WS}+ { 275 yylloc->step(); 276 } 277 278 /* gobble up end-of-lines */ 279 <LDSCRIPT,EXPRESSION>\n { 280 yylloc->lines(1); 281 yylloc->step(); 282 } 283 284 %% /* Additional Code */ 285 286 namespace mcld { 287 288 ScriptScanner::ScriptScanner(std::istream* yyin, std::ostream* yyout) 289 : yyFlexLexer(yyin, yyout), m_Kind(ScriptFile::Unknown) 290 { 291 } 292 293 ScriptScanner::~ScriptScanner() 294 { 295 } 296 297 void ScriptScanner::enterComments(ScriptParser::location_type& pLocation) 298 { 299 const int start_line = pLocation.begin.line; 300 const int start_col = pLocation.begin.column; 301 302 int ch = 0; 303 304 while (true) { 305 ch = yyinput(); 306 pLocation.columns(1); 307 308 while (ch != '*' && ch != EOF) { 309 if (ch == '\n') { 310 pLocation.lines(1); 311 } 312 313 ch = yyinput(); 314 pLocation.columns(1); 315 } 316 317 if (ch == '*') { 318 ch = yyinput(); 319 pLocation.columns(1); 320 321 while (ch == '*') { 322 ch = yyinput(); 323 pLocation.columns(1); 324 } 325 326 if (ch == '/') 327 break; 328 } 329 330 if (ch == '\n') 331 pLocation.lines(1); 332 333 if (ch == EOF) { 334 error(diag::err_unterminated_comment) << pLocation.begin.filename 335 << start_line 336 << start_col; 337 break; 338 } 339 } 340 } 341 342 void ScriptScanner::setLexState(ScriptFile::Kind pKind) 343 { 344 /* push the state into the top of stach */ 345 m_StateStack.push(pKind); 346 347 switch (pKind) { 348 case ScriptFile::LDScript: 349 BEGIN(LDSCRIPT); 350 break; 351 case ScriptFile::Expression: 352 BEGIN(EXPRESSION); 353 break; 354 case ScriptFile::VersionScript: 355 case ScriptFile::DynamicList: 356 default: 357 assert(0 && "Unsupported script type!"); 358 break; 359 } 360 } 361 362 void ScriptScanner::popLexState() 363 { 364 /* pop the last state */ 365 m_StateStack.pop(); 366 367 /* resume the appropriate state */ 368 if (!m_StateStack.empty()) { 369 switch (m_StateStack.top()) { 370 case ScriptFile::LDScript: 371 BEGIN(LDSCRIPT); 372 break; 373 case ScriptFile::Expression: 374 BEGIN(EXPRESSION); 375 break; 376 case ScriptFile::VersionScript: 377 case ScriptFile::DynamicList: 378 default: 379 assert(0 && "Unsupported script type!"); 380 break; 381 } 382 } 383 } 384 385 } /* namespace mcld */ 386 387 #ifdef __clang__ 388 #pragma clang diagnostic pop 389 #endif 390 391 #ifdef yylex 392 #undef yylex 393 #endif 394 395 int yyFlexLexer::yylex() 396 { 397 return 0; 398 } 399 400