Home | History | Annotate | Download | only in lex
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package lex implements lexical analysis for the assembler.
      6 package lex
      7 
      8 import (
      9 	"fmt"
     10 	"log"
     11 	"os"
     12 	"strings"
     13 	"text/scanner"
     14 
     15 	"cmd/internal/obj"
     16 )
     17 
     18 // A ScanToken represents an input item. It is a simple wrapping of rune, as
     19 // returned by text/scanner.Scanner, plus a couple of extra values.
     20 type ScanToken rune
     21 
     22 const (
     23 	// Asm defines some two-character lexemes. We make up
     24 	// a rune/ScanToken value for them - ugly but simple.
     25 	LSH       ScanToken = -1000 - iota // << Left shift.
     26 	RSH                                // >> Logical right shift.
     27 	ARR                                // -> Used on ARM for shift type 3, arithmetic right shift.
     28 	ROT                                // @> Used on ARM for shift type 4, rotate right.
     29 	macroName                          // name of macro that should not be expanded
     30 )
     31 
     32 // IsRegisterShift reports whether the token is one of the ARM register shift operators.
     33 func IsRegisterShift(r ScanToken) bool {
     34 	return ROT <= r && r <= LSH // Order looks backwards because these are negative.
     35 }
     36 
     37 func (t ScanToken) String() string {
     38 	switch t {
     39 	case scanner.EOF:
     40 		return "EOF"
     41 	case scanner.Ident:
     42 		return "identifier"
     43 	case scanner.Int:
     44 		return "integer constant"
     45 	case scanner.Float:
     46 		return "float constant"
     47 	case scanner.Char:
     48 		return "rune constant"
     49 	case scanner.String:
     50 		return "string constant"
     51 	case scanner.RawString:
     52 		return "raw string constant"
     53 	case scanner.Comment:
     54 		return "comment"
     55 	default:
     56 		return fmt.Sprintf("%q", rune(t))
     57 	}
     58 }
     59 
     60 var (
     61 	// It might be nice if these weren't global.
     62 	linkCtxt *obj.Link     // The link context for all instructions.
     63 	histLine int       = 1 // The cumulative count of lines processed.
     64 )
     65 
     66 // HistLine reports the cumulative source line number of the token,
     67 // for use in the Prog structure for the linker. (It's always handling the
     68 // instruction from the current lex line.)
     69 // It returns int32 because that's what type ../asm prefers.
     70 func HistLine() int32 {
     71 	return int32(histLine)
     72 }
     73 
     74 // NewLexer returns a lexer for the named file and the given link context.
     75 func NewLexer(name string, ctxt *obj.Link) TokenReader {
     76 	linkCtxt = ctxt
     77 	input := NewInput(name)
     78 	fd, err := os.Open(name)
     79 	if err != nil {
     80 		log.Fatalf("%s\n", err)
     81 	}
     82 	input.Push(NewTokenizer(name, fd, fd))
     83 	return input
     84 }
     85 
     86 // InitHist sets the line count to 1, for reproducible testing.
     87 func InitHist() {
     88 	histLine = 1
     89 }
     90 
     91 // The other files in this directory each contain an implementation of TokenReader.
     92 
     93 // A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what
     94 // the text of the most recently returned token is, and where it was found.
     95 // The underlying scanner elides all spaces except newline, so the input looks like a  stream of
     96 // Tokens; original spacing is lost but we don't need it.
     97 type TokenReader interface {
     98 	// Next returns the next token.
     99 	Next() ScanToken
    100 	// The following methods all refer to the most recent token returned by Next.
    101 	// Text returns the original string representation of the token.
    102 	Text() string
    103 	// File reports the source file name of the token.
    104 	File() string
    105 	// Line reports the source line number of the token.
    106 	Line() int
    107 	// Col reports the source column number of the token.
    108 	Col() int
    109 	// SetPos sets the file and line number.
    110 	SetPos(line int, file string)
    111 	// Close does any teardown required.
    112 	Close()
    113 }
    114 
    115 // A Token is a scan token plus its string value.
    116 // A macro is stored as a sequence of Tokens with spaces stripped.
    117 type Token struct {
    118 	ScanToken
    119 	text string
    120 }
    121 
    122 // Make returns a Token with the given rune (ScanToken) and text representation.
    123 func Make(token ScanToken, text string) Token {
    124 	// If the symbol starts with center dot, as in x, rewrite it as ""x
    125 	if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {
    126 		text = `""` + text
    127 	}
    128 	// Substitute the substitutes for . and /.
    129 	text = strings.Replace(text, "\u00B7", ".", -1)
    130 	text = strings.Replace(text, "\u2215", "/", -1)
    131 	return Token{ScanToken: token, text: text}
    132 }
    133 
    134 func (l Token) String() string {
    135 	return l.text
    136 }
    137 
    138 // A Macro represents the definition of a #defined macro.
    139 type Macro struct {
    140 	name   string   // The #define name.
    141 	args   []string // Formal arguments.
    142 	tokens []Token  // Body of macro.
    143 }
    144 
    145 // Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
    146 func Tokenize(str string) []Token {
    147 	t := NewTokenizer("command line", strings.NewReader(str), nil)
    148 	var tokens []Token
    149 	for {
    150 		tok := t.Next()
    151 		if tok == scanner.EOF {
    152 			break
    153 		}
    154 		tokens = append(tokens, Make(tok, t.Text()))
    155 	}
    156 	return tokens
    157 }
    158