1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package lex 6 7 import ( 8 "io" 9 "os" 10 "strings" 11 "text/scanner" 12 "unicode" 13 ) 14 15 // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured 16 // for our purposes and made a TokenReader. It forms the lowest level, 17 // turning text from readers into tokens. 18 type Tokenizer struct { 19 tok ScanToken 20 s *scanner.Scanner 21 line int 22 fileName string 23 file *os.File // If non-nil, file descriptor to close. 24 } 25 26 func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { 27 var s scanner.Scanner 28 s.Init(r) 29 // Newline is like a semicolon; other space characters are fine. 30 s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' 31 // Don't skip comments: we need to count newlines. 32 s.Mode = scanner.ScanChars | 33 scanner.ScanFloats | 34 scanner.ScanIdents | 35 scanner.ScanInts | 36 scanner.ScanStrings | 37 scanner.ScanComments 38 s.Position.Filename = name 39 s.IsIdentRune = isIdentRune 40 if file != nil { 41 linkCtxt.LineHist.Push(histLine, name) 42 } 43 return &Tokenizer{ 44 s: &s, 45 line: 1, 46 fileName: name, 47 file: file, 48 } 49 } 50 51 // We want center dot () and division slash () to work as identifier characters. 52 func isIdentRune(ch rune, i int) bool { 53 if unicode.IsLetter(ch) { 54 return true 55 } 56 switch ch { 57 case '_': // Underscore; traditional. 58 return true 59 case '\u00B7': // Represents the period in runtime.exit. U+00B7 '' middle dot 60 return true 61 case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '' division slash 62 return true 63 } 64 // Digits are OK only after the first character. 65 return i > 0 && unicode.IsDigit(ch) 66 } 67 68 func (t *Tokenizer) Text() string { 69 switch t.tok { 70 case LSH: 71 return "<<" 72 case RSH: 73 return ">>" 74 case ARR: 75 return "->" 76 case ROT: 77 return "@>" 78 } 79 return t.s.TokenText() 80 } 81 82 func (t *Tokenizer) File() string { 83 return t.fileName 84 } 85 86 func (t *Tokenizer) Line() int { 87 return t.line 88 } 89 90 func (t *Tokenizer) Col() int { 91 return t.s.Pos().Column 92 } 93 94 func (t *Tokenizer) SetPos(line int, file string) { 95 t.line = line 96 t.fileName = file 97 } 98 99 func (t *Tokenizer) Next() ScanToken { 100 s := t.s 101 for { 102 t.tok = ScanToken(s.Scan()) 103 if t.tok != scanner.Comment { 104 break 105 } 106 length := strings.Count(s.TokenText(), "\n") 107 t.line += length 108 histLine += length 109 // TODO: If we ever have //go: comments in assembly, will need to keep them here. 110 // For now, just discard all comments. 111 } 112 switch t.tok { 113 case '\n': 114 if t.file != nil { 115 histLine++ 116 } 117 t.line++ 118 case '-': 119 if s.Peek() == '>' { 120 s.Next() 121 t.tok = ARR 122 return ARR 123 } 124 case '@': 125 if s.Peek() == '>' { 126 s.Next() 127 t.tok = ROT 128 return ROT 129 } 130 case '<': 131 if s.Peek() == '<' { 132 s.Next() 133 t.tok = LSH 134 return LSH 135 } 136 case '>': 137 if s.Peek() == '>' { 138 s.Next() 139 t.tok = RSH 140 return RSH 141 } 142 } 143 return t.tok 144 } 145 146 func (t *Tokenizer) Close() { 147 if t.file != nil { 148 t.file.Close() 149 // It's an open file, so pop the line history. 150 linkCtxt.LineHist.Pop(histLine) 151 } 152 } 153