Home | History | Annotate | Download | only in lex
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package lex
      6 
      7 import (
      8 	"io"
      9 	"os"
     10 	"strings"
     11 	"text/scanner"
     12 	"unicode"
     13 )
     14 
     15 // A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
     16 // for our purposes and made a TokenReader. It forms the lowest level,
     17 // turning text from readers into tokens.
     18 type Tokenizer struct {
     19 	tok      ScanToken
     20 	s        *scanner.Scanner
     21 	line     int
     22 	fileName string
     23 	file     *os.File // If non-nil, file descriptor to close.
     24 }
     25 
     26 func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
     27 	var s scanner.Scanner
     28 	s.Init(r)
     29 	// Newline is like a semicolon; other space characters are fine.
     30 	s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
     31 	// Don't skip comments: we need to count newlines.
     32 	s.Mode = scanner.ScanChars |
     33 		scanner.ScanFloats |
     34 		scanner.ScanIdents |
     35 		scanner.ScanInts |
     36 		scanner.ScanStrings |
     37 		scanner.ScanComments
     38 	s.Position.Filename = name
     39 	s.IsIdentRune = isIdentRune
     40 	if file != nil {
     41 		linkCtxt.LineHist.Push(histLine, name)
     42 	}
     43 	return &Tokenizer{
     44 		s:        &s,
     45 		line:     1,
     46 		fileName: name,
     47 		file:     file,
     48 	}
     49 }
     50 
     51 // We want center dot () and division slash () to work as identifier characters.
     52 func isIdentRune(ch rune, i int) bool {
     53 	if unicode.IsLetter(ch) {
     54 		return true
     55 	}
     56 	switch ch {
     57 	case '_': // Underscore; traditional.
     58 		return true
     59 	case '\u00B7': // Represents the period in runtime.exit. U+00B7 '' middle dot
     60 		return true
     61 	case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '' division slash
     62 		return true
     63 	}
     64 	// Digits are OK only after the first character.
     65 	return i > 0 && unicode.IsDigit(ch)
     66 }
     67 
     68 func (t *Tokenizer) Text() string {
     69 	switch t.tok {
     70 	case LSH:
     71 		return "<<"
     72 	case RSH:
     73 		return ">>"
     74 	case ARR:
     75 		return "->"
     76 	case ROT:
     77 		return "@>"
     78 	}
     79 	return t.s.TokenText()
     80 }
     81 
     82 func (t *Tokenizer) File() string {
     83 	return t.fileName
     84 }
     85 
     86 func (t *Tokenizer) Line() int {
     87 	return t.line
     88 }
     89 
     90 func (t *Tokenizer) Col() int {
     91 	return t.s.Pos().Column
     92 }
     93 
     94 func (t *Tokenizer) SetPos(line int, file string) {
     95 	t.line = line
     96 	t.fileName = file
     97 }
     98 
     99 func (t *Tokenizer) Next() ScanToken {
    100 	s := t.s
    101 	for {
    102 		t.tok = ScanToken(s.Scan())
    103 		if t.tok != scanner.Comment {
    104 			break
    105 		}
    106 		length := strings.Count(s.TokenText(), "\n")
    107 		t.line += length
    108 		histLine += length
    109 		// TODO: If we ever have //go: comments in assembly, will need to keep them here.
    110 		// For now, just discard all comments.
    111 	}
    112 	switch t.tok {
    113 	case '\n':
    114 		if t.file != nil {
    115 			histLine++
    116 		}
    117 		t.line++
    118 	case '-':
    119 		if s.Peek() == '>' {
    120 			s.Next()
    121 			t.tok = ARR
    122 			return ARR
    123 		}
    124 	case '@':
    125 		if s.Peek() == '>' {
    126 			s.Next()
    127 			t.tok = ROT
    128 			return ROT
    129 		}
    130 	case '<':
    131 		if s.Peek() == '<' {
    132 			s.Next()
    133 			t.tok = LSH
    134 			return LSH
    135 		}
    136 	case '>':
    137 		if s.Peek() == '>' {
    138 			s.Next()
    139 			t.tok = RSH
    140 			return RSH
    141 		}
    142 	}
    143 	return t.tok
    144 }
    145 
    146 func (t *Tokenizer) Close() {
    147 	if t.file != nil {
    148 		t.file.Close()
    149 		// It's an open file, so pop the line history.
    150 		linkCtxt.LineHist.Pop(histLine)
    151 	}
    152 }
    153