1 from __future__ import print_function, division, absolute_import 2 from __future__ import unicode_literals 3 from fontTools.voltLib.error import VoltLibError 4 5 class Lexer(object): 6 NUMBER = "NUMBER" 7 STRING = "STRING" 8 NAME = "NAME" 9 NEWLINE = "NEWLINE" 10 11 CHAR_WHITESPACE_ = " \t" 12 CHAR_NEWLINE_ = "\r\n" 13 CHAR_DIGIT_ = "0123456789" 14 CHAR_UC_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 15 CHAR_LC_LETTER_ = "abcdefghijklmnopqrstuvwxyz" 16 CHAR_UNDERSCORE_ = "_" 17 CHAR_PERIOD_ = "." 18 CHAR_NAME_START_ = CHAR_UC_LETTER_ + CHAR_LC_LETTER_ + CHAR_PERIOD_ + \ 19 CHAR_UNDERSCORE_ 20 CHAR_NAME_CONTINUATION_ = CHAR_NAME_START_ + CHAR_DIGIT_ 21 22 def __init__(self, text, filename): 23 self.filename_ = filename 24 self.line_ = 1 25 self.pos_ = 0 26 self.line_start_ = 0 27 self.text_ = text 28 self.text_length_ = len(text) 29 30 def __iter__(self): 31 return self 32 33 def next(self): # Python 2 34 return self.__next__() 35 36 def __next__(self): # Python 3 37 while True: 38 token_type, token, location = self.next_() 39 if token_type not in {Lexer.NEWLINE}: 40 return (token_type, token, location) 41 42 def next_(self): 43 self.scan_over_(Lexer.CHAR_WHITESPACE_) 44 column = self.pos_ - self.line_start_ + 1 45 location = (self.filename_, self.line_, column) 46 start = self.pos_ 47 text = self.text_ 48 limit = len(text) 49 if start >= limit: 50 raise StopIteration() 51 cur_char = text[start] 52 next_char = text[start + 1] if start + 1 < limit else None 53 54 if cur_char == "\n": 55 self.pos_ += 1 56 self.line_ += 1 57 self.line_start_ = self.pos_ 58 return (Lexer.NEWLINE, None, location) 59 if cur_char == "\r": 60 self.pos_ += (2 if next_char == "\n" else 1) 61 self.line_ += 1 62 self.line_start_ = self.pos_ 63 return (Lexer.NEWLINE, None, location) 64 if cur_char == '"': 65 self.pos_ += 1 66 self.scan_until_('"\r\n') 67 if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"': 68 self.pos_ += 1 69 return (Lexer.STRING, text[start + 1:self.pos_ - 1], location) 70 else: 71 raise VoltLibError("Expected '\"' to terminate string", 72 location) 73 if cur_char in Lexer.CHAR_NAME_START_: 74 self.pos_ += 1 75 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 76 token = text[start:self.pos_] 77 return (Lexer.NAME, token, location) 78 if cur_char in Lexer.CHAR_DIGIT_: 79 self.scan_over_(Lexer.CHAR_DIGIT_) 80 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) 81 if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_: 82 self.pos_ += 1 83 self.scan_over_(Lexer.CHAR_DIGIT_) 84 return (Lexer.NUMBER, int(text[start:self.pos_], 10), location) 85 raise VoltLibError("Unexpected character: '%s'" % cur_char, 86 location) 87 88 def scan_over_(self, valid): 89 p = self.pos_ 90 while p < self.text_length_ and self.text_[p] in valid: 91 p += 1 92 self.pos_ = p 93 94 def scan_until_(self, stop_at): 95 p = self.pos_ 96 while p < self.text_length_ and self.text_[p] not in stop_at: 97 p += 1 98 self.pos_ = p 99