Home | History | Annotate | Download | only in simplejson
      1 """
      2 Iterator based sre token scanner
      3 """
      4 import sre_parse, sre_compile, sre_constants
      5 from sre_constants import BRANCH, SUBPATTERN
      6 from re import VERBOSE, MULTILINE, DOTALL
      7 import re
      8 
      9 __all__ = ['Scanner', 'pattern']
     10 
     11 FLAGS = (VERBOSE | MULTILINE | DOTALL)
     12 class Scanner(object):
     13     def __init__(self, lexicon, flags=FLAGS):
     14         self.actions = [None]
     15         # combine phrases into a compound pattern
     16         s = sre_parse.Pattern()
     17         s.flags = flags
     18         p = []
     19         for idx, token in enumerate(lexicon):
     20             phrase = token.pattern
     21             try:
     22                 subpattern = sre_parse.SubPattern(s,
     23                     [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
     24             except sre_constants.error:
     25                 raise
     26             p.append(subpattern)
     27             self.actions.append(token)
     28 
     29         p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
     30         self.scanner = sre_compile.compile(p)
     31 
     32 
     33     def iterscan(self, string, idx=0, context=None):
     34         """
     35         Yield match, end_idx for each match
     36         """
     37         match = self.scanner.scanner(string, idx).match
     38         actions = self.actions
     39         lastend = idx
     40         end = len(string)
     41         while True:
     42             m = match()
     43             if m is None:
     44                 break
     45             matchbegin, matchend = m.span()
     46             if lastend == matchend:
     47                 break
     48             action = actions[m.lastindex]
     49             if action is not None:
     50                 rval, next_pos = action(m, context)
     51                 if next_pos is not None and next_pos != matchend:
     52                     # "fast forward" the scanner
     53                     matchend = next_pos
     54                     match = self.scanner.scanner(string, matchend).match
     55                 yield rval, matchend
     56             lastend = matchend
     57             
     58 def pattern(pattern, flags=FLAGS):
     59     def decorator(fn):
     60         fn.pattern = pattern
     61         fn.regex = re.compile(pattern, flags)
     62         return fn
     63     return decorator
     64