Home | History | Annotate | Download | only in plat-irix5
      1 # Module 'parser'
      2 #
      3 # Parse S-expressions output by the Panel Editor
      4 # (which is written in Scheme so it can't help writing S-expressions).
      5 #
      6 # See notes at end of file.
      7 from warnings import warnpy3k
      8 warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2)
      9 del warnpy3k
     10 
     11 
     12 whitespace = ' \t\n'
     13 operators = '()\''
     14 separators = operators + whitespace + ';' + '"'
     15 
     16 
     17 # Tokenize a string.
     18 # Return a list of tokens (strings).
     19 #
     20 def tokenize_string(s):
     21     tokens = []
     22     while s:
     23         c = s[:1]
     24         if c in whitespace:
     25             s = s[1:]
     26         elif c == ';':
     27             s = ''
     28         elif c == '"':
     29             n = len(s)
     30             i = 1
     31             while i < n:
     32                 c = s[i]
     33                 i = i+1
     34                 if c == '"': break
     35                 if c == '\\': i = i+1
     36             tokens.append(s[:i])
     37             s = s[i:]
     38         elif c in operators:
     39             tokens.append(c)
     40             s = s[1:]
     41         else:
     42             n = len(s)
     43             i = 1
     44             while i < n:
     45                 if s[i] in separators: break
     46                 i = i+1
     47             tokens.append(s[:i])
     48             s = s[i:]
     49     return tokens
     50 
     51 
     52 # Tokenize a whole file (given as file object, not as file name).
     53 # Return a list of tokens (strings).
     54 #
     55 def tokenize_file(fp):
     56     tokens = []
     57     while 1:
     58         line = fp.readline()
     59         if not line: break
     60         tokens = tokens + tokenize_string(line)
     61     return tokens
     62 
     63 
     64 # Exception raised by parse_exr.
     65 #
     66 syntax_error = 'syntax error'
     67 
     68 
     69 # Parse an S-expression.
     70 # Input is a list of tokens as returned by tokenize_*().
     71 # Return a pair (expr, tokens)
     72 # where expr is a list representing the s-expression,
     73 # and tokens contains the remaining tokens.
     74 # May raise syntax_error.
     75 #
     76 def parse_expr(tokens):
     77     if (not tokens) or tokens[0] != '(':
     78         raise syntax_error, 'expected "("'
     79     tokens = tokens[1:]
     80     expr = []
     81     while 1:
     82         if not tokens:
     83             raise syntax_error, 'missing ")"'
     84         if tokens[0] == ')':
     85             return expr, tokens[1:]
     86         elif tokens[0] == '(':
     87             subexpr, tokens = parse_expr(tokens)
     88             expr.append(subexpr)
     89         else:
     90             expr.append(tokens[0])
     91             tokens = tokens[1:]
     92 
     93 
     94 # Parse a file (given as file object, not as file name).
     95 # Return a list of parsed S-expressions found at the top level.
     96 #
     97 def parse_file(fp):
     98     tokens = tokenize_file(fp)
     99     exprlist = []
    100     while tokens:
    101         expr, tokens = parse_expr(tokens)
    102         exprlist.append(expr)
    103     return exprlist
    104 
    105 
    106 # EXAMPLE:
    107 #
    108 # The input
    109 #       '(hip (hop hur-ray))'
    110 #
    111 # passed to tokenize_string() returns the token list
    112 #       ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
    113 #
    114 # When this is passed to parse_expr() it returns the expression
    115 #       ['hip', ['hop', 'hur-ray']]
    116 # plus an empty token list (because there are no tokens left.
    117 #
    118 # When a file containing the example is passed to parse_file() it returns
    119 # a list whose only element is the output of parse_expr() above:
    120 #       [['hip', ['hop', 'hur-ray']]]
    121 
    122 
    123 # TOKENIZING:
    124 #
    125 # Comments start with semicolon (;) and continue till the end of the line.
    126 #
    127 # Tokens are separated by whitespace, except the following characters
    128 # always form a separate token (outside strings):
    129 #       ( ) '
    130 # Strings are enclosed in double quotes (") and backslash (\) is used
    131 # as escape character in strings.
    132