Home | History | Annotate | Download | only in idlelib
      1 """
      2 HyperParser
      3 ===========
      4 This module defines the HyperParser class, which provides advanced parsing
      5 abilities for the ParenMatch and other extensions.
      6 The HyperParser uses PyParser. PyParser is intended mostly to give information
      7 on the proper indentation of code. HyperParser gives some information on the
      8 structure of code, used by extensions to help the user.
      9 """
     10 
     11 import string
     12 import keyword
     13 from idlelib import PyParse
     14 
     15 class HyperParser:
     16 
     17     def __init__(self, editwin, index):
     18         """Initialize the HyperParser to analyze the surroundings of the given
     19         index.
     20         """
     21 
     22         self.editwin = editwin
     23         self.text = text = editwin.text
     24 
     25         parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
     26 
     27         def index2line(index):
     28             return int(float(index))
     29         lno = index2line(text.index(index))
     30 
     31         if not editwin.context_use_ps1:
     32             for context in editwin.num_context_lines:
     33                 startat = max(lno - context, 1)
     34                 startatindex = repr(startat) + ".0"
     35                 stopatindex = "%d.end" % lno
     36                 # We add the newline because PyParse requires a newline at end.
     37                 # We add a space so that index won't be at end of line, so that
     38                 # its status will be the same as the char before it, if should.
     39                 parser.set_str(text.get(startatindex, stopatindex)+' \n')
     40                 bod = parser.find_good_parse_start(
     41                           editwin._build_char_in_string_func(startatindex))
     42                 if bod is not None or startat == 1:
     43                     break
     44             parser.set_lo(bod or 0)
     45         else:
     46             r = text.tag_prevrange("console", index)
     47             if r:
     48                 startatindex = r[1]
     49             else:
     50                 startatindex = "1.0"
     51             stopatindex = "%d.end" % lno
     52             # We add the newline because PyParse requires a newline at end.
     53             # We add a space so that index won't be at end of line, so that
     54             # its status will be the same as the char before it, if should.
     55             parser.set_str(text.get(startatindex, stopatindex)+' \n')
     56             parser.set_lo(0)
     57 
     58         # We want what the parser has, except for the last newline and space.
     59         self.rawtext = parser.str[:-2]
     60         # As far as I can see, parser.str preserves the statement we are in,
     61         # so that stopatindex can be used to synchronize the string with the
     62         # text box indices.
     63         self.stopatindex = stopatindex
     64         self.bracketing = parser.get_last_stmt_bracketing()
     65         # find which pairs of bracketing are openers. These always correspond
     66         # to a character of rawtext.
     67         self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1]
     68                          for i in range(len(self.bracketing))]
     69 
     70         self.set_index(index)
     71 
     72     def set_index(self, index):
     73         """Set the index to which the functions relate. Note that it must be
     74         in the same statement.
     75         """
     76         indexinrawtext = \
     77             len(self.rawtext) - len(self.text.get(index, self.stopatindex))
     78         if indexinrawtext < 0:
     79             raise ValueError("The index given is before the analyzed statement")
     80         self.indexinrawtext = indexinrawtext
     81         # find the rightmost bracket to which index belongs
     82         self.indexbracket = 0
     83         while self.indexbracket < len(self.bracketing)-1 and \
     84               self.bracketing[self.indexbracket+1][0] < self.indexinrawtext:
     85             self.indexbracket += 1
     86         if self.indexbracket < len(self.bracketing)-1 and \
     87            self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \
     88            not self.isopener[self.indexbracket+1]:
     89             self.indexbracket += 1
     90 
     91     def is_in_string(self):
     92         """Is the index given to the HyperParser is in a string?"""
     93         # The bracket to which we belong should be an opener.
     94         # If it's an opener, it has to have a character.
     95         return self.isopener[self.indexbracket] and \
     96                self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'")
     97 
     98     def is_in_code(self):
     99         """Is the index given to the HyperParser is in a normal code?"""
    100         return not self.isopener[self.indexbracket] or \
    101                self.rawtext[self.bracketing[self.indexbracket][0]] not in \
    102                                                                 ('#', '"', "'")
    103 
    104     def get_surrounding_brackets(self, openers='([{', mustclose=False):
    105         """If the index given to the HyperParser is surrounded by a bracket
    106         defined in openers (or at least has one before it), return the
    107         indices of the opening bracket and the closing bracket (or the
    108         end of line, whichever comes first).
    109         If it is not surrounded by brackets, or the end of line comes before
    110         the closing bracket and mustclose is True, returns None.
    111         """
    112         bracketinglevel = self.bracketing[self.indexbracket][1]
    113         before = self.indexbracket
    114         while not self.isopener[before] or \
    115               self.rawtext[self.bracketing[before][0]] not in openers or \
    116               self.bracketing[before][1] > bracketinglevel:
    117             before -= 1
    118             if before < 0:
    119                 return None
    120             bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
    121         after = self.indexbracket + 1
    122         while after < len(self.bracketing) and \
    123               self.bracketing[after][1] >= bracketinglevel:
    124             after += 1
    125 
    126         beforeindex = self.text.index("%s-%dc" %
    127             (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
    128         if after >= len(self.bracketing) or \
    129            self.bracketing[after][0] > len(self.rawtext):
    130             if mustclose:
    131                 return None
    132             afterindex = self.stopatindex
    133         else:
    134             # We are after a real char, so it is a ')' and we give the index
    135             # before it.
    136             afterindex = self.text.index("%s-%dc" %
    137                 (self.stopatindex,
    138                  len(self.rawtext)-(self.bracketing[after][0]-1)))
    139 
    140         return beforeindex, afterindex
    141 
    142     # This string includes all chars that may be in a white space
    143     _whitespace_chars = " \t\n\\"
    144     # This string includes all chars that may be in an identifier
    145     _id_chars = string.ascii_letters + string.digits + "_"
    146     # This string includes all chars that may be the first char of an identifier
    147     _id_first_chars = string.ascii_letters + "_"
    148 
    149     # Given a string and pos, return the number of chars in the identifier
    150     # which ends at pos, or 0 if there is no such one. Saved words are not
    151     # identifiers.
    152     def _eat_identifier(self, str, limit, pos):
    153         i = pos
    154         while i > limit and str[i-1] in self._id_chars:
    155             i -= 1
    156         if i < pos and (str[i] not in self._id_first_chars or \
    157                         keyword.iskeyword(str[i:pos])):
    158             i = pos
    159         return pos - i
    160 
    161     def get_expression(self):
    162         """Return a string with the Python expression which ends at the given
    163         index, which is empty if there is no real one.
    164         """
    165         if not self.is_in_code():
    166             raise ValueError("get_expression should only be called if index "\
    167                              "is inside a code.")
    168 
    169         rawtext = self.rawtext
    170         bracketing = self.bracketing
    171 
    172         brck_index = self.indexbracket
    173         brck_limit = bracketing[brck_index][0]
    174         pos = self.indexinrawtext
    175 
    176         last_identifier_pos = pos
    177         postdot_phase = True
    178 
    179         while 1:
    180             # Eat whitespaces, comments, and if postdot_phase is False - one dot
    181             while 1:
    182                 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
    183                     # Eat a whitespace
    184                     pos -= 1
    185                 elif not postdot_phase and \
    186                      pos > brck_limit and rawtext[pos-1] == '.':
    187                     # Eat a dot
    188                     pos -= 1
    189                     postdot_phase = True
    190                 # The next line will fail if we are *inside* a comment, but we
    191                 # shouldn't be.
    192                 elif pos == brck_limit and brck_index > 0 and \
    193                      rawtext[bracketing[brck_index-1][0]] == '#':
    194                     # Eat a comment
    195                     brck_index -= 2
    196                     brck_limit = bracketing[brck_index][0]
    197                     pos = bracketing[brck_index+1][0]
    198                 else:
    199                     # If we didn't eat anything, quit.
    200                     break
    201 
    202             if not postdot_phase:
    203                 # We didn't find a dot, so the expression end at the last
    204                 # identifier pos.
    205                 break
    206 
    207             ret = self._eat_identifier(rawtext, brck_limit, pos)
    208             if ret:
    209                 # There is an identifier to eat
    210                 pos = pos - ret
    211                 last_identifier_pos = pos
    212                 # Now, in order to continue the search, we must find a dot.
    213                 postdot_phase = False
    214                 # (the loop continues now)
    215 
    216             elif pos == brck_limit:
    217                 # We are at a bracketing limit. If it is a closing bracket,
    218                 # eat the bracket, otherwise, stop the search.
    219                 level = bracketing[brck_index][1]
    220                 while brck_index > 0 and bracketing[brck_index-1][1] > level:
    221                     brck_index -= 1
    222                 if bracketing[brck_index][0] == brck_limit:
    223                     # We were not at the end of a closing bracket
    224                     break
    225                 pos = bracketing[brck_index][0]
    226                 brck_index -= 1
    227                 brck_limit = bracketing[brck_index][0]
    228                 last_identifier_pos = pos
    229                 if rawtext[pos] in "([":
    230                     # [] and () may be used after an identifier, so we
    231                     # continue. postdot_phase is True, so we don't allow a dot.
    232                     pass
    233                 else:
    234                     # We can't continue after other types of brackets
    235                     if rawtext[pos] in "'\"":
    236                         # Scan a string prefix
    237                         while pos > 0 and rawtext[pos - 1] in "rRbBuU":
    238                             pos -= 1
    239                         last_identifier_pos = pos
    240                     break
    241 
    242             else:
    243                 # We've found an operator or something.
    244                 break
    245 
    246         return rawtext[last_identifier_pos:self.indexinrawtext]
    247