Home | History | Annotate | Download | only in idlelib
      1 """Provide advanced parsing abilities for ParenMatch and other extensions.
      2 
      3 HyperParser uses PyParser.  PyParser mostly gives information on the
      4 proper indentation of code.  HyperParser gives additional information on
      5 the structure of code.
      6 """
      7 
      8 import string
      9 import keyword
     10 from idlelib import PyParse
     11 
     12 class HyperParser:
     13 
     14     def __init__(self, editwin, index):
     15         "To initialize, analyze the surroundings of the given index."
     16 
     17         self.editwin = editwin
     18         self.text = text = editwin.text
     19 
     20         parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
     21 
     22         def index2line(index):
     23             return int(float(index))
     24         lno = index2line(text.index(index))
     25 
     26         if not editwin.context_use_ps1:
     27             for context in editwin.num_context_lines:
     28                 startat = max(lno - context, 1)
     29                 startatindex = repr(startat) + ".0"
     30                 stopatindex = "%d.end" % lno
     31                 # We add the newline because PyParse requires a newline
     32                 # at end. We add a space so that index won't be at end
     33                 # of line, so that its status will be the same as the
     34                 # char before it, if should.
     35                 parser.set_str(text.get(startatindex, stopatindex)+' \n')
     36                 bod = parser.find_good_parse_start(
     37                           editwin._build_char_in_string_func(startatindex))
     38                 if bod is not None or startat == 1:
     39                     break
     40             parser.set_lo(bod or 0)
     41         else:
     42             r = text.tag_prevrange("console", index)
     43             if r:
     44                 startatindex = r[1]
     45             else:
     46                 startatindex = "1.0"
     47             stopatindex = "%d.end" % lno
     48             # We add the newline because PyParse requires it. We add a
     49             # space so that index won't be at end of line, so that its
     50             # status will be the same as the char before it, if should.
     51             parser.set_str(text.get(startatindex, stopatindex)+' \n')
     52             parser.set_lo(0)
     53 
     54         # We want what the parser has, minus the last newline and space.
     55         self.rawtext = parser.str[:-2]
     56         # Parser.str apparently preserves the statement we are in, so
     57         # that stopatindex can be used to synchronize the string with
     58         # the text box indices.
     59         self.stopatindex = stopatindex
     60         self.bracketing = parser.get_last_stmt_bracketing()
     61         # find which pairs of bracketing are openers. These always
     62         # correspond to a character of rawtext.
     63         self.isopener = [i>0 and self.bracketing[i][1] >
     64                          self.bracketing[i-1][1]
     65                          for i in range(len(self.bracketing))]
     66 
     67         self.set_index(index)
     68 
     69     def set_index(self, index):
     70         """Set the index to which the functions relate.
     71 
     72         The index must be in the same statement.
     73         """
     74         indexinrawtext = (len(self.rawtext) -
     75                           len(self.text.get(index, self.stopatindex)))
     76         if indexinrawtext < 0:
     77             raise ValueError("Index %s precedes the analyzed statement"
     78                              % index)
     79         self.indexinrawtext = indexinrawtext
     80         # find the rightmost bracket to which index belongs
     81         self.indexbracket = 0
     82         while (self.indexbracket < len(self.bracketing)-1 and
     83                self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
     84             self.indexbracket += 1
     85         if (self.indexbracket < len(self.bracketing)-1 and
     86             self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
     87            not self.isopener[self.indexbracket+1]):
     88             self.indexbracket += 1
     89 
     90     def is_in_string(self):
     91         """Is the index given to the HyperParser in a string?"""
     92         # The bracket to which we belong should be an opener.
     93         # If it's an opener, it has to have a character.
     94         return (self.isopener[self.indexbracket] and
     95                 self.rawtext[self.bracketing[self.indexbracket][0]]
     96                 in ('"', "'"))
     97 
     98     def is_in_code(self):
     99         """Is the index given to the HyperParser in normal code?"""
    100         return (not self.isopener[self.indexbracket] or
    101                 self.rawtext[self.bracketing[self.indexbracket][0]]
    102                 not in ('#', '"', "'"))
    103 
    104     def get_surrounding_brackets(self, openers='([{', mustclose=False):
    105         """Return bracket indexes or None.
    106 
    107         If the index given to the HyperParser is surrounded by a
    108         bracket defined in openers (or at least has one before it),
    109         return the indices of the opening bracket and the closing
    110         bracket (or the end of line, whichever comes first).
    111 
    112         If it is not surrounded by brackets, or the end of line comes
    113         before the closing bracket and mustclose is True, returns None.
    114         """
    115 
    116         bracketinglevel = self.bracketing[self.indexbracket][1]
    117         before = self.indexbracket
    118         while (not self.isopener[before] or
    119               self.rawtext[self.bracketing[before][0]] not in openers or
    120               self.bracketing[before][1] > bracketinglevel):
    121             before -= 1
    122             if before < 0:
    123                 return None
    124             bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
    125         after = self.indexbracket + 1
    126         while (after < len(self.bracketing) and
    127               self.bracketing[after][1] >= bracketinglevel):
    128             after += 1
    129 
    130         beforeindex = self.text.index("%s-%dc" %
    131             (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
    132         if (after >= len(self.bracketing) or
    133            self.bracketing[after][0] > len(self.rawtext)):
    134             if mustclose:
    135                 return None
    136             afterindex = self.stopatindex
    137         else:
    138             # We are after a real char, so it is a ')' and we give the
    139             # index before it.
    140             afterindex = self.text.index(
    141                 "%s-%dc" % (self.stopatindex,
    142                  len(self.rawtext)-(self.bracketing[after][0]-1)))
    143 
    144         return beforeindex, afterindex
    145 
    146     # Ascii chars that may be in a white space
    147     _whitespace_chars = " \t\n\\"
    148     # Ascii chars that may be in an identifier
    149     _id_chars = string.ascii_letters + string.digits + "_"
    150     # Ascii chars that may be the first char of an identifier
    151     _id_first_chars = string.ascii_letters + "_"
    152 
    153     # Given a string and pos, return the number of chars in the
    154     # identifier which ends at pos, or 0 if there is no such one. Saved
    155     # words are not identifiers.
    156     def _eat_identifier(self, str, limit, pos):
    157         i = pos
    158         while i > limit and str[i-1] in self._id_chars:
    159             i -= 1
    160         if (i < pos and (str[i] not in self._id_first_chars or
    161             keyword.iskeyword(str[i:pos]))):
    162             i = pos
    163         return pos - i
    164 
    165     def get_expression(self):
    166         """Return a string with the Python expression which ends at the
    167         given index, which is empty if there is no real one.
    168         """
    169         if not self.is_in_code():
    170             raise ValueError("get_expression should only be called"
    171                              "if index is inside a code.")
    172 
    173         rawtext = self.rawtext
    174         bracketing = self.bracketing
    175 
    176         brck_index = self.indexbracket
    177         brck_limit = bracketing[brck_index][0]
    178         pos = self.indexinrawtext
    179 
    180         last_identifier_pos = pos
    181         postdot_phase = True
    182 
    183         while 1:
    184             # Eat whitespaces, comments, and if postdot_phase is False - a dot
    185             while 1:
    186                 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
    187                     # Eat a whitespace
    188                     pos -= 1
    189                 elif (not postdot_phase and
    190                       pos > brck_limit and rawtext[pos-1] == '.'):
    191                     # Eat a dot
    192                     pos -= 1
    193                     postdot_phase = True
    194                 # The next line will fail if we are *inside* a comment,
    195                 # but we shouldn't be.
    196                 elif (pos == brck_limit and brck_index > 0 and
    197                       rawtext[bracketing[brck_index-1][0]] == '#'):
    198                     # Eat a comment
    199                     brck_index -= 2
    200                     brck_limit = bracketing[brck_index][0]
    201                     pos = bracketing[brck_index+1][0]
    202                 else:
    203                     # If we didn't eat anything, quit.
    204                     break
    205 
    206             if not postdot_phase:
    207                 # We didn't find a dot, so the expression end at the
    208                 # last identifier pos.
    209                 break
    210 
    211             ret = self._eat_identifier(rawtext, brck_limit, pos)
    212             if ret:
    213                 # There is an identifier to eat
    214                 pos = pos - ret
    215                 last_identifier_pos = pos
    216                 # Now, to continue the search, we must find a dot.
    217                 postdot_phase = False
    218                 # (the loop continues now)
    219 
    220             elif pos == brck_limit:
    221                 # We are at a bracketing limit. If it is a closing
    222                 # bracket, eat the bracket, otherwise, stop the search.
    223                 level = bracketing[brck_index][1]
    224                 while brck_index > 0 and bracketing[brck_index-1][1] > level:
    225                     brck_index -= 1
    226                 if bracketing[brck_index][0] == brck_limit:
    227                     # We were not at the end of a closing bracket
    228                     break
    229                 pos = bracketing[brck_index][0]
    230                 brck_index -= 1
    231                 brck_limit = bracketing[brck_index][0]
    232                 last_identifier_pos = pos
    233                 if rawtext[pos] in "([":
    234                     # [] and () may be used after an identifier, so we
    235                     # continue. postdot_phase is True, so we don't allow a dot.
    236                     pass
    237                 else:
    238                     # We can't continue after other types of brackets
    239                     if rawtext[pos] in "'\"":
    240                         # Scan a string prefix
    241                         while pos > 0 and rawtext[pos - 1] in "rRbBuU":
    242                             pos -= 1
    243                         last_identifier_pos = pos
    244                     break
    245 
    246             else:
    247                 # We've found an operator or something.
    248                 break
    249 
    250         return rawtext[last_identifier_pos:self.indexinrawtext]
    251 
    252 
    253 if __name__ == '__main__':
    254     import unittest
    255     unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)
    256