Home | History | Annotate | Download | only in webchecker
      1 #! /usr/bin/env python

      2 
      3 """GUI interface to webchecker.
      4 
      5 This works as a Grail applet too!  E.g.
      6 
      7   <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
      8 
      9 Checkpoints are not (yet???  ever???) supported.
     10 
     11 User interface:
     12 
     13 Enter a root to check in the text entry box.  To enter more than one root,
     14 enter them one at a time and press <Return> for each one.
     15 
     16 Command buttons Start, Stop and "Check one" govern the checking process in
     17 the obvious way.  Start and "Check one" also enter the root from the text
     18 entry box if one is present.  There's also a check box (enabled by default)
     19 to decide whether actually to follow external links (since this can slow
     20 the checking down considerably).  Finally there's a Quit button.
     21 
     22 A series of checkbuttons determines whether the corresponding output panel
     23 is shown.  List panels are also automatically shown or hidden when their
     24 status changes between empty to non-empty.  There are six panels:
     25 
     26 Log        -- raw output from the checker (-v, -q affect this)
     27 To check   -- links discovered but not yet checked
     28 Checked    -- links that have been checked
     29 Bad links  -- links that failed upon checking
     30 Errors     -- pages containing at least one bad link
     31 Details    -- details about one URL; double click on a URL in any of
     32               the above list panels (not in Log) will show details
     33               for that URL
     34 
     35 Use your window manager's Close command to quit.
     36 
     37 Command line options:
     38 
     39 -m bytes  -- skip HTML pages larger than this size (default %(MAXPAGE)d)
     40 -q        -- quiet operation (also suppresses external links report)
     41 -v        -- verbose operation; repeating -v will increase verbosity
     42 -t root   -- specify root dir which should be treated as internal (can repeat)
     43 -a        -- don't check name anchors
     44 
     45 Command line arguments:
     46 
     47 rooturl   -- URL to start checking
     48              (default %(DEFROOT)s)
     49 
     50 XXX The command line options (-m, -q, -v) should be GUI accessible.
     51 
     52 XXX The roots should be visible as a list (?).
     53 
     54 XXX The multipanel user interface is clumsy.
     55 
     56 """
     57 
     58 # ' Emacs bait

     59 
     60 
     61 import sys
     62 import getopt
     63 from Tkinter import *
     64 import tktools
     65 import webchecker
     66 
     67 def main():
     68     try:
     69         opts, args = getopt.getopt(sys.argv[1:], 't:m:qva')
     70     except getopt.error, msg:
     71         sys.stdout = sys.stderr
     72         print msg
     73         print __doc__%vars(webchecker)
     74         sys.exit(2)
     75     webchecker.verbose = webchecker.VERBOSE
     76     webchecker.nonames = webchecker.NONAMES
     77     webchecker.maxpage = webchecker.MAXPAGE
     78     extra_roots = []
     79     for o, a in opts:
     80         if o == '-m':
     81             webchecker.maxpage = int(a)
     82         if o == '-q':
     83             webchecker.verbose = 0
     84         if o == '-v':
     85             webchecker.verbose = webchecker.verbose + 1
     86         if o == '-t':
     87             extra_roots.append(a)
     88         if o == '-a':
     89             webchecker.nonames = not webchecker.nonames
     90     root = Tk(className='Webchecker')
     91     root.protocol("WM_DELETE_WINDOW", root.quit)
     92     c = CheckerWindow(root)
     93     c.setflags(verbose=webchecker.verbose, maxpage=webchecker.maxpage,
     94                nonames=webchecker.nonames)
     95     if args:
     96         for arg in args[:-1]:
     97             c.addroot(arg)
     98         c.suggestroot(args[-1])
     99     # Usually conditioned on whether external links

    100     # will be checked, but since that's not a command

    101     # line option, just toss them in.

    102     for url_root in extra_roots:
    103         # Make sure it's terminated by a slash,

    104         # so that addroot doesn't discard the last

    105         # directory component.

    106         if url_root[-1] != "/":
    107             url_root = url_root + "/"
    108         c.addroot(url_root, add_to_do = 0)
    109     root.mainloop()
    110 
    111 
    112 class CheckerWindow(webchecker.Checker):
    113 
    114     def __init__(self, parent, root=webchecker.DEFROOT):
    115         self.__parent = parent
    116 
    117         self.__topcontrols = Frame(parent)
    118         self.__topcontrols.pack(side=TOP, fill=X)
    119         self.__label = Label(self.__topcontrols, text="Root URL:")
    120         self.__label.pack(side=LEFT)
    121         self.__rootentry = Entry(self.__topcontrols, width=60)
    122         self.__rootentry.pack(side=LEFT)
    123         self.__rootentry.bind('<Return>', self.enterroot)
    124         self.__rootentry.focus_set()
    125 
    126         self.__controls = Frame(parent)
    127         self.__controls.pack(side=TOP, fill=X)
    128         self.__running = 0
    129         self.__start = Button(self.__controls, text="Run", command=self.start)
    130         self.__start.pack(side=LEFT)
    131         self.__stop = Button(self.__controls, text="Stop", command=self.stop,
    132                              state=DISABLED)
    133         self.__stop.pack(side=LEFT)
    134         self.__step = Button(self.__controls, text="Check one",
    135                              command=self.step)
    136         self.__step.pack(side=LEFT)
    137         self.__cv = BooleanVar(parent)
    138         self.__cv.set(self.checkext)
    139         self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
    140                                       command=self.update_checkext,
    141                                       text="Check nonlocal links",)
    142         self.__checkext.pack(side=LEFT)
    143         self.__reset = Button(self.__controls, text="Start over", command=self.reset)
    144         self.__reset.pack(side=LEFT)
    145         if __name__ == '__main__': # No Quit button under Grail!

    146             self.__quit = Button(self.__controls, text="Quit",
    147                                  command=self.__parent.quit)
    148             self.__quit.pack(side=RIGHT)
    149 
    150         self.__status = Label(parent, text="Status: initial", anchor=W)
    151         self.__status.pack(side=TOP, fill=X)
    152         self.__checking = Label(parent, text="Idle", anchor=W)
    153         self.__checking.pack(side=TOP, fill=X)
    154         self.__mp = mp = MultiPanel(parent)
    155         sys.stdout = self.__log = LogPanel(mp, "Log")
    156         self.__todo = ListPanel(mp, "To check", self, self.showinfo)
    157         self.__done = ListPanel(mp, "Checked", self, self.showinfo)
    158         self.__bad = ListPanel(mp, "Bad links", self, self.showinfo)
    159         self.__errors = ListPanel(mp, "Pages w/ bad links", self, self.showinfo)
    160         self.__details = LogPanel(mp, "Details")
    161         self.root_seed = None
    162         webchecker.Checker.__init__(self)
    163         if root:
    164             root = str(root).strip()
    165             if root:
    166                 self.suggestroot(root)
    167         self.newstatus()
    168 
    169     def reset(self):
    170         webchecker.Checker.reset(self)
    171         for p in self.__todo, self.__done, self.__bad, self.__errors:
    172             p.clear()
    173         if self.root_seed:
    174             self.suggestroot(self.root_seed)
    175 
    176     def suggestroot(self, root):
    177         self.__rootentry.delete(0, END)
    178         self.__rootentry.insert(END, root)
    179         self.__rootentry.select_range(0, END)
    180         self.root_seed = root
    181 
    182     def enterroot(self, event=None):
    183         root = self.__rootentry.get()
    184         root = root.strip()
    185         if root:
    186             self.__checking.config(text="Adding root "+root)
    187             self.__checking.update_idletasks()
    188             self.addroot(root)
    189             self.__checking.config(text="Idle")
    190             try:
    191                 i = self.__todo.items.index(root)
    192             except (ValueError, IndexError):
    193                 pass
    194             else:
    195                 self.__todo.list.select_clear(0, END)
    196                 self.__todo.list.select_set(i)
    197                 self.__todo.list.yview(i)
    198         self.__rootentry.delete(0, END)
    199 
    200     def start(self):
    201         self.__start.config(state=DISABLED, relief=SUNKEN)
    202         self.__stop.config(state=NORMAL)
    203         self.__step.config(state=DISABLED)
    204         self.enterroot()
    205         self.__running = 1
    206         self.go()
    207 
    208     def stop(self):
    209         self.__stop.config(state=DISABLED, relief=SUNKEN)
    210         self.__running = 0
    211 
    212     def step(self):
    213         self.__start.config(state=DISABLED)
    214         self.__step.config(state=DISABLED, relief=SUNKEN)
    215         self.enterroot()
    216         self.__running = 0
    217         self.dosomething()
    218 
    219     def go(self):
    220         if self.__running:
    221             self.__parent.after_idle(self.dosomething)
    222         else:
    223             self.__checking.config(text="Idle")
    224             self.__start.config(state=NORMAL, relief=RAISED)
    225             self.__stop.config(state=DISABLED, relief=RAISED)
    226             self.__step.config(state=NORMAL, relief=RAISED)
    227 
    228     __busy = 0
    229 
    230     def dosomething(self):
    231         if self.__busy: return
    232         self.__busy = 1
    233         if self.todo:
    234             l = self.__todo.selectedindices()
    235             if l:
    236                 i = l[0]
    237             else:
    238                 i = 0
    239                 self.__todo.list.select_set(i)
    240             self.__todo.list.yview(i)
    241             url = self.__todo.items[i]
    242             self.__checking.config(text="Checking "+self.format_url(url))
    243             self.__parent.update()
    244             self.dopage(url)
    245         else:
    246             self.stop()
    247         self.__busy = 0
    248         self.go()
    249 
    250     def showinfo(self, url):
    251         d = self.__details
    252         d.clear()
    253         d.put("URL:    %s\n" % self.format_url(url))
    254         if self.bad.has_key(url):
    255             d.put("Error:  %s\n" % str(self.bad[url]))
    256         if url in self.roots:
    257             d.put("Note:   This is a root URL\n")
    258         if self.done.has_key(url):
    259             d.put("Status: checked\n")
    260             o = self.done[url]
    261         elif self.todo.has_key(url):
    262             d.put("Status: to check\n")
    263             o = self.todo[url]
    264         else:
    265             d.put("Status: unknown (!)\n")
    266             o = []
    267         if (not url[1]) and self.errors.has_key(url[0]):
    268             d.put("Bad links from this page:\n")
    269             for triple in self.errors[url[0]]:
    270                 link, rawlink, msg = triple
    271                 d.put("  HREF  %s" % self.format_url(link))
    272                 if self.format_url(link) != rawlink: d.put(" (%s)" %rawlink)
    273                 d.put("\n")
    274                 d.put("  error %s\n" % str(msg))
    275         self.__mp.showpanel("Details")
    276         for source, rawlink in o:
    277             d.put("Origin: %s" % source)
    278             if rawlink != self.format_url(url):
    279                 d.put(" (%s)" % rawlink)
    280             d.put("\n")
    281         d.text.yview("1.0")
    282 
    283     def setbad(self, url, msg):
    284         webchecker.Checker.setbad(self, url, msg)
    285         self.__bad.insert(url)
    286         self.newstatus()
    287 
    288     def setgood(self, url):
    289         webchecker.Checker.setgood(self, url)
    290         self.__bad.remove(url)
    291         self.newstatus()
    292 
    293     def newlink(self, url, origin):
    294         webchecker.Checker.newlink(self, url, origin)
    295         if self.done.has_key(url):
    296             self.__done.insert(url)
    297         elif self.todo.has_key(url):
    298             self.__todo.insert(url)
    299         self.newstatus()
    300 
    301     def markdone(self, url):
    302         webchecker.Checker.markdone(self, url)
    303         self.__done.insert(url)
    304         self.__todo.remove(url)
    305         self.newstatus()
    306 
    307     def seterror(self, url, triple):
    308         webchecker.Checker.seterror(self, url, triple)
    309         self.__errors.insert((url, ''))
    310         self.newstatus()
    311 
    312     def newstatus(self):
    313         self.__status.config(text="Status: "+self.status())
    314         self.__parent.update()
    315 
    316     def update_checkext(self):
    317         self.checkext = self.__cv.get()
    318 
    319 
    320 class ListPanel:
    321 
    322     def __init__(self, mp, name, checker, showinfo=None):
    323         self.mp = mp
    324         self.name = name
    325         self.showinfo = showinfo
    326         self.checker = checker
    327         self.panel = mp.addpanel(name)
    328         self.list, self.frame = tktools.make_list_box(
    329             self.panel, width=60, height=5)
    330         self.list.config(exportselection=0)
    331         if showinfo:
    332             self.list.bind('<Double-Button-1>', self.doubleclick)
    333         self.items = []
    334 
    335     def clear(self):
    336         self.items = []
    337         self.list.delete(0, END)
    338         self.mp.hidepanel(self.name)
    339 
    340     def doubleclick(self, event):
    341         l = self.selectedindices()
    342         if l:
    343             self.showinfo(self.items[l[0]])
    344 
    345     def selectedindices(self):
    346         l = self.list.curselection()
    347         if not l: return []
    348         return map(int, l)
    349 
    350     def insert(self, url):
    351         if url not in self.items:
    352             if not self.items:
    353                 self.mp.showpanel(self.name)
    354             # (I tried sorting alphabetically, but the display is too jumpy)

    355             i = len(self.items)
    356             self.list.insert(i, self.checker.format_url(url))
    357             self.list.yview(i)
    358             self.items.insert(i, url)
    359 
    360     def remove(self, url):
    361         try:
    362             i = self.items.index(url)
    363         except (ValueError, IndexError):
    364             pass
    365         else:
    366             was_selected = i in self.selectedindices()
    367             self.list.delete(i)
    368             del self.items[i]
    369             if not self.items:
    370                 self.mp.hidepanel(self.name)
    371             elif was_selected:
    372                 if i >= len(self.items):
    373                     i = len(self.items) - 1
    374                 self.list.select_set(i)
    375 
    376 
    377 class LogPanel:
    378 
    379     def __init__(self, mp, name):
    380         self.mp = mp
    381         self.name = name
    382         self.panel = mp.addpanel(name)
    383         self.text, self.frame = tktools.make_text_box(self.panel, height=10)
    384         self.text.config(wrap=NONE)
    385 
    386     def clear(self):
    387         self.text.delete("1.0", END)
    388         self.text.yview("1.0")
    389 
    390     def put(self, s):
    391         self.text.insert(END, s)
    392         if '\n' in s:
    393             self.text.yview(END)
    394 
    395     def write(self, s):
    396         self.text.insert(END, s)
    397         if '\n' in s:
    398             self.text.yview(END)
    399             self.panel.update()
    400 
    401 
    402 class MultiPanel:
    403 
    404     def __init__(self, parent):
    405         self.parent = parent
    406         self.frame = Frame(self.parent)
    407         self.frame.pack(expand=1, fill=BOTH)
    408         self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
    409         self.topframe.pack(fill=X)
    410         self.botframe = Frame(self.frame)
    411         self.botframe.pack(expand=1, fill=BOTH)
    412         self.panelnames = []
    413         self.panels = {}
    414 
    415     def addpanel(self, name, on=0):
    416         v = StringVar(self.parent)
    417         if on:
    418             v.set(name)
    419         else:
    420             v.set("")
    421         check = Checkbutton(self.topframe, text=name,
    422                             offvalue="", onvalue=name, variable=v,
    423                             command=self.checkpanel)
    424         check.pack(side=LEFT)
    425         panel = Frame(self.botframe)
    426         label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
    427         label.pack(side=TOP, fill=X)
    428         t = v, check, panel
    429         self.panelnames.append(name)
    430         self.panels[name] = t
    431         if on:
    432             panel.pack(expand=1, fill=BOTH)
    433         return panel
    434 
    435     def showpanel(self, name):
    436         v, check, panel = self.panels[name]
    437         v.set(name)
    438         panel.pack(expand=1, fill=BOTH)
    439 
    440     def hidepanel(self, name):
    441         v, check, panel = self.panels[name]
    442         v.set("")
    443         panel.pack_forget()
    444 
    445     def checkpanel(self):
    446         for name in self.panelnames:
    447             v, check, panel = self.panels[name]
    448             panel.pack_forget()
    449         for name in self.panelnames:
    450             v, check, panel = self.panels[name]
    451             if v.get():
    452                 panel.pack(expand=1, fill=BOTH)
    453 
    454 
    455 if __name__ == '__main__':
    456     main()
    457