1 #!/usr/bin/python 2 3 import os 4 import re 5 import sys 6 7 link_re = re.compile('\[' + '[^\[\]]+' + '\]' + '\(' + '([^\(\)]+)' + '\)') 8 9 if len(sys.argv) < 3: 10 print 'Usage: <root_dir> <doc_files>...' 11 sys.exit(1) 12 13 root = sys.argv[1] 14 docs = sys.argv[2:] 15 16 links = [] 17 18 for doc in docs: 19 with open(doc) as f: 20 data = f.read() 21 r = link_re.findall(data) 22 for link in r: 23 links += [(doc, link)] 24 25 def filter_link((doc, link)): 26 if link.startswith('http'): 27 return False 28 if link.startswith('#'): 29 return False 30 return True 31 32 links = filter(filter_link, links) 33 34 def fix_link((doc, link)): 35 link = link.split('#')[0] 36 link = link.split('?')[0] 37 return (doc, link) 38 39 links = map(fix_link, links) 40 41 errors = [] 42 43 def check_link((doc, link)): 44 path = os.path.dirname(doc) 45 full_link = None 46 if link[0] == '/': 47 link = link[1:] 48 full_link = os.path.join(root, link) 49 else: 50 full_link = os.path.join(root, path, link) 51 if not os.path.exists(full_link): 52 return False 53 return True 54 55 for link in links: 56 if not check_link(link): 57 errors += [link] 58 59 if len(errors) == 0: 60 print '%d links checked: OK' % (len(links),) 61 sys.exit(0) 62 63 for (doc, link) in errors: 64 print 'File %s linked from %s not found' % (link, doc) 65 66 sys.exit(2) 67