Home | History | Annotate | Download | only in tools

Lines Matching refs:_url

88     self._url = url
165 link = urlparse.urljoin(self._url, link)
171 self._url)
177 self._url)
199 comment = etree.Comment(FORM_LOCATION_COMMENT % self._url)
218 link = urlparse.urljoin(self._url, link)
233 self._curl_object.setopt(pycurl.URL, self._url)
263 """Downloads the self._url page.
277 self.logger.error('Error: %s, url: %s', e, self._url)
279 self._url = urlparse.urljoin(
280 self._url, self._curl_object.getinfo(pycurl.EFFECTIVE_URL))
287 self.logger.error('Error: %s, url: %s', e, self._url)
291 self.logger.info('\tSkipping: Not an HTML page <<< %s', self._url)
304 url_parsed = urlparse.urlparse(self._url)
352 self._url = url
469 self.logger.error('Error: %s, url: %s', e, self._url)
476 r._url = urlparse.urljoin(r._url, r._curl_object.getinfo(
484 self.logger.info('\tSkipping: Not an HTML page <<< %s', r._url)
489 self.logger.error('Error: %s, url: %s', e, self._url)
493 self.logger.info('Downloaded: %s', r._url)
505 self.logger.info('\t%s', retriever._url)
557 r = Retriever(self._url, self._domain, self._cookie_file)
562 self._url = r._url
564 self.logger.info('url to crawl: %s', self._url)
566 self._links_visited.append(r._url)
585 self.logger.info('parsed: %s', r._url)
598 """Creates _url and page_found attri to populate urls_with_no_reg_page file.
607 self._url = url
617 self.page_found = Crawler(self._url).Run()
702 for t in sorted(allThreads, key=lambda t: t._url):
706 fnot.write('%s' % t._url)