%PDF- %PDF-
Direktori : /lib/python3/dist-packages/mechanize/ |
Current File : //lib/python3/dist-packages/mechanize/_opener.py |
"""URL opener. Copyright 2004-2006 John J Lee <jjl@pobox.com> This code is free software; you can redistribute it and/or modify it under the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE included with the distribution). """ from __future__ import absolute_import import bisect import os import tempfile import threading from . import _response from . import _rfc3986 from . import _sockettimeout from . import _urllib2_fork from ._request import Request from ._util import isstringlike from .polyglot import HTTPError, URLError, iteritems, is_class open_file = open class ContentTooShortError(URLError): def __init__(self, reason, result): URLError.__init__(self, reason) self.result = result def set_request_attr(req, name, value, default): try: getattr(req, name) except AttributeError: setattr(req, name, default) if value is not default: setattr(req, name, value) class OpenerDirector(_urllib2_fork.OpenerDirector): def __init__(self): _urllib2_fork.OpenerDirector.__init__(self) # really none of these are (sanely) public -- the lack of initial # underscore on some is just due to following urllib2 self.process_response = {} self.process_request = {} self._any_request = {} self._any_response = {} self._handler_index_valid = True self._tempfiles = [] def add_handler(self, handler): if not hasattr(handler, "add_parent"): raise TypeError("expected BaseHandler instance, got %r" % type(handler)) if handler in self.handlers: return # XXX why does self.handlers need to be sorted? bisect.insort(self.handlers, handler) handler.add_parent(self) self._handler_index_valid = False def _maybe_reindex_handlers(self): if self._handler_index_valid: return handle_error = {} handle_open = {} process_request = {} process_response = {} any_request = set() any_response = set() unwanted = [] for handler in self.handlers: added = False for meth in dir(handler): if meth in ["redirect_request", "do_open", "proxy_open"]: # oops, coincidental match continue if meth == "any_request": any_request.add(handler) added = True continue elif meth == "any_response": any_response.add(handler) added = True continue ii = meth.find("_") scheme = meth[:ii] condition = meth[ii + 1:] if condition.startswith("error"): jj = meth[ii + 1:].find("_") + ii + 1 kind = meth[jj + 1:] try: kind = int(kind) except ValueError: pass lookup = handle_error.setdefault(scheme, {}) elif condition == "open": kind = scheme lookup = handle_open elif condition == "request": kind = scheme lookup = process_request elif condition == "response": kind = scheme lookup = process_response else: continue lookup.setdefault(kind, set()).add(handler) added = True if not added: unwanted.append(handler) for handler in unwanted: self.handlers.remove(handler) # sort indexed methods # XXX could be cleaned up for lookup in [process_request, process_response]: for scheme, handlers in iteritems(lookup): lookup[scheme] = handlers for scheme, lookup in iteritems(handle_error): for code, handlers in iteritems(lookup): handlers = list(handlers) handlers.sort() lookup[code] = handlers for scheme, handlers in iteritems(handle_open): handlers = list(handlers) handlers.sort() handle_open[scheme] = handlers # cache the indexes self.handle_error = handle_error self.handle_open = handle_open self.process_request = process_request self.process_response = process_response self._any_request = any_request self._any_response = any_response def _request(self, url_or_req, data, visit, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): if isstringlike(url_or_req): req = Request(url_or_req, data, visit=visit, timeout=timeout) else: # already a mechanize.Request instance req = url_or_req if data is not None: req.add_data(data) # XXX yuck set_request_attr(req, "visit", visit, None) set_request_attr(req, "timeout", timeout, _sockettimeout._GLOBAL_DEFAULT_TIMEOUT) return req def open(self, fullurl, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): req = self._request(fullurl, data, None, timeout) req_scheme = req.get_type() self._maybe_reindex_handlers() # pre-process request # XXX should we allow a Processor to change the URL scheme # of the request? request_processors = set(self.process_request.get(req_scheme, [])) request_processors.update(self._any_request) request_processors = list(request_processors) request_processors.sort() for processor in request_processors: for meth_name in ["any_request", req_scheme + "_request"]: meth = getattr(processor, meth_name, None) if meth: req = meth(req) # In Python >= 2.4, .open() supports processors already, so we must # call ._open() instead. urlopen = _urllib2_fork.OpenerDirector._open response = urlopen(self, req, data) # post-process response response_processors = set(self.process_response.get(req_scheme, [])) response_processors.update(self._any_response) response_processors = list(response_processors) response_processors.sort() for processor in response_processors: for meth_name in ["any_response", req_scheme + "_response"]: meth = getattr(processor, meth_name, None) if meth: response = meth(req, response) return response def error(self, proto, *args): if proto in ['http', 'https']: # XXX http[s] protocols are special-cased # https is not different than http dict = self.handle_error['http'] proto = args[2] # YUCK! meth_name = 'http_error_%s' % proto http_err = 1 orig_args = args else: dict = self.handle_error meth_name = proto + '_error' http_err = 0 args = (dict, proto, meth_name) + args result = self._call_chain(*args) if result: return result if http_err: args = (dict, 'default', 'http_error_default') + orig_args return self._call_chain(*args) BLOCK_SIZE = 1024 * 8 def retrieve(self, fullurl, filename=None, reporthook=None, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT, open=open_file): """Returns (filename, headers). For remote objects, the default filename will refer to a temporary file. Temporary files are removed when the OpenerDirector.close() method is called. For file: URLs, at present the returned filename is None. This may change in future. If the actual number of bytes read is less than indicated by the Content-Length header, raises ContentTooShortError (a URLError subclass). The exception's .result attribute contains the (filename, headers) that would have been returned. """ req = self._request(fullurl, data, False, timeout) scheme = req.get_type() fp = self.open(req) try: headers = fp.info() if filename is None and scheme == 'file': # XXX req.get_selector() seems broken here, return None, # pending sanity :-/ return None, headers # return urllib.url2pathname(req.get_selector()), headers if filename: tfp = open(filename, 'wb') else: path = _rfc3986.urlsplit(req.get_full_url())[2] suffix = os.path.splitext(path)[1] fd, filename = tempfile.mkstemp(suffix) self._tempfiles.append(filename) tfp = os.fdopen(fd, 'wb') try: result = filename, headers bs = self.BLOCK_SIZE size = -1 read = 0 blocknum = 0 if reporthook: if "content-length" in headers: size = int(headers["content-length"]) reporthook(blocknum, bs, size) while 1: block = fp.read(bs) if not block: break read += len(block) tfp.write(block) blocknum += 1 if reporthook: reporthook(blocknum, bs, size) finally: tfp.close() finally: fp.close() # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise ContentTooShortError( "retrieval incomplete: " "got only %i out of %i bytes" % (read, size), result ) return result def close(self): _urllib2_fork.OpenerDirector.close(self) # make it very obvious this object is no longer supposed to be used self.open = self.error = self.retrieve = self.add_handler = None if self._tempfiles: for filename in self._tempfiles: try: os.unlink(filename) except OSError: pass del self._tempfiles[:] def wrapped_open(urlopen, process_response_object, fullurl, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): success = True try: response = urlopen(fullurl, data, timeout) except HTTPError as error: success = False if error.fp is None: # not a response raise response = error if response is not None: response = process_response_object(response) if not success: raise response return response class ResponseProcessingOpener(OpenerDirector): def open(self, fullurl, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): def bound_open(fullurl, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): return OpenerDirector.open(self, fullurl, data, timeout) return wrapped_open( bound_open, self.process_response_object, fullurl, data, timeout) def process_response_object(self, response): return response class SeekableResponseOpener(ResponseProcessingOpener): def process_response_object(self, response): return _response.seek_wrapped_response(response) class OpenerFactory: """This class's interface is quite likely to change.""" default_classes = [ # handlers _urllib2_fork.ProxyHandler, _urllib2_fork.UnknownHandler, _urllib2_fork.HTTPHandler, _urllib2_fork.HTTPDefaultErrorHandler, _urllib2_fork.HTTPRedirectHandler, _urllib2_fork.FTPHandler, _urllib2_fork.FileHandler, # processors _urllib2_fork.HTTPCookieProcessor, _urllib2_fork.HTTPErrorProcessor, ] default_classes.append(_urllib2_fork.HTTPSHandler) handlers = [] replacement_handlers = [] def __init__(self, klass=OpenerDirector): self.klass = klass def build_opener(self, *handlers): """Create an opener object from a list of handlers and processors. The opener will use several default handlers and processors, including support for HTTP and FTP. If any of the handlers passed as arguments are subclasses of the default handlers, the default handlers will not be used. """ opener = self.klass() default_classes = list(self.default_classes) skip = set() for klass in default_classes: for check in handlers: if is_class(check): if issubclass(check, klass): skip.add(klass) elif isinstance(check, klass): skip.add(klass) for klass in skip: default_classes.remove(klass) for klass in default_classes: opener.add_handler(klass()) for h in handlers: if is_class(h): h = h() opener.add_handler(h) return opener build_opener = OpenerFactory().build_opener thread_local = threading.local() thread_local.opener = None def get_thread_local_opener(): try: ans = thread_local.opener except AttributeError: # threading module is broken, use a single global instance ans = getattr(get_thread_local_opener, 'ans', None) if ans is None: ans = get_thread_local_opener.ans = build_opener() if ans is None: ans = thread_local.opener = build_opener() return ans def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): return get_thread_local_opener().open(url, data, timeout) def urlretrieve(url, filename=None, reporthook=None, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): return get_thread_local_opener().retrieve( url, filename, reporthook, data, timeout) def install_opener(opener): get_thread_local_opener.ans = opener try: thread_local.opener = opener except AttributeError: pass