%PDF- %PDF-
Direktori : /lib/calibre/calibre/srv/ |
Current File : //lib/calibre/calibre/srv/http_request.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>' import re from io import BytesIO, DEFAULT_BUFFER_SIZE from calibre import as_unicode, force_unicode from calibre.ptempfile import SpooledTemporaryFile from calibre.srv.errors import HTTPSimpleResponse from calibre.srv.loop import Connection, READ, WRITE from calibre.srv.utils import MultiDict, HTTP1, HTTP11, Accumulator from polyglot import http_client, reprlib from polyglot.urllib import unquote from polyglot.builtins import error_message protocol_map = {(1, 0):HTTP1, (1, 1):HTTP11} quoted_slash = re.compile(br'%2[fF]') HTTP_METHODS = {'HEAD', 'GET', 'PUT', 'POST', 'TRACE', 'DELETE', 'OPTIONS'} # Parse URI {{{ def parse_request_uri(uri): """Parse a Request-URI into (scheme, authority, path). Note that Request-URI's must be one of:: Request-URI = "*" | absoluteURI | abs_path | authority Therefore, a Request-URI which starts with a double forward-slash cannot be a "net_path":: net_path = "//" authority [ abs_path ] Instead, it must be interpreted as an "abs_path" with an empty first path segment:: abs_path = "/" path_segments path_segments = segment *( "/" segment ) segment = *pchar *( ";" param ) param = *pchar """ if uri == b'*': return None, None, uri i = uri.find(b'://') if i > 0 and b'?' not in uri[:i]: # An absoluteURI. # If there's a scheme (and it must be http or https), then: # http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query # ]] scheme, remainder = uri[:i].lower(), uri[i + 3:] authority, path = remainder.partition(b'/')[::2] path = b'/' + path return scheme, authority, path if uri.startswith(b'/'): # An abs_path. return None, None, uri else: # An authority. return None, uri, None def parse_uri(uri, parse_query=True, unquote_func=unquote): scheme, authority, path = parse_request_uri(uri) if path is None: raise HTTPSimpleResponse(http_client.BAD_REQUEST, "No path component") if b'#' in path: raise HTTPSimpleResponse(http_client.BAD_REQUEST, "Illegal #fragment in Request-URI.") if scheme: try: scheme = scheme.decode('ascii') except ValueError: raise HTTPSimpleResponse(http_client.BAD_REQUEST, 'Un-decodeable scheme') path, qs = path.partition(b'?')[::2] if parse_query: try: query = MultiDict.create_from_query_string(qs) except Exception: raise HTTPSimpleResponse(http_client.BAD_REQUEST, 'Unparseable query string') else: query = None try: path = '%2F'.join(unquote_func(x).decode('utf-8') for x in quoted_slash.split(path)) except ValueError as e: raise HTTPSimpleResponse(http_client.BAD_REQUEST, as_unicode(e)) path = tuple(filter(None, (x.replace('%2F', '/') for x in path.split('/')))) return scheme, path, query # }}} # HTTP Header parsing {{{ comma_separated_headers = { 'Accept', 'Accept-Charset', 'Accept-Encoding', 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', 'Connection', 'Content-Encoding', 'Content-Language', 'Expect', 'If-Match', 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'TE', 'Trailer', 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', } decoded_headers = { 'Transfer-Encoding', 'Keep-Alive', 'Expect', 'WWW-Authenticate', 'Authorization', 'Sec-WebSocket-Key', 'Sec-WebSocket-Version', 'Sec-WebSocket-Protocol', } | comma_separated_headers uppercase_headers = {'WWW', 'TE'} def normalize_header_name(name): parts = [x.capitalize() for x in name.split('-')] q = parts[0].upper() if q in uppercase_headers: parts[0] = q if len(parts) == 3 and parts[1] == 'Websocket': parts[1] = 'WebSocket' return '-'.join(parts) class HTTPHeaderParser: ''' Parse HTTP headers. Use this class by repeatedly calling the created object with a single line at a time and checking the finished attribute. Can raise ValueError for malformed headers, in which case you should probably return BAD_REQUEST. Headers which are repeated are folded together using a comma if their specification so dictates. ''' __slots__ = ('hdict', 'lines', 'finished') def __init__(self): self.hdict = MultiDict() self.lines = [] self.finished = False def push(self, *lines): for line in lines: self(line) def __call__(self, line): 'Process a single line' def safe_decode(hname, value): try: return value.decode('utf-8') except UnicodeDecodeError: if hname in decoded_headers: raise return value def commit(): if not self.lines: return line = b' '.join(self.lines) del self.lines[:] k, v = line.partition(b':')[::2] key = normalize_header_name(k.strip().decode('ascii')) val = safe_decode(key, v.strip()) if not key or not val: raise ValueError('Malformed header line: %s' % reprlib.repr(line)) if key in comma_separated_headers: existing = self.hdict.pop(key) if existing is not None: val = existing + ', ' + val self.hdict[key] = val if self.finished: raise ValueError('Header block already terminated') if line == b'\r\n': # Normal end of headers commit() self.finished = True return if line and line[0] in b' \t': # It's a continuation line. if not self.lines: raise ValueError('Orphaned continuation line') self.lines.append(line.lstrip()) else: commit() self.lines.append(line) def read_headers(readline): p = HTTPHeaderParser() while not p.finished: p(readline()) return p.hdict # }}} class HTTPRequest(Connection): request_handler = None static_cache = None translator_cache = None def __init__(self, *args, **kwargs): Connection.__init__(self, *args, **kwargs) self.max_header_line_size = int(1024 * self.opts.max_header_line_size) self.max_request_body_size = int(1024 * 1024 * self.opts.max_request_body_size) self.forwarded_for = None self.request_original_uri = None def read(self, buf, endpos): size = endpos - buf.tell() if size > 0: data = self.recv(size) if data: buf.write(data) return len(data) >= size else: return False else: return True def readline(self, buf): line = self.read_buffer.readline() buf.append(line) if buf.total_length > self.max_header_line_size: self.simple_response(self.header_line_too_long_error_code) return if line.endswith(b'\n'): line = buf.getvalue() if not line.endswith(b'\r\n'): self.simple_response(http_client.BAD_REQUEST, 'HTTP requires CRLF line terminators') return return line if not line: # read buffer is empty, fill it self.fill_read_buffer() def connection_ready(self): 'Become ready to read an HTTP request' self.method = self.request_line = None self.response_protocol = self.request_protocol = HTTP1 self.forwarded_for = None self.path = self.query = None self.close_after_response = False self.header_line_too_long_error_code = http_client.REQUEST_URI_TOO_LONG self.response_started = False self.set_state(READ, self.parse_request_line, Accumulator(), first=True) def parse_request_line(self, buf, event, first=False): # {{{ line = self.readline(buf) if line is None: return self.request_line = line.rstrip() if line == b'\r\n': # Ignore a single leading empty line, as per RFC 2616 sec 4.1 if first: return self.set_state(READ, self.parse_request_line, Accumulator()) return self.simple_response(http_client.BAD_REQUEST, 'Multiple leading empty lines not allowed') try: method, uri, req_protocol = line.strip().split(b' ', 2) req_protocol = req_protocol.decode('ascii') rp = int(req_protocol[5]), int(req_protocol[7]) self.method = method.decode('ascii').upper() except Exception: return self.simple_response(http_client.BAD_REQUEST, "Malformed Request-Line") if self.method not in HTTP_METHODS: return self.simple_response(http_client.BAD_REQUEST, "Unknown HTTP method") try: self.request_protocol = protocol_map[rp] except KeyError: return self.simple_response(http_client.HTTP_VERSION_NOT_SUPPORTED) self.response_protocol = protocol_map[min((1, 1), rp)] self.request_original_uri = uri try: self.scheme, self.path, self.query = parse_uri(uri) except HTTPSimpleResponse as e: return self.simple_response(e.http_code, error_message(e), close_after_response=False) self.header_line_too_long_error_code = http_client.REQUEST_ENTITY_TOO_LARGE self.set_state(READ, self.parse_header_line, HTTPHeaderParser(), Accumulator()) # }}} @property def state_description(self): return 'State: {} Client: {}:{} Request: {}'.format( getattr(self.handle_event, '__name__', None), self.remote_addr, self.remote_port, force_unicode(getattr(self, 'request_line', 'WebSocketConnection'), 'utf-8')) def parse_header_line(self, parser, buf, event): line = self.readline(buf) if line is None: return try: parser(line) except ValueError: self.simple_response(http_client.BAD_REQUEST, 'Failed to parse header line') return if parser.finished: self.finalize_headers(parser.hdict) def finalize_headers(self, inheaders): request_content_length = int(inheaders.get('Content-Length', 0)) if request_content_length > self.max_request_body_size: return self.simple_response(http_client.REQUEST_ENTITY_TOO_LARGE, "The entity sent with the request exceeds the maximum " "allowed bytes (%d)." % self.max_request_body_size) # Persistent connection support if self.response_protocol is HTTP11: # Both server and client are HTTP/1.1 if inheaders.get("Connection", "") == "close": self.close_after_response = True else: # Either the server or client (or both) are HTTP/1.0 if inheaders.get("Connection", "") != "Keep-Alive": self.close_after_response = True # Transfer-Encoding support te = () if self.response_protocol is HTTP11: rte = inheaders.get("Transfer-Encoding") if rte: te = [x.strip().lower() for x in rte.split(",") if x.strip()] chunked_read = False if te: for enc in te: if enc == "chunked": chunked_read = True else: # Note that, even if we see "chunked", we must reject # if there is an extension we don't recognize. return self.simple_response(http_client.NOT_IMPLEMENTED, "Unknown transfer encoding: %r" % enc) if inheaders.get("Expect", '').lower() == "100-continue": buf = BytesIO((HTTP11 + " 100 Continue\r\n\r\n").encode('ascii')) return self.set_state(WRITE, self.write_continue, buf, inheaders, request_content_length, chunked_read) self.forwarded_for = inheaders.get('X-Forwarded-For') self.read_request_body(inheaders, request_content_length, chunked_read) def write_continue(self, buf, inheaders, request_content_length, chunked_read, event): if self.write(buf): self.read_request_body(inheaders, request_content_length, chunked_read) def read_request_body(self, inheaders, request_content_length, chunked_read): buf = SpooledTemporaryFile(prefix='rq-body-', max_size=DEFAULT_BUFFER_SIZE, dir=self.tdir) if chunked_read: self.set_state(READ, self.read_chunk_length, inheaders, Accumulator(), buf, [0]) else: if request_content_length > 0: self.set_state(READ, self.sized_read, inheaders, buf, request_content_length) else: self.prepare_response(inheaders, BytesIO()) def sized_read(self, inheaders, buf, request_content_length, event): if self.read(buf, request_content_length): self.prepare_response(inheaders, buf) def read_chunk_length(self, inheaders, line_buf, buf, bytes_read, event): line = self.readline(line_buf) if line is None: return bytes_read[0] += len(line) try: chunk_size = int(line.strip(), 16) except Exception: return self.simple_response(http_client.BAD_REQUEST, '%s is not a valid chunk size' % reprlib.repr(line.strip())) if bytes_read[0] + chunk_size + 2 > self.max_request_body_size: return self.simple_response(http_client.REQUEST_ENTITY_TOO_LARGE, 'Chunked request is larger than %d bytes' % self.max_request_body_size) if chunk_size == 0: self.set_state(READ, self.read_chunk_separator, inheaders, Accumulator(), buf, bytes_read, last=True) else: self.set_state(READ, self.read_chunk, inheaders, buf, chunk_size, buf.tell() + chunk_size, bytes_read) def read_chunk(self, inheaders, buf, chunk_size, end, bytes_read, event): if not self.read(buf, end): return bytes_read[0] += chunk_size self.set_state(READ, self.read_chunk_separator, inheaders, Accumulator(), buf, bytes_read) def read_chunk_separator(self, inheaders, line_buf, buf, bytes_read, event, last=False): line = self.readline(line_buf) if line is None: return if line != b'\r\n': return self.simple_response(http_client.BAD_REQUEST, 'Chunk does not have trailing CRLF') bytes_read[0] += len(line) if bytes_read[0] > self.max_request_body_size: return self.simple_response(http_client.REQUEST_ENTITY_TOO_LARGE, 'Chunked request is larger than %d bytes' % self.max_request_body_size) if last: self.prepare_response(inheaders, buf) else: self.set_state(READ, self.read_chunk_length, inheaders, Accumulator(), buf, bytes_read) def handle_timeout(self): if not hasattr(self, 'response_protocol') or self.response_started: # Either connection is not ready or a response has already bee # started return False self.simple_response(http_client.REQUEST_TIMEOUT) return True def write(self, buf, end=None): raise NotImplementedError() def simple_response(self, status_code, msg='', close_after_response=True): raise NotImplementedError() def prepare_response(self, inheaders, request_body_file): raise NotImplementedError()