%PDF- %PDF-
| Direktori : /lib/python3/dist-packages/mitmproxy/net/http/http1/ |
| Current File : //lib/python3/dist-packages/mitmproxy/net/http/http1/read.py |
import re
import sys
import time
import typing
from mitmproxy import exceptions
from mitmproxy.net.http import headers
from mitmproxy.net.http import request
from mitmproxy.net.http import response
from mitmproxy.net.http import url
def get_header_tokens(headers, key):
"""
Retrieve all tokens for a header key. A number of different headers
follow a pattern where each header line can containe comma-separated
tokens, and headers can be set multiple times.
"""
if key not in headers:
return []
tokens = headers[key].split(",")
return [token.strip() for token in tokens]
def read_request(rfile, body_size_limit=None):
request = read_request_head(rfile)
expected_body_size = expected_http_body_size(request)
request.data.content = b"".join(read_body(rfile, expected_body_size, limit=body_size_limit))
request.timestamp_end = time.time()
return request
def read_request_head(rfile):
"""
Parse an HTTP request head (request line + headers) from an input stream
Args:
rfile: The input stream
Returns:
The HTTP request object (without body)
Raises:
exceptions.HttpReadDisconnect: No bytes can be read from rfile.
exceptions.HttpSyntaxException: The input is malformed HTTP.
exceptions.HttpException: Any other error occurred.
"""
timestamp_start = time.time()
if hasattr(rfile, "reset_timestamps"):
rfile.reset_timestamps()
host, port, method, scheme, authority, path, http_version = _read_request_line(rfile)
headers = _read_headers(rfile)
if hasattr(rfile, "first_byte_timestamp"):
# more accurate timestamp_start
timestamp_start = rfile.first_byte_timestamp
return request.Request(
host, port, method, scheme, authority, path, http_version, headers, None, None, timestamp_start, None
)
def read_response(rfile, request, body_size_limit=None):
response = read_response_head(rfile)
expected_body_size = expected_http_body_size(request, response)
response.data.content = b"".join(read_body(rfile, expected_body_size, body_size_limit))
response.timestamp_end = time.time()
return response
def read_response_head(rfile):
"""
Parse an HTTP response head (response line + headers) from an input stream
Args:
rfile: The input stream
Returns:
The HTTP request object (without body)
Raises:
exceptions.HttpReadDisconnect: No bytes can be read from rfile.
exceptions.HttpSyntaxException: The input is malformed HTTP.
exceptions.HttpException: Any other error occurred.
"""
timestamp_start = time.time()
if hasattr(rfile, "reset_timestamps"):
rfile.reset_timestamps()
http_version, status_code, message = _read_response_line(rfile)
headers = _read_headers(rfile)
if hasattr(rfile, "first_byte_timestamp"):
# more accurate timestamp_start
timestamp_start = rfile.first_byte_timestamp
return response.Response(http_version, status_code, message, headers, None, None, timestamp_start, None)
def read_body(rfile, expected_size, limit=None, max_chunk_size=4096):
"""
Read an HTTP message body
Args:
rfile: The input stream
expected_size: The expected body size (see :py:meth:`expected_body_size`)
limit: Maximum body size
max_chunk_size: Maximium chunk size that gets yielded
Returns:
A generator that yields byte chunks of the content.
Raises:
exceptions.HttpException, if an error occurs
Caveats:
max_chunk_size is not considered if the transfer encoding is chunked.
"""
if not limit or limit < 0:
limit = sys.maxsize
if not max_chunk_size:
max_chunk_size = limit
if expected_size is None:
yield from _read_chunked(rfile, limit)
elif expected_size >= 0:
if limit is not None and expected_size > limit:
raise exceptions.HttpException(
"HTTP Body too large. "
"Limit is {}, content length was advertised as {}".format(limit, expected_size)
)
bytes_left = expected_size
while bytes_left:
chunk_size = min(bytes_left, max_chunk_size)
content = rfile.read(chunk_size)
if len(content) < chunk_size:
raise exceptions.HttpException("Unexpected EOF")
yield content
bytes_left -= chunk_size
else:
bytes_left = limit
while bytes_left:
chunk_size = min(bytes_left, max_chunk_size)
content = rfile.read(chunk_size)
if not content:
return
yield content
bytes_left -= chunk_size
not_done = rfile.read(1)
if not_done:
raise exceptions.HttpException(f"HTTP body too large. Limit is {limit}.")
def connection_close(http_version, headers):
"""
Checks the message to see if the client connection should be closed
according to RFC 2616 Section 8.1.
If we don't have a Connection header, HTTP 1.1 connections are assumed
to be persistent.
"""
if "connection" in headers:
tokens = get_header_tokens(headers, "connection")
if "close" in tokens:
return True
elif "keep-alive" in tokens:
return False
return http_version not in (
"HTTP/1.1", b"HTTP/1.1",
"HTTP/2.0", b"HTTP/2.0",
)
def expected_http_body_size(
request: request.Request,
response: typing.Optional[response.Response] = None,
expect_continue_as_0: bool = True
):
"""
Args:
- expect_continue_as_0: If true, incorrectly predict a body size of 0 for requests which are waiting
for a 100 Continue response.
Returns:
The expected body length:
- a positive integer, if the size is known in advance
- None, if the size in unknown in advance (chunked encoding)
- -1, if all data should be read until end of stream.
Raises:
exceptions.HttpSyntaxException, if the content length header is invalid
"""
# Determine response size according to
# http://tools.ietf.org/html/rfc7230#section-3.3
if not response:
headers = request.headers
if expect_continue_as_0 and headers.get("expect", "").lower() == "100-continue":
return 0
else:
headers = response.headers
if request.method.upper() == "HEAD":
return 0
if 100 <= response.status_code <= 199:
return 0
if response.status_code == 200 and request.method.upper() == "CONNECT":
return 0
if response.status_code in (204, 304):
return 0
if "chunked" in headers.get("transfer-encoding", "").lower():
return None
if "content-length" in headers:
try:
sizes = headers.get_all("content-length")
different_content_length_headers = any(x != sizes[0] for x in sizes)
if different_content_length_headers:
raise exceptions.HttpSyntaxException("Conflicting Content Length Headers")
size = int(sizes[0])
if size < 0:
raise ValueError()
return size
except ValueError as e:
raise exceptions.HttpSyntaxException("Unparseable Content Length") from e
if not response:
return 0
return -1
def _get_first_line(rfile):
try:
line = rfile.readline()
if line == b"\r\n" or line == b"\n":
# Possible leftover from previous message
line = rfile.readline()
except (exceptions.TcpDisconnect, exceptions.TlsException):
raise exceptions.HttpReadDisconnect("Remote disconnected")
if not line:
raise exceptions.HttpReadDisconnect("Remote disconnected")
return line.strip()
def _read_request_line(rfile):
try:
line = _get_first_line(rfile)
except exceptions.HttpReadDisconnect:
# We want to provide a better error message.
raise exceptions.HttpReadDisconnect("Client disconnected")
try:
method, target, http_version = line.split()
if target == b"*" or target.startswith(b"/"):
scheme, authority, path = b"", b"", target
host, port = "", 0
elif method == b"CONNECT":
scheme, authority, path = b"", target, b""
host, port = url.parse_authority(authority, check=True)
if not port:
raise ValueError
else:
scheme, rest = target.split(b"://", maxsplit=1)
authority, path_ = rest.split(b"/", maxsplit=1)
path = b"/" + path_
host, port = url.parse_authority(authority, check=True)
port = port or url.default_port(scheme)
if not port:
raise ValueError
# TODO: we can probably get rid of this check?
url.parse(target)
_check_http_version(http_version)
except ValueError:
raise exceptions.HttpSyntaxException(f"Bad HTTP request line: {line}")
return host, port, method, scheme, authority, path, http_version
def _read_response_line(rfile):
try:
line = _get_first_line(rfile)
except exceptions.HttpReadDisconnect:
# We want to provide a better error message.
raise exceptions.HttpReadDisconnect("Server disconnected")
try:
parts = line.split(None, 2)
if len(parts) == 2: # handle missing message gracefully
parts.append(b"")
http_version, status_code, message = parts
status_code = int(status_code)
_check_http_version(http_version)
except ValueError:
raise exceptions.HttpSyntaxException(f"Bad HTTP response line: {line}")
return http_version, status_code, message
def _check_http_version(http_version):
if not re.match(br"^HTTP/\d\.\d$", http_version):
raise exceptions.HttpSyntaxException(f"Unknown HTTP version: {http_version}")
def _read_headers(rfile):
"""
Read a set of headers.
Stop once a blank line is reached.
Returns:
A headers object
Raises:
exceptions.HttpSyntaxException
"""
ret = []
while True:
line = rfile.readline()
if not line or line == b"\r\n" or line == b"\n":
# we do have coverage of this, but coverage.py does not detect it.
break # pragma: no cover
if line[0] in b" \t":
if not ret:
raise exceptions.HttpSyntaxException("Invalid headers")
# continued header
ret[-1] = (ret[-1][0], ret[-1][1] + b'\r\n ' + line.strip())
else:
try:
name, value = line.split(b":", 1)
value = value.strip()
if not name:
raise ValueError()
ret.append((name, value))
except ValueError:
raise exceptions.HttpSyntaxException(
"Invalid header line: %s" % repr(line)
)
return headers.Headers(ret)
def _read_chunked(rfile, limit=sys.maxsize):
"""
Read a HTTP body with chunked transfer encoding.
Args:
rfile: the input file
limit: A positive integer
"""
total = 0
while True:
line = rfile.readline(128)
if line == b"":
raise exceptions.HttpException("Connection closed prematurely")
if line != b"\r\n" and line != b"\n":
try:
length = int(line, 16)
except ValueError:
raise exceptions.HttpSyntaxException(f"Invalid chunked encoding length: {line}")
total += length
if total > limit:
raise exceptions.HttpException(
"HTTP Body too large. Limit is {}, "
"chunked content longer than {}".format(limit, total)
)
chunk = rfile.read(length)
suffix = rfile.readline(5)
if suffix != b"\r\n":
raise exceptions.HttpSyntaxException("Malformed chunked body")
if length == 0:
return
yield chunk