%PDF- %PDF-
| Direktori : /lib/python3/dist-packages/mitmproxy/proxy/protocol/ |
| Current File : //lib/python3/dist-packages/mitmproxy/proxy/protocol/http.py |
import textwrap
import h2.exceptions
import time
import enum
from mitmproxy import connections # noqa
from mitmproxy import exceptions
from mitmproxy import http
from mitmproxy import flow
from mitmproxy.net.http import url
from mitmproxy.proxy.protocol import base
from mitmproxy.proxy.protocol.websocket import WebSocketLayer
from mitmproxy.net import websocket
class _HttpTransmissionLayer(base.Layer):
def read_request_headers(self, flow):
raise NotImplementedError()
def read_request_body(self, request):
raise NotImplementedError()
def read_request_trailers(self, request):
raise NotImplementedError()
def send_request(self, request):
raise NotImplementedError()
def read_response_headers(self):
raise NotImplementedError()
def read_response_body(self, request, response):
raise NotImplementedError()
yield "this is a generator" # pragma: no cover
def read_response_trailers(self, request, response):
raise NotImplementedError()
def read_response(self, request):
response = self.read_response_headers()
response.data.content = b"".join(
self.read_response_body(request, response)
)
response.data.trailers = self.read_response_trailers(request, response)
return response
def send_response(self, response):
if response.data.content is None:
raise exceptions.HttpException("Cannot assemble flow with missing content")
self.send_response_headers(response)
self.send_response_body(response, [response.data.content])
self.send_response_trailers(response)
def send_response_headers(self, response):
raise NotImplementedError()
def send_response_body(self, response, chunks):
raise NotImplementedError()
def send_response_trailers(self, response, chunks):
raise NotImplementedError()
def check_close_connection(self, f):
raise NotImplementedError()
class ConnectServerConnection:
"""
"Fake" ServerConnection to represent state after a CONNECT request to an upstream proxy.
"""
def __init__(self, address, ctx):
self.address = address
self._ctx = ctx
@property
def via(self):
return self._ctx.server_conn
def __getattr__(self, item):
return getattr(self.via, item)
def connected(self):
return self.via.connected()
class UpstreamConnectLayer(base.Layer):
def __init__(self, ctx, connect_request):
super().__init__(ctx)
self.connect_request = connect_request
self.server_conn = ConnectServerConnection(
(connect_request.host, connect_request.port),
self.ctx
)
def __call__(self):
layer = self.ctx.next_layer(self)
layer()
def _send_connect_request(self):
self.log("Sending CONNECT request", "debug", [
f"Proxy Server: {self.ctx.server_conn.address}",
f"Connect to: {self.connect_request.host}:{self.connect_request.port}"
])
self.send_request(self.connect_request)
resp = self.read_response(self.connect_request)
if resp.status_code != 200:
raise exceptions.ProtocolException("Reconnect: Upstream server refuses CONNECT request")
def connect(self):
if not self.server_conn.connected():
self.ctx.connect()
self._send_connect_request()
else:
pass # swallow the message
def change_upstream_proxy_server(self, address):
self.log("Changing upstream proxy to {} (CONNECTed)".format(repr(address)), "debug")
if address != self.server_conn.via.address:
self.ctx.set_server(address)
def set_server(self, address):
if self.ctx.server_conn.connected():
self.ctx.disconnect()
self.connect_request.host = address[0]
self.connect_request.port = address[1]
self.server_conn.address = address
def is_ok(status):
return 200 <= status < 300
class HTTPMode(enum.Enum):
regular = 1
transparent = 2
upstream = 3
# At this point, we see only a subset of the proxy modes
MODE_REQUEST_FORMS = {
HTTPMode.regular: ("authority", "absolute"),
HTTPMode.transparent: ("relative",),
HTTPMode.upstream: ("authority", "absolute"),
}
def validate_request_form(mode, request):
if request.first_line_format == "absolute" and request.scheme not in ("http", "https"):
raise exceptions.HttpException(
"Invalid request scheme: %s" % request.scheme
)
allowed_request_forms = MODE_REQUEST_FORMS[mode]
if request.first_line_format not in allowed_request_forms:
if request.is_http2 and mode is HTTPMode.transparent and request.first_line_format == "absolute":
return # dirty hack: h2 may have authority info. will be fixed properly with sans-io.
if mode == HTTPMode.transparent:
err_message = textwrap.dedent(
"""
Mitmproxy received an {} request even though it is not running
in regular mode. This usually indicates a misconfiguration,
please see the mitmproxy mode documentation for details.
"""
).strip().format("HTTP CONNECT" if request.first_line_format == "authority" else "absolute-form")
else:
err_message = "Invalid HTTP request form (expected: {}, got: {})".format(
" or ".join(allowed_request_forms), request.first_line_format
)
raise exceptions.HttpException(err_message)
class HttpLayer(base.Layer):
if False:
# mypy type hints
server_conn: connections.ServerConnection = None
def __init__(self, ctx, mode):
super().__init__(ctx)
self.mode = mode
self.__initial_server_address: tuple = None
"Contains the original destination in transparent mode, which needs to be restored"
"if an inline script modified the target server for a single http request"
# We cannot rely on server_conn.tls_established,
# see https://github.com/mitmproxy/mitmproxy/issues/925
self.__initial_server_tls = None
# Requests happening after CONNECT do not need Proxy-Authorization headers.
self.connect_request = False
def __call__(self):
if self.mode == HTTPMode.transparent:
self.__initial_server_tls = self.server_tls
self.__initial_server_address = self.server_conn.address
while True:
flow = http.HTTPFlow(
self.client_conn,
self.server_conn,
live=self,
mode=self.mode.name
)
if not self._process_flow(flow):
return
def handle_regular_connect(self, f):
self.connect_request = True
try:
self.set_server((f.request.host, f.request.port))
if f.response:
resp = f.response
else:
resp = http.make_connect_response(f.request.data.http_version)
self.send_response(resp)
if is_ok(resp.status_code):
layer = self.ctx.next_layer(self)
layer()
except (
exceptions.ProtocolException, exceptions.NetlibException
) as e:
# HTTPS tasting means that ordinary errors like resolution
# and connection errors can happen here.
self.send_error_response(502, repr(e))
f.error = flow.Error(str(e))
self.channel.ask("error", f)
return False
return False
def handle_upstream_connect(self, f):
# if the user specifies a response in the http_connect hook, we do not connect upstream here.
# https://github.com/mitmproxy/mitmproxy/pull/2473
if not f.response:
self.establish_server_connection(
f.request.host,
f.request.port,
f.request.scheme
)
self.send_request(f.request)
f.response = self.read_response_headers()
f.response.data.content = b"".join(
self.read_response_body(f.request, f.response)
)
self.send_response(f.response)
if is_ok(f.response.status_code):
layer = UpstreamConnectLayer(self, f.request)
return layer()
return False
def _process_flow(self, f):
try:
try:
request: http.HTTPRequest = self.read_request_headers(f)
except exceptions.HttpReadDisconnect:
# don't throw an error for disconnects that happen
# before/between requests.
return False
f.request = request
if request.first_line_format == "authority":
# The standards are silent on what we should do with a CONNECT
# request body, so although it's not common, it's allowed.
f.request.data.content = b"".join(
self.read_request_body(f.request)
)
f.request.data.trailers = self.read_request_trailers(f.request)
f.request.timestamp_end = time.time()
self.channel.ask("http_connect", f)
if self.mode is HTTPMode.regular:
return self.handle_regular_connect(f)
elif self.mode is HTTPMode.upstream:
return self.handle_upstream_connect(f)
else:
msg = "Unexpected CONNECT request."
self.send_error_response(400, msg)
return False
if not self.config.options.relax_http_form_validation:
validate_request_form(self.mode, request)
self.channel.ask("requestheaders", f)
# Re-validate request form in case the user has changed something.
if not self.config.options.relax_http_form_validation:
validate_request_form(self.mode, request)
if request.headers.get("expect", "").lower() == "100-continue":
# TODO: We may have to use send_response_headers for HTTP2
# here.
self.send_response(http.make_expect_continue_response())
request.headers.pop("expect")
if f.request.stream:
f.request.data.content = None
else:
f.request.data.content = b"".join(self.read_request_body(request))
f.request.data.trailers = self.read_request_trailers(f.request)
request.timestamp_end = time.time()
except exceptions.HttpException as e:
# We optimistically guess there might be an HTTP client on the
# other end
self.send_error_response(400, repr(e))
# Request may be malformed at this point, so we unset it.
f.request = None
f.error = flow.Error(str(e))
self.channel.ask("error", f)
self.log(
"request",
"warn",
[f"HTTP protocol error in client request: {e}"]
)
return False
self.log("request", "debug", [repr(request)])
# set first line format to relative in regular mode,
# see https://github.com/mitmproxy/mitmproxy/issues/1759
if self.mode is HTTPMode.regular and request.first_line_format == "absolute":
request.authority = ""
# update host header in reverse proxy mode
if self.config.options.mode.startswith("reverse:") and not self.config.options.keep_host_header:
f.request.host_header = url.hostport(
self.config.upstream_server.scheme,
*self.config.upstream_server.address
)
# Determine .scheme, .host and .port attributes for inline scripts. For
# absolute-form requests, they are directly given in the request. For
# authority-form requests, we only need to determine the request
# scheme. For relative-form requests, we need to determine host and
# port as well.
if self.mode is HTTPMode.transparent:
# Setting request.host also updates the host header, which we want
# to preserve
f.request.data.host = self.__initial_server_address[0]
f.request.data.port = self.__initial_server_address[1]
f.request.data.scheme = b"https" if self.__initial_server_tls else b"http"
self.channel.ask("request", f)
try:
valid = (
websocket.check_handshake(request.headers) and
websocket.check_client_version(request.headers)
)
if valid:
f.metadata['websocket'] = True
# We only support RFC6455 with WebSocket version 13
# allow inline scripts to manipulate the client handshake
self.channel.ask("websocket_handshake", f)
if not f.response:
self.establish_server_connection(
f.request.host,
f.request.port,
f.request.scheme
)
def get_response():
self.send_request_headers(f.request)
if f.request.stream:
chunks = self.read_request_body(f.request)
if callable(f.request.stream):
chunks = f.request.stream(chunks)
self.send_request_body(f.request, chunks)
else:
self.send_request_body(f.request, [f.request.data.content])
self.send_request_trailers(f.request)
f.response = self.read_response_headers()
try:
get_response()
except exceptions.NetlibException as e:
self.log(
"server communication error: %s" % repr(e),
level="debug"
)
# In any case, we try to reconnect at least once. This is
# necessary because it might be possible that we already
# initiated an upstream connection after clientconnect that
# has already been expired, e.g consider the following event
# log:
# > clientconnect (transparent mode destination known)
# > serverconnect (required for client tls handshake)
# > read n% of large request
# > server detects timeout, disconnects
# > read (100-n)% of large request
# > send large request upstream
if isinstance(e, exceptions.Http2ProtocolException):
# do not try to reconnect for HTTP2
raise exceptions.ProtocolException(
"First and only attempt to get response via HTTP2 failed."
)
elif f.request.stream:
# We may have already consumed some request chunks already,
# so all we can do is signal downstream that upstream closed the connection.
self.send_error_response(408, "Request Timeout")
f.error = flow.Error(repr(e))
self.channel.ask("error", f)
return False
self.disconnect()
self.connect()
get_response()
# call the appropriate script hook - this is an opportunity for
# an inline script to set f.stream = True
self.channel.ask("responseheaders", f)
if f.response.stream:
f.response.data.content = None
else:
f.response.data.content = b"".join(
self.read_response_body(f.request, f.response)
)
f.response.timestamp_end = time.time()
# no further manipulation of self.server_conn beyond this point
# we can safely set it as the final attribute value here.
f.server_conn = self.server_conn
else:
# response was set by an inline script.
# we now need to emulate the responseheaders hook.
self.channel.ask("responseheaders", f)
f.response.data.trailers = self.read_response_trailers(f.request, f.response)
self.log("response", "debug", [repr(f.response)])
self.channel.ask("response", f)
if not f.response.stream:
# no streaming:
# we already received the full response from the server and can
# send it to the client straight away.
self.send_response(f.response)
else:
# streaming:
# First send the headers and then transfer the response incrementally
self.send_response_headers(f.response)
chunks = self.read_response_body(
f.request,
f.response
)
if callable(f.response.stream):
chunks = f.response.stream(chunks)
self.send_response_body(f.response, chunks)
f.response.timestamp_end = time.time()
if self.check_close_connection(f):
return False
# Handle 101 Switching Protocols
if f.response.status_code == 101:
# Handle a successful HTTP 101 Switching Protocols Response,
# received after e.g. a WebSocket upgrade request.
# Check for WebSocket handshake
is_websocket = (
websocket.check_handshake(f.request.headers) and
websocket.check_handshake(f.response.headers)
)
if is_websocket and not self.config.options.websocket:
self.log(
"Client requested WebSocket connection, but the protocol is disabled.",
"info"
)
if is_websocket and self.config.options.websocket:
layer = WebSocketLayer(self, f)
else:
layer = self.ctx.next_layer(self)
layer()
return False # should never be reached
except (exceptions.ProtocolException, exceptions.NetlibException) as e:
if not f.response:
self.send_error_response(502, repr(e))
f.error = flow.Error(str(e))
self.channel.ask("error", f)
return False
else:
raise exceptions.ProtocolException(
"Error in HTTP connection: %s" % repr(e)
)
finally:
if f:
f.live = False
return True
def send_error_response(self, code, message, headers=None) -> None:
try:
response = http.make_error_response(code, message, headers)
self.send_response(response)
except (exceptions.NetlibException, h2.exceptions.H2Error, exceptions.Http2ProtocolException):
self.log(f"Failed to send error response to client: {message}", "debug")
def change_upstream_proxy_server(self, address):
# Make set_upstream_proxy_server always available,
# even if there's no UpstreamConnectLayer
if hasattr(self.ctx, "change_upstream_proxy_server"):
self.ctx.change_upstream_proxy_server(address)
elif address != self.server_conn.address:
self.log("Changing upstream proxy to {} (not CONNECTed)".format(repr(address)), "debug")
self.set_server(address)
def establish_server_connection(self, host: str, port: int, scheme: str):
tls = (scheme == "https")
if self.mode is HTTPMode.regular or self.mode is HTTPMode.transparent:
# If there's an existing connection that doesn't match our expectations, kill it.
address = (host, port)
if address != self.server_conn.address or tls != self.server_tls:
self.set_server(address)
self.set_server_tls(tls, address[0])
# Establish connection is necessary.
if not self.server_conn.connected():
self.connect()
else:
if not self.server_conn.connected():
self.connect()
if tls:
raise exceptions.HttpProtocolException("Cannot change scheme in upstream proxy mode.")