%PDF- %PDF-
Direktori : /data/old/usr/lib64/python3.4/site-packages/aiohttp/ |
Current File : //data/old/usr/lib64/python3.4/site-packages/aiohttp/multipart.py |
import asyncio import base64 import binascii import json import re import uuid import warnings import zlib from collections import Mapping, Sequence, deque from urllib.parse import parse_qsl, unquote, urlencode from multidict import CIMultiDict from .hdrs import (CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TRANSFER_ENCODING, CONTENT_TYPE) from .helpers import CHAR, PY_35, PY_352, TOKEN, parse_mimetype, reify from .http import HttpParser from .payload import (BytesPayload, LookupError, Payload, StringPayload, get_payload, payload_type) __all__ = ('MultipartReader', 'MultipartWriter', 'BodyPartReader', 'BadContentDispositionHeader', 'BadContentDispositionParam', 'parse_content_disposition', 'content_disposition_filename') class BadContentDispositionHeader(RuntimeWarning): pass class BadContentDispositionParam(RuntimeWarning): pass def parse_content_disposition(header): def is_token(string): return string and TOKEN >= set(string) def is_quoted(string): return string[0] == string[-1] == '"' def is_rfc5987(string): return is_token(string) and string.count("'") == 2 def is_extended_param(string): return string.endswith('*') def is_continuous_param(string): pos = string.find('*') + 1 if not pos: return False substring = string[pos:-1] if string.endswith('*') else string[pos:] return substring.isdigit() def unescape(text, *, chars=''.join(map(re.escape, CHAR))): return re.sub('\\\\([{}])'.format(chars), '\\1', text) if not header: return None, {} disptype, *parts = header.split(';') if not is_token(disptype): warnings.warn(BadContentDispositionHeader(header)) return None, {} params = {} while parts: item = parts.pop(0) if '=' not in item: warnings.warn(BadContentDispositionHeader(header)) return None, {} key, value = item.split('=', 1) key = key.lower().strip() value = value.lstrip() if key in params: warnings.warn(BadContentDispositionHeader(header)) return None, {} if not is_token(key): warnings.warn(BadContentDispositionParam(item)) continue elif is_continuous_param(key): if is_quoted(value): value = unescape(value[1:-1]) elif not is_token(value): warnings.warn(BadContentDispositionParam(item)) continue elif is_extended_param(key): if is_rfc5987(value): encoding, _, value = value.split("'", 2) encoding = encoding or 'utf-8' else: warnings.warn(BadContentDispositionParam(item)) continue try: value = unquote(value, encoding, 'strict') except UnicodeDecodeError: # pragma: nocover warnings.warn(BadContentDispositionParam(item)) continue else: failed = True if is_quoted(value): failed = False value = unescape(value[1:-1].lstrip('\\/')) elif is_token(value): failed = False elif parts: # maybe just ; in filename, in any case this is just # one case fix, for proper fix we need to redesign parser _value = '%s;%s' % (value, parts[0]) if is_quoted(_value): parts.pop(0) value = unescape(_value[1:-1].lstrip('\\/')) failed = False if failed: warnings.warn(BadContentDispositionHeader(header)) return None, {} params[key] = value return disptype.lower(), params def content_disposition_filename(params, name='filename'): name_suf = '%s*' % name if not params: return None elif name_suf in params: return params[name_suf] elif name in params: return params[name] else: parts = [] fnparams = sorted((key, value) for key, value in params.items() if key.startswith(name_suf)) for num, (key, value) in enumerate(fnparams): _, tail = key.split('*', 1) if tail.endswith('*'): tail = tail[:-1] if tail == str(num): parts.append(value) else: break if not parts: return None value = ''.join(parts) if "'" in value: encoding, _, value = value.split("'", 2) encoding = encoding or 'utf-8' return unquote(value, encoding, 'strict') return value class MultipartResponseWrapper(object): """Wrapper around the :class:`MultipartBodyReader` to take care about underlying connection and close it when it needs in.""" def __init__(self, resp, stream): self.resp = resp self.stream = stream if PY_35: def __aiter__(self): return self if not PY_352: # pragma: no cover __aiter__ = asyncio.coroutine(__aiter__) @asyncio.coroutine def __anext__(self): part = yield from self.next() if part is None: raise StopAsyncIteration # NOQA return part def at_eof(self): """Returns ``True`` when all response data had been read. :rtype: bool """ return self.resp.content.at_eof() @asyncio.coroutine def next(self): """Emits next multipart reader object.""" item = yield from self.stream.next() if self.stream.at_eof(): yield from self.release() return item @asyncio.coroutine def release(self): """Releases the connection gracefully, reading all the content to the void.""" yield from self.resp.release() class BodyPartReader(object): """Multipart reader for single body part.""" chunk_size = 8192 def __init__(self, boundary, headers, content): self.headers = headers self._boundary = boundary self._content = content self._at_eof = False length = self.headers.get(CONTENT_LENGTH, None) self._length = int(length) if length is not None else None self._read_bytes = 0 self._unread = deque() self._prev_chunk = None self._content_eof = 0 self._cache = {} if PY_35: def __aiter__(self): return self if not PY_352: # pragma: no cover __aiter__ = asyncio.coroutine(__aiter__) @asyncio.coroutine def __anext__(self): part = yield from self.next() if part is None: raise StopAsyncIteration # NOQA return part @asyncio.coroutine def next(self): item = yield from self.read() if not item: return None return item @asyncio.coroutine def read(self, *, decode=False): """Reads body part data. :param bool decode: Decodes data following by encoding method from `Content-Encoding` header. If it missed data remains untouched :rtype: bytearray """ if self._at_eof: return b'' data = bytearray() while not self._at_eof: data.extend((yield from self.read_chunk(self.chunk_size))) if decode: return self.decode(data) return data @asyncio.coroutine def read_chunk(self, size=chunk_size): """Reads body part content chunk of the specified size. :param int size: chunk size :rtype: bytearray """ if self._at_eof: return b'' if self._length: chunk = yield from self._read_chunk_from_length(size) else: chunk = yield from self._read_chunk_from_stream(size) self._read_bytes += len(chunk) if self._read_bytes == self._length: self._at_eof = True if self._at_eof: clrf = yield from self._content.readline() assert b'\r\n' == clrf, \ 'reader did not read all the data or it is malformed' return chunk @asyncio.coroutine def _read_chunk_from_length(self, size): """Reads body part content chunk of the specified size. The body part must has `Content-Length` header with proper value. :param int size: chunk size :rtype: bytearray """ assert self._length is not None, \ 'Content-Length required for chunked read' chunk_size = min(size, self._length - self._read_bytes) chunk = yield from self._content.read(chunk_size) return chunk @asyncio.coroutine def _read_chunk_from_stream(self, size): """Reads content chunk of body part with unknown length. The `Content-Length` header for body part is not necessary. :param int size: chunk size :rtype: bytearray """ assert size >= len(self._boundary) + 2, \ 'Chunk size must be greater or equal than boundary length + 2' first_chunk = self._prev_chunk is None if first_chunk: self._prev_chunk = yield from self._content.read(size) chunk = yield from self._content.read(size) self._content_eof += int(self._content.at_eof()) assert self._content_eof < 3, "Reading after EOF" window = self._prev_chunk + chunk sub = b'\r\n' + self._boundary if first_chunk: idx = window.find(sub) else: idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub))) if idx >= 0: # pushing boundary back to content self._content.unread_data(window[idx:]) if size > idx: self._prev_chunk = self._prev_chunk[:idx] chunk = window[len(self._prev_chunk):idx] if not chunk: self._at_eof = True result = self._prev_chunk self._prev_chunk = chunk return result @asyncio.coroutine def readline(self): """Reads body part by line by line. :rtype: bytearray """ if self._at_eof: return b'' if self._unread: line = self._unread.popleft() else: line = yield from self._content.readline() if line.startswith(self._boundary): # the very last boundary may not come with \r\n, # so set single rules for everyone sline = line.rstrip(b'\r\n') boundary = self._boundary last_boundary = self._boundary + b'--' # ensure that we read exactly the boundary, not something alike if sline == boundary or sline == last_boundary: self._at_eof = True self._unread.append(line) return b'' else: next_line = yield from self._content.readline() if next_line.startswith(self._boundary): line = line[:-2] # strip CRLF but only once self._unread.append(next_line) return line @asyncio.coroutine def release(self): """Like :meth:`read`, but reads all the data to the void. :rtype: None """ if self._at_eof: return while not self._at_eof: yield from self.read_chunk(self.chunk_size) @asyncio.coroutine def text(self, *, encoding=None): """Like :meth:`read`, but assumes that body part contains text data. :param str encoding: Custom text encoding. Overrides specified in charset param of `Content-Type` header :rtype: str """ data = yield from self.read(decode=True) # see https://www.w3.org/TR/html5/forms.html#multipart/form-data-encoding-algorithm # NOQA # and https://dvcs.w3.org/hg/xhr/raw-file/tip/Overview.html#dom-xmlhttprequest-send # NOQA encoding = encoding or self.get_charset(default='utf-8') return data.decode(encoding) @asyncio.coroutine def json(self, *, encoding=None): """Like :meth:`read`, but assumes that body parts contains JSON data. :param str encoding: Custom JSON encoding. Overrides specified in charset param of `Content-Type` header """ data = yield from self.read(decode=True) if not data: return None encoding = encoding or self.get_charset(default='utf-8') return json.loads(data.decode(encoding)) @asyncio.coroutine def form(self, *, encoding=None): """Like :meth:`read`, but assumes that body parts contains form urlencoded data. :param str encoding: Custom form encoding. Overrides specified in charset param of `Content-Type` header """ data = yield from self.read(decode=True) if not data: return None encoding = encoding or self.get_charset(default='utf-8') return parse_qsl(data.rstrip().decode(encoding), keep_blank_values=True, encoding=encoding) def at_eof(self): """Returns ``True`` if the boundary was reached or ``False`` otherwise. :rtype: bool """ return self._at_eof def decode(self, data): """Decodes data according the specified `Content-Encoding` or `Content-Transfer-Encoding` headers value. Supports ``gzip``, ``deflate`` and ``identity`` encodings for `Content-Encoding` header. Supports ``base64``, ``quoted-printable``, ``binary`` encodings for `Content-Transfer-Encoding` header. :param bytearray data: Data to decode. :raises: :exc:`RuntimeError` - if encoding is unknown. :rtype: bytes """ if CONTENT_TRANSFER_ENCODING in self.headers: data = self._decode_content_transfer(data) if CONTENT_ENCODING in self.headers: return self._decode_content(data) return data def _decode_content(self, data): encoding = self.headers[CONTENT_ENCODING].lower() if encoding == 'deflate': return zlib.decompress(data, -zlib.MAX_WBITS) elif encoding == 'gzip': return zlib.decompress(data, 16 + zlib.MAX_WBITS) elif encoding == 'identity': return data else: raise RuntimeError('unknown content encoding: {}'.format(encoding)) def _decode_content_transfer(self, data): encoding = self.headers[CONTENT_TRANSFER_ENCODING].lower() if encoding == 'base64': return base64.b64decode(data) elif encoding == 'quoted-printable': return binascii.a2b_qp(data) elif encoding in ('binary', '8bit', '7bit'): return data else: raise RuntimeError('unknown content transfer encoding: {}' ''.format(encoding)) def get_charset(self, default=None): """Returns charset parameter from ``Content-Type`` header or default. """ ctype = self.headers.get(CONTENT_TYPE, '') *_, params = parse_mimetype(ctype) return params.get('charset', default) @reify def name(self): """Returns filename specified in Content-Disposition header or ``None`` if missed or header is malformed.""" _, params = parse_content_disposition( self.headers.get(CONTENT_DISPOSITION)) return content_disposition_filename(params, 'name') @reify def filename(self): """Returns filename specified in Content-Disposition header or ``None`` if missed or header is malformed.""" _, params = parse_content_disposition( self.headers.get(CONTENT_DISPOSITION)) return content_disposition_filename(params, 'filename') @payload_type(BodyPartReader) class BodyPartReaderPayload(Payload): def __init__(self, value, *args, **kwargs): super().__init__(value, *args, **kwargs) params = {} if value.name is not None: params['name'] = value.name if value.filename is not None: params['filename'] = value.name if params: self.set_content_disposition('attachment', **params) @asyncio.coroutine def write(self, writer): field = self._value chunk = yield from field.read_chunk(size=2**16) while chunk: writer.write(field.decode(chunk)) chunk = yield from field.read_chunk(size=2**16) class MultipartReader(object): """Multipart body reader.""" #: Response wrapper, used when multipart readers constructs from response. response_wrapper_cls = MultipartResponseWrapper #: Multipart reader class, used to handle multipart/* body parts. #: None points to type(self) multipart_reader_cls = None #: Body part reader class for non multipart/* content types. part_reader_cls = BodyPartReader def __init__(self, headers, content): self.headers = headers self._boundary = ('--' + self._get_boundary()).encode() self._content = content self._last_part = None self._at_eof = False self._at_bof = True self._unread = [] if PY_35: def __aiter__(self): return self if not PY_352: # pragma: no cover __aiter__ = asyncio.coroutine(__aiter__) @asyncio.coroutine def __anext__(self): part = yield from self.next() if part is None: raise StopAsyncIteration # NOQA return part @classmethod def from_response(cls, response): """Constructs reader instance from HTTP response. :param response: :class:`~aiohttp.client.ClientResponse` instance """ obj = cls.response_wrapper_cls(response, cls(response.headers, response.content)) return obj def at_eof(self): """Returns ``True`` if the final boundary was reached or ``False`` otherwise. :rtype: bool """ return self._at_eof @asyncio.coroutine def next(self): """Emits the next multipart body part.""" # So, if we're at BOF, we need to skip till the boundary. if self._at_eof: return yield from self._maybe_release_last_part() if self._at_bof: yield from self._read_until_first_boundary() self._at_bof = False else: yield from self._read_boundary() if self._at_eof: # we just read the last boundary, nothing to do there return self._last_part = yield from self.fetch_next_part() return self._last_part @asyncio.coroutine def release(self): """Reads all the body parts to the void till the final boundary.""" while not self._at_eof: item = yield from self.next() if item is None: break yield from item.release() @asyncio.coroutine def fetch_next_part(self): """Returns the next body part reader.""" headers = yield from self._read_headers() return self._get_part_reader(headers) def _get_part_reader(self, headers): """Dispatches the response by the `Content-Type` header, returning suitable reader instance. :param dict headers: Response headers """ ctype = headers.get(CONTENT_TYPE, '') mtype, *_ = parse_mimetype(ctype) if mtype == 'multipart': if self.multipart_reader_cls is None: return type(self)(headers, self._content) return self.multipart_reader_cls(headers, self._content) else: return self.part_reader_cls(self._boundary, headers, self._content) def _get_boundary(self): mtype, *_, params = parse_mimetype(self.headers[CONTENT_TYPE]) assert mtype == 'multipart', 'multipart/* content type expected' if 'boundary' not in params: raise ValueError('boundary missed for Content-Type: %s' % self.headers[CONTENT_TYPE]) boundary = params['boundary'] if len(boundary) > 70: raise ValueError('boundary %r is too long (70 chars max)' % boundary) return boundary @asyncio.coroutine def _readline(self): if self._unread: return self._unread.pop() return (yield from self._content.readline()) @asyncio.coroutine def _read_until_first_boundary(self): while True: chunk = yield from self._readline() if chunk == b'': raise ValueError("Could not find starting boundary %r" % (self._boundary)) chunk = chunk.rstrip() if chunk == self._boundary: return elif chunk == self._boundary + b'--': self._at_eof = True return @asyncio.coroutine def _read_boundary(self): chunk = (yield from self._readline()).rstrip() if chunk == self._boundary: pass elif chunk == self._boundary + b'--': self._at_eof = True epilogue = yield from self._readline() next_line = yield from self._readline() # the epilogue is expected and then either the end of input or the # parent multipart boundary, if the parent boundary is found then # it should be marked as unread and handed to the parent for # processing if next_line[:2] == b'--': self._unread.append(next_line) # otherwise the request is likely missing an epilogue and both # lines should be passed to the parent for processing # (this handles the old behavior gracefully) else: self._unread.extend([next_line, epilogue]) else: raise ValueError('Invalid boundary %r, expected %r' % (chunk, self._boundary)) @asyncio.coroutine def _read_headers(self): lines = [b''] while True: chunk = yield from self._content.readline() chunk = chunk.strip() lines.append(chunk) if not chunk: break parser = HttpParser() headers, *_ = parser.parse_headers(lines) return headers @asyncio.coroutine def _maybe_release_last_part(self): """Ensures that the last read body part is read completely.""" if self._last_part is not None: if not self._last_part.at_eof(): yield from self._last_part.release() self._unread.extend(self._last_part._unread) self._last_part = None class MultipartWriter(Payload): """Multipart body writer.""" def __init__(self, subtype='mixed', boundary=None): boundary = boundary if boundary is not None else uuid.uuid4().hex try: self._boundary = boundary.encode('us-ascii') except UnicodeEncodeError: raise ValueError('boundary should contains ASCII only chars') ctype = 'multipart/{}; boundary="{}"'.format(subtype, boundary) super().__init__(None, content_type=ctype) self._parts = [] self._headers = CIMultiDict() self._headers[CONTENT_TYPE] = self.content_type def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): pass def __iter__(self): return iter(self._parts) def __len__(self): return len(self._parts) @property def boundary(self): return self._boundary def append(self, obj, headers=None): if headers is None: headers = CIMultiDict() if isinstance(obj, Payload): if obj.headers is not None: obj.headers.update(headers) else: obj._headers = headers self.append_payload(obj) else: try: self.append_payload(get_payload(obj, headers=headers)) except LookupError: raise TypeError def append_payload(self, payload): """Adds a new body part to multipart writer.""" # content-type if CONTENT_TYPE not in payload.headers: payload.headers[CONTENT_TYPE] = payload.content_type # compression encoding = payload.headers.get(CONTENT_ENCODING, '').lower() if encoding and encoding not in ('deflate', 'gzip', 'identity'): raise RuntimeError('unknown content encoding: {}'.format(encoding)) if encoding == 'identity': encoding = None # te encoding te_encoding = payload.headers.get( CONTENT_TRANSFER_ENCODING, '').lower() if te_encoding not in ('', 'base64', 'quoted-printable', 'binary'): raise RuntimeError('unknown content transfer encoding: {}' ''.format(te_encoding)) if te_encoding == 'binary': te_encoding = None # size size = payload.size if size is not None and not (encoding or te_encoding): payload.headers[CONTENT_LENGTH] = str(size) # render headers headers = ''.join( [k + ': ' + v + '\r\n' for k, v in payload.headers.items()] ).encode('utf-8') + b'\r\n' self._parts.append((payload, headers, encoding, te_encoding)) def append_json(self, obj, headers=None): """Helper to append JSON part.""" if headers is None: headers = CIMultiDict() data = json.dumps(obj).encode('utf-8') self.append_payload( BytesPayload( data, headers=headers, content_type='application/json')) def append_form(self, obj, headers=None): """Helper to append form urlencoded part.""" assert isinstance(obj, (Sequence, Mapping)) if headers is None: headers = CIMultiDict() if isinstance(obj, Mapping): obj = list(obj.items()) data = urlencode(obj, doseq=True) return self.append_payload( StringPayload(data, headers=headers, content_type='application/x-www-form-urlencoded')) @property def size(self): """Size of the payload.""" if not self._parts: return 0 total = 0 for part, headers, encoding, te_encoding in self._parts: if encoding or te_encoding or part.size is None: return None total += ( 2 + len(self._boundary) + 2 + # b'--'+self._boundary+b'\r\n' part.size + len(headers) + 2 # b'\r\n' ) total += 2 + len(self._boundary) + 4 # b'--'+self._boundary+b'--\r\n' return total @asyncio.coroutine def write(self, writer): """Write body.""" if not self._parts: return for part, headers, encoding, te_encoding in self._parts: yield from writer.write(b'--' + self._boundary + b'\r\n') yield from writer.write(headers) if encoding or te_encoding: w = MultipartPayloadWriter(writer) if encoding: w.enable_compression(encoding) if te_encoding: w.enable_encoding(te_encoding) yield from part.write(w) yield from w.write_eof() else: yield from part.write(writer) yield from writer.write(b'\r\n') yield from writer.write(b'--' + self._boundary + b'--\r\n') class MultipartPayloadWriter: def __init__(self, writer): self._writer = writer self._encoding = None self._compress = None def enable_encoding(self, encoding): if encoding == 'base64': self._encoding = encoding self._encoding_buffer = bytearray() elif encoding == 'quoted-printable': self._encoding = 'quoted-printable' def enable_compression(self, encoding='deflate'): zlib_mode = (16 + zlib.MAX_WBITS if encoding == 'gzip' else -zlib.MAX_WBITS) self._compress = zlib.compressobj(wbits=zlib_mode) @asyncio.coroutine def write_eof(self): if self._compress is not None: chunk = self._compress.flush() if chunk: self._compress = None yield from self.write(chunk) if self._encoding == 'base64': if self._encoding_buffer: yield from self._writer.write(base64.b64encode( self._encoding_buffer)) @asyncio.coroutine def write(self, chunk): if self._compress is not None: if chunk: chunk = self._compress.compress(chunk) if not chunk: return if self._encoding == 'base64': self._encoding_buffer.extend(chunk) if self._encoding_buffer: buffer = self._encoding_buffer div, mod = divmod(len(buffer), 3) enc_chunk, self._encoding_buffer = ( buffer[:div * 3], buffer[div * 3:]) if enc_chunk: enc_chunk = base64.b64encode(enc_chunk) yield from self._writer.write(enc_chunk) elif self._encoding == 'quoted-printable': yield from self._writer.write(binascii.b2a_qp(chunk)) else: yield from self._writer.write(chunk)