%PDF- %PDF-
| Direktori : /lib/python3/dist-packages/mechanize/ |
| Current File : //lib/python3/dist-packages/mechanize/_gzip.py |
from __future__ import absolute_import
import struct
import zlib
from io import DEFAULT_BUFFER_SIZE
from ._urllib2_fork import BaseHandler
from .polyglot import is_py2
CRC_MASK = 0xffffffff
if is_py2:
CRC_MASK = long(CRC_MASK)
def gzip_prefix():
# See http://www.gzip.org/zlib/rfc-gzip.html
return b''.join((
b'\x1f\x8b', # ID1 and ID2: gzip marker
b'\x08', # CM: compression method
b'\x00', # FLG: none set
# MTIME: 4 bytes, set to zero so as not to leak timezone information
b'\0\0\0\0',
b'\x02', # XFL: max compression, slowest algo
b'\xff', # OS: unknown
))
def compress_readable_output(src_file, compress_level=6):
crc = zlib.crc32(b"")
size = 0
zobj = zlib.compressobj(compress_level, zlib.DEFLATED, -zlib.MAX_WBITS,
zlib.DEF_MEM_LEVEL, zlib.Z_DEFAULT_STRATEGY)
prefix_written = False
while True:
data = src_file.read(DEFAULT_BUFFER_SIZE)
if not data:
break
size += len(data)
crc = zlib.crc32(data, crc)
data = zobj.compress(data)
if not prefix_written:
prefix_written = True
data = gzip_prefix() + data
yield data
yield zobj.flush() + struct.pack(b"<LL", crc & CRC_MASK, size)
def read_amt(f, amt):
ans = b''
while len(ans) < amt:
extra = f.read(amt - len(ans))
if not extra:
raise EOFError('Unexpected end of compressed stream')
ans += extra
return ans
class UnzipWrapper:
def __init__(self, fp):
self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
self.__data = b''
self.__crc = zlib.crc32(self.__data) & CRC_MASK
self.__fp = fp
self.__size = 0
self.__is_fully_read = False
def read(self, sz=-1):
amt_read = 0
ans = []
if self.__data:
if sz < 0 or len(self.__data) < sz:
ans.append(self.__data)
amt_read += len(self.__data)
self.__data = b''
else:
self.__data, ret = self.__data[sz:], self.__data[:sz]
return ret
if not self.__is_fully_read:
while not self.__decoder.unused_data and (sz < 0 or amt_read < sz):
chunk = self.__fp.read(1024)
if chunk:
if self.__decoder.unconsumed_tail:
chunk = self.__decoder.unconsumed_tail + chunk
chunk = self.__decoder.decompress(chunk)
ans.append(chunk)
amt_read += len(chunk)
self.__size += len(chunk)
self.__crc = zlib.crc32(chunk, self.__crc)
else:
if not self.__decoder.unused_data:
raise ValueError(
'unexpected end of compressed gzip data,'
' before reading trailer')
break
if self.__decoder.unused_data:
# End of compressed stream reached
tail = self.__decoder.unused_data
if len(tail) < 8:
tail += read_amt(self.__fp, 8 - len(tail))
# ignore any extra bytes after end of compressed stream
self.__fp.read()
# check CRC, ignore size mismatch
crc, size = struct.unpack(b'<LL', tail)
if (crc & CRC_MASK) != (self.__crc & CRC_MASK):
raise ValueError(
'gzip stream is corrupted, CRC does not match')
self.__is_fully_read = True
ans = b''.join(ans)
if len(ans) > sz and sz > -1:
ans, self.__data = ans[:sz], ans[sz:]
return ans
def readline(self, sz=-1):
# Dont care about making this efficient
data = self.read()
idx = data.find(b'\n')
if idx > 0:
if sz < 0 or idx < sz:
line, self.__data = data[:idx + 1], data[idx + 1:]
else:
line, self.__data = data[:sz], data[sz:]
else:
if sz > -1:
line, self.__data = data[:sz], data[sz:]
else:
line = data
return line
def close(self):
self.__fp.close()
def fileno(self):
return self.__fp.fileno()
def __iter__(self):
ans = self.readline()
if ans:
yield ans
def next(self):
ans = self.readline()
if not ans:
raise StopIteration()
return ans
def create_gzip_decompressor(zipped_file):
prefix = read_amt(zipped_file, 10)
if prefix[:2] != b'\x1f\x8b':
raise ValueError('gzip stream has incorrect magic bytes: %r' %
prefix[:2])
if prefix[2:3] != b'\x08':
raise ValueError('gzip stream has unknown compression method: %r' %
prefix[2])
flag = ord(prefix[3:4])
if flag & 4: # extra
extra_amt = read_amt(zipped_file, 2)
extra_amt = ord(extra_amt[0]) + 256 * ord(extra_amt[1])
if extra_amt:
read_amt(zipped_file, extra_amt)
if flag & 8: # filename
while read_amt(zipped_file, 1) != b'\0':
continue
if flag & 16: # comment
while read_amt(zipped_file, 1) != b'\0':
continue
if flag & 2: # crc
read_amt(zipped_file, 2)
return UnzipWrapper(zipped_file)
class HTTPGzipProcessor(BaseHandler):
handler_order = 200 # response processing before HTTPEquivProcessor
def __init__(self, request_gzip=False):
self.request_gzip = request_gzip
def __copy__(self):
return self.__class__(self.request_gzip)
def http_request(self, request):
if self.request_gzip:
existing = [
x.strip().lower()
for x in request.get_header('Accept-Encoding', '').split(',')
]
if 'gzip' not in existing:
existing.append('gzip')
request.add_header("Accept-Encoding",
', '.join(filter(None, existing)))
return request
def http_response(self, request, response):
# post-process response
h = response.info()
enc_hdrs = h.getheaders("Content-encoding")
for enc_hdr in enc_hdrs:
if "gzip" in enc_hdr:
response._set_fp(create_gzip_decompressor(response.fp))
del h['Content-encoding']
del h['Content-length']
return response
https_response = http_response
https_request = http_request