%PDF- %PDF-
| Direktori : /lib/calibre/calibre/utils/ |
| Current File : //lib/calibre/calibre/utils/imghdr.py |
#!/usr/bin/env python3
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from struct import unpack, error
import os
from calibre.utils.speedups import ReadOnlyFileBuffer
from polyglot.builtins import string_or_bytes
""" Recognize image file formats and sizes based on their first few bytes."""
HSIZE = 120
def what(file, h=None):
' Recognize image headers '
if h is None:
if isinstance(file, string_or_bytes):
with lopen(file, 'rb') as f:
h = f.read(HSIZE)
else:
location = file.tell()
h = file.read(HSIZE)
file.seek(location)
if isinstance(h, bytes):
h = memoryview(h)
for tf in tests:
res = tf(h)
if res:
return res
# There exist some jpeg files with no headers, only the starting two bits
# If we cannot identify as anything else, identify as jpeg.
if h[:2] == b'\xff\xd8':
return 'jpeg'
return None
def identify(src):
''' Recognize file format and sizes. Returns format, width, height. width
and height will be -1 if not found and fmt will be None if the image is not
recognized. '''
needs_close = False
if isinstance(src, str):
stream = lopen(src, 'rb')
needs_close = True
elif isinstance(src, bytes):
stream = ReadOnlyFileBuffer(src)
else:
stream = src
try:
return _identify(stream)
finally:
if needs_close:
stream.close()
def _identify(stream):
width = height = -1
pos = stream.tell()
head = stream.read(HSIZE)
stream.seek(pos)
fmt = what(None, head)
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
size = len(head)
if fmt == 'png':
# PNG
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
try:
width, height = unpack(b">LL", s)
except error:
return fmt, width, height
elif fmt == 'jpeg':
# JPEG
pos = stream.tell()
try:
height, width = jpeg_dimensions(stream)
except Exception:
return fmt, width, height
finally:
stream.seek(pos)
elif fmt == 'gif':
# GIF
try:
width, height = unpack(b"<HH", head[6:10])
except error:
return fmt, width, height
elif size >= 56 and fmt == 'jpeg2000':
# JPEG2000
try:
height, width = unpack(b'>LL', head[48:56])
except error:
return fmt, width, height
return fmt, width, height
# ---------------------------------#
# Subroutines per image file type #
# ---------------------------------#
tests = []
def test(f):
tests.append(f)
return f
@test
def jpeg(h):
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
the original code was failing with some jpegs that included ICC_PROFILE
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
if h[6:10] in (b'JFIF', b'Exif'):
return 'jpeg'
if h[:2] == b'\xff\xd8':
q = h[:32].tobytes()
if b'JFIF' in q or b'8BIM' in q:
return 'jpeg'
def jpeg_dimensions(stream):
# A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
# See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
# We read the dimensions from the first SOFn section we come across
stream.seek(2, os.SEEK_CUR)
def read(n):
ans = stream.read(n)
if len(ans) != n:
raise ValueError('Truncated JPEG data')
return ans
def read_byte():
return read(1)[0]
x = None
while True:
# Find next marker
while x != 0xff:
x = read_byte()
# Soak up padding
marker = 0xff
while marker == 0xff:
marker = read_byte()
q = marker
if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
# SOFn marker
stream.seek(3, os.SEEK_CUR)
return unpack(b'>HH', read(4))
elif 0xd8 <= q <= 0xda:
break # start of image, end of image, start of scan, no point
elif q == 0:
return -1, -1 # Corrupted JPEG
elif q == 0x01 or 0xd0 <= q <= 0xd7:
# Standalone marker
continue
else:
# skip this section
size = unpack(b'>H', read(2))[0]
stream.seek(size - 2, os.SEEK_CUR)
# standalone marker, keep going
return -1, -1
@test
def png(h):
if h[:8] == b"\211PNG\r\n\032\n":
return 'png'
@test
def gif(h):
"""GIF ('87 and '89 variants)"""
if h[:6] in (b'GIF87a', b'GIF89a'):
return 'gif'
@test
def tiff(h):
"""TIFF (can be in Motorola or Intel byte order)"""
if h[:2] in (b'MM', b'II'):
if h[2:4] == b'\xbc\x01':
return 'jxr'
return 'tiff'
@test
def webp(h):
if h[:4] == b'RIFF' and h[8:12] == b'WEBP':
return 'webp'
@test
def rgb(h):
"""SGI image library"""
if h[:2] == b'\001\332':
return 'rgb'
@test
def pbm(h):
"""PBM (portable bitmap)"""
if len(h) >= 3 and \
h[0] == b'P' and h[1] in b'14' and h[2] in b' \t\n\r':
return 'pbm'
@test
def pgm(h):
"""PGM (portable graymap)"""
if len(h) >= 3 and \
h[0] == b'P' and h[1] in b'25' and h[2] in b' \t\n\r':
return 'pgm'
@test
def ppm(h):
"""PPM (portable pixmap)"""
if len(h) >= 3 and \
h[0] == b'P' and h[1] in b'36' and h[2] in b' \t\n\r':
return 'ppm'
@test
def rast(h):
"""Sun raster file"""
if h[:4] == b'\x59\xA6\x6A\x95':
return 'rast'
@test
def xbm(h):
"""X bitmap (X10 or X11)"""
s = b'#define '
if h[:len(s)] == s:
return 'xbm'
@test
def bmp(h):
if h[:2] == b'BM':
return 'bmp'
@test
def emf(h):
if h[:4] == b'\x01\0\0\0' and h[40:44] == b' EMF':
return 'emf'
@test
def jpeg2000(h):
if h[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n':
return 'jpeg2000'
@test
def svg(h):
if h[:4] == b'<svg' or (h[:2] == b'<?' and h[2:5].tobytes().lower() == b'xml' and b'<svg' in h.tobytes()):
return 'svg'
tests = tuple(tests)