%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /www/varak.net/wiki.varak.net/includes/utils/
Upload File :
Create Path :
Current File : /www/varak.net/wiki.varak.net/includes/utils/ZipDirectoryReader.php

<?php
/**
 * ZIP file directories reader, for the purposes of upload verification.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

/**
 * A class for reading ZIP file directories, for the purposes of upload
 * verification.
 *
 * Only a functional interface is provided: ZipFileReader::read(). No access is
 * given to object instances.
 */
class ZipDirectoryReader {
	/**
	 * Read a ZIP file and call a function for each file discovered in it.
	 *
	 * Because this class is aimed at verification, an error is raised on
	 * suspicious or ambiguous input, instead of emulating some standard
	 * behavior.
	 *
	 * @param string $fileName The archive file name
	 * @param array $callback The callback function. It will be called for each file
	 *   with a single associative array each time, with members:
	 *
	 *      - name: The file name. Directories conventionally have a trailing
	 *        slash.
	 *
	 *      - mtime: The file modification time, in MediaWiki 14-char format
	 *
	 *      - size: The uncompressed file size
	 *
	 * @param array $options An associative array of read options, with the option
	 *    name in the key. This may currently contain:
	 *
	 *      - zip64: If this is set to true, then we will emulate a
	 *        library with ZIP64 support, like OpenJDK 7. If it is set to
	 *        false, then we will emulate a library with no knowledge of
	 *        ZIP64.
	 *
	 *        NOTE: The ZIP64 code is untested and probably doesn't work. It
	 *        turned out to be easier to just reject ZIP64 archive uploads,
	 *        since they are likely to be very rare. Confirming safety of a
	 *        ZIP64 file is fairly complex. What do you do with a file that is
	 *        ambiguous and broken when read with a non-ZIP64 reader, but valid
	 *        when read with a ZIP64 reader? This situation is normal for a
	 *        valid ZIP64 file, and working out what non-ZIP64 readers will make
	 *        of such a file is not trivial.
	 *
	 * @return Status A Status object. The following fatal errors are defined:
	 *
	 *      - zip-file-open-error: The file could not be opened.
	 *
	 *      - zip-wrong-format: The file does not appear to be a ZIP file.
	 *
	 *      - zip-bad: There was something wrong or ambiguous about the file
	 *        data.
	 *
	 *      - zip-unsupported: The ZIP file uses features which
	 *        ZipDirectoryReader does not support.
	 *
	 * The default messages for those fatal errors are written in a way that
	 * makes sense for upload verification.
	 *
	 * If a fatal error is returned, more information about the error will be
	 * available in the debug log.
	 *
	 * Note that the callback function may be called any number of times before
	 * a fatal error is returned. If this occurs, the data sent to the callback
	 * function should be discarded.
	 */
	public static function read( $fileName, $callback, $options = [] ) {
		$zdr = new self( $fileName, $callback, $options );

		return $zdr->execute();
	}

	/** The file name */
	protected $fileName;

	/** The opened file resource */
	protected $file;

	/** The cached length of the file, or null if it has not been loaded yet. */
	protected $fileLength;

	/** A segmented cache of the file contents */
	protected $buffer;

	/** The file data callback */
	protected $callback;

	/** The ZIP64 mode */
	protected $zip64 = false;

	/** Stored headers */
	protected $eocdr, $eocdr64, $eocdr64Locator;

	protected $data;

	/** The "extra field" ID for ZIP64 central directory entries */
	const ZIP64_EXTRA_HEADER = 0x0001;

	/** The segment size for the file contents cache */
	const SEGSIZE = 16384;

	/** The index of the "general field" bit for UTF-8 file names */
	const GENERAL_UTF8 = 11;

	/** The index of the "general field" bit for central directory encryption */
	const GENERAL_CD_ENCRYPTED = 13;

	/**
	 * Private constructor
	 * @param string $fileName
	 * @param callable $callback
	 * @param array $options
	 */
	protected function __construct( $fileName, $callback, $options ) {
		$this->fileName = $fileName;
		$this->callback = $callback;

		if ( isset( $options['zip64'] ) ) {
			$this->zip64 = $options['zip64'];
		}
	}

	/**
	 * Read the directory according to settings in $this.
	 *
	 * @return Status
	 */
	function execute() {
		$this->file = fopen( $this->fileName, 'r' );
		$this->data = [];
		if ( !$this->file ) {
			return Status::newFatal( 'zip-file-open-error' );
		}

		$status = Status::newGood();
		try {
			$this->readEndOfCentralDirectoryRecord();
			if ( $this->zip64 ) {
				list( $offset, $size ) = $this->findZip64CentralDirectory();
				$this->readCentralDirectory( $offset, $size );
			} else {
				if ( $this->eocdr['CD size'] == 0xffffffff
					|| $this->eocdr['CD offset'] == 0xffffffff
					|| $this->eocdr['CD entries total'] == 0xffff
				) {
					$this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
						'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
						'opening vulnerabilities on clients using OpenJDK 7 or later.' );
				}

				list( $offset, $size ) = $this->findOldCentralDirectory();
				$this->readCentralDirectory( $offset, $size );
			}
		} catch ( ZipDirectoryReaderError $e ) {
			$status->fatal( $e->getErrorCode() );
		}

		fclose( $this->file );

		return $status;
	}

	/**
	 * Throw an error, and log a debug message
	 * @param mixed $code
	 * @param string $debugMessage
	 * @throws ZipDirectoryReaderError
	 */
	function error( $code, $debugMessage ) {
		wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
		throw new ZipDirectoryReaderError( $code );
	}

	/**
	 * Read the header which is at the end of the central directory,
	 * unimaginatively called the "end of central directory record" by the ZIP
	 * spec.
	 */
	function readEndOfCentralDirectoryRecord() {
		$info = [
			'signature' => 4,
			'disk' => 2,
			'CD start disk' => 2,
			'CD entries this disk' => 2,
			'CD entries total' => 2,
			'CD size' => 4,
			'CD offset' => 4,
			'file comment length' => 2,
		];
		$structSize = $this->getStructSize( $info );
		$startPos = $this->getFileLength() - 65536 - $structSize;
		if ( $startPos < 0 ) {
			$startPos = 0;
		}

		if ( $this->getFileLength() === 0 ) {
			$this->error( 'zip-wrong-format', "The file is empty." );
		}

		$block = $this->getBlock( $startPos );
		$sigPos = strrpos( $block, "PK\x05\x06" );
		if ( $sigPos === false ) {
			$this->error( 'zip-wrong-format',
				"zip file lacks EOCDR signature. It probably isn't a zip file." );
		}

		$this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
		$this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];

		if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
			$this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
		}
		if ( $this->eocdr['disk'] !== 0
			|| $this->eocdr['CD start disk'] !== 0
		) {
			$this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
		}
		$this->eocdr += $this->unpack(
			$block,
			[ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
			$sigPos + $structSize );
		$this->eocdr['position'] = $startPos + $sigPos;
	}

	/**
	 * Read the header called the "ZIP64 end of central directory locator". An
	 * error will be raised if it does not exist.
	 */
	function readZip64EndOfCentralDirectoryLocator() {
		$info = [
			'signature' => [ 'string', 4 ],
			'eocdr64 start disk' => 4,
			'eocdr64 offset' => 8,
			'number of disks' => 4,
		];
		$structSize = $this->getStructSize( $info );

		$start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
		$block = $this->getBlock( $start, $structSize );
		$this->eocdr64Locator = $data = $this->unpack( $block, $info );

		if ( $data['signature'] !== "PK\x06\x07" ) {
			// Note: Java will allow this and continue to read the
			// EOCDR64, so we have to reject the upload, we can't
			// just use the EOCDR header instead.
			$this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
		}
	}

	/**
	 * Read the header called the "ZIP64 end of central directory record". It
	 * may replace the regular "end of central directory record" in ZIP64 files.
	 */
	function readZip64EndOfCentralDirectoryRecord() {
		if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
			|| $this->eocdr64Locator['number of disks'] != 0
		) {
			$this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
		}

		$info = [
			'signature' => [ 'string', 4 ],
			'EOCDR64 size' => 8,
			'version made by' => 2,
			'version needed' => 2,
			'disk' => 4,
			'CD start disk' => 4,
			'CD entries this disk' => 8,
			'CD entries total' => 8,
			'CD size' => 8,
			'CD offset' => 8
		];
		$structSize = $this->getStructSize( $info );
		$block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
		$this->eocdr64 = $data = $this->unpack( $block, $info );
		if ( $data['signature'] !== "PK\x06\x06" ) {
			$this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
		}
		if ( $data['disk'] !== 0
			|| $data['CD start disk'] !== 0
		) {
			$this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
		}
	}

	/**
	 * Find the location of the central directory, as would be seen by a
	 * non-ZIP64 reader.
	 *
	 * @return array List containing offset, size and end position.
	 */
	function findOldCentralDirectory() {
		$size = $this->eocdr['CD size'];
		$offset = $this->eocdr['CD offset'];
		$endPos = $this->eocdr['position'];

		// Some readers use the EOCDR position instead of the offset field
		// to find the directory, so to be safe, we check if they both agree.
		if ( $offset + $size != $endPos ) {
			$this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
				'of central directory record' );
		}

		return [ $offset, $size ];
	}

	/**
	 * Find the location of the central directory, as would be seen by a
	 * ZIP64-compliant reader.
	 *
	 * @return array List containing offset, size and end position.
	 */
	function findZip64CentralDirectory() {
		// The spec is ambiguous about the exact rules of precedence between the
		// ZIP64 headers and the original headers. Here we follow zip_util.c
		// from OpenJDK 7.
		$size = $this->eocdr['CD size'];
		$offset = $this->eocdr['CD offset'];
		$numEntries = $this->eocdr['CD entries total'];
		$endPos = $this->eocdr['position'];
		if ( $size == 0xffffffff
			|| $offset == 0xffffffff
			|| $numEntries == 0xffff
		) {
			$this->readZip64EndOfCentralDirectoryLocator();

			if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
				$this->readZip64EndOfCentralDirectoryRecord();
				if ( isset( $this->eocdr64['CD offset'] ) ) {
					$size = $this->eocdr64['CD size'];
					$offset = $this->eocdr64['CD offset'];
					$endPos = $this->eocdr64Locator['eocdr64 offset'];
				}
			}
		}
		// Some readers use the EOCDR position instead of the offset field
		// to find the directory, so to be safe, we check if they both agree.
		if ( $offset + $size != $endPos ) {
			$this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
				'of central directory record' );
		}

		return [ $offset, $size ];
	}

	/**
	 * Read the central directory at the given location
	 * @param int $offset
	 * @param int $size
	 */
	function readCentralDirectory( $offset, $size ) {
		$block = $this->getBlock( $offset, $size );

		$fixedInfo = [
			'signature' => [ 'string', 4 ],
			'version made by' => 2,
			'version needed' => 2,
			'general bits' => 2,
			'compression method' => 2,
			'mod time' => 2,
			'mod date' => 2,
			'crc-32' => 4,
			'compressed size' => 4,
			'uncompressed size' => 4,
			'name length' => 2,
			'extra field length' => 2,
			'comment length' => 2,
			'disk number start' => 2,
			'internal attrs' => 2,
			'external attrs' => 4,
			'local header offset' => 4,
		];
		$fixedSize = $this->getStructSize( $fixedInfo );

		$pos = 0;
		while ( $pos < $size ) {
			$data = $this->unpack( $block, $fixedInfo, $pos );
			$pos += $fixedSize;

			if ( $data['signature'] !== "PK\x01\x02" ) {
				$this->error( 'zip-bad', 'Invalid signature found in directory entry' );
			}

			$variableInfo = [
				'name' => [ 'string', $data['name length'] ],
				'extra field' => [ 'string', $data['extra field length'] ],
				'comment' => [ 'string', $data['comment length'] ],
			];
			$data += $this->unpack( $block, $variableInfo, $pos );
			$pos += $this->getStructSize( $variableInfo );

			if ( $this->zip64 && (
					$data['compressed size'] == 0xffffffff
					|| $data['uncompressed size'] == 0xffffffff
					|| $data['local header offset'] == 0xffffffff )
			) {
				$zip64Data = $this->unpackZip64Extra( $data['extra field'] );
				if ( $zip64Data ) {
					$data = $zip64Data + $data;
				}
			}

			if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
				$this->error( 'zip-unsupported', 'central directory encryption is not supported' );
			}

			// Convert the timestamp into MediaWiki format
			// For the format, please see the MS-DOS 2.0 Programmer's Reference,
			// pages 3-5 and 3-6.
			$time = $data['mod time'];
			$date = $data['mod date'];

			$year = 1980 + ( $date >> 9 );
			$month = ( $date >> 5 ) & 15;
			$day = $date & 31;
			$hour = ( $time >> 11 ) & 31;
			$minute = ( $time >> 5 ) & 63;
			$second = ( $time & 31 ) * 2;
			$timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
				$year, $month, $day, $hour, $minute, $second );

			// Convert the character set in the file name
			if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
				$name = $data['name'];
			} else {
				$name = iconv( 'CP437', 'UTF-8', $data['name'] );
			}

			// Compile a data array for the user, with a sensible format
			$userData = [
				'name' => $name,
				'mtime' => $timestamp,
				'size' => $data['uncompressed size'],
			];
			call_user_func( $this->callback, $userData );
		}
	}

	/**
	 * Interpret ZIP64 "extra field" data and return an associative array.
	 * @param string $extraField
	 * @return array|bool
	 */
	function unpackZip64Extra( $extraField ) {
		$extraHeaderInfo = [
			'id' => 2,
			'size' => 2,
		];
		$extraHeaderSize = $this->getStructSize( $extraHeaderInfo );

		$zip64ExtraInfo = [
			'uncompressed size' => 8,
			'compressed size' => 8,
			'local header offset' => 8,
			'disk number start' => 4,
		];

		$extraPos = 0;
		while ( $extraPos < strlen( $extraField ) ) {
			$extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
			$extraPos += $extraHeaderSize;
			$extra += $this->unpack( $extraField,
				[ 'data' => [ 'string', $extra['size'] ] ],
				$extraPos );
			$extraPos += $extra['size'];

			if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
				return $this->unpack( $extra['data'], $zip64ExtraInfo );
			}
		}

		return false;
	}

	/**
	 * Get the length of the file.
	 * @return int
	 */
	function getFileLength() {
		if ( $this->fileLength === null ) {
			$stat = fstat( $this->file );
			$this->fileLength = $stat['size'];
		}

		return $this->fileLength;
	}

	/**
	 * Get the file contents from a given offset. If there are not enough bytes
	 * in the file to satisfy the request, an exception will be thrown.
	 *
	 * @param int $start The byte offset of the start of the block.
	 * @param int|null $length The number of bytes to return. If omitted, the remainder
	 *    of the file will be returned.
	 *
	 * @return string
	 */
	function getBlock( $start, $length = null ) {
		$fileLength = $this->getFileLength();
		if ( $start >= $fileLength ) {
			$this->error( 'zip-bad', "getBlock() requested position $start, " .
				"file length is $fileLength" );
		}
		if ( $length === null ) {
			$length = $fileLength - $start;
		}
		$end = $start + $length;
		if ( $end > $fileLength ) {
			$this->error( 'zip-bad', "getBlock() requested end position $end, " .
				"file length is $fileLength" );
		}
		$startSeg = floor( $start / self::SEGSIZE );
		$endSeg = ceil( $end / self::SEGSIZE );

		$block = '';
		for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
			$block .= $this->getSegment( $segIndex );
		}

		$block = substr( $block,
			$start - $startSeg * self::SEGSIZE,
			$length );

		if ( strlen( $block ) < $length ) {
			$this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
		}

		return $block;
	}

	/**
	 * Get a section of the file starting at position $segIndex * self::SEGSIZE,
	 * of length self::SEGSIZE. The result is cached. This is a helper function
	 * for getBlock().
	 *
	 * If there are not enough bytes in the file to satisfy the request, the
	 * return value will be truncated. If a request is made for a segment beyond
	 * the end of the file, an empty string will be returned.
	 *
	 * @param int $segIndex
	 *
	 * @return string
	 */
	function getSegment( $segIndex ) {
		if ( !isset( $this->buffer[$segIndex] ) ) {
			$bytePos = $segIndex * self::SEGSIZE;
			if ( $bytePos >= $this->getFileLength() ) {
				$this->buffer[$segIndex] = '';

				return '';
			}
			if ( fseek( $this->file, $bytePos ) ) {
				$this->error( 'zip-bad', "seek to $bytePos failed" );
			}
			$seg = fread( $this->file, self::SEGSIZE );
			if ( $seg === false ) {
				$this->error( 'zip-bad', "read from $bytePos failed" );
			}
			$this->buffer[$segIndex] = $seg;
		}

		return $this->buffer[$segIndex];
	}

	/**
	 * Get the size of a structure in bytes. See unpack() for the format of $struct.
	 * @param array $struct
	 * @return int
	 */
	function getStructSize( $struct ) {
		$size = 0;
		foreach ( $struct as $type ) {
			if ( is_array( $type ) ) {
				list( , $fieldSize ) = $type;
				$size += $fieldSize;
			} else {
				$size += $type;
			}
		}

		return $size;
	}

	/**
	 * Unpack a binary structure. This is like the built-in unpack() function
	 * except nicer.
	 *
	 * @param string $string The binary data input
	 *
	 * @param array $struct An associative array giving structure members and their
	 *    types. In the key is the field name. The value may be either an
	 *    integer, in which case the field is a little-endian unsigned integer
	 *    encoded in the given number of bytes, or an array, in which case the
	 *    first element of the array is the type name, and the subsequent
	 *    elements are type-dependent parameters. Only one such type is defined:
	 *       - "string": The second array element gives the length of string.
	 *          Not null terminated.
	 *
	 * @param int $offset The offset into the string at which to start unpacking.
	 *
	 * @throws MWException
	 * @return array Unpacked associative array. Note that large integers in the input
	 *    may be represented as floating point numbers in the return value, so
	 *    the use of weak comparison is advised.
	 */
	function unpack( $string, $struct, $offset = 0 ) {
		$size = $this->getStructSize( $struct );
		if ( $offset + $size > strlen( $string ) ) {
			$this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
		}

		$data = [];
		$pos = $offset;
		foreach ( $struct as $key => $type ) {
			if ( is_array( $type ) ) {
				list( $typeName, $fieldSize ) = $type;
				switch ( $typeName ) {
					case 'string':
						$data[$key] = substr( $string, $pos, $fieldSize );
						$pos += $fieldSize;
						break;
					default:
						throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
				}
			} else {
				// Unsigned little-endian integer
				$length = intval( $type );

				// Calculate the value. Use an algorithm which automatically
				// upgrades the value to floating point if necessary.
				$value = 0;
				for ( $i = $length - 1; $i >= 0; $i-- ) {
					$value *= 256;
					$value += ord( $string[$pos + $i] );
				}

				// Throw an exception if there was loss of precision
				if ( $value > 2 ** 52 ) {
					$this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
						'This could happen if we tried to unpack a 64-bit structure ' .
						'at an invalid location.' );
				}
				$data[$key] = $value;
				$pos += $length;
			}
		}

		return $data;
	}

	/**
	 * Returns a bit from a given position in an integer value, converted to
	 * boolean.
	 *
	 * @param int $value
	 * @param int $bitIndex The index of the bit, where 0 is the LSB.
	 * @return bool
	 */
	function testBit( $value, $bitIndex ) {
		return (bool)( ( $value >> $bitIndex ) & 1 );
	}

	/**
	 * Debugging helper function which dumps a string in hexdump -C format.
	 * @param string $s
	 */
	function hexDump( $s ) {
		$n = strlen( $s );
		for ( $i = 0; $i < $n; $i += 16 ) {
			printf( "%08X ", $i );
			for ( $j = 0; $j < 16; $j++ ) {
				print " ";
				if ( $j == 8 ) {
					print " ";
				}
				if ( $i + $j >= $n ) {
					print "  ";
				} else {
					printf( "%02X", ord( $s[$i + $j] ) );
				}
			}

			print "  |";
			for ( $j = 0; $j < 16; $j++ ) {
				if ( $i + $j >= $n ) {
					print " ";
				} elseif ( ctype_print( $s[$i + $j] ) ) {
					print $s[$i + $j];
				} else {
					print '.';
				}
			}
			print "|\n";
		}
	}
}

Zerion Mini Shell 1.0