%PDF- %PDF-
Direktori : /data/old/usr/share/perl5/vendor_perl/Net/FTP/ |
Current File : //data/old/usr/share/perl5/vendor_perl/Net/FTP/RetrHandle.pm |
package Net::FTP::RetrHandle; our $VERSION = '0.2'; use warnings; use strict; use constant DEFAULT_MAX_SKIPSIZE => 1024 * 1024 * 2; use constant DEFAULT_BLOCKSIZE => 10240; # Net::FTP's default use base 'IO::Seekable'; # We don't use base 'IO::Handle'; it currently confuses Archive::Zip. use Carp; use Scalar::Util; =head1 NAME Net::FTP::RetrHandle - Tied or IO::Handle-compatible interface to a file retrieved by FTP =head1 SYNOPSIS Provides a file reading interface for reading all or parts of files located on a remote FTP server, including emulation of C<seek> and support for downloading only the parts of the file requested. =head1 DESCRIPTION Support for skipping the beginning of the file is implemented with the FTP C<REST> command, which starts a retrieval at any point in the file. Support for skipping the end of the file is implemented with the FTP C<ABOR> command, which stops the transfer. With these two commands and some careful tracking of the current file position, we're able to reliably emulate a C<seek/read> pair, and get only the parts of the file that are actually read. This was originally designed for use with L<Archive::Zip|Archive::Zip>; it's reliable enough that the table of contents and individual files can be extracted from a remote ZIP archive without downloading the whole thing. See L<EXAMPLES> below. An interface compatible with L<IO::Handle|IO::Handle> is provided, along with a C<tie>-based interface. Remember that an FTP server can only do one thing at a time, so make sure to C<close> your connection before asking the FTP server to do nything else. =head1 CONSTRUCTOR =head2 new ( $ftp, $filename, options... ) Creates a new L<IO::Handle|IO::Handle>-compatible object to fetch all or parts of C<$filename> using the FTP connection C<$ftp>. Available options: =over 4 =item MaxSkipSize => $size If we need to move forward in a file or close the connection, sometimes it's faster to just read the bytes we don't need than to abort the connection and restart. This setting tells how many unnecessary bytes we're willing to read rather than abort. An appropriate setting depends on the speed of transferring files and the speed of reconnecting to the server. =item BlockSize => $size When doing buffered reads, how many bytes to read at once. The default is the same as the default for L<Net::FTP|Net::FTP>, so it's generally best to leave it alone. =item AlreadyBinary => $bool If set to a true value, we assume the server is already in binary mode, and don't try to set it. =back =cut use constant USAGE => "Usage: Net::FTP::RetrHandle\->new(ftp => \$ftp_obj, filename => \$filename)\n"; sub new { my $class = shift; my $ftp = shift or croak USAGE; my $filename = shift or croak USAGE; my $self = { MaxSkipSize => DEFAULT_MAX_SKIPSIZE, BlockSize => DEFAULT_BLOCKSIZE, @_, ftp => $ftp, filename => $filename, pos => 0, nextpos => 0}; $self->{size} = $self->{ftp}->size($self->{filename}) or return undef; $self->{ftp}->binary() unless ($self->{AlreadyBinary}); bless $self,$class; } =head1 METHODS Most of the methods implemented behave exactly like those from L<IO::Handle|IO::Handle>. These methods are implemented: C<binmode>, C<clearerr>, C<close>, C<eof>, C<error>, C<getc>, C<getline>, C<getlines>, C<getpos>, C<read>, C<seek>, C<setpos>, C<sysseek>, C<tell>, C<ungetc>, C<opened>. =cut ; sub opened { 1; } sub seek { my $self = shift; my $pos = shift || 0; my $whence = shift || 0; warn " SEEK: self=$self, pos=$pos, whence=$whence\n" if ($ENV{DEBUG}); my $curpos = $self->tell(); my $newpos = _newpos($self->tell(),$self->{size},$pos,$whence); my $ret; if ($newpos == $curpos) { return $curpos; } elsif (defined($self->{_buf}) and ($newpos > $curpos) and ($newpos < ($curpos + length($self->{_buf})))) { # Just seeking within the buffer (or not at all) substr($self->{_buf},0,$newpos - $curpos,''); $ret = $newpos; } else { $ret = $self->sysseek($newpos,0); $self->{_buf} = ''; } return $ret; } sub _newpos { my($curpos,$size,$pos,$whence)=@_; if ($whence == 0) # seek_set { return $pos; } elsif ($whence == 1) # seek_cur { return $curpos + $pos; } elsif ($whence == 2) # seek_end { return $size + $pos; } else { die "Invalid value $whence for whence!"; } } sub sysseek { my $self = shift; my $pos = shift || 0; my $whence = shift || 0; warn "SYSSEEK: self=$self, pos=$pos, whence=$whence\n" if ($ENV{DEBUG}); my $newpos = _newpos($self->{nextpos},$self->{size},$pos,$whence); $self->{eof}=undef; return $self->{nextpos}=$newpos; } sub tell { my $self = shift; return $self->{nextpos} - (defined($self->{_buf}) ? length($self->{_buf}) : 0); } # WARNING: ASCII mode probably breaks seek. sub binmode { my $self = shift; my $mode = shift || ':raw'; return if (defined($self->{curmode}) && ($self->{curmode} eq $mode)); if (defined($mode) and $mode eq ':crlf') { $self->_finish_connection(); $self->{ftp}->ascii() or return $self->seterr(); } else { $self->_finish_connection(); $self->{ftp}->binary() or return $self->seterr(); } $self->{curmode} = $mode; } sub _min { return $_[0] < $_[1] ? $_[0] : $_[1]; } sub _max { return $_[0] > $_[1] ? $_[0] : $_[1]; } sub read { my $self = shift; # return $self->sysread(@_); my(undef,$len,$offset)=@_; $offset ||= 0; warn "READ(buf,$len,$offset)\n" if ($ENV{DEBUG}); if (!defined($self->{_buf}) || length($self->{_buf}) <= 0) { $self->sysread($self->{_buf},_max($len,$self->{BlockSize})) or return 0; } elsif (length($self->{_buf}) < $len) { $self->sysread($self->{_buf},_max($len-length($self->{_buf}),$self->{BlockSize}),length($self->{_buf})); } my $ret = _min($len,length($self->{_buf})); if (!defined($_[0])) { $_[0] = '' } substr($_[0],$offset) = substr($self->{_buf},0,$len,''); $self->{read_count}++; return $ret; } sub sysread { my $self = shift; if ($self->{eof}) { return 0; } my(undef,$len,$offset) = @_; $offset ||= 0; warn "SYSREAD(buf,$len,$offset)\n" if ($ENV{DEBUG}); if ($self->{nextpos} >= $self->{size}) { $self->{eof} = 1; $self->{pos} = $self->{nextpos}; return 0; } if ($self->{pos} != $self->{nextpos}) { # They seeked. if ($self->{ftp_running}) { warn "Seek detected, nextpos=$self->{nextpos}, pos=$self->{pos}, MaxSkipSize=$self->{MaxSkipSize}\n" if ($ENV{DEBUG}); if ($self->{nextpos} > $self->{pos} and ($self->{nextpos} - $self->{pos}) < $self->{MaxSkipSize}) { my $br = $self->{nextpos}-$self->{pos}; warn "Reading $br bytes to skip ahead\n" if ($ENV{DEBUG}); my $junkbuff; while ($br > 0) { warn "Trying to read $br more bytes\n" if ($ENV{DEBUG}); my $b = $self->{ftp_data}->read($junkbuff,$br); if ($b == 0) { $self->_at_eof(); return 0; } elsif (!defined($b) || $b < 0) { return $self->seterr(); } else { $br -= $b; } } $self->{pos}=$self->{nextpos}; } else { warn "Aborting connection to move to new position\n" if ($ENV{DEBUG}); $self->_finish_connection(); } } } if (!$self->{ftp_running}) { $self->{ftp}->restart($self->{nextpos}); $self->{ftp_data} = $self->{ftp}->retr($self->{filename}) or return $self->seterr(); $self->{ftp_running} = 1; $self->{pos}=$self->{nextpos}; } my $tmpbuf; my $rb = $self->{ftp_data}->read($tmpbuf,$len); if ($rb == 0) { $self->_at_eof(); return 0; } elsif (!defined($rb) || $rb < 0) { return $self->seterr(); } if (!defined($_[0])) { $_[0] = '' } substr($_[0],$offset) = $tmpbuf; $self->{pos} += $rb; $self->{nextpos} += $rb; $self->{sysread_count}++; $rb; } sub _at_eof { my $self = shift; $self->{eof}=1; $self->_finish_connection(); # $self->{ftp_data}->_close(); $self->{ftp_running} = $self->{ftp_data} = undef; } sub _finish_connection { my $self = shift; warn "_finish_connection\n" if ($ENV{DEBUG}); return unless ($self->{ftp_running}); if ($self->{size} - $self->{pos} < $self->{MaxSkipSize}) { warn "Skipping " . ($self->{size}-$self->{pos}) . " bytes\n" if ($ENV{DEBUG}); my $junkbuff; my $br; while(($br = $self->{ftp_data}->read($junkbuff,8192))) { # Read until EOF or error } defined($br) or $self->seterr(); } warn "Shutting down existing FTP DATA session...\n" if ($ENV{DEBUG}); my $closeret; { eval { $closeret = $self->{ftp_data}->close(); }; # Work around a timeout bug in Net::FTP if ($@ && $@ =~ /^Timeout /) { warn "Timeout closing connection, retrying...\n" if ($ENV{DEBUG}); select(undef,undef,undef,1); redo; } } $self->{ftp_running} = $self->{ftp_data} = undef; return $closeret ? 1 : $self->seterr(); } sub write { die "Only reading currently supported"; } sub close { my $self = shift; return $self->{ftp_data} ? $self->_finish_connection() : 1; } sub eof { my $self = shift; if ($self->{eof}) { return 1; } my $c = $self->getc; if (!defined($c)) { return 1; } $self->ungetc(ord($c)); return undef; } sub getc { my $self = shift; my $c; my $rb = $self->read($c,1); if ($rb < 1) { return undef; } return $c; } sub ungetc { my $self = shift; # Note that $c is the ordinal value of a character, not the # character itself (for some reason) my($c)=@_; $self->{_buf} = chr($c) . $self->{_buf}; } sub getline { my $self = shift; if (!defined($/)) { my $buf; while($self->read($buf,$self->{BlockSize},length($buf)) > 0) { # Keep going } return $buf; } elsif (ref($/) && looks_like_number ${$/} ) { my $buf; $self->read($buf,${$/}) or return undef; return $buf; } my $rs; if ($/ eq '') { $rs = "\n\n"; } else { $rs = $/; } my $eol; if (!defined($self->{_buf})) { $self->{_buf} = '' } while (($eol=index($self->{_buf},$rs)) < $[) { if ($self->{eof}) { # return what's left if (length($self->{_buf}) == 0) { return undef; } else { return substr($self->{_buf},0,length($self->{_buf}),''); } } else { $self->sysread($self->{_buf},$self->{BlockSize},length($self->{_buf})); } } # OK, we should have a match. my $tmpbuf = substr($self->{_buf},0,$eol+length($rs),''); while ($/ eq '' and substr($self->{_buf},0,1) eq "\n") { substr($self->{_buf},0,1)=''; } return $tmpbuf; } sub getlines { my $self = shift; my @lines; my $line; while (defined($line = $self->getline())) { push(@lines,$line); } @lines; } sub error { return undef; } sub seterr { my $self = shift; $self->{_error} = 1; return undef; } sub clearerr { my $self = shift; $self->{_error} = undef; return 0; } sub getpos { my $self = shift; return $self->tell(); } sub setpos { my $self = shift; return $self->seek(@_); } sub DESTROY { my $self = shift; if (UNIVERSAL::isa($self,'GLOB')) { $self = tied *$self or die "$self not tied?..."; } if ($self->{ftp_data}) { $self->_finish_connection(); } warn "sysread called ".$self->{sysread_count}." times.\n" if ($ENV{DEBUG}); } =head1 TIED INTERFACE Instead of a L<IO::Handle|IO::Handle>-compatible interface, you can use a C<tie>-based interface to use the standard Perl I/O operators. You can use it like this: use Net::FTP::RetrHandle; # Create FTP object in $ftp # Store filename in $filename tie *FH, 'Net::FTP::RetrHandle', $ftp, $filename or die "Error in tie!\n"; =cut ; sub TIEHANDLE { my $class = shift; my $obj = $class->new(@_); $obj; } sub READ { my $self = shift; $self->read(@_); } sub READLINE { my $self = shift; return wantarray ? $self->getlines(@_) : $self->getline(@_); } sub GETC { my $self = shift; return $self->getc(@_); } sub SEEK { my $self = shift; return $self->seek(@_); } sub SYSSEEK { my $self = shift; return $self->sysseek(@_); } sub TELL { my $self = shift; return $self->tell(); } sub CLOSE { my $self = shift; return $self->close(@_); } sub EOF { my $self = shift; return $self->eof(@_); } sub UNTIE { tied($_[0])->close(@_); } =head1 EXAMPLE Here's an example of listing a Zip file without downloading the whole thing: #!/usr/bin/perl use warnings; use strict; use Net::FTP; use Net::FTP::AutoReconnect; use Net::FTP::RetrHandle; use Archive::Zip; my $ftp = Net::FTP::AutoReconnect->new("ftp.info-zip.com", Debug => $ENV{DEBUG}) or die "connect error\n"; $ftp->login('anonymous','example@example.com') or die "login error\n"; $ftp->cwd('/pub/infozip/UNIX/LINUX') or die "cwd error\n"; my $fh = Net::FTP::RetrHandle->new($ftp,'unz551x-glibc.zip') or die "Couldn't get handle to remote file\n"; my $zip = Archive::Zip->new($fh) or die "Couldn't create Zip object\n"; foreach my $fn ($zip->memberNames()) { print "unz551-glibc.zip: $fn\n"; } =head1 AUTHOR Scott Gifford <sgifford@suspectclass.com> =head1 BUGS The distinction between tied filehandles and C<IO::Handle>-compatible filehandles should be blurrier. It seems like other file handle objects you can freely mix method calls and traditional Perl operations, but I can't figure out how to do it. Many FTP servers don't like frequent connection aborts. If that's the case, try L<Net::FTP::AutoReconnect>, which will hide much of that from you. If the filehandle is tied and created with C<gensym>, C<readline> doesn't work with older versions of Perl. No idea why. =head1 SEE ALSO L<Net::FTP>, L<Net::FTP::AutoReconnect>, L<IO::Handle>. =head1 COPYRIGHT Copyright (c) 2006 Scott Gifford. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1;