summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Emde <martin.emde@gmail.com>2023-02-16 18:03:41 -0800
committergit <svn-admin@ruby-lang.org>2023-03-07 20:21:43 +0000
commit85a1738ab37b3348fc0b924804ca4b209f34fbf7 (patch)
tree92706ff6ce6f4ae5baa1b06095468678de236f5d
parent719a7726d17f0800b8f8ef42f3f48e7558d5a444 (diff)
[rubygems/rubygems] Add TarReader::Entry#seek to seek within the tar file entry
TarReader#each previously implemented a partial version of seek. This code moved to Entry#seek for use from TarReader#each. Entry#close now returns nil instead of true, like IO#close. Closing an Entry now seeks to the end of the Entry, seeking past any remaining zero byte tar file padding and moving the io to the correcty position to read the next file in the archive. Uses seek for Entry#rewind and #pos=, fixing the tar->gzip->tar nested rewind that would break previous to this change. Add Entry.open that behaves more like File.open. https://github.com/rubygems/rubygems/commit/f5149565d5
-rw-r--r--lib/rubygems/package/tar_reader.rb28
-rw-r--r--lib/rubygems/package/tar_reader/entry.rb85
-rw-r--r--test/rubygems/package/tar_test_case.rb63
-rw-r--r--test/rubygems/test_gem_package.rb25
-rw-r--r--test/rubygems/test_gem_package_tar_reader.rb49
-rw-r--r--test/rubygems/test_gem_package_tar_reader_entry.rb117
6 files changed, 281 insertions, 86 deletions
diff --git a/lib/rubygems/package/tar_reader.rb b/lib/rubygems/package/tar_reader.rb
index cdc3fdc015..9f1d49035b 100644
--- a/lib/rubygems/package/tar_reader.rb
+++ b/lib/rubygems/package/tar_reader.rb
@@ -53,39 +53,11 @@ class Gem::Package::TarReader
def each
return enum_for __method__ unless block_given?
- use_seek = @io.respond_to?(:seek)
-
until @io.eof? do
header = Gem::Package::TarHeader.from @io
return if header.empty?
-
entry = Gem::Package::TarReader::Entry.new header, @io
- size = entry.header.size
-
yield entry
-
- skip = (512 - (size % 512)) % 512
- pending = size - entry.bytes_read
-
- if use_seek
- begin
- # avoid reading if the @io supports seeking
- @io.seek pending, IO::SEEK_CUR
- pending = 0
- rescue Errno::EINVAL
- end
- end
-
- # if seeking isn't supported or failed
- while pending > 0 do
- bytes_read = @io.read([pending, 4096].min).size
- raise UnexpectedEOF if @io.eof?
- pending -= bytes_read
- end
-
- @io.read skip # discard trailing zeros
-
- # make sure nobody can use #read, #getc or #rewind anymore
entry.close
end
end
diff --git a/lib/rubygems/package/tar_reader/entry.rb b/lib/rubygems/package/tar_reader/entry.rb
index e7b0cd0602..9e7b327431 100644
--- a/lib/rubygems/package/tar_reader/entry.rb
+++ b/lib/rubygems/package/tar_reader/entry.rb
@@ -9,6 +9,20 @@
class Gem::Package::TarReader::Entry
##
+ # Creates a new tar entry for +header+ that will be read from +io+
+ # If a block is given, the entry is yielded and then closed.
+
+ def self.open(header, io, &block)
+ entry = new header, io
+ return entry unless block_given?
+ begin
+ yield entry
+ ensure
+ entry.close
+ end
+ end
+
+ ##
# Header for this tar entry
attr_reader :header
@@ -21,6 +35,7 @@ class Gem::Package::TarReader::Entry
@header = header
@io = io
@orig_pos = @io.pos
+ @end_pos = @orig_pos + @header.size
@read = 0
end
@@ -39,7 +54,14 @@ class Gem::Package::TarReader::Entry
# Closes the tar entry
def close
+ return if closed?
+ # Seek to the end of the entry if it wasn't fully read
+ seek(0, IO::SEEK_END)
+ # discard trailing zeros
+ skip = (512 - (@header.size % 512)) % 512
+ @io.read(skip)
@closed = true
+ nil
end
##
@@ -117,6 +139,14 @@ class Gem::Package::TarReader::Entry
bytes_read
end
+ ##
+ # Seek to the position in the tar entry
+
+ def pos=(new_pos)
+ seek(new_pos, IO::SEEK_SET)
+ new_pos
+ end
+
def size
@header.size
end
@@ -158,12 +188,61 @@ class Gem::Package::TarReader::Entry
end
##
+ # Seeks to +offset+ bytes into the tar file entry
+ # +whence+ can be IO::SEEK_SET, IO::SEEK_CUR, or IO::SEEK_END
+
+ def seek(offset, whence = IO::SEEK_SET)
+ check_closed
+
+ new_pos =
+ case whence
+ when IO::SEEK_SET then @orig_pos + offset
+ when IO::SEEK_CUR then @io.pos + offset
+ when IO::SEEK_END then @end_pos + offset
+ else
+ raise ArgumentError, "invalid whence"
+ end
+
+ if new_pos < @orig_pos
+ new_pos = @orig_pos
+ elsif new_pos > @end_pos
+ new_pos = @end_pos
+ end
+
+ pending = new_pos - @io.pos
+
+ if @io.respond_to?(:seek)
+ begin
+ # avoid reading if the @io supports seeking
+ @io.seek new_pos, IO::SEEK_SET
+ pending = 0
+ rescue Errno::EINVAL
+ end
+ end
+
+ # if seeking isn't supported or failed
+ # negative seek requires that we rewind and read
+ if pending < 0
+ @io.rewind
+ pending = new_pos
+ end
+
+ while pending > 0 do
+ size_read = @io.read([pending, 4096].min).size
+ raise UnexpectedEOF if @io.eof?
+ pending -= size_read
+ end
+
+ @read = @io.pos - @orig_pos
+
+ 0
+ end
+
+ ##
# Rewinds to the beginning of the tar file entry
def rewind
check_closed
-
- @io.pos = @orig_pos
- @read = 0
+ seek(0, IO::SEEK_SET)
end
end
diff --git a/test/rubygems/package/tar_test_case.rb b/test/rubygems/package/tar_test_case.rb
index 6cee7f86dc..cb0612eeb9 100644
--- a/test/rubygems/package/tar_test_case.rb
+++ b/test/rubygems/package/tar_test_case.rb
@@ -90,31 +90,36 @@ class Gem::Package::TarTestCase < Gem::TestCase
ASCIIZ("wheel", 32), # char gname[32]; ASCIIZ
Z(to_oct(0, 7)), # char devmajor[8]; 0 padded, octal, null
Z(to_oct(0, 7)), # char devminor[8]; 0 padded, octal, null
- ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled)
+ ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled)
]
h = arr.join
- ret = h + "\0" * (512 - h.size)
+ ret = ASCIIZ(h, 512)
assert_equal(512, ret.size)
ret
end
- def tar_dir_header(name, prefix, mode, mtime)
- h = header("5", name, prefix, 0, mode, mtime)
+ def header_with_checksum(type, fname, dname, length, mode, mtime, linkname = "")
+ h = header(type, fname, dname, length, mode, mtime, nil, linkname)
checksum = calc_checksum(h)
- header("5", name, prefix, 0, mode, mtime, checksum)
+ header(type, fname, dname, length, mode, mtime, checksum, linkname)
+ end
+
+ def tar_dir_header(name, prefix, mode, mtime)
+ header_with_checksum("5", name, prefix, 0, mode, mtime)
end
def tar_file_header(fname, dname, mode, length, mtime)
- h = header("0", fname, dname, length, mode, mtime)
- checksum = calc_checksum(h)
- header("0", fname, dname, length, mode, mtime, checksum)
+ header_with_checksum("0", fname, dname, length, mode, mtime)
end
- def tar_symlink_header(fname, prefix, mode, mtime, linkname)
- h = header("2", fname, prefix, 0, mode, mtime, nil, linkname)
- checksum = calc_checksum(h)
- header("2", fname, prefix, 0, mode, mtime, checksum, linkname)
+ def tar_symlink_header(fname, dname, mode, mtime, linkname)
+ header_with_checksum("2", fname, dname, 0, mode, mtime, linkname)
+ end
+
+ def tar_file_contents(content)
+ pad = (512 - (content.size % 512)) % 512
+ content + "\0" * pad
end
def to_oct(n, pad_size)
@@ -122,11 +127,15 @@ class Gem::Package::TarTestCase < Gem::TestCase
end
def util_entry(tar)
- io = TempIO.new tar
+ io = tar.respond_to?(:read) ? tar : TempIO.new(tar)
header = Gem::Package::TarHeader.from io
- Gem::Package::TarReader::Entry.new header, io
+ Gem::Package::TarReader::Entry.open header, io
+ end
+
+ def close_util_entry(entry)
+ entry.instance_variable_get(:@io).close!
end
def util_dir_entry
@@ -136,4 +145,30 @@ class Gem::Package::TarTestCase < Gem::TestCase
def util_symlink_entry
util_entry tar_symlink_header("foo", "bar", 0, Time.now, "link")
end
+
+ def util_tar(&block)
+ tar_io = StringIO.new
+ Gem::Package::TarWriter.new(tar_io, &block)
+ tar_io.rewind
+ tar_io
+ end
+
+ def util_tar_gz(&block)
+ tar_io = util_tar(&block)
+ StringIO.new util_gzip(tar_io.string)
+ end
+
+ def util_gem_data_tar(spec = nil, &block)
+ data_tgz = util_tar_gz(&block)
+ util_tar do |tar|
+ if spec
+ tar.add_file "metadata.gz", 0444 do |io|
+ io.write util_gzip(spec.to_yaml)
+ end
+ end
+ tar.add_file "data.tar.gz", 0644 do |io|
+ io.write data_tgz.string
+ end
+ end
+ end
end
diff --git a/test/rubygems/test_gem_package.rb b/test/rubygems/test_gem_package.rb
index eebe4d86d0..9d6215838d 100644
--- a/test/rubygems/test_gem_package.rb
+++ b/test/rubygems/test_gem_package.rb
@@ -1187,29 +1187,4 @@ class TestGemPackage < Gem::Package::TarTestCase
assert_equal %w[lib/code.rb], package.contents
end
-
- def util_tar
- tar_io = StringIO.new
-
- Gem::Package::TarWriter.new tar_io do |tar|
- yield tar
- end
-
- tar_io.rewind
-
- tar_io
- end
-
- def util_tar_gz(&block)
- tar_io = util_tar(&block)
-
- tgz_io = StringIO.new
-
- # can't wrap TarWriter because it seeks
- Zlib::GzipWriter.wrap tgz_io do |io|
- io.write tar_io.string
- end
-
- StringIO.new tgz_io.string
- end
end
diff --git a/test/rubygems/test_gem_package_tar_reader.rb b/test/rubygems/test_gem_package_tar_reader.rb
index 19860eb7e8..62e60489a6 100644
--- a/test/rubygems/test_gem_package_tar_reader.rb
+++ b/test/rubygems/test_gem_package_tar_reader.rb
@@ -56,12 +56,14 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase
io = TempIO.new tar
Gem::Package::TarReader.new io do |tar_reader|
- tar_reader.seek "baz/bar" do |entry|
+ retval = tar_reader.seek "baz/bar" do |entry|
assert_kind_of Gem::Package::TarReader::Entry, entry
assert_equal "baz/bar", entry.full_name
+ entry.read
end
+ assert_equal "", retval
assert_equal 0, io.pos
end
ensure
@@ -84,4 +86,49 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase
ensure
io.close!
end
+
+ def test_read_in_gem_data
+ gem_tar = util_gem_data_tar do |tar|
+ tar.add_file "lib/code.rb", 0444 do |io|
+ io.write "# lib/code.rb"
+ end
+ end
+
+ count = 0
+ Gem::Package::TarReader.new(gem_tar).each do |entry|
+ next unless entry.full_name == "data.tar.gz"
+
+ Zlib::GzipReader.wrap entry do |gzio|
+ Gem::Package::TarReader.new(gzio).each do |contents_entry|
+ assert_equal "# lib/code.rb", contents_entry.read
+ count += 1
+ end
+ end
+ end
+
+ assert_equal 1, count, "should have found one file"
+ end
+
+ def test_seek_in_gem_data
+ gem_tar = util_gem_data_tar do |tar|
+ tar.add_file "lib/code.rb", 0444 do |io|
+ io.write "# lib/code.rb"
+ end
+ tar.add_file "lib/foo.rb", 0444 do |io|
+ io.write "# lib/foo.rb"
+ end
+ end
+
+ count = 0
+ Gem::Package::TarReader.new(gem_tar).seek("data.tar.gz") do |entry|
+ Zlib::GzipReader.wrap entry do |gzio|
+ Gem::Package::TarReader.new(gzio).seek("lib/foo.rb") do |contents_entry|
+ assert_equal "# lib/foo.rb", contents_entry.read
+ count += 1
+ end
+ end
+ end
+
+ assert_equal 1, count, "should have found one file"
+ end
end
diff --git a/test/rubygems/test_gem_package_tar_reader_entry.rb b/test/rubygems/test_gem_package_tar_reader_entry.rb
index 91b0beb86d..64b9a86729 100644
--- a/test/rubygems/test_gem_package_tar_reader_entry.rb
+++ b/test/rubygems/test_gem_package_tar_reader_entry.rb
@@ -10,8 +10,7 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
@tar = String.new
@tar << tar_file_header("lib/foo", "", 0, @contents.size, Time.now)
- @tar << @contents
- @tar << "\0" * (512 - (@tar.size % 512))
+ @tar << tar_file_contents(@contents)
@entry = util_entry @tar
end
@@ -21,8 +20,35 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
super
end
- def close_util_entry(entry)
- entry.instance_variable_get(:@io).close!
+ def test_open
+ io = TempIO.new @tar
+ header = Gem::Package::TarHeader.from io
+ retval = Gem::Package::TarReader::Entry.open header, io do |entry|
+ entry.getc
+ end
+ assert_equal "a", retval
+ assert_equal @tar.size, io.pos, "should have read to end of entry"
+ end
+
+ def test_open_closes_entry
+ io = TempIO.new @tar
+ header = Gem::Package::TarHeader.from io
+ entry = nil
+ Gem::Package::TarReader::Entry.open header, io do |e|
+ entry = e
+ end
+ assert entry.closed?
+ assert_raise(IOError) { entry.getc }
+ end
+
+ def test_open_returns_entry
+ io = TempIO.new @tar
+ header = Gem::Package::TarHeader.from io
+ entry = Gem::Package::TarReader::Entry.open header, io
+ refute entry.closed?
+ assert_equal ?a, entry.getc
+ assert_nil entry.close
+ assert entry.closed?
end
def test_bytes_read
@@ -177,6 +203,21 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
assert_equal char, @entry.getc
end
+ def test_seek
+ @entry.seek(50)
+ assert_equal 50, @entry.pos
+ assert_equal @contents[50..-1], @entry.read, "read remaining after seek"
+ @entry.seek(-50, IO::SEEK_CUR)
+ assert_equal @contents.size - 50, @entry.pos
+ assert_equal @contents[-50..-1], @entry.read, "read after stepping back 50 from the end"
+ @entry.seek(0, IO::SEEK_SET)
+ assert_equal 0, @entry.pos
+ assert_equal @contents, @entry.read, "read from beginning"
+ @entry.seek(-10, IO::SEEK_END)
+ assert_equal @contents.size - 10, @entry.pos
+ assert_equal @contents[-10..-1], @entry.read, "read from end"
+ end
+
def test_read_zero
expected = StringIO.new("")
assert_equal expected.read(0), @entry.read(0)
@@ -187,28 +228,74 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase
assert_equal expected.readpartial(0), @entry.readpartial(0)
end
- def util_zero_byte_entry
- tar = String.new
- tar << tar_file_header("lib/empty", "", 0, 0, Time.now)
- tar << "\0" * (512 - (tar.size % 512))
- util_entry tar
- end
-
def test_zero_byte_file_read
- zero_entry = util_zero_byte_entry
+ zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now))
expected = StringIO.new("")
-
assert_equal expected.read, zero_entry.read
ensure
close_util_entry(zero_entry) if zero_entry
end
def test_zero_byte_file_readpartial
- zero_entry = util_zero_byte_entry
+ zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now))
expected = StringIO.new("")
-
assert_equal expected.readpartial(0), zero_entry.readpartial(0)
ensure
close_util_entry(zero_entry) if zero_entry
end
+
+ def test_read_from_gzip_io
+ tgz = util_gzip(@tar)
+
+ Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
+ entry = util_entry(gzio)
+ assert_equal @contents, entry.read
+ entry.rewind
+ assert_equal @contents, entry.read, "second read after rewind should read same contents"
+ end
+ end
+
+ def test_read_from_gzip_io_with_non_zero_offset
+ contents2 = ("0".."9").to_a.join * 100
+ @tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now)
+ @tar << tar_file_contents(contents2)
+
+ tgz = util_gzip(@tar)
+
+ Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
+ util_entry(gzio).close # skip the first entry so io.pos is not 0, preventing easy rewind
+ entry = util_entry(gzio)
+
+ assert_equal contents2, entry.read
+ entry.rewind
+ assert_equal contents2, entry.read, "second read after rewind should read same contents"
+ end
+ end
+
+ def test_seek_in_gzip_io_with_non_zero_offset
+ contents2 = ("0".."9").to_a.join * 100
+ @tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now)
+ @tar << tar_file_contents(contents2)
+
+ tgz = util_gzip(@tar)
+
+ Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio|
+ util_entry(gzio).close # skip the first entry so io.pos is not 0
+ entry = util_entry(gzio)
+
+ entry.seek(50)
+ assert_equal 50, entry.pos
+ assert_equal contents2[50..-1], entry.read, "read remaining after seek"
+ entry.seek(-50, IO::SEEK_CUR)
+ assert_equal contents2.size - 50, entry.pos
+ assert_equal contents2[-50..-1], entry.read, "read after stepping back 50 from the end"
+ entry.seek(0, IO::SEEK_SET)
+ assert_equal 0, entry.pos
+ assert_equal contents2, entry.read, "read from beginning"
+ entry.seek(-10, IO::SEEK_END)
+ assert_equal contents2.size - 10, entry.pos
+ assert_equal contents2[-10..-1], entry.read, "read from end"
+ assert_equal contents2.size, entry.pos
+ end
+ end
end