From 85a1738ab37b3348fc0b924804ca4b209f34fbf7 Mon Sep 17 00:00:00 2001 From: Martin Emde Date: Thu, 16 Feb 2023 18:03:41 -0800 Subject: [rubygems/rubygems] Add TarReader::Entry#seek to seek within the tar file entry TarReader#each previously implemented a partial version of seek. This code moved to Entry#seek for use from TarReader#each. Entry#close now returns nil instead of true, like IO#close. Closing an Entry now seeks to the end of the Entry, seeking past any remaining zero byte tar file padding and moving the io to the correcty position to read the next file in the archive. Uses seek for Entry#rewind and #pos=, fixing the tar->gzip->tar nested rewind that would break previous to this change. Add Entry.open that behaves more like File.open. https://github.com/rubygems/rubygems/commit/f5149565d5 --- lib/rubygems/package/tar_reader.rb | 28 ----- lib/rubygems/package/tar_reader/entry.rb | 85 ++++++++++++++- test/rubygems/package/tar_test_case.rb | 63 ++++++++--- test/rubygems/test_gem_package.rb | 25 ----- test/rubygems/test_gem_package_tar_reader.rb | 49 ++++++++- test/rubygems/test_gem_package_tar_reader_entry.rb | 117 ++++++++++++++++++--- 6 files changed, 281 insertions(+), 86 deletions(-) diff --git a/lib/rubygems/package/tar_reader.rb b/lib/rubygems/package/tar_reader.rb index cdc3fdc015..9f1d49035b 100644 --- a/lib/rubygems/package/tar_reader.rb +++ b/lib/rubygems/package/tar_reader.rb @@ -53,39 +53,11 @@ class Gem::Package::TarReader def each return enum_for __method__ unless block_given? - use_seek = @io.respond_to?(:seek) - until @io.eof? do header = Gem::Package::TarHeader.from @io return if header.empty? - entry = Gem::Package::TarReader::Entry.new header, @io - size = entry.header.size - yield entry - - skip = (512 - (size % 512)) % 512 - pending = size - entry.bytes_read - - if use_seek - begin - # avoid reading if the @io supports seeking - @io.seek pending, IO::SEEK_CUR - pending = 0 - rescue Errno::EINVAL - end - end - - # if seeking isn't supported or failed - while pending > 0 do - bytes_read = @io.read([pending, 4096].min).size - raise UnexpectedEOF if @io.eof? - pending -= bytes_read - end - - @io.read skip # discard trailing zeros - - # make sure nobody can use #read, #getc or #rewind anymore entry.close end end diff --git a/lib/rubygems/package/tar_reader/entry.rb b/lib/rubygems/package/tar_reader/entry.rb index e7b0cd0602..9e7b327431 100644 --- a/lib/rubygems/package/tar_reader/entry.rb +++ b/lib/rubygems/package/tar_reader/entry.rb @@ -8,6 +8,20 @@ # Class for reading entries out of a tar file class Gem::Package::TarReader::Entry + ## + # Creates a new tar entry for +header+ that will be read from +io+ + # If a block is given, the entry is yielded and then closed. + + def self.open(header, io, &block) + entry = new header, io + return entry unless block_given? + begin + yield entry + ensure + entry.close + end + end + ## # Header for this tar entry @@ -21,6 +35,7 @@ class Gem::Package::TarReader::Entry @header = header @io = io @orig_pos = @io.pos + @end_pos = @orig_pos + @header.size @read = 0 end @@ -39,7 +54,14 @@ class Gem::Package::TarReader::Entry # Closes the tar entry def close + return if closed? + # Seek to the end of the entry if it wasn't fully read + seek(0, IO::SEEK_END) + # discard trailing zeros + skip = (512 - (@header.size % 512)) % 512 + @io.read(skip) @closed = true + nil end ## @@ -117,6 +139,14 @@ class Gem::Package::TarReader::Entry bytes_read end + ## + # Seek to the position in the tar entry + + def pos=(new_pos) + seek(new_pos, IO::SEEK_SET) + new_pos + end + def size @header.size end @@ -157,13 +187,62 @@ class Gem::Package::TarReader::Entry outbuf end + ## + # Seeks to +offset+ bytes into the tar file entry + # +whence+ can be IO::SEEK_SET, IO::SEEK_CUR, or IO::SEEK_END + + def seek(offset, whence = IO::SEEK_SET) + check_closed + + new_pos = + case whence + when IO::SEEK_SET then @orig_pos + offset + when IO::SEEK_CUR then @io.pos + offset + when IO::SEEK_END then @end_pos + offset + else + raise ArgumentError, "invalid whence" + end + + if new_pos < @orig_pos + new_pos = @orig_pos + elsif new_pos > @end_pos + new_pos = @end_pos + end + + pending = new_pos - @io.pos + + if @io.respond_to?(:seek) + begin + # avoid reading if the @io supports seeking + @io.seek new_pos, IO::SEEK_SET + pending = 0 + rescue Errno::EINVAL + end + end + + # if seeking isn't supported or failed + # negative seek requires that we rewind and read + if pending < 0 + @io.rewind + pending = new_pos + end + + while pending > 0 do + size_read = @io.read([pending, 4096].min).size + raise UnexpectedEOF if @io.eof? + pending -= size_read + end + + @read = @io.pos - @orig_pos + + 0 + end + ## # Rewinds to the beginning of the tar file entry def rewind check_closed - - @io.pos = @orig_pos - @read = 0 + seek(0, IO::SEEK_SET) end end diff --git a/test/rubygems/package/tar_test_case.rb b/test/rubygems/package/tar_test_case.rb index 6cee7f86dc..cb0612eeb9 100644 --- a/test/rubygems/package/tar_test_case.rb +++ b/test/rubygems/package/tar_test_case.rb @@ -90,31 +90,36 @@ class Gem::Package::TarTestCase < Gem::TestCase ASCIIZ("wheel", 32), # char gname[32]; ASCIIZ Z(to_oct(0, 7)), # char devmajor[8]; 0 padded, octal, null Z(to_oct(0, 7)), # char devminor[8]; 0 padded, octal, null - ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled) + ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled) ] h = arr.join - ret = h + "\0" * (512 - h.size) + ret = ASCIIZ(h, 512) assert_equal(512, ret.size) ret end - def tar_dir_header(name, prefix, mode, mtime) - h = header("5", name, prefix, 0, mode, mtime) + def header_with_checksum(type, fname, dname, length, mode, mtime, linkname = "") + h = header(type, fname, dname, length, mode, mtime, nil, linkname) checksum = calc_checksum(h) - header("5", name, prefix, 0, mode, mtime, checksum) + header(type, fname, dname, length, mode, mtime, checksum, linkname) + end + + def tar_dir_header(name, prefix, mode, mtime) + header_with_checksum("5", name, prefix, 0, mode, mtime) end def tar_file_header(fname, dname, mode, length, mtime) - h = header("0", fname, dname, length, mode, mtime) - checksum = calc_checksum(h) - header("0", fname, dname, length, mode, mtime, checksum) + header_with_checksum("0", fname, dname, length, mode, mtime) end - def tar_symlink_header(fname, prefix, mode, mtime, linkname) - h = header("2", fname, prefix, 0, mode, mtime, nil, linkname) - checksum = calc_checksum(h) - header("2", fname, prefix, 0, mode, mtime, checksum, linkname) + def tar_symlink_header(fname, dname, mode, mtime, linkname) + header_with_checksum("2", fname, dname, 0, mode, mtime, linkname) + end + + def tar_file_contents(content) + pad = (512 - (content.size % 512)) % 512 + content + "\0" * pad end def to_oct(n, pad_size) @@ -122,11 +127,15 @@ class Gem::Package::TarTestCase < Gem::TestCase end def util_entry(tar) - io = TempIO.new tar + io = tar.respond_to?(:read) ? tar : TempIO.new(tar) header = Gem::Package::TarHeader.from io - Gem::Package::TarReader::Entry.new header, io + Gem::Package::TarReader::Entry.open header, io + end + + def close_util_entry(entry) + entry.instance_variable_get(:@io).close! end def util_dir_entry @@ -136,4 +145,30 @@ class Gem::Package::TarTestCase < Gem::TestCase def util_symlink_entry util_entry tar_symlink_header("foo", "bar", 0, Time.now, "link") end + + def util_tar(&block) + tar_io = StringIO.new + Gem::Package::TarWriter.new(tar_io, &block) + tar_io.rewind + tar_io + end + + def util_tar_gz(&block) + tar_io = util_tar(&block) + StringIO.new util_gzip(tar_io.string) + end + + def util_gem_data_tar(spec = nil, &block) + data_tgz = util_tar_gz(&block) + util_tar do |tar| + if spec + tar.add_file "metadata.gz", 0444 do |io| + io.write util_gzip(spec.to_yaml) + end + end + tar.add_file "data.tar.gz", 0644 do |io| + io.write data_tgz.string + end + end + end end diff --git a/test/rubygems/test_gem_package.rb b/test/rubygems/test_gem_package.rb index eebe4d86d0..9d6215838d 100644 --- a/test/rubygems/test_gem_package.rb +++ b/test/rubygems/test_gem_package.rb @@ -1187,29 +1187,4 @@ class TestGemPackage < Gem::Package::TarTestCase assert_equal %w[lib/code.rb], package.contents end - - def util_tar - tar_io = StringIO.new - - Gem::Package::TarWriter.new tar_io do |tar| - yield tar - end - - tar_io.rewind - - tar_io - end - - def util_tar_gz(&block) - tar_io = util_tar(&block) - - tgz_io = StringIO.new - - # can't wrap TarWriter because it seeks - Zlib::GzipWriter.wrap tgz_io do |io| - io.write tar_io.string - end - - StringIO.new tgz_io.string - end end diff --git a/test/rubygems/test_gem_package_tar_reader.rb b/test/rubygems/test_gem_package_tar_reader.rb index 19860eb7e8..62e60489a6 100644 --- a/test/rubygems/test_gem_package_tar_reader.rb +++ b/test/rubygems/test_gem_package_tar_reader.rb @@ -56,12 +56,14 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase io = TempIO.new tar Gem::Package::TarReader.new io do |tar_reader| - tar_reader.seek "baz/bar" do |entry| + retval = tar_reader.seek "baz/bar" do |entry| assert_kind_of Gem::Package::TarReader::Entry, entry assert_equal "baz/bar", entry.full_name + entry.read end + assert_equal "", retval assert_equal 0, io.pos end ensure @@ -84,4 +86,49 @@ class TestGemPackageTarReader < Gem::Package::TarTestCase ensure io.close! end + + def test_read_in_gem_data + gem_tar = util_gem_data_tar do |tar| + tar.add_file "lib/code.rb", 0444 do |io| + io.write "# lib/code.rb" + end + end + + count = 0 + Gem::Package::TarReader.new(gem_tar).each do |entry| + next unless entry.full_name == "data.tar.gz" + + Zlib::GzipReader.wrap entry do |gzio| + Gem::Package::TarReader.new(gzio).each do |contents_entry| + assert_equal "# lib/code.rb", contents_entry.read + count += 1 + end + end + end + + assert_equal 1, count, "should have found one file" + end + + def test_seek_in_gem_data + gem_tar = util_gem_data_tar do |tar| + tar.add_file "lib/code.rb", 0444 do |io| + io.write "# lib/code.rb" + end + tar.add_file "lib/foo.rb", 0444 do |io| + io.write "# lib/foo.rb" + end + end + + count = 0 + Gem::Package::TarReader.new(gem_tar).seek("data.tar.gz") do |entry| + Zlib::GzipReader.wrap entry do |gzio| + Gem::Package::TarReader.new(gzio).seek("lib/foo.rb") do |contents_entry| + assert_equal "# lib/foo.rb", contents_entry.read + count += 1 + end + end + end + + assert_equal 1, count, "should have found one file" + end end diff --git a/test/rubygems/test_gem_package_tar_reader_entry.rb b/test/rubygems/test_gem_package_tar_reader_entry.rb index 91b0beb86d..64b9a86729 100644 --- a/test/rubygems/test_gem_package_tar_reader_entry.rb +++ b/test/rubygems/test_gem_package_tar_reader_entry.rb @@ -10,8 +10,7 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase @tar = String.new @tar << tar_file_header("lib/foo", "", 0, @contents.size, Time.now) - @tar << @contents - @tar << "\0" * (512 - (@tar.size % 512)) + @tar << tar_file_contents(@contents) @entry = util_entry @tar end @@ -21,8 +20,35 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase super end - def close_util_entry(entry) - entry.instance_variable_get(:@io).close! + def test_open + io = TempIO.new @tar + header = Gem::Package::TarHeader.from io + retval = Gem::Package::TarReader::Entry.open header, io do |entry| + entry.getc + end + assert_equal "a", retval + assert_equal @tar.size, io.pos, "should have read to end of entry" + end + + def test_open_closes_entry + io = TempIO.new @tar + header = Gem::Package::TarHeader.from io + entry = nil + Gem::Package::TarReader::Entry.open header, io do |e| + entry = e + end + assert entry.closed? + assert_raise(IOError) { entry.getc } + end + + def test_open_returns_entry + io = TempIO.new @tar + header = Gem::Package::TarHeader.from io + entry = Gem::Package::TarReader::Entry.open header, io + refute entry.closed? + assert_equal ?a, entry.getc + assert_nil entry.close + assert entry.closed? end def test_bytes_read @@ -177,6 +203,21 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase assert_equal char, @entry.getc end + def test_seek + @entry.seek(50) + assert_equal 50, @entry.pos + assert_equal @contents[50..-1], @entry.read, "read remaining after seek" + @entry.seek(-50, IO::SEEK_CUR) + assert_equal @contents.size - 50, @entry.pos + assert_equal @contents[-50..-1], @entry.read, "read after stepping back 50 from the end" + @entry.seek(0, IO::SEEK_SET) + assert_equal 0, @entry.pos + assert_equal @contents, @entry.read, "read from beginning" + @entry.seek(-10, IO::SEEK_END) + assert_equal @contents.size - 10, @entry.pos + assert_equal @contents[-10..-1], @entry.read, "read from end" + end + def test_read_zero expected = StringIO.new("") assert_equal expected.read(0), @entry.read(0) @@ -187,28 +228,74 @@ class TestGemPackageTarReaderEntry < Gem::Package::TarTestCase assert_equal expected.readpartial(0), @entry.readpartial(0) end - def util_zero_byte_entry - tar = String.new - tar << tar_file_header("lib/empty", "", 0, 0, Time.now) - tar << "\0" * (512 - (tar.size % 512)) - util_entry tar - end - def test_zero_byte_file_read - zero_entry = util_zero_byte_entry + zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now)) expected = StringIO.new("") - assert_equal expected.read, zero_entry.read ensure close_util_entry(zero_entry) if zero_entry end def test_zero_byte_file_readpartial - zero_entry = util_zero_byte_entry + zero_entry = util_entry(tar_file_header("foo", "", 0, 0, Time.now)) expected = StringIO.new("") - assert_equal expected.readpartial(0), zero_entry.readpartial(0) ensure close_util_entry(zero_entry) if zero_entry end + + def test_read_from_gzip_io + tgz = util_gzip(@tar) + + Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio| + entry = util_entry(gzio) + assert_equal @contents, entry.read + entry.rewind + assert_equal @contents, entry.read, "second read after rewind should read same contents" + end + end + + def test_read_from_gzip_io_with_non_zero_offset + contents2 = ("0".."9").to_a.join * 100 + @tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now) + @tar << tar_file_contents(contents2) + + tgz = util_gzip(@tar) + + Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio| + util_entry(gzio).close # skip the first entry so io.pos is not 0, preventing easy rewind + entry = util_entry(gzio) + + assert_equal contents2, entry.read + entry.rewind + assert_equal contents2, entry.read, "second read after rewind should read same contents" + end + end + + def test_seek_in_gzip_io_with_non_zero_offset + contents2 = ("0".."9").to_a.join * 100 + @tar << tar_file_header("lib/bar", "", 0, contents2.size, Time.now) + @tar << tar_file_contents(contents2) + + tgz = util_gzip(@tar) + + Zlib::GzipReader.wrap StringIO.new(tgz) do |gzio| + util_entry(gzio).close # skip the first entry so io.pos is not 0 + entry = util_entry(gzio) + + entry.seek(50) + assert_equal 50, entry.pos + assert_equal contents2[50..-1], entry.read, "read remaining after seek" + entry.seek(-50, IO::SEEK_CUR) + assert_equal contents2.size - 50, entry.pos + assert_equal contents2[-50..-1], entry.read, "read after stepping back 50 from the end" + entry.seek(0, IO::SEEK_SET) + assert_equal 0, entry.pos + assert_equal contents2, entry.read, "read from beginning" + entry.seek(-10, IO::SEEK_END) + assert_equal contents2.size - 10, entry.pos + assert_equal contents2[-10..-1], entry.read, "read from end" + assert_equal contents2.size, entry.pos + end + end end -- cgit v1.2.3