summaryrefslogtreecommitdiff
path: root/lib/bundler/compact_index_client
diff options
context:
space:
mode:
Diffstat (limited to 'lib/bundler/compact_index_client')
-rw-r--r--lib/bundler/compact_index_client/cache.rb96
-rw-r--r--lib/bundler/compact_index_client/cache_file.rb148
-rw-r--r--lib/bundler/compact_index_client/parser.rb87
-rw-r--r--lib/bundler/compact_index_client/updater.rb105
4 files changed, 436 insertions, 0 deletions
diff --git a/lib/bundler/compact_index_client/cache.rb b/lib/bundler/compact_index_client/cache.rb
new file mode 100644
index 0000000000..3bae6c9efd
--- /dev/null
+++ b/lib/bundler/compact_index_client/cache.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+require "rubygems/resolver/api_set/gem_parser"
+
+module Bundler
+ class CompactIndexClient
+ class Cache
+ attr_reader :directory
+
+ def initialize(directory, fetcher = nil)
+ @directory = Pathname.new(directory).expand_path
+ @updater = Updater.new(fetcher) if fetcher
+ @mutex = Thread::Mutex.new
+ @endpoints = Set.new
+
+ @info_root = mkdir("info")
+ @special_characters_info_root = mkdir("info-special-characters")
+ @info_etag_root = mkdir("info-etags")
+ end
+
+ def names
+ fetch("names", names_path, names_etag_path)
+ end
+
+ def versions
+ fetch("versions", versions_path, versions_etag_path)
+ end
+
+ def info(name, remote_checksum = nil)
+ path = info_path(name)
+
+ if remote_checksum && remote_checksum != SharedHelpers.checksum_for_file(path, :MD5)
+ fetch("info/#{name}", path, info_etag_path(name))
+ else
+ Bundler::CompactIndexClient.debug { "update skipped info/#{name} (#{remote_checksum ? "versions index checksum is nil" : "versions index checksum matches local"})" }
+ read(path)
+ end
+ end
+
+ def reset!
+ @mutex.synchronize { @endpoints.clear }
+ end
+
+ private
+
+ def names_path = directory.join("names")
+ def names_etag_path = directory.join("names.etag")
+ def versions_path = directory.join("versions")
+ def versions_etag_path = directory.join("versions.etag")
+
+ def info_path(name)
+ name = name.to_s
+ # TODO: converge this into the info_root by hashing all filenames like info_etag_path
+ if /[^a-z0-9_-]/.match?(name)
+ name += "-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}"
+ @special_characters_info_root.join(name)
+ else
+ @info_root.join(name)
+ end
+ end
+
+ def info_etag_path(name)
+ name = name.to_s
+ @info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
+ end
+
+ def mkdir(name)
+ directory.join(name).tap do |dir|
+ SharedHelpers.filesystem_access(dir) do
+ FileUtils.mkdir_p(dir)
+ end
+ end
+ end
+
+ def fetch(remote_path, path, etag_path)
+ if already_fetched?(remote_path)
+ Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
+ else
+ Bundler::CompactIndexClient.debug { "fetching #{remote_path}" }
+ @updater&.update(remote_path, path, etag_path)
+ end
+
+ read(path)
+ end
+
+ def already_fetched?(remote_path)
+ @mutex.synchronize { !@endpoints.add?(remote_path) }
+ end
+
+ def read(path)
+ return unless path.file?
+ SharedHelpers.filesystem_access(path, :read, &:read)
+ end
+ end
+ end
+end
diff --git a/lib/bundler/compact_index_client/cache_file.rb b/lib/bundler/compact_index_client/cache_file.rb
new file mode 100644
index 0000000000..299d683438
--- /dev/null
+++ b/lib/bundler/compact_index_client/cache_file.rb
@@ -0,0 +1,148 @@
+# frozen_string_literal: true
+
+require_relative "../vendored_fileutils"
+require "rubygems/package"
+
+module Bundler
+ class CompactIndexClient
+ # write cache files in a way that is robust to concurrent modifications
+ # if digests are given, the checksums will be verified
+ class CacheFile
+ DEFAULT_FILE_MODE = 0o644
+ private_constant :DEFAULT_FILE_MODE
+
+ class Error < RuntimeError; end
+ class ClosedError < Error; end
+
+ class DigestMismatchError < Error
+ def initialize(digests, expected_digests)
+ super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}."
+ end
+ end
+
+ # Initialize with a copy of the original file, then yield the instance.
+ def self.copy(path, &block)
+ new(path) do |file|
+ file.initialize_digests
+
+ SharedHelpers.filesystem_access(path, :read) do
+ path.open("rb") do |s|
+ file.open {|f| IO.copy_stream(s, f) }
+ end
+ end
+
+ yield file
+ end
+ end
+
+ # Write data to a temp file, then replace the original file with it verifying the digests if given.
+ def self.write(path, data, digests = nil)
+ return unless data
+ new(path) do |file|
+ file.digests = digests
+ file.write(data)
+ end
+ end
+
+ attr_reader :original_path, :path
+
+ def initialize(original_path, &block)
+ @original_path = original_path
+ @perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE
+ @path = original_path.sub(/$/, ".#{$$}.tmp")
+ return unless block_given?
+ begin
+ yield self
+ ensure
+ close
+ end
+ end
+
+ def size
+ path.size
+ end
+
+ # initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys.
+ def initialize_digests(keys = nil)
+ @digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup
+ @digests.transform_values! {|algo_class| SharedHelpers.digest(algo_class).new }
+ end
+
+ # reset the digests so they don't contain any previously read data
+ def reset_digests
+ @digests&.each_value(&:reset)
+ end
+
+ # set the digests that will be verified at the end
+ def digests=(expected_digests)
+ @expected_digests = expected_digests
+
+ if @expected_digests.nil?
+ @digests = nil
+ elsif @digests
+ @digests = @digests.slice(*@expected_digests.keys)
+ else
+ initialize_digests(@expected_digests.keys)
+ end
+ end
+
+ def digests?
+ @digests&.any?
+ end
+
+ # Open the temp file for writing, reusing original permissions, yielding the IO object.
+ def open(write_mode = "wb", perm = @perm, &block)
+ raise ClosedError, "Cannot reopen closed file" if @closed
+ SharedHelpers.filesystem_access(path, :write) do
+ path.open(write_mode, perm) do |f|
+ yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f
+ end
+ end
+ end
+
+ # Returns false without appending when no digests since appending is too error prone to do without digests.
+ def append(data)
+ return false unless digests?
+ open("a") {|f| f.write data }
+ verify && commit
+ end
+
+ def write(data)
+ reset_digests
+ open {|f| f.write data }
+ commit!
+ end
+
+ def commit!
+ verify || raise(DigestMismatchError.new(@base64digests, @expected_digests))
+ commit
+ end
+
+ # Verify the digests, returning true on match, false on mismatch.
+ def verify
+ return true unless @expected_digests && digests?
+ @base64digests = @digests.transform_values!(&:base64digest)
+ @digests = nil
+ @base64digests.all? {|algo, digest| @expected_digests[algo] == digest }
+ end
+
+ # Replace the original file with the temp file without verifying digests.
+ # The file is permanently closed.
+ def commit
+ raise ClosedError, "Cannot commit closed file" if @closed
+ SharedHelpers.filesystem_access(original_path, :write) do
+ FileUtils.mv(path, original_path)
+ end
+ @closed = true
+ end
+
+ # Remove the temp file without replacing the original file.
+ # The file is permanently closed.
+ def close
+ return if @closed
+ FileUtils.remove_file(path) if @path&.file?
+ @closed = true
+ end
+ end
+ end
+end
diff --git a/lib/bundler/compact_index_client/parser.rb b/lib/bundler/compact_index_client/parser.rb
new file mode 100644
index 0000000000..ad0d17ed4a
--- /dev/null
+++ b/lib/bundler/compact_index_client/parser.rb
@@ -0,0 +1,87 @@
+# frozen_string_literal: true
+
+module Bundler
+ class CompactIndexClient
+ class Parser
+ # `compact_index` - an object responding to #names, #versions, #info(name, checksum),
+ # returning the file contents as a string
+ def initialize(compact_index)
+ @compact_index = compact_index
+ @info_checksums = nil
+ @versions_by_name = nil
+ @available = nil
+ @gem_parser = nil
+ end
+
+ def names
+ lines(@compact_index.names)
+ end
+
+ def versions
+ @versions_by_name ||= Hash.new {|hash, key| hash[key] = [] }
+ @info_checksums = {}
+
+ lines(@compact_index.versions).each do |line|
+ name, versions_string, checksum = line.split(" ", 3)
+ @info_checksums[name] = checksum || ""
+ versions_string.split(",") do |version|
+ delete = version.delete_prefix!("-")
+ version = version.split("-", 2).unshift(name)
+ if delete
+ @versions_by_name[name].delete(version)
+ else
+ @versions_by_name[name] << version
+ end
+ end
+ end
+
+ @versions_by_name
+ end
+
+ def info(name)
+ data = @compact_index.info(name, info_checksums[name])
+ lines(data).map {|line| gem_parser.parse(line).unshift(name) }
+ end
+
+ def available?
+ return @available unless @available.nil?
+ @available = !info_checksums.empty?
+ end
+
+ private
+
+ def info_checksums
+ @info_checksums ||= lines(@compact_index.versions).each_with_object({}) do |line, checksums|
+ parse_version_checksum(line, checksums)
+ end
+ end
+
+ def lines(data)
+ return [] if data.nil? || data.empty?
+ lines = data.split("\n")
+ header = lines.index("---")
+ header ? lines[header + 1..-1] : lines
+ end
+
+ def gem_parser
+ @gem_parser ||= Gem::Resolver::APISet::GemParser.new
+ end
+
+ # This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects.
+ # This method gets called at least once for every gem when parsing versions.
+ def parse_version_checksum(line, checksums)
+ return unless (name_end = line.index(" ")) # Artifactory bug causes blank lines in artifactor index files
+ checksum_start = line.index(" ", name_end + 1)
+ return unless checksum_start
+ checksum_start += 1
+
+ checksum_end = line.size - checksum_start
+
+ line.freeze # allows slicing into the string to not allocate a copy of the line
+ name = line[0, name_end]
+ checksum = line[checksum_start, checksum_end]
+ checksums[name.freeze] = checksum # freeze name since it is used as a hash key
+ end
+ end
+ end
+end
diff --git a/lib/bundler/compact_index_client/updater.rb b/lib/bundler/compact_index_client/updater.rb
new file mode 100644
index 0000000000..6066fdc7c4
--- /dev/null
+++ b/lib/bundler/compact_index_client/updater.rb
@@ -0,0 +1,105 @@
+# frozen_string_literal: true
+
+module Bundler
+ class CompactIndexClient
+ class Updater
+ class MismatchedChecksumError < Error
+ def initialize(path, message)
+ super "The checksum of /#{path} does not match the checksum provided by the server! Something is wrong. #{message}"
+ end
+ end
+
+ def initialize(fetcher)
+ @fetcher = fetcher
+ end
+
+ def update(remote_path, local_path, etag_path)
+ append(remote_path, local_path, etag_path) || replace(remote_path, local_path, etag_path)
+ rescue CacheFile::DigestMismatchError => e
+ raise MismatchedChecksumError.new(remote_path, e.message)
+ rescue Zlib::GzipFile::Error
+ raise Bundler::HTTPError
+ end
+
+ private
+
+ def append(remote_path, local_path, etag_path)
+ return false unless local_path.file? && local_path.size.nonzero?
+
+ CacheFile.copy(local_path) do |file|
+ etag = etag_path.read.tap(&:chomp!) if etag_path.file?
+
+ # Subtract a byte to ensure the range won't be empty.
+ # Avoids 416 (Range Not Satisfiable) responses.
+ response = @fetcher.call(remote_path, request_headers(etag, file.size - 1))
+ break true if response.is_a?(Gem::Net::HTTPNotModified)
+
+ file.digests = parse_digests(response)
+ # server may ignore Range and return the full response
+ if response.is_a?(Gem::Net::HTTPPartialContent)
+ tail = response.body.byteslice(1..-1)
+ break false unless tail && file.append(tail)
+ else
+ file.write(response.body)
+ end
+ CacheFile.write(etag_path, etag_from_response(response))
+ true
+ end
+ end
+
+ # request without range header to get the full file or a 304 Not Modified
+ def replace(remote_path, local_path, etag_path)
+ etag = etag_path.read.tap(&:chomp!) if etag_path.file?
+ response = @fetcher.call(remote_path, request_headers(etag))
+ return true if response.is_a?(Gem::Net::HTTPNotModified)
+ CacheFile.write(local_path, response.body, parse_digests(response))
+ CacheFile.write(etag_path, etag_from_response(response))
+ end
+
+ def request_headers(etag, range_start = nil)
+ headers = {}
+ headers["Range"] = "bytes=#{range_start}-" if range_start
+ headers["If-None-Match"] = %("#{etag}") if etag
+ headers
+ end
+
+ def etag_for_request(etag_path)
+ etag_path.read.tap(&:chomp!) if etag_path.file?
+ end
+
+ def etag_from_response(response)
+ return unless response["ETag"]
+ etag = response["ETag"].delete_prefix("W/")
+ return if etag.delete_prefix!('"') && !etag.delete_suffix!('"')
+ etag
+ end
+
+ # Unwraps and returns a Hash of digest algorithms and base64 values
+ # according to RFC 8941 Structured Field Values for HTTP.
+ # https://www.rfc-editor.org/rfc/rfc8941#name-parsing-a-byte-sequence
+ # Ignores unsupported algorithms.
+ def parse_digests(response)
+ return unless header = response["Repr-Digest"] || response["Digest"]
+ digests = {}
+ header.split(",") do |param|
+ algorithm, value = param.split("=", 2)
+ algorithm.strip!
+ algorithm.downcase!
+ next unless SUPPORTED_DIGESTS.key?(algorithm)
+ next unless value = byte_sequence(value)
+ digests[algorithm] = value
+ end
+ digests.empty? ? nil : digests
+ end
+
+ # Unwrap surrounding colons (byte sequence)
+ # The wrapping characters must be matched or we return nil.
+ # Also handles quotes because right now rubygems.org sends them.
+ def byte_sequence(value)
+ return if value.delete_prefix!(":") && !value.delete_suffix!(":")
+ return if value.delete_prefix!('"') && !value.delete_suffix!('"')
+ value
+ end
+ end
+ end
+end