diff options
Diffstat (limited to 'lib/bundler/compact_index_client')
| -rw-r--r-- | lib/bundler/compact_index_client/cache.rb | 96 | ||||
| -rw-r--r-- | lib/bundler/compact_index_client/cache_file.rb | 148 | ||||
| -rw-r--r-- | lib/bundler/compact_index_client/parser.rb | 87 | ||||
| -rw-r--r-- | lib/bundler/compact_index_client/updater.rb | 105 |
4 files changed, 436 insertions, 0 deletions
diff --git a/lib/bundler/compact_index_client/cache.rb b/lib/bundler/compact_index_client/cache.rb new file mode 100644 index 0000000000..3bae6c9efd --- /dev/null +++ b/lib/bundler/compact_index_client/cache.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require "rubygems/resolver/api_set/gem_parser" + +module Bundler + class CompactIndexClient + class Cache + attr_reader :directory + + def initialize(directory, fetcher = nil) + @directory = Pathname.new(directory).expand_path + @updater = Updater.new(fetcher) if fetcher + @mutex = Thread::Mutex.new + @endpoints = Set.new + + @info_root = mkdir("info") + @special_characters_info_root = mkdir("info-special-characters") + @info_etag_root = mkdir("info-etags") + end + + def names + fetch("names", names_path, names_etag_path) + end + + def versions + fetch("versions", versions_path, versions_etag_path) + end + + def info(name, remote_checksum = nil) + path = info_path(name) + + if remote_checksum && remote_checksum != SharedHelpers.checksum_for_file(path, :MD5) + fetch("info/#{name}", path, info_etag_path(name)) + else + Bundler::CompactIndexClient.debug { "update skipped info/#{name} (#{remote_checksum ? "versions index checksum is nil" : "versions index checksum matches local"})" } + read(path) + end + end + + def reset! + @mutex.synchronize { @endpoints.clear } + end + + private + + def names_path = directory.join("names") + def names_etag_path = directory.join("names.etag") + def versions_path = directory.join("versions") + def versions_etag_path = directory.join("versions.etag") + + def info_path(name) + name = name.to_s + # TODO: converge this into the info_root by hashing all filenames like info_etag_path + if /[^a-z0-9_-]/.match?(name) + name += "-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}" + @special_characters_info_root.join(name) + else + @info_root.join(name) + end + end + + def info_etag_path(name) + name = name.to_s + @info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}") + end + + def mkdir(name) + directory.join(name).tap do |dir| + SharedHelpers.filesystem_access(dir) do + FileUtils.mkdir_p(dir) + end + end + end + + def fetch(remote_path, path, etag_path) + if already_fetched?(remote_path) + Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" } + else + Bundler::CompactIndexClient.debug { "fetching #{remote_path}" } + @updater&.update(remote_path, path, etag_path) + end + + read(path) + end + + def already_fetched?(remote_path) + @mutex.synchronize { !@endpoints.add?(remote_path) } + end + + def read(path) + return unless path.file? + SharedHelpers.filesystem_access(path, :read, &:read) + end + end + end +end diff --git a/lib/bundler/compact_index_client/cache_file.rb b/lib/bundler/compact_index_client/cache_file.rb new file mode 100644 index 0000000000..299d683438 --- /dev/null +++ b/lib/bundler/compact_index_client/cache_file.rb @@ -0,0 +1,148 @@ +# frozen_string_literal: true + +require_relative "../vendored_fileutils" +require "rubygems/package" + +module Bundler + class CompactIndexClient + # write cache files in a way that is robust to concurrent modifications + # if digests are given, the checksums will be verified + class CacheFile + DEFAULT_FILE_MODE = 0o644 + private_constant :DEFAULT_FILE_MODE + + class Error < RuntimeError; end + class ClosedError < Error; end + + class DigestMismatchError < Error + def initialize(digests, expected_digests) + super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}." + end + end + + # Initialize with a copy of the original file, then yield the instance. + def self.copy(path, &block) + new(path) do |file| + file.initialize_digests + + SharedHelpers.filesystem_access(path, :read) do + path.open("rb") do |s| + file.open {|f| IO.copy_stream(s, f) } + end + end + + yield file + end + end + + # Write data to a temp file, then replace the original file with it verifying the digests if given. + def self.write(path, data, digests = nil) + return unless data + new(path) do |file| + file.digests = digests + file.write(data) + end + end + + attr_reader :original_path, :path + + def initialize(original_path, &block) + @original_path = original_path + @perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE + @path = original_path.sub(/$/, ".#{$$}.tmp") + return unless block_given? + begin + yield self + ensure + close + end + end + + def size + path.size + end + + # initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys. + def initialize_digests(keys = nil) + @digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup + @digests.transform_values! {|algo_class| SharedHelpers.digest(algo_class).new } + end + + # reset the digests so they don't contain any previously read data + def reset_digests + @digests&.each_value(&:reset) + end + + # set the digests that will be verified at the end + def digests=(expected_digests) + @expected_digests = expected_digests + + if @expected_digests.nil? + @digests = nil + elsif @digests + @digests = @digests.slice(*@expected_digests.keys) + else + initialize_digests(@expected_digests.keys) + end + end + + def digests? + @digests&.any? + end + + # Open the temp file for writing, reusing original permissions, yielding the IO object. + def open(write_mode = "wb", perm = @perm, &block) + raise ClosedError, "Cannot reopen closed file" if @closed + SharedHelpers.filesystem_access(path, :write) do + path.open(write_mode, perm) do |f| + yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f + end + end + end + + # Returns false without appending when no digests since appending is too error prone to do without digests. + def append(data) + return false unless digests? + open("a") {|f| f.write data } + verify && commit + end + + def write(data) + reset_digests + open {|f| f.write data } + commit! + end + + def commit! + verify || raise(DigestMismatchError.new(@base64digests, @expected_digests)) + commit + end + + # Verify the digests, returning true on match, false on mismatch. + def verify + return true unless @expected_digests && digests? + @base64digests = @digests.transform_values!(&:base64digest) + @digests = nil + @base64digests.all? {|algo, digest| @expected_digests[algo] == digest } + end + + # Replace the original file with the temp file without verifying digests. + # The file is permanently closed. + def commit + raise ClosedError, "Cannot commit closed file" if @closed + SharedHelpers.filesystem_access(original_path, :write) do + FileUtils.mv(path, original_path) + end + @closed = true + end + + # Remove the temp file without replacing the original file. + # The file is permanently closed. + def close + return if @closed + FileUtils.remove_file(path) if @path&.file? + @closed = true + end + end + end +end diff --git a/lib/bundler/compact_index_client/parser.rb b/lib/bundler/compact_index_client/parser.rb new file mode 100644 index 0000000000..ad0d17ed4a --- /dev/null +++ b/lib/bundler/compact_index_client/parser.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +module Bundler + class CompactIndexClient + class Parser + # `compact_index` - an object responding to #names, #versions, #info(name, checksum), + # returning the file contents as a string + def initialize(compact_index) + @compact_index = compact_index + @info_checksums = nil + @versions_by_name = nil + @available = nil + @gem_parser = nil + end + + def names + lines(@compact_index.names) + end + + def versions + @versions_by_name ||= Hash.new {|hash, key| hash[key] = [] } + @info_checksums = {} + + lines(@compact_index.versions).each do |line| + name, versions_string, checksum = line.split(" ", 3) + @info_checksums[name] = checksum || "" + versions_string.split(",") do |version| + delete = version.delete_prefix!("-") + version = version.split("-", 2).unshift(name) + if delete + @versions_by_name[name].delete(version) + else + @versions_by_name[name] << version + end + end + end + + @versions_by_name + end + + def info(name) + data = @compact_index.info(name, info_checksums[name]) + lines(data).map {|line| gem_parser.parse(line).unshift(name) } + end + + def available? + return @available unless @available.nil? + @available = !info_checksums.empty? + end + + private + + def info_checksums + @info_checksums ||= lines(@compact_index.versions).each_with_object({}) do |line, checksums| + parse_version_checksum(line, checksums) + end + end + + def lines(data) + return [] if data.nil? || data.empty? + lines = data.split("\n") + header = lines.index("---") + header ? lines[header + 1..-1] : lines + end + + def gem_parser + @gem_parser ||= Gem::Resolver::APISet::GemParser.new + end + + # This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects. + # This method gets called at least once for every gem when parsing versions. + def parse_version_checksum(line, checksums) + return unless (name_end = line.index(" ")) # Artifactory bug causes blank lines in artifactor index files + checksum_start = line.index(" ", name_end + 1) + return unless checksum_start + checksum_start += 1 + + checksum_end = line.size - checksum_start + + line.freeze # allows slicing into the string to not allocate a copy of the line + name = line[0, name_end] + checksum = line[checksum_start, checksum_end] + checksums[name.freeze] = checksum # freeze name since it is used as a hash key + end + end + end +end diff --git a/lib/bundler/compact_index_client/updater.rb b/lib/bundler/compact_index_client/updater.rb new file mode 100644 index 0000000000..6066fdc7c4 --- /dev/null +++ b/lib/bundler/compact_index_client/updater.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +module Bundler + class CompactIndexClient + class Updater + class MismatchedChecksumError < Error + def initialize(path, message) + super "The checksum of /#{path} does not match the checksum provided by the server! Something is wrong. #{message}" + end + end + + def initialize(fetcher) + @fetcher = fetcher + end + + def update(remote_path, local_path, etag_path) + append(remote_path, local_path, etag_path) || replace(remote_path, local_path, etag_path) + rescue CacheFile::DigestMismatchError => e + raise MismatchedChecksumError.new(remote_path, e.message) + rescue Zlib::GzipFile::Error + raise Bundler::HTTPError + end + + private + + def append(remote_path, local_path, etag_path) + return false unless local_path.file? && local_path.size.nonzero? + + CacheFile.copy(local_path) do |file| + etag = etag_path.read.tap(&:chomp!) if etag_path.file? + + # Subtract a byte to ensure the range won't be empty. + # Avoids 416 (Range Not Satisfiable) responses. + response = @fetcher.call(remote_path, request_headers(etag, file.size - 1)) + break true if response.is_a?(Gem::Net::HTTPNotModified) + + file.digests = parse_digests(response) + # server may ignore Range and return the full response + if response.is_a?(Gem::Net::HTTPPartialContent) + tail = response.body.byteslice(1..-1) + break false unless tail && file.append(tail) + else + file.write(response.body) + end + CacheFile.write(etag_path, etag_from_response(response)) + true + end + end + + # request without range header to get the full file or a 304 Not Modified + def replace(remote_path, local_path, etag_path) + etag = etag_path.read.tap(&:chomp!) if etag_path.file? + response = @fetcher.call(remote_path, request_headers(etag)) + return true if response.is_a?(Gem::Net::HTTPNotModified) + CacheFile.write(local_path, response.body, parse_digests(response)) + CacheFile.write(etag_path, etag_from_response(response)) + end + + def request_headers(etag, range_start = nil) + headers = {} + headers["Range"] = "bytes=#{range_start}-" if range_start + headers["If-None-Match"] = %("#{etag}") if etag + headers + end + + def etag_for_request(etag_path) + etag_path.read.tap(&:chomp!) if etag_path.file? + end + + def etag_from_response(response) + return unless response["ETag"] + etag = response["ETag"].delete_prefix("W/") + return if etag.delete_prefix!('"') && !etag.delete_suffix!('"') + etag + end + + # Unwraps and returns a Hash of digest algorithms and base64 values + # according to RFC 8941 Structured Field Values for HTTP. + # https://www.rfc-editor.org/rfc/rfc8941#name-parsing-a-byte-sequence + # Ignores unsupported algorithms. + def parse_digests(response) + return unless header = response["Repr-Digest"] || response["Digest"] + digests = {} + header.split(",") do |param| + algorithm, value = param.split("=", 2) + algorithm.strip! + algorithm.downcase! + next unless SUPPORTED_DIGESTS.key?(algorithm) + next unless value = byte_sequence(value) + digests[algorithm] = value + end + digests.empty? ? nil : digests + end + + # Unwrap surrounding colons (byte sequence) + # The wrapping characters must be matched or we return nil. + # Also handles quotes because right now rubygems.org sends them. + def byte_sequence(value) + return if value.delete_prefix!(":") && !value.delete_suffix!(":") + return if value.delete_prefix!('"') && !value.delete_suffix!('"') + value + end + end + end +end |
