summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Emde <me@martinemde.com>2024-06-11 12:34:13 -0700
committerHiroshi SHIBATA <hsbt@ruby-lang.org>2024-06-20 15:18:56 +0900
commitaf304ad9529b26d95507071ac1711440b7e4191f (patch)
tree250f120716d91aa6183c1c4752d4a4696f08cb91
parent0e1182f93c29b6bfd43568c80c244c961059942d (diff)
[rubygems/rubygems] Revert to splitting parser due to performance regression
* The string search parser was more memory efficient but in some cases, much slower. Reverting until a better solution is found. * Handle the situation where the line might be blank (Artifactory bug) https://github.com/rubygems/rubygems/commit/222d38737d
-rw-r--r--lib/bundler/compact_index_client/parser.rb73
-rw-r--r--spec/bundler/bundler/compact_index_client/parser_spec.rb178
2 files changed, 101 insertions, 150 deletions
diff --git a/lib/bundler/compact_index_client/parser.rb b/lib/bundler/compact_index_client/parser.rb
index 3a0dec4907..3276abdd68 100644
--- a/lib/bundler/compact_index_client/parser.rb
+++ b/lib/bundler/compact_index_client/parser.rb
@@ -11,7 +11,6 @@ module Bundler
@versions_by_name = nil
@available = nil
@gem_parser = nil
- @versions_data = nil
end
def names
@@ -40,71 +39,45 @@ module Bundler
end
def info(name)
- data = @compact_index.info(name, info_checksum(name))
+ data = @compact_index.info(name, info_checksums[name])
lines(data).map {|line| gem_parser.parse(line).unshift(name) }
end
- # parse the last, most recently updated line of the versions file to determine availability
def available?
return @available unless @available.nil?
- return @available = false unless versions_data&.size&.nonzero?
-
- line_end = versions_data.size - 1
- return @available = false if versions_data[line_end] != "\n"
-
- line_start = versions_data.rindex("\n", line_end - 1)
- line_start ||= -1 # allow a single line versions file
-
- @available = !split_last_word(versions_data, line_start + 1, line_end).nil?
+ @available = !info_checksums.empty?
end
private
- # Search for a line starting with gem name, then return last space-separated word (the checksum)
- def info_checksum(name)
- return unless versions_data
- return unless (line_start = rindex_of_gem(name))
- return unless (line_end = versions_data.index("\n", line_start))
- split_last_word(versions_data, line_start, line_end)
- end
-
- def gem_parser
- @gem_parser ||= GemParser.new
- end
-
- def versions_data
- @versions_data ||= begin
- data = @compact_index.versions
- strip_header!(data) if data
- data.freeze
+ def info_checksums
+ @info_checksums ||= lines(@compact_index.versions).each_with_object({}) do |line, checksums|
+ parse_version_checksum(line, checksums)
end
end
- def rindex_of_gem(name)
- if (pos = versions_data.rindex("\n#{name} "))
- pos + 1
- elsif versions_data.start_with?("#{name} ")
- 0
- end
+ def lines(data)
+ return [] if data.nil? || data.empty?
+ lines = data.split("\n")
+ header = lines.index("---")
+ header ? lines[header + 1..-1] : lines
end
- # This is similar to `string.split(" ").last` but it avoids allocating extra objects.
- def split_last_word(string, line_start, line_end)
- return unless line_start < line_end && line_start >= 0
- word_start = string.rindex(" ", line_end).to_i + 1
- return if word_start < line_start
- string[word_start, line_end - word_start]
- end
-
- def lines(string)
- return [] if string.nil? || string.empty?
- strip_header!(string)
- string.split("\n")
+ def gem_parser
+ @gem_parser ||= GemParser.new
end
- def strip_header!(string)
- header_end = string.index("---\n")
- string.slice!(0, header_end + 4) if header_end
+ # This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects.
+ # This method gets called at least once for every gem when parsing versions.
+ def parse_version_checksum(line, checksums)
+ return unless (name_end = line.index(" ")) # Artifactory bug causes blank lines in artifactor index files
+ return unless (checksum_start = line.index(" ", name_end + 1) + 1)
+ checksum_end = line.size - checksum_start
+
+ line.freeze # allows slicing into the string to not allocate a copy of the line
+ name = line[0, name_end]
+ checksum = line[checksum_start, checksum_end]
+ checksums[name.freeze] = checksum # freeze name since it is used as a hash key
end
end
end
diff --git a/spec/bundler/bundler/compact_index_client/parser_spec.rb b/spec/bundler/bundler/compact_index_client/parser_spec.rb
index 45a08fd9ff..1f6b9e593b 100644
--- a/spec/bundler/bundler/compact_index_client/parser_spec.rb
+++ b/spec/bundler/bundler/compact_index_client/parser_spec.rb
@@ -89,11 +89,6 @@ RSpec.describe Bundler::CompactIndexClient::Parser do
compact_index.versions = +""
expect(parser).not_to be_available
end
-
- it "returns false when versions ends improperly without a newline" do
- compact_index.versions = "a 1.0.0 aaa1"
- expect(parser).not_to be_available
- end
end
describe "#names" do
@@ -143,108 +138,78 @@ RSpec.describe Bundler::CompactIndexClient::Parser do
end
describe "#info" do
- it "returns the info for example gem 'a' which has no deps" do
- expect(parser.info("a")).to eq(
+ let(:a_result) do
+ [
[
- [
- "a",
- "1.0.0",
- nil,
- [],
- [
- ["checksum", ["aaa1"]],
- ["ruby", [">= 3.0.0"]],
- ["rubygems", [">= 3.2.3"]],
- ],
- ],
- [
- "a",
- "1.0.1",
- nil,
- [],
- [
- ["checksum", ["aaa2"]],
- ["ruby", [">= 3.0.0"]],
- ["rubygems", [">= 3.2.3"]],
- ],
- ],
- [
- "a",
- "1.1.0",
- nil,
- [],
- [
- ["checksum", ["aaa3"]],
- ["ruby", [">= 3.0.0"]],
- ["rubygems", [">= 3.2.3"]],
- ],
- ],
- ]
- )
+ "a",
+ "1.0.0",
+ nil,
+ [],
+ [["checksum", ["aaa1"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
+ ],
+ [
+ "a",
+ "1.0.1",
+ nil,
+ [],
+ [["checksum", ["aaa2"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
+ ],
+ [
+ "a",
+ "1.1.0",
+ nil,
+ [],
+ [["checksum", ["aaa3"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
+ ],
+ ]
+ end
+ let(:b_result) do
+ [
+ [
+ "b",
+ "2.0.0",
+ nil,
+ [["a", ["~> 1.0", "<= 3.0"]]],
+ [["checksum", ["bbb1"]]],
+ ],
+ [
+ "b",
+ "2.0.0",
+ "java",
+ [["a", ["~> 1.0", "<= 3.0"]]],
+ [["checksum", ["bbb2"]]],
+ ],
+ ]
+ end
+ let(:c_result) do
+ [
+ [
+ "c",
+ "3.0.0",
+ nil,
+ [["a", ["= 1.0.0"]], ["b", ["~> 2.0"]]],
+ [["checksum", ["ccc1"]], ["ruby", [">= 2.7.0"]], ["rubygems", [">= 3.0.0"]]],
+ ],
+ [
+ "c",
+ "3.3.3",
+ nil,
+ [["a", [">= 1.1.0"]], ["b", ["~> 2.0"]]],
+ [["checksum", ["ccc3"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
+ ],
+ ]
+ end
+
+ it "returns the info for example gem 'a' which has no deps" do
+ expect(parser.info("a")).to eq(a_result)
end
it "returns the info for example gem 'b' which has platform and compound deps" do
- expect(parser.info("b")).to eq(
- [
- [
- "b",
- "2.0.0",
- nil,
- [
- ["a", ["~> 1.0", "<= 3.0"]],
- ],
- [
- ["checksum", ["bbb1"]],
- ],
- ],
- [
- "b",
- "2.0.0",
- "java",
- [
- ["a", ["~> 1.0", "<= 3.0"]],
- ],
- [
- ["checksum", ["bbb2"]],
- ],
- ],
- ]
- )
+ expect(parser.info("b")).to eq(b_result)
end
it "returns the info for example gem 'c' which has deps and yanked version (requires use of correct info checksum)" do
- expect(parser.info("c")).to eq(
- [
- [
- "c",
- "3.0.0",
- nil,
- [
- ["a", ["= 1.0.0"]],
- ["b", ["~> 2.0"]],
- ],
- [
- ["checksum", ["ccc1"]],
- ["ruby", [">= 2.7.0"]],
- ["rubygems", [">= 3.0.0"]],
- ],
- ],
- [
- "c",
- "3.3.3",
- nil,
- [
- ["a", [">= 1.1.0"]],
- ["b", ["~> 2.0"]],
- ],
- [
- ["checksum", ["ccc3"]],
- ["ruby", [">= 3.0.0"]],
- ["rubygems", [">= 3.2.3"]],
- ],
- ],
- ]
- )
+ expect(parser.info("c")).to eq(c_result)
end
it "returns an empty array when the info is empty" do
@@ -255,5 +220,18 @@ RSpec.describe Bundler::CompactIndexClient::Parser do
it "returns an empty array when the info is not readable" do
expect(parser.info("d")).to eq([])
end
+
+ it "handles empty lines in the versions file (Artifactory bug that they have yet to fix)" do
+ compact_index.versions = +<<~VERSIONS
+ created_at: 2024-05-01T00:00:04Z
+ ---
+ a 1.0.0,1.0.1,1.1.0 aaa111
+ b 2.0.0,2.0.0-java bbb222
+
+ c 3.0.0,3.0.3,3.3.3 ccc333
+ c -3.0.3 ccc333yanked
+ VERSIONS
+ expect(parser.info("a")).to eq(a_result)
+ end
end
end