diff options
| author | Martin Emde <me@martinemde.com> | 2024-06-11 12:34:13 -0700 |
|---|---|---|
| committer | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2024-06-20 15:18:56 +0900 |
| commit | af304ad9529b26d95507071ac1711440b7e4191f (patch) | |
| tree | 250f120716d91aa6183c1c4752d4a4696f08cb91 | |
| parent | 0e1182f93c29b6bfd43568c80c244c961059942d (diff) | |
[rubygems/rubygems] Revert to splitting parser due to performance regression
* The string search parser was more memory efficient but
in some cases, much slower. Reverting until a better
solution is found.
* Handle the situation where the line might be blank (Artifactory bug)
https://github.com/rubygems/rubygems/commit/222d38737d
| -rw-r--r-- | lib/bundler/compact_index_client/parser.rb | 73 | ||||
| -rw-r--r-- | spec/bundler/bundler/compact_index_client/parser_spec.rb | 178 |
2 files changed, 101 insertions, 150 deletions
diff --git a/lib/bundler/compact_index_client/parser.rb b/lib/bundler/compact_index_client/parser.rb index 3a0dec4907..3276abdd68 100644 --- a/lib/bundler/compact_index_client/parser.rb +++ b/lib/bundler/compact_index_client/parser.rb @@ -11,7 +11,6 @@ module Bundler @versions_by_name = nil @available = nil @gem_parser = nil - @versions_data = nil end def names @@ -40,71 +39,45 @@ module Bundler end def info(name) - data = @compact_index.info(name, info_checksum(name)) + data = @compact_index.info(name, info_checksums[name]) lines(data).map {|line| gem_parser.parse(line).unshift(name) } end - # parse the last, most recently updated line of the versions file to determine availability def available? return @available unless @available.nil? - return @available = false unless versions_data&.size&.nonzero? - - line_end = versions_data.size - 1 - return @available = false if versions_data[line_end] != "\n" - - line_start = versions_data.rindex("\n", line_end - 1) - line_start ||= -1 # allow a single line versions file - - @available = !split_last_word(versions_data, line_start + 1, line_end).nil? + @available = !info_checksums.empty? end private - # Search for a line starting with gem name, then return last space-separated word (the checksum) - def info_checksum(name) - return unless versions_data - return unless (line_start = rindex_of_gem(name)) - return unless (line_end = versions_data.index("\n", line_start)) - split_last_word(versions_data, line_start, line_end) - end - - def gem_parser - @gem_parser ||= GemParser.new - end - - def versions_data - @versions_data ||= begin - data = @compact_index.versions - strip_header!(data) if data - data.freeze + def info_checksums + @info_checksums ||= lines(@compact_index.versions).each_with_object({}) do |line, checksums| + parse_version_checksum(line, checksums) end end - def rindex_of_gem(name) - if (pos = versions_data.rindex("\n#{name} ")) - pos + 1 - elsif versions_data.start_with?("#{name} ") - 0 - end + def lines(data) + return [] if data.nil? || data.empty? + lines = data.split("\n") + header = lines.index("---") + header ? lines[header + 1..-1] : lines end - # This is similar to `string.split(" ").last` but it avoids allocating extra objects. - def split_last_word(string, line_start, line_end) - return unless line_start < line_end && line_start >= 0 - word_start = string.rindex(" ", line_end).to_i + 1 - return if word_start < line_start - string[word_start, line_end - word_start] - end - - def lines(string) - return [] if string.nil? || string.empty? - strip_header!(string) - string.split("\n") + def gem_parser + @gem_parser ||= GemParser.new end - def strip_header!(string) - header_end = string.index("---\n") - string.slice!(0, header_end + 4) if header_end + # This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects. + # This method gets called at least once for every gem when parsing versions. + def parse_version_checksum(line, checksums) + return unless (name_end = line.index(" ")) # Artifactory bug causes blank lines in artifactor index files + return unless (checksum_start = line.index(" ", name_end + 1) + 1) + checksum_end = line.size - checksum_start + + line.freeze # allows slicing into the string to not allocate a copy of the line + name = line[0, name_end] + checksum = line[checksum_start, checksum_end] + checksums[name.freeze] = checksum # freeze name since it is used as a hash key end end end diff --git a/spec/bundler/bundler/compact_index_client/parser_spec.rb b/spec/bundler/bundler/compact_index_client/parser_spec.rb index 45a08fd9ff..1f6b9e593b 100644 --- a/spec/bundler/bundler/compact_index_client/parser_spec.rb +++ b/spec/bundler/bundler/compact_index_client/parser_spec.rb @@ -89,11 +89,6 @@ RSpec.describe Bundler::CompactIndexClient::Parser do compact_index.versions = +"" expect(parser).not_to be_available end - - it "returns false when versions ends improperly without a newline" do - compact_index.versions = "a 1.0.0 aaa1" - expect(parser).not_to be_available - end end describe "#names" do @@ -143,108 +138,78 @@ RSpec.describe Bundler::CompactIndexClient::Parser do end describe "#info" do - it "returns the info for example gem 'a' which has no deps" do - expect(parser.info("a")).to eq( + let(:a_result) do + [ [ - [ - "a", - "1.0.0", - nil, - [], - [ - ["checksum", ["aaa1"]], - ["ruby", [">= 3.0.0"]], - ["rubygems", [">= 3.2.3"]], - ], - ], - [ - "a", - "1.0.1", - nil, - [], - [ - ["checksum", ["aaa2"]], - ["ruby", [">= 3.0.0"]], - ["rubygems", [">= 3.2.3"]], - ], - ], - [ - "a", - "1.1.0", - nil, - [], - [ - ["checksum", ["aaa3"]], - ["ruby", [">= 3.0.0"]], - ["rubygems", [">= 3.2.3"]], - ], - ], - ] - ) + "a", + "1.0.0", + nil, + [], + [["checksum", ["aaa1"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]], + ], + [ + "a", + "1.0.1", + nil, + [], + [["checksum", ["aaa2"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]], + ], + [ + "a", + "1.1.0", + nil, + [], + [["checksum", ["aaa3"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]], + ], + ] + end + let(:b_result) do + [ + [ + "b", + "2.0.0", + nil, + [["a", ["~> 1.0", "<= 3.0"]]], + [["checksum", ["bbb1"]]], + ], + [ + "b", + "2.0.0", + "java", + [["a", ["~> 1.0", "<= 3.0"]]], + [["checksum", ["bbb2"]]], + ], + ] + end + let(:c_result) do + [ + [ + "c", + "3.0.0", + nil, + [["a", ["= 1.0.0"]], ["b", ["~> 2.0"]]], + [["checksum", ["ccc1"]], ["ruby", [">= 2.7.0"]], ["rubygems", [">= 3.0.0"]]], + ], + [ + "c", + "3.3.3", + nil, + [["a", [">= 1.1.0"]], ["b", ["~> 2.0"]]], + [["checksum", ["ccc3"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]], + ], + ] + end + + it "returns the info for example gem 'a' which has no deps" do + expect(parser.info("a")).to eq(a_result) end it "returns the info for example gem 'b' which has platform and compound deps" do - expect(parser.info("b")).to eq( - [ - [ - "b", - "2.0.0", - nil, - [ - ["a", ["~> 1.0", "<= 3.0"]], - ], - [ - ["checksum", ["bbb1"]], - ], - ], - [ - "b", - "2.0.0", - "java", - [ - ["a", ["~> 1.0", "<= 3.0"]], - ], - [ - ["checksum", ["bbb2"]], - ], - ], - ] - ) + expect(parser.info("b")).to eq(b_result) end it "returns the info for example gem 'c' which has deps and yanked version (requires use of correct info checksum)" do - expect(parser.info("c")).to eq( - [ - [ - "c", - "3.0.0", - nil, - [ - ["a", ["= 1.0.0"]], - ["b", ["~> 2.0"]], - ], - [ - ["checksum", ["ccc1"]], - ["ruby", [">= 2.7.0"]], - ["rubygems", [">= 3.0.0"]], - ], - ], - [ - "c", - "3.3.3", - nil, - [ - ["a", [">= 1.1.0"]], - ["b", ["~> 2.0"]], - ], - [ - ["checksum", ["ccc3"]], - ["ruby", [">= 3.0.0"]], - ["rubygems", [">= 3.2.3"]], - ], - ], - ] - ) + expect(parser.info("c")).to eq(c_result) end it "returns an empty array when the info is empty" do @@ -255,5 +220,18 @@ RSpec.describe Bundler::CompactIndexClient::Parser do it "returns an empty array when the info is not readable" do expect(parser.info("d")).to eq([]) end + + it "handles empty lines in the versions file (Artifactory bug that they have yet to fix)" do + compact_index.versions = +<<~VERSIONS + created_at: 2024-05-01T00:00:04Z + --- + a 1.0.0,1.0.1,1.1.0 aaa111 + b 2.0.0,2.0.0-java bbb222 + + c 3.0.0,3.0.3,3.3.3 ccc333 + c -3.0.3 ccc333yanked + VERSIONS + expect(parser.info("a")).to eq(a_result) + end end end |
