summaryrefslogtreecommitdiff
path: root/lib/rubygems/text.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rubygems/text.rb')
-rw-r--r--lib/rubygems/text.rb66
1 files changed, 37 insertions, 29 deletions
diff --git a/lib/rubygems/text.rb b/lib/rubygems/text.rb
index b944b62c27..0550dc473d 100644
--- a/lib/rubygems/text.rb
+++ b/lib/rubygems/text.rb
@@ -1,16 +1,23 @@
# frozen_string_literal: true
-require 'rubygems'
##
# A collection of text-wrangling methods
module Gem::Text
-
##
# Remove any non-printable characters and make the text suitable for
# printing.
def clean_text(text)
- text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".".freeze)
+ text = text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".")
+
+ # Match C1 control characters (U+0080-U+009F) as codepoints. This requires
+ # a valid UTF-8 string so the regexp does not split a multibyte sequence;
+ # strings in other encodings are left unchanged.
+ if text.encoding == Encoding::UTF_8 && text.valid_encoding?
+ text = text.gsub(/[\u0080-\u009f]/, ".")
+ end
+
+ text
end
def truncate_text(text, description, max_length = 100_000)
@@ -23,12 +30,12 @@ module Gem::Text
# Wraps +text+ to +wrap+ characters and optionally indents by +indent+
# characters
- def format_text(text, wrap, indent=0)
+ def format_text(text, wrap, indent = 0)
result = []
work = clean_text(text)
while work.length > wrap do
- if work =~ /^(.{0,#{wrap}})[ \n]/ then
+ if work =~ /^(.{0,#{wrap}})[ \n]/
result << $1.rstrip
work.slice!(0, $&.length)
else
@@ -40,47 +47,48 @@ module Gem::Text
result.join("\n").gsub(/^/, " " * indent)
end
- def min3 a, b, c # :nodoc:
- if a < b && a < c then
+ def min3(a, b, c) # :nodoc:
+ if a < b && a < c
a
- elsif b < c then
+ elsif b < c
b
else
c
end
end
- # This code is based directly on the Text gem implementation
# Returns a value representing the "cost" of transforming str1 into str2
- def levenshtein_distance str1, str2
- s = str1
- t = str2
- n = s.length
- m = t.length
-
- return m if (0 == n)
- return n if (0 == m)
+ # Vendored version of DidYouMean::Levenshtein.distance from the ruby/did_you_mean gem @ 1.4.0
+ # https://github.com/ruby/did_you_mean/blob/2ddf39b874808685965dbc47d344cf6c7651807c/lib/did_you_mean/levenshtein.rb#L7-L37
+ def levenshtein_distance(str1, str2)
+ n = str1.length
+ m = str2.length
+ return m if n.zero?
+ return n if m.zero?
d = (0..m).to_a
x = nil
- str1.each_char.each_with_index do |char1,i|
- e = i+1
+ # to avoid duplicating an enumerable object, create it outside of the loop
+ str2_codepoints = str2.codepoints
- str2.each_char.each_with_index do |char2,j|
- cost = (char1 == char2) ? 0 : 1
+ str1.each_codepoint.with_index(1) do |char1, i|
+ j = 0
+ while j < m
+ cost = char1 == str2_codepoints[j] ? 0 : 1
x = min3(
- d[j+1] + 1, # insertion
- e + 1, # deletion
- d[j] + cost # substitution
- )
- d[j] = e
- e = x
+ d[j + 1] + 1, # insertion
+ i + 1, # deletion
+ d[j] + cost # substitution
+ )
+ d[j] = i
+ i = x
+
+ j += 1
end
-
d[m] = x
end
- return x
+ x
end
end