diff options
Diffstat (limited to 'lib/rubygems/text.rb')
| -rw-r--r-- | lib/rubygems/text.rb | 66 |
1 files changed, 37 insertions, 29 deletions
diff --git a/lib/rubygems/text.rb b/lib/rubygems/text.rb index b944b62c27..0550dc473d 100644 --- a/lib/rubygems/text.rb +++ b/lib/rubygems/text.rb @@ -1,16 +1,23 @@ # frozen_string_literal: true -require 'rubygems' ## # A collection of text-wrangling methods module Gem::Text - ## # Remove any non-printable characters and make the text suitable for # printing. def clean_text(text) - text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".".freeze) + text = text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".") + + # Match C1 control characters (U+0080-U+009F) as codepoints. This requires + # a valid UTF-8 string so the regexp does not split a multibyte sequence; + # strings in other encodings are left unchanged. + if text.encoding == Encoding::UTF_8 && text.valid_encoding? + text = text.gsub(/[\u0080-\u009f]/, ".") + end + + text end def truncate_text(text, description, max_length = 100_000) @@ -23,12 +30,12 @@ module Gem::Text # Wraps +text+ to +wrap+ characters and optionally indents by +indent+ # characters - def format_text(text, wrap, indent=0) + def format_text(text, wrap, indent = 0) result = [] work = clean_text(text) while work.length > wrap do - if work =~ /^(.{0,#{wrap}})[ \n]/ then + if work =~ /^(.{0,#{wrap}})[ \n]/ result << $1.rstrip work.slice!(0, $&.length) else @@ -40,47 +47,48 @@ module Gem::Text result.join("\n").gsub(/^/, " " * indent) end - def min3 a, b, c # :nodoc: - if a < b && a < c then + def min3(a, b, c) # :nodoc: + if a < b && a < c a - elsif b < c then + elsif b < c b else c end end - # This code is based directly on the Text gem implementation # Returns a value representing the "cost" of transforming str1 into str2 - def levenshtein_distance str1, str2 - s = str1 - t = str2 - n = s.length - m = t.length - - return m if (0 == n) - return n if (0 == m) + # Vendored version of DidYouMean::Levenshtein.distance from the ruby/did_you_mean gem @ 1.4.0 + # https://github.com/ruby/did_you_mean/blob/2ddf39b874808685965dbc47d344cf6c7651807c/lib/did_you_mean/levenshtein.rb#L7-L37 + def levenshtein_distance(str1, str2) + n = str1.length + m = str2.length + return m if n.zero? + return n if m.zero? d = (0..m).to_a x = nil - str1.each_char.each_with_index do |char1,i| - e = i+1 + # to avoid duplicating an enumerable object, create it outside of the loop + str2_codepoints = str2.codepoints - str2.each_char.each_with_index do |char2,j| - cost = (char1 == char2) ? 0 : 1 + str1.each_codepoint.with_index(1) do |char1, i| + j = 0 + while j < m + cost = char1 == str2_codepoints[j] ? 0 : 1 x = min3( - d[j+1] + 1, # insertion - e + 1, # deletion - d[j] + cost # substitution - ) - d[j] = e - e = x + d[j + 1] + 1, # insertion + i + 1, # deletion + d[j] + cost # substitution + ) + d[j] = i + i = x + + j += 1 end - d[m] = x end - return x + x end end |
