summaryrefslogtreecommitdiff
path: root/lib/did_you_mean/jaro_winkler.rb
diff options
context:
space:
mode:
authorKevin Deisz <kevin.deisz@gmail.com>2019-10-29 10:08:37 -0400
committerYuki Nishijima <yk.nishijima@gmail.com>2019-11-30 21:08:19 -0500
commit171803d5d34feb1b4244ca81b9db0a7bc2171c85 (patch)
tree664ee644da144f28152097fbe5ea43329bfc0576 /lib/did_you_mean/jaro_winkler.rb
parenta2fc6a51dd2e1a153559038795e1e2509f9c6a94 (diff)
Promote did_you_mean to default gem
At the moment, there are some problems with regard to bundler + did_you_mean because of did_you_mean being a bundled gem. Since the vendored version of thor inside bundler and ruby itself explicitly requires did_you_mean, it can become difficult to load it when using Bundler.setup. See this issue: https://github.com/yuki24/did_you_mean/issues/117#issuecomment-482733159 for more details.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/2689
Diffstat (limited to 'lib/did_you_mean/jaro_winkler.rb')
-rw-r--r--lib/did_you_mean/jaro_winkler.rb87
1 files changed, 87 insertions, 0 deletions
diff --git a/lib/did_you_mean/jaro_winkler.rb b/lib/did_you_mean/jaro_winkler.rb
new file mode 100644
index 0000000000..56db130af4
--- /dev/null
+++ b/lib/did_you_mean/jaro_winkler.rb
@@ -0,0 +1,87 @@
+module DidYouMean
+ module Jaro
+ module_function
+
+ def distance(str1, str2)
+ str1, str2 = str2, str1 if str1.length > str2.length
+ length1, length2 = str1.length, str2.length
+
+ m = 0.0
+ t = 0.0
+ range = (length2 / 2).floor - 1
+ range = 0 if range < 0
+ flags1 = 0
+ flags2 = 0
+
+ # Avoid duplicating enumerable objects
+ str1_codepoints = str1.codepoints
+ str2_codepoints = str2.codepoints
+
+ i = 0
+ while i < length1
+ last = i + range
+ j = (i >= range) ? i - range : 0
+
+ while j <= last
+ if flags2[j] == 0 && str1_codepoints[i] == str2_codepoints[j]
+ flags2 |= (1 << j)
+ flags1 |= (1 << i)
+ m += 1
+ break
+ end
+
+ j += 1
+ end
+
+ i += 1
+ end
+
+ k = i = 0
+ while i < length1
+ if flags1[i] != 0
+ j = index = k
+
+ k = while j < length2
+ index = j
+ break(j + 1) if flags2[j] != 0
+
+ j += 1
+ end
+
+ t += 1 if str1_codepoints[i] != str2_codepoints[index]
+ end
+
+ i += 1
+ end
+ t = (t / 2).floor
+
+ m == 0 ? 0 : (m / length1 + m / length2 + (m - t) / m) / 3
+ end
+ end
+
+ module JaroWinkler
+ WEIGHT = 0.1
+ THRESHOLD = 0.7
+
+ module_function
+
+ def distance(str1, str2)
+ jaro_distance = Jaro.distance(str1, str2)
+
+ if jaro_distance > THRESHOLD
+ codepoints2 = str2.codepoints
+ prefix_bonus = 0
+
+ i = 0
+ str1.each_codepoint do |char1|
+ char1 == codepoints2[i] && i < 4 ? prefix_bonus += 1 : break
+ i += 1
+ end
+
+ jaro_distance + (prefix_bonus * WEIGHT * (1 - jaro_distance))
+ else
+ jaro_distance
+ end
+ end
+ end
+end