summaryrefslogtreecommitdiff
path: root/lib/did_you_mean/tree_spell_checker.rb
diff options
context:
space:
mode:
authorYuki Nishijima <yk.nishijima@gmail.com>2020-05-22 17:17:10 -0400
committerYuki Nishijima <yk.nishijima@gmail.com>2020-06-06 13:02:08 -0400
commite5f54465284b4505042fca10ace998e1d29c2313 (patch)
tree3770bc320b3cb7fdd7e5406bb1fbcd5c4c705c77 /lib/did_you_mean/tree_spell_checker.rb
parent0c00a4176ba353d59d8c991428574ef2c2676674 (diff)
Sync did_you_mean
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/3135
Diffstat (limited to 'lib/did_you_mean/tree_spell_checker.rb')
-rw-r--r--lib/did_you_mean/tree_spell_checker.rb150
1 files changed, 61 insertions, 89 deletions
diff --git a/lib/did_you_mean/tree_spell_checker.rb b/lib/did_you_mean/tree_spell_checker.rb
index 6a5b485413..799f07fcf0 100644
--- a/lib/did_you_mean/tree_spell_checker.rb
+++ b/lib/did_you_mean/tree_spell_checker.rb
@@ -1,137 +1,109 @@
+# frozen_string_literal: true
+
module DidYouMean
# spell checker for a dictionary that has a tree
# structure, see doc/tree_spell_checker_api.md
class TreeSpellChecker
- attr_reader :dictionary, :dimensions, :separator, :augment
+ attr_reader :dictionary, :separator, :augment
def initialize(dictionary:, separator: '/', augment: nil)
@dictionary = dictionary
@separator = separator
@augment = augment
- @dimensions = parse_dimensions
end
def correct(input)
- plausibles = plausible_dimensions input
- return no_idea(input) if plausibles.empty?
- suggestions = find_suggestions input, plausibles
- return no_idea(input) if suggestions.empty?
- suggestions
- end
+ plausibles = plausible_dimensions(input)
+ return fall_back_to_normal_spell_check(input) if plausibles.empty?
- private
+ suggestions = find_suggestions(input, plausibles)
+ return fall_back_to_normal_spell_check(input) if suggestions.empty?
- def parse_dimensions
- ParseDimensions.new(dictionary, separator).call
+ suggestions
end
- def find_suggestions(input, plausibles)
- states = plausibles[0].product(*plausibles[1..-1])
- paths = possible_paths states
- leaf = input.split(separator).last
- ideas = find_ideas(paths, leaf)
- ideas.compact.flatten
+ def dictionary_without_leaves
+ @dictionary_without_leaves ||= dictionary.map { |word| word.split(separator)[0..-2] }.uniq
end
- def no_idea(input)
- return [] unless augment
- ::DidYouMean::SpellChecker.new(dictionary: dictionary).correct(input)
+ def tree_depth
+ @tree_depth ||= dictionary_without_leaves.max { |a, b| a.size <=> b.size }.size
end
- def find_ideas(paths, leaf)
- paths.map do |path|
- names = find_leaves(path)
- ideas = CorrectElement.new.call names, leaf
- ideas_to_paths ideas, leaf, names, path
- end
+ def dimensions
+ @dimensions ||= tree_depth.times.map do |index|
+ dictionary_without_leaves.map { |element| element[index] }.compact.uniq
+ end
end
- def ideas_to_paths(ideas, leaf, names, path)
- return nil if ideas.empty?
- return [path + separator + leaf] if names.include? leaf
- ideas.map { |str| path + separator + str }
+ def find_leaves(path)
+ path_with_separator = "#{path}#{separator}"
+
+ dictionary
+ .select {|str| str.include?(path_with_separator) }
+ .map {|str| str.gsub(path_with_separator, '') }
end
- def find_leaves(path)
- dictionary.map do |str|
- next unless str.include? "#{path}#{separator}"
- str.gsub("#{path}#{separator}", '')
- end.compact
+ def plausible_dimensions(input)
+ input.split(separator)[0..-2]
+ .map
+ .with_index { |element, index| correct_element(dimensions[index], element) if dimensions[index] }
+ .compact
end
def possible_paths(states)
- states.map do |state|
- state.join separator
- end
+ states.map { |state| state.join(separator) }
end
- def plausible_dimensions(input)
- elements = input.split(separator)[0..-2]
- elements.each_with_index.map do |element, i|
- next if dimensions[i].nil?
- CorrectElement.new.call dimensions[i], element
- end.compact
- end
- end
+ private
- # parses the elements in each dimension
- class ParseDimensions
- def initialize(dictionary, separator)
- @dictionary = dictionary
- @separator = separator
+ def find_suggestions(input, plausibles)
+ states = plausibles[0].product(*plausibles[1..-1])
+ paths = possible_paths(states)
+ leaf = input.split(separator).last
+
+ find_ideas(paths, leaf)
end
- def call
- leafless = remove_leaves
- dimensions = find_elements leafless
- dimensions.map do |elements|
- elements.to_set.to_a
- end
+ def fall_back_to_normal_spell_check(input)
+ return [] unless augment
+
+ ::DidYouMean::SpellChecker.new(dictionary: dictionary).correct(input)
end
- private
+ def find_ideas(paths, leaf)
+ paths.flat_map do |path|
+ names = find_leaves(path)
+ ideas = correct_element(names, leaf)
- def remove_leaves
- dictionary.map do |a|
- elements = a.split(separator)
- elements[0..-2]
- end.to_set.to_a
+ ideas_to_paths(ideas, leaf, names, path)
+ end.compact
end
- def find_elements(leafless)
- max_elements = leafless.map(&:size).max
- dimensions = Array.new(max_elements) { [] }
- (0...max_elements).each do |i|
- leafless.each do |elements|
- dimensions[i] << elements[i] unless elements[i].nil?
- end
+ def ideas_to_paths(ideas, leaf, names, path)
+ if ideas.empty?
+ nil
+ elsif names.include?(leaf)
+ ["#{path}#{separator}#{leaf}"]
+ else
+ ideas.map {|str| "#{path}#{separator}#{str}" }
end
- dimensions
end
- attr_reader :dictionary, :separator
- end
+ def correct_element(names, element)
+ return names if names.size == 1
- # identifies the elements close to element
- class CorrectElement
- def initialize
- end
+ str = normalize(element)
- def call(names, element)
- return names if names.size == 1
- str = normalize element
- return [str] if names.include? str
- checker = ::DidYouMean::SpellChecker.new(dictionary: names)
- checker.correct(str)
- end
+ return [str] if names.include?(str)
- private
+ ::DidYouMean::SpellChecker.new(dictionary: names).correct(str)
+ end
- def normalize(leaf)
- str = leaf.dup
+ def normalize(str)
str.downcase!
- return str unless str.include? '@'
- str.tr!('@', ' ')
+ str.tr!('@', ' ') if str.include?('@')
+ str
end
end
end