diff options
author | Yuki Nishijima <yk.nishijima@gmail.com> | 2020-05-22 17:17:10 -0400 |
---|---|---|
committer | Yuki Nishijima <yk.nishijima@gmail.com> | 2020-06-06 13:02:08 -0400 |
commit | e5f54465284b4505042fca10ace998e1d29c2313 (patch) | |
tree | 3770bc320b3cb7fdd7e5406bb1fbcd5c4c705c77 /lib/did_you_mean/tree_spell_checker.rb | |
parent | 0c00a4176ba353d59d8c991428574ef2c2676674 (diff) |
Sync did_you_mean
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/3135
Diffstat (limited to 'lib/did_you_mean/tree_spell_checker.rb')
-rw-r--r-- | lib/did_you_mean/tree_spell_checker.rb | 150 |
1 files changed, 61 insertions, 89 deletions
diff --git a/lib/did_you_mean/tree_spell_checker.rb b/lib/did_you_mean/tree_spell_checker.rb index 6a5b485413..799f07fcf0 100644 --- a/lib/did_you_mean/tree_spell_checker.rb +++ b/lib/did_you_mean/tree_spell_checker.rb @@ -1,137 +1,109 @@ +# frozen_string_literal: true + module DidYouMean # spell checker for a dictionary that has a tree # structure, see doc/tree_spell_checker_api.md class TreeSpellChecker - attr_reader :dictionary, :dimensions, :separator, :augment + attr_reader :dictionary, :separator, :augment def initialize(dictionary:, separator: '/', augment: nil) @dictionary = dictionary @separator = separator @augment = augment - @dimensions = parse_dimensions end def correct(input) - plausibles = plausible_dimensions input - return no_idea(input) if plausibles.empty? - suggestions = find_suggestions input, plausibles - return no_idea(input) if suggestions.empty? - suggestions - end + plausibles = plausible_dimensions(input) + return fall_back_to_normal_spell_check(input) if plausibles.empty? - private + suggestions = find_suggestions(input, plausibles) + return fall_back_to_normal_spell_check(input) if suggestions.empty? - def parse_dimensions - ParseDimensions.new(dictionary, separator).call + suggestions end - def find_suggestions(input, plausibles) - states = plausibles[0].product(*plausibles[1..-1]) - paths = possible_paths states - leaf = input.split(separator).last - ideas = find_ideas(paths, leaf) - ideas.compact.flatten + def dictionary_without_leaves + @dictionary_without_leaves ||= dictionary.map { |word| word.split(separator)[0..-2] }.uniq end - def no_idea(input) - return [] unless augment - ::DidYouMean::SpellChecker.new(dictionary: dictionary).correct(input) + def tree_depth + @tree_depth ||= dictionary_without_leaves.max { |a, b| a.size <=> b.size }.size end - def find_ideas(paths, leaf) - paths.map do |path| - names = find_leaves(path) - ideas = CorrectElement.new.call names, leaf - ideas_to_paths ideas, leaf, names, path - end + def dimensions + @dimensions ||= tree_depth.times.map do |index| + dictionary_without_leaves.map { |element| element[index] }.compact.uniq + end end - def ideas_to_paths(ideas, leaf, names, path) - return nil if ideas.empty? - return [path + separator + leaf] if names.include? leaf - ideas.map { |str| path + separator + str } + def find_leaves(path) + path_with_separator = "#{path}#{separator}" + + dictionary + .select {|str| str.include?(path_with_separator) } + .map {|str| str.gsub(path_with_separator, '') } end - def find_leaves(path) - dictionary.map do |str| - next unless str.include? "#{path}#{separator}" - str.gsub("#{path}#{separator}", '') - end.compact + def plausible_dimensions(input) + input.split(separator)[0..-2] + .map + .with_index { |element, index| correct_element(dimensions[index], element) if dimensions[index] } + .compact end def possible_paths(states) - states.map do |state| - state.join separator - end + states.map { |state| state.join(separator) } end - def plausible_dimensions(input) - elements = input.split(separator)[0..-2] - elements.each_with_index.map do |element, i| - next if dimensions[i].nil? - CorrectElement.new.call dimensions[i], element - end.compact - end - end + private - # parses the elements in each dimension - class ParseDimensions - def initialize(dictionary, separator) - @dictionary = dictionary - @separator = separator + def find_suggestions(input, plausibles) + states = plausibles[0].product(*plausibles[1..-1]) + paths = possible_paths(states) + leaf = input.split(separator).last + + find_ideas(paths, leaf) end - def call - leafless = remove_leaves - dimensions = find_elements leafless - dimensions.map do |elements| - elements.to_set.to_a - end + def fall_back_to_normal_spell_check(input) + return [] unless augment + + ::DidYouMean::SpellChecker.new(dictionary: dictionary).correct(input) end - private + def find_ideas(paths, leaf) + paths.flat_map do |path| + names = find_leaves(path) + ideas = correct_element(names, leaf) - def remove_leaves - dictionary.map do |a| - elements = a.split(separator) - elements[0..-2] - end.to_set.to_a + ideas_to_paths(ideas, leaf, names, path) + end.compact end - def find_elements(leafless) - max_elements = leafless.map(&:size).max - dimensions = Array.new(max_elements) { [] } - (0...max_elements).each do |i| - leafless.each do |elements| - dimensions[i] << elements[i] unless elements[i].nil? - end + def ideas_to_paths(ideas, leaf, names, path) + if ideas.empty? + nil + elsif names.include?(leaf) + ["#{path}#{separator}#{leaf}"] + else + ideas.map {|str| "#{path}#{separator}#{str}" } end - dimensions end - attr_reader :dictionary, :separator - end + def correct_element(names, element) + return names if names.size == 1 - # identifies the elements close to element - class CorrectElement - def initialize - end + str = normalize(element) - def call(names, element) - return names if names.size == 1 - str = normalize element - return [str] if names.include? str - checker = ::DidYouMean::SpellChecker.new(dictionary: names) - checker.correct(str) - end + return [str] if names.include?(str) - private + ::DidYouMean::SpellChecker.new(dictionary: names).correct(str) + end - def normalize(leaf) - str = leaf.dup + def normalize(str) str.downcase! - return str unless str.include? '@' - str.tr!('@', ' ') + str.tr!('@', ' ') if str.include?('@') + str end end end |