summaryrefslogtreecommitdiff
path: root/lib/did_you_mean/tree_spell_checker.rb
blob: 6a5b4854130dda71ca94a14cac2c6c678ce80209 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
module DidYouMean
  # spell checker for a dictionary that has a tree
  # structure, see doc/tree_spell_checker_api.md
  class TreeSpellChecker
    attr_reader :dictionary, :dimensions, :separator, :augment

    def initialize(dictionary:, separator: '/', augment: nil)
      @dictionary = dictionary
      @separator = separator
      @augment = augment
      @dimensions = parse_dimensions
    end

    def correct(input)
      plausibles = plausible_dimensions input
      return no_idea(input) if plausibles.empty?
      suggestions = find_suggestions input, plausibles
      return no_idea(input) if suggestions.empty?
      suggestions
    end

    private

    def parse_dimensions
      ParseDimensions.new(dictionary, separator).call
    end

    def find_suggestions(input, plausibles)
      states = plausibles[0].product(*plausibles[1..-1])
      paths = possible_paths states
      leaf = input.split(separator).last
      ideas = find_ideas(paths, leaf)
      ideas.compact.flatten
    end

    def no_idea(input)
      return [] unless augment
      ::DidYouMean::SpellChecker.new(dictionary: dictionary).correct(input)
    end

    def find_ideas(paths, leaf)
      paths.map do |path|
        names = find_leaves(path)
        ideas = CorrectElement.new.call names, leaf
        ideas_to_paths ideas, leaf, names, path
      end
    end

    def ideas_to_paths(ideas, leaf, names, path)
      return nil if ideas.empty?
      return [path + separator + leaf] if names.include? leaf
      ideas.map { |str| path + separator + str }
    end

    def find_leaves(path)
      dictionary.map do |str|
        next unless str.include? "#{path}#{separator}"
        str.gsub("#{path}#{separator}", '')
      end.compact
    end

    def possible_paths(states)
      states.map do |state|
        state.join separator
      end
    end

    def plausible_dimensions(input)
      elements = input.split(separator)[0..-2]
      elements.each_with_index.map do |element, i|
        next if dimensions[i].nil?
        CorrectElement.new.call dimensions[i], element
      end.compact
    end
  end

  # parses the elements in each dimension
  class ParseDimensions
    def initialize(dictionary, separator)
      @dictionary = dictionary
      @separator = separator
    end

    def call
      leafless = remove_leaves
      dimensions = find_elements leafless
      dimensions.map do |elements|
        elements.to_set.to_a
      end
    end

    private

    def remove_leaves
      dictionary.map do |a|
        elements = a.split(separator)
        elements[0..-2]
      end.to_set.to_a
    end

    def find_elements(leafless)
      max_elements = leafless.map(&:size).max
      dimensions = Array.new(max_elements) { [] }
      (0...max_elements).each do |i|
        leafless.each do |elements|
          dimensions[i] << elements[i] unless elements[i].nil?
        end
      end
      dimensions
    end

    attr_reader :dictionary, :separator
  end

  # identifies the elements close to element
  class CorrectElement
    def initialize
    end

    def call(names, element)
      return names if names.size == 1
      str = normalize element
      return [str] if names.include? str
      checker = ::DidYouMean::SpellChecker.new(dictionary: names)
      checker.correct(str)
    end

    private

    def normalize(leaf)
      str = leaf.dup
      str.downcase!
      return str unless str.include? '@'
      str.tr!('@', '  ')
    end
  end
end