summaryrefslogtreecommitdiff
path: root/lib/abbrev.rb
blob: d4f8196f49121d7994d1c5f5d4d3bacb9e848b0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env ruby
#--
# Copyright (c) 2001,2003 Akinori MUSHA <knu@iDaemons.org>
#
# All rights reserved.  You can redistribute and/or modify it under
# the same terms as Ruby.
#
# $Idaemons: /home/cvs/rb/abbrev.rb,v 1.2 2001/05/30 09:37:45 knu Exp $
# $RoughId: abbrev.rb,v 1.4 2003/10/14 19:45:42 knu Exp $
# $Id$
#++

##
# Calculates the set of unique abbreviations for a given set of strings.
#
#   require 'abbrev'
#   require 'pp'
#
#   pp Abbrev.abbrev(['ruby', 'rules'])
#
# Generates:
#
#   { "rub"   =>  "ruby",
#     "ruby"  =>  "ruby",
#     "rul"   =>  "rules",
#     "rule"  =>  "rules",
#     "rules" =>  "rules" }
#
# It also provides an array core extension, Array#abbrev.
#
#   pp %w{summer winter}.abbrev
#   #=> {"summe"=>"summer",
#        "summ"=>"summer",
#        "sum"=>"summer",
#        "su"=>"summer",
#        "s"=>"summer",
#        "winte"=>"winter",
#        "wint"=>"winter",
#        "win"=>"winter",
#        "wi"=>"winter",
#        "w"=>"winter",
#        "summer"=>"summer",
#        "winter"=>"winter"}

module Abbrev

  # Given a set of strings, calculate the set of unambiguous
  # abbreviations for those strings, and return a hash where the keys
  # are all the possible abbreviations and the values are the full
  # strings.
  #
  # Thus, given input of "car" and "cone", the keys pointing to "car" would be
  # "ca" and "car", while those pointing to "cone" would be "co", "con", and
  # "cone".
  #
  #   require 'abbrev'
  #
  #   Abbrev.abbrev(['car', 'cone'])
  #   #=> {"ca"=>"car", "con"=>"cone", "co"=>"cone", "car"=>"car", "cone"=>"cone"}
  #
  # The optional +pattern+ parameter is a pattern or a string. Only
  # input strings that match the pattern or start with the string
  # are included in the output hash.
  #
  #   Abbrev.abbrev(%w{car box cone}, /b/)
  #   #=> {"bo"=>"box", "b"=>"box", "box"=>"box"}
  def abbrev(words, pattern = nil)
    table = {}
    seen = Hash.new(0)

    if pattern.is_a?(String)
      pattern = /\A#{Regexp.quote(pattern)}/  # regard as a prefix
    end

    words.each do |word|
      next if word.empty?
      word.size.downto(1) { |len|
        abbrev = word[0...len]

        next if pattern && pattern !~ abbrev

        case seen[abbrev] += 1
        when 1
          table[abbrev] = word
        when 2
          table.delete(abbrev)
        else
          break
        end
      }
    end

    words.each do |word|
      next if pattern && pattern !~ word

      table[word] = word
    end

    table
  end

  module_function :abbrev
end

class Array
  # Calculates the set of unambiguous abbreviations for the strings in
  # +self+.
  #
  #   require 'abbrev'
  #   %w{ car cone }.abbrev
  #   #=> {"ca" => "car", "con"=>"cone", "co" => "cone",
  #        "car"=>"car", "cone" => "cone"}
  #
  # The optional +pattern+ parameter is a pattern or a string. Only
  # input strings that match the pattern or start with the string
  # are included in the output hash.
  #
  #   %w{ fast boat day }.abbrev(/^.a/)
  #   #=> {"fas"=>"fast", "fa"=>"fast", "da"=>"day",
  #        "fast"=>"fast", "day"=>"day"}
  #
  # See also Abbrev.abbrev
  def abbrev(pattern = nil)
    Abbrev::abbrev(self, pattern)
  end
end

if $0 == __FILE__
  while line = gets
    hash = line.split.abbrev

    hash.sort.each do |k, v|
      puts "#{k} => #{v}"
    end
  end
end