# == sentence library # # = Features # # * syntax based sentences generation # * sentence operations such as substitution. # # = Example # # Some arithmetic expressions using "+", "-", "*" and "/" are generated as follows. # # require 'sentence' # Sentence.each({ # :exp => [["num"], # [:exp, "+", :exp], # [:exp, "-", :exp], # [:exp, "*", :exp], # [:exp, "/", :exp]] # }, :exp, 2) {|sent| p sent } # #=> # # # # # # # # # # # # # # # ... # # Sentence.each takes 3 arguments. # The first argument is the syntax for the expressions. # The second argument, :exp, is a generating nonterminal. # The third argument, 2, limits derivation to restrict results finitely. # # Some arithmetic expressions including parenthesis can be generated as follows. # # syntax = { # :factor => [["n"], # ["(", :exp, ")"]], # :term => [[:factor], # [:term, "*", :factor], # [:term, "/", :factor]], # :exp => [[:term], # [:exp, "+", :term], # [:exp, "-", :term]] # } # Sentence.each(syntax, :exp, 2) {|sent| p sent } # #=> # # # # # # # # # # # # # # # # # # # ... # # Sentence#to_s can be used to concatenate strings # in a sentence: # # Sentence.each(syntax, :exp, 2) {|sent| p sent.to_s } # #=> # "n" # "(n)" # "((n))" # "(n*n)" # "(n/n)" # "(n+n)" # "(n-n)" # "n*n" # "n*(n)" # ... # # Sentence() instantiates a sentence object. # # Sentence("foo", "bar") # #=> # # # Sentence("foo", ["bar", "baz"]) # #=> # # def Sentence(*ary) Sentence.new(ary) end # Sentence class represents a tree with string leaves. # class Sentence # _ary_ represents a tree. # It should be a possibly nested array which contains strings. # # Note that _ary_ is not copied. # Don't modify _ary_ after the sentence object is instantiated. # # Sentence.new(["a", "pen"]) # # # # Sentence.new(["I", "have", ["a", "pen"]]) # # # def initialize(ary) @sent = ary end # returns a string which is concatenation of all strings. # No separator is used. # # Sentence("2", "+", "3").to_s # "2+3" # # Sentence("2", "+", ["3", "*", "5"]).to_s # "2+3*5" # def to_s @sent.join('') end # returns a string which is concatenation of all strings separated by _sep_. # If _sep_ is not given, single space is used. # # Sentence("I", "have", ["a", "pen"]).join # "I have a pen" # # Sentence("I", "have", ["a", "pen"]).join("/") # "I/have/a/pen" # # Sentence("a", [], "b").join("/") # "a/b" # def join(sep=' ') @sent.flatten.join(sep) end # returns a tree as a nested array. # # Note that the result is not copied. # Don't modify the result. # # Sentence(["foo", "bar"], "baz").to_a # #=> [["foo", "bar"], "baz"] # def to_a @sent end # returns ith element as a sentence or string. # # s = Sentence(["foo", "bar"], "baz") # s #=> # # s[0] #=> # # s[1] #=> "baz" # def [](i) e = @sent[i] e.respond_to?(:to_ary) ? Sentence.new(e) : e end # returns the number of top level elements. # # Sentence.new(%w[foo bar]).length # #=> 2 # # Sentence(%w[2 * 7], "+", %w[3 * 5]).length # #=> 3 # def length @sent.length end # iterates over children. # # Sentence(%w[2 * 7], "+", %w[3 * 5]).each {|v| p v } # #=> # # # "+" # # # def each # :yield: element @sent.each_index {|i| yield self[i] } end include Enumerable def inspect "#<#{self.class}: #{inner_inspect(@sent, '')}>" end # :stopdoc: def inner_inspect(ary, r) first = true ary.each {|obj| r << ' ' if !first first = false if obj.respond_to? :to_ary r << '(' inner_inspect(obj, r) r << ')' else r << obj.inspect end } r end # :startdoc: # returns new sentence object which # _target_ is substituted by the block. # # Sentence#subst invokes _target_ === _string_ for each # string in the sentence. # The strings which === returns true are substituted by the block. # The block is invoked with the substituting string. # # Sentence.new(%w[2 + 3]).subst("+") { "*" } # # # # Sentence.new(%w[2 + 3]).subst(/\A\d+\z/) {|s| ((s.to_i)*2).to_s } # #=> # # def subst(target, &b) # :yield: string Sentence.new(subst_rec(@sent, target, &b)) end # :stopdoc: def subst_rec(obj, target, &b) if obj.respond_to? :to_ary a = [] obj.each {|e| a << subst_rec(e, target, &b) } a elsif target === obj yield obj else obj end end # :startdoc: # find a subsentence and return it. # The block is invoked for each subsentence in preorder manner. # The first subsentence which the block returns true is returned. # # Sentence(%w[2 * 7], "+", %w[3 * 5]).find_subtree {|s| s[1] == "*" } # #=> # # def find_subtree(&b) # :yield: sentence find_subtree_rec(@sent, &b) end # :stopdoc: def find_subtree_rec(obj, &b) if obj.respond_to? :to_ary s = Sentence.new(obj) if b.call s return s else obj.each {|e| r = find_subtree_rec(e, &b) return r if r } end end nil end # :startdoc: # returns a new sentence object which expands according to the condition # given by the block. # # The block is invoked for each subsentence. # The subsentences which the block returns true are # expanded into parent. # # s = Sentence(%w[2 * 7], "+", %w[3 * 5]) # #=> # # # s.expand { true } # #=> # # # s.expand {|s| s[0] == "3" } # #=> # # def expand(&b) # :yield: sentence Sentence.new(expand_rec(@sent, &b)) end # :stopdoc: def expand_rec(obj, r=[], &b) if obj.respond_to? :to_ary obj.each {|o| s = Sentence.new(o) if b.call s expand_rec(o, r, &b) else a = [] expand_rec(o, a, &b) r << a end } else r << obj end r end # :startdoc: # Sentence.each generates sentences # by deriving the start symbol _sym_ using _syntax_. # The derivation is restricted by an positive integer _limit_ to # avoid infinite generation. # # Sentence.each yields the block with a generated sentence. # # Sentence.each({ # :exp => [["n"], # [:exp, "+", :exp], # [:exp, "*", :exp]] # }, :exp, 1) {|sent| p sent } # #=> # # # # # # # # Sentence.each({ # :exp => [["n"], # [:exp, "+", :exp], # [:exp, "*", :exp]] # }, :exp, 2) {|sent| p sent } # #=> # # # # # # # # # # # # # # # # # # # # # # # def Sentence.each(syntax, sym, limit) Gen.new(syntax).each_tree(sym, limit) {|tree| yield Sentence.new(tree) } end # Sentence.expand_syntax returns an expanded syntax: # * No rule derives to empty sequence # * Underivable rule simplified # * No channel rule # * Symbols which has zero or one choices are not appered in rhs. # # Note that the rules which can derive empty and non-empty # sequences are modified to derive only non-empty sequences. # # Sentence.expand_syntax({ # :underivable1 => [], # :underivable2 => [[:underivable1]], # :underivable3 => [[:underivable3]], # :empty_only1 => [[]], # :empty_only2 => [[:just_empty1, :just_empty1]], # :empty_or_not => [[], ["foo"]], # :empty_or_not_2 => [[:empty_or_not, :empty_or_not]], # :empty_or_not_3 => [[:empty_or_not, :empty_or_not, :empty_or_not]], # :empty_or_not_4 => [[:empty_or_not_2, :empty_or_not_2]], # :channel1 => [[:channeled_data]], # :channeled_data => [["a", "b"], ["c", "d"]], # :single_choice => [["single", "choice"]], # :single_choice_2 => [[:single_choice, :single_choice]], # }) # #=> # { # :underivable1=>[], # underivable rules are simplified to []. # :underivable2=>[], # :underivable3=>[], # :empty_only1=>[], # derivation to empty sequence are removed. # :empty_only2=>[], # :empty_or_not=>[["foo"]], # empty sequences are removed too. # :empty_or_not_2=>[["foo"], ["foo", "foo"]], # :empty_or_not_3=>[["foo"], ["foo", "foo"], ["foo", "foo", "foo"]], # :empty_or_not_4=> [["foo"], ["foo", "foo"], [:empty_or_not_2, :empty_or_not_2]], # :channel1=>[["a", "b"], ["c", "d"]], # channel rules are removed. # :channeled_data=>[["a", "b"], ["c", "d"]], # :single_choice=>[["single", "choice"]], # single choice rules are expanded. # :single_choice_2=>[["single", "choice", "single", "choice"]], # } # # Sentence.expand_syntax({ # :factor => [["n"], # ["(", :exp, ")"]], # :term => [[:factor], # [:term, "*", :factor], # [:term, "/", :factor]], # :exp => [[:term], # [:exp, "+", :term], # [:exp, "-", :term]] # }) # #=> # {:exp=> [["n"], # ["(", :exp, ")"], # [:exp, "+", :term], # [:exp, "-", :term], # [:term, "*", :factor], # [:term, "/", :factor]], # :factor=> [["n"], # ["(", :exp, ")"]], # :term=> [["n"], # ["(", :exp, ")"], # [:term, "*", :factor], # [:term, "/", :factor]] # } # def Sentence.expand_syntax(syntax) Sentence::Gen.expand_syntax(syntax) end # :stopdoc: class Gen def Gen.each_tree(syntax, sym, limit, &b) Gen.new(syntax).each_tree(sym, limit, &b) end def Gen.each_string(syntax, sym, limit, &b) Gen.new(syntax).each_string(sym, limit, &b) end def initialize(syntax) @syntax = syntax end def self.expand_syntax(syntax) syntax = simplify_underivable_rules(syntax) syntax = simplify_emptyonly_rules(syntax) syntax = make_rules_no_empseq(syntax) syntax = expand_channel_rules(syntax) syntax = expand_noalt_rules(syntax) syntax = reorder_rules(syntax) end def self.simplify_underivable_rules(syntax) deribable_syms = {} changed = true while changed changed = false syntax.each {|sym, rules| next if deribable_syms[sym] rules.each {|rhs| if rhs.all? {|e| String === e || deribable_syms[e] } deribable_syms[sym] = true changed = true break end } } end result = {} syntax.each {|sym, rules| if deribable_syms[sym] rules2 = [] rules.each {|rhs| rules2 << rhs if rhs.all? {|e| String === e || deribable_syms[e] } } result[sym] = rules2.uniq else result[sym] = [] end } result end def self.simplify_emptyonly_rules(syntax) justempty_syms = {} changed = true while changed changed = false syntax.each {|sym, rules| next if justempty_syms[sym] if !rules.empty? && rules.all? {|rhs| rhs.all? {|e| justempty_syms[e] } } justempty_syms[sym] = true changed = true end } end result = {} syntax.each {|sym, rules| result[sym] = rules.map {|rhs| rhs.reject {|e| justempty_syms[e] } }.uniq } result end def self.expand_emptyable_syms(rhs, emptyable_syms) if rhs.empty? yield [] else first = rhs[0] rest = rhs[1..-1] if emptyable_syms[first] expand_emptyable_syms(rest, emptyable_syms) {|rhs2| yield [first] + rhs2 yield rhs2 } else expand_emptyable_syms(rest, emptyable_syms) {|rhs2| yield [first] + rhs2 } end end end def self.make_rules_no_empseq(syntax) emptyable_syms = {} changed = true while changed changed = false syntax.each {|sym, rules| next if emptyable_syms[sym] rules.each {|rhs| if rhs.all? {|e| emptyable_syms[e] } emptyable_syms[sym] = true changed = true break end } } end result = {} syntax.each {|sym, rules| rules2 = [] rules.each {|rhs| expand_emptyable_syms(rhs, emptyable_syms) {|rhs2| next if rhs2.empty? rules2 << rhs2 } } result[sym] = rules2.uniq } result end def self.expand_channel_rules(syntax) channel_rules = {} syntax.each {|sym, rules| channel_rules[sym] = {sym=>true} rules.each {|rhs| if rhs.length == 1 && Symbol === rhs[0] channel_rules[sym][rhs[0]] = true end } } changed = true while changed changed = false channel_rules.each {|sym, set| n1 = set.size set.keys.each {|s| set.update(channel_rules[s]) } n2 = set.size changed = true if n1 < n2 } end result = {} syntax.each {|sym, rules| rules2 = [] channel_rules[sym].each_key {|s| syntax[s].each {|rhs| unless rhs.length == 1 && Symbol === rhs[0] rules2 << rhs end } } result[sym] = rules2.uniq } result end def self.expand_noalt_rules(syntax) noalt_syms = {} syntax.each {|sym, rules| if rules.length == 1 noalt_syms[sym] = true end } result = {} syntax.each {|sym, rules| rules2 = [] rules.each {|rhs| rhs2 = [] rhs.each {|e| if noalt_syms[e] rhs2.concat syntax[e][0] else rhs2 << e end } rules2 << rhs2 } result[sym] = rules2.uniq } result end def self.reorder_rules(syntax) result = {} syntax.each {|sym, rules| result[sym] = rules.sort_by {|rhs| [rhs.find_all {|e| Symbol === e }.length, rhs.length] } } result end def each_tree(sym, limit) generate_from_sym(sym, limit) {|_, tree| yield tree } nil end def each_string(sym, limit) generate_from_sym(sym, limit) {|_, tree| yield [tree].join('') } nil end def generate_from_sym(sym, limit, &b) return if limit < 0 if String === sym yield limit, sym else rules = @syntax[sym] raise "undefined rule: #{sym}" if !rules rules.each {|rhs| if rhs.length == 1 || rules.length == 1 limit1 = limit else limit1 = limit-1 end generate_from_rhs(rhs, limit1, &b) } end nil end def generate_from_rhs(rhs, limit) return if limit < 0 if rhs.empty? yield limit, [] else generate_from_sym(rhs[0], limit) {|limit1, child| generate_from_rhs(rhs[1..-1], limit1) {|limit2, arr| yield limit2, [child, *arr] } } end nil end end # :startdoc: end