summaryrefslogtreecommitdiff
path: root/tool/lrama/lib/lrama/states.rb
diff options
context:
space:
mode:
Diffstat (limited to 'tool/lrama/lib/lrama/states.rb')
-rw-r--r--tool/lrama/lib/lrama/states.rb832
1 files changed, 832 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb
new file mode 100644
index 0000000000..f907db30cc
--- /dev/null
+++ b/tool/lrama/lib/lrama/states.rb
@@ -0,0 +1,832 @@
+require "forwardable"
+require "lrama/report"
+
+module Lrama
+ class State
+ class Reduce
+ # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html
+ attr_reader :item, :look_ahead, :not_selected_symbols
+ attr_accessor :default_reduction
+
+ def initialize(item)
+ @item = item
+ @look_ahead = nil
+ @not_selected_symbols = []
+ end
+
+ def rule
+ @item.rule
+ end
+
+ def look_ahead=(look_ahead)
+ @look_ahead = look_ahead.freeze
+ end
+
+ def add_not_selected_symbol(sym)
+ @not_selected_symbols << sym
+ end
+
+ def selected_look_ahead
+ if @look_ahead
+ @look_ahead - @not_selected_symbols
+ else
+ []
+ end
+ end
+ end
+
+ class Shift
+ attr_reader :next_sym, :next_items
+ attr_accessor :not_selected
+
+ def initialize(next_sym, next_items)
+ @next_sym = next_sym
+ @next_items = next_items
+ end
+ end
+
+ # * symbol: A symbol under discussion
+ # * reduce: A reduce under discussion
+ # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative)
+ ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do
+ def report_message
+ s = symbol.display_name
+ r = reduce.rule.precedence_sym.display_name
+ case
+ when which == :shift && same_prec
+ msg = "resolved as #{which} (%right #{s})"
+ when which == :shift
+ msg = "resolved as #{which} (#{r} < #{s})"
+ when which == :reduce && same_prec
+ msg = "resolved as #{which} (%left #{s})"
+ when which == :reduce
+ msg = "resolved as #{which} (#{s} < #{r})"
+ when which == :error
+ msg = "resolved as an #{which} (%nonassoc #{s})"
+ else
+ raise "Unknown direction. #{self}"
+ end
+
+ "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}."
+ end
+ end
+
+ Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true)
+
+ attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts,
+ :default_reduction_rule, :closure, :items
+ attr_accessor :shifts, :reduces
+
+ def initialize(id, accessing_symbol, kernels)
+ @id = id
+ @accessing_symbol = accessing_symbol
+ @kernels = kernels.freeze
+ @items = @kernels
+ # Manage relationships between items to state
+ # to resolve next state
+ @items_to_state = {}
+ @conflicts = []
+ @resolved_conflicts = []
+ @default_reduction_rule = nil
+ end
+
+ def closure=(closure)
+ @closure = closure
+ @items = @kernels + @closure
+ end
+
+ def non_default_reduces
+ reduces.select do |reduce|
+ reduce.rule != @default_reduction_rule
+ end
+ end
+
+ def compute_shifts_reduces
+ _shifts = {}
+ reduces = []
+ items.each do |item|
+ # TODO: Consider what should be pushed
+ if item.end_of_rule?
+ reduces << Reduce.new(item)
+ else
+ key = item.next_sym
+ _shifts[key] ||= []
+ _shifts[key] << item.new_by_next_position
+ end
+ end
+
+ # It seems Bison 3.8.2 iterates transitions order by symbol number
+ shifts = _shifts.sort_by do |next_sym, new_items|
+ next_sym.number
+ end.map do |next_sym, new_items|
+ Shift.new(next_sym, new_items.flatten)
+ end
+ self.shifts = shifts.freeze
+ self.reduces = reduces.freeze
+ end
+
+ def set_items_to_state(items, next_state)
+ @items_to_state[items] = next_state
+ end
+
+ #
+ def set_look_ahead(rule, look_ahead)
+ reduce = reduces.find do |r|
+ r.rule == rule
+ end
+
+ reduce.look_ahead = look_ahead
+ end
+
+ # Returns array of [nterm, next_state]
+ def nterm_transitions
+ return @nterm_transitions if @nterm_transitions
+
+ @nterm_transitions = []
+
+ shifts.each do |shift|
+ next if shift.next_sym.term?
+
+ @nterm_transitions << [shift, @items_to_state[shift.next_items]]
+ end
+
+ @nterm_transitions
+ end
+
+ # Returns array of [term, next_state]
+ def term_transitions
+ return @term_transitions if @term_transitions
+
+ @term_transitions = []
+
+ shifts.each do |shift|
+ next if shift.next_sym.nterm?
+
+ @term_transitions << [shift, @items_to_state[shift.next_items]]
+ end
+
+ @term_transitions
+ end
+
+ def selected_term_transitions
+ term_transitions.select do |shift, next_state|
+ !shift.not_selected
+ end
+ end
+
+ # Move to next state by sym
+ def transition(sym)
+ result = nil
+
+ if sym.term?
+ term_transitions.each do |shift, next_state|
+ term = shift.next_sym
+ result = next_state if term == sym
+ end
+ else
+ nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ result = next_state if nterm == sym
+ end
+ end
+
+ raise "Can not transit by #{sym} #{self}" if result.nil?
+
+ result
+ end
+
+ def find_reduce_by_item!(item)
+ reduces.find do |r|
+ r.item == item
+ end || (raise "reduce is not found. #{item}, #{state}")
+ end
+
+ def default_reduction_rule=(default_reduction_rule)
+ @default_reduction_rule = default_reduction_rule
+
+ reduces.each do |r|
+ if r.rule == default_reduction_rule
+ r.default_reduction = true
+ end
+ end
+ end
+
+ def sr_conflicts
+ @conflicts.select do |conflict|
+ conflict.type == :shift_reduce
+ end
+ end
+
+ def rr_conflicts
+ @conflicts.select do |conflict|
+ conflict.type == :reduce_reduce
+ end
+ end
+ end
+
+ # States is passed to a template file
+ #
+ # "Efficient Computation of LALR(1) Look-Ahead Sets"
+ # https://dl.acm.org/doi/pdf/10.1145/69622.357187
+ class States
+ extend Forwardable
+ include Lrama::Report::Duration
+
+ def_delegators "@grammar", :symbols, :terms, :nterms, :rules,
+ :accept_symbol, :eof_symbol, :find_symbol_by_s_value!
+
+ # TODO: Validate position is not over rule rhs
+ Item = Struct.new(:rule, :position, keyword_init: true) do
+ # Optimization for States#setup_state
+ def hash
+ [rule.id, position].hash
+ end
+
+ def rule_id
+ rule.id
+ end
+
+ def next_sym
+ rule.rhs[position]
+ end
+
+ def end_of_rule?
+ rule.rhs.count == position
+ end
+
+ def new_by_next_position
+ Item.new(rule: rule, position: position + 1)
+ end
+
+ def previous_sym
+ rule.rhs[position - 1]
+ end
+
+ def display_name
+ r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
+ "#{r} (rule #{rule.id})"
+ end
+
+ # Right after position
+ def display_rest
+ r = rule.rhs[position..-1].map(&:display_name).join(" ")
+ ". #{r} (rule #{rule.id})"
+ end
+ end
+
+ attr_reader :states, :reads_relation, :includes_relation, :lookback_relation
+
+ def initialize(grammar, warning, trace_state: false)
+ @grammar = grammar
+ @warning = warning
+ @trace_state = trace_state
+
+ @states = []
+
+ # `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }`
+ # where p is state, A is nterm, t is term.
+ #
+ # `@direct_read_sets` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is bitmap of term.
+ @direct_read_sets = {}
+
+ # Reads relation on nonterminal transitions (pair of state and nterm)
+ # `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε`
+ # where p, r are state, A, C are nterm.
+ #
+ # `@reads_relation` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is array of [state.id, nterm.token_id].
+ @reads_relation = {}
+
+ # `@read_sets` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is bitmap of term.
+ @read_sets = {}
+
+ # `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p`
+ # where p, p' are state, A, B are nterm, β, γ is sequence of symbol.
+ #
+ # `@includes_relation` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is array of [state.id, nterm.token_id].
+ @includes_relation = {}
+
+ # `(q, A -> ω) lookback (p, A) iff p -(ω)-> q`
+ # where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol.
+ #
+ # `@lookback_relation` is a hash whose
+ # key is [state.id, rule.id],
+ # value is array of [state.id, nterm.token_id].
+ @lookback_relation = {}
+
+ # `@follow_sets` is a hash whose
+ # key is [state.id, rule.id],
+ # value is bitmap of term.
+ @follow_sets = {}
+
+ # `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)`
+ #
+ # `@la` is a hash whose
+ # key is [state.id, rule.id],
+ # value is bitmap of term.
+ @la = {}
+ end
+
+ def compute
+ # Look Ahead Sets
+ report_duration(:compute_lr0_states) { compute_lr0_states }
+ report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
+ report_duration(:compute_reads_relation) { compute_reads_relation }
+ report_duration(:compute_read_sets) { compute_read_sets }
+ report_duration(:compute_includes_relation) { compute_includes_relation }
+ report_duration(:compute_lookback_relation) { compute_lookback_relation }
+ report_duration(:compute_follow_sets) { compute_follow_sets }
+ report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets }
+
+ # Conflicts
+ report_duration(:compute_conflicts) { compute_conflicts }
+
+ report_duration(:compute_default_reduction) { compute_default_reduction }
+
+ check_conflicts
+ end
+
+ def reporter
+ StatesReporter.new(self)
+ end
+
+ def states_count
+ @states.count
+ end
+
+ def direct_read_sets
+ h = {}
+
+ @direct_read_sets.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ def read_sets
+ h = {}
+
+ @read_sets.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ def follow_sets
+ h = {}
+
+ @follow_sets.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ def la
+ h = {}
+
+ @la.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ private
+
+ def sr_conflicts
+ @states.flat_map(&:sr_conflicts)
+ end
+
+ def rr_conflicts
+ @states.flat_map(&:rr_conflicts)
+ end
+
+ def initial_attrs
+ h = {}
+
+ attrs.each do |attr|
+ h[attr.id] = false
+ end
+
+ h
+ end
+
+ def trace_state
+ if @trace_state
+ yield STDERR
+ end
+ end
+
+ def create_state(accessing_symbol, kernels, states_creted)
+ # A item can appear in some states,
+ # so need to use `kernels` (not `kernels.first`) as a key.
+ #
+ # For example...
+ #
+ # %%
+ # program: '+' strings_1
+ # | '-' strings_2
+ # ;
+ #
+ # strings_1: string_1
+ # ;
+ #
+ # strings_2: string_1
+ # | string_2
+ # ;
+ #
+ # string_1: string
+ # ;
+ #
+ # string_2: string '+'
+ # ;
+ #
+ # string: tSTRING
+ # ;
+ # %%
+ #
+ # For these grammar, there are 2 states
+ #
+ # State A
+ # string_1: string •
+ #
+ # State B
+ # string_1: string •
+ # string_2: string • '+'
+ #
+ return [states_creted[kernels], false] if states_creted[kernels]
+
+ state = State.new(@states.count, accessing_symbol, kernels)
+ @states << state
+ states_creted[kernels] = state
+
+ return [state, true]
+ end
+
+ def setup_state(state)
+ # closure
+ closure = []
+ visited = {}
+ queued = {}
+ items = state.kernels.dup
+
+ items.each do |item|
+ queued[item] = true
+ end
+
+ while (item = items.shift) do
+ visited[item] = true
+
+ if (sym = item.next_sym) && sym.nterm?
+ @grammar.find_rules_by_symbol!(sym).each do |rule|
+ i = Item.new(rule: rule, position: 0)
+ next if queued[i]
+ closure << i
+ items << i
+ queued[i] = true
+ end
+ end
+ end
+
+ state.closure = closure.sort_by {|i| i.rule.id }
+
+ # Trace
+ trace_state do |out|
+ out << "Closure: input\n"
+ state.kernels.each do |item|
+ out << " #{item.display_rest}\n"
+ end
+ out << "\n\n"
+ out << "Closure: output\n"
+ state.items.each do |item|
+ out << " #{item.display_rest}\n"
+ end
+ out << "\n\n"
+ end
+
+ # shift & reduce
+ state.compute_shifts_reduces
+ end
+
+ def enqueue_state(states, state)
+ # Trace
+ previous = state.kernels.first.previous_sym
+ trace_state do |out|
+ out << sprintf("state_list_append (state = %d, symbol = %d (%s))",
+ @states.count, previous.number, previous.display_name)
+ end
+
+ states << state
+ end
+
+ def compute_lr0_states
+ # State queue
+ states = []
+ states_creted = {}
+
+ state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_creted)
+ enqueue_state(states, state)
+
+ while (state = states.shift) do
+ # Trace
+ #
+ # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but
+ # I think it is not correct...
+ previous = state.kernels.first.previous_sym
+ trace_state do |out|
+ out << "Processing state #{state.id} (reached by #{previous.display_name})\n"
+ end
+
+ setup_state(state)
+
+ state.shifts.each do |shift|
+ new_state, created = create_state(shift.next_sym, shift.next_items, states_creted)
+ state.set_items_to_state(shift.next_items, new_state)
+ enqueue_state(states, new_state) if created
+ end
+ end
+ end
+
+ def nterm_transitions
+ a = []
+
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ a << [state, nterm, next_state]
+ end
+ end
+
+ a
+ end
+
+ def compute_direct_read_sets
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+
+ ary = next_state.term_transitions.map do |shift, _|
+ shift.next_sym.number
+ end
+
+ key = [state.id, nterm.token_id]
+ @direct_read_sets[key] = Bitmap.from_array(ary)
+ end
+ end
+ end
+
+ def compute_reads_relation
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ next_state.nterm_transitions.each do |shift2, _next_state2|
+ nterm2 = shift2.next_sym
+ if nterm2.nullable
+ key = [state.id, nterm.token_id]
+ @reads_relation[key] ||= []
+ @reads_relation[key] << [next_state.id, nterm2.token_id]
+ end
+ end
+ end
+ end
+ end
+
+ def compute_read_sets
+ sets = nterm_transitions.map do |state, nterm, next_state|
+ [state.id, nterm.token_id]
+ end
+
+ @read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute
+ end
+
+ # Execute transition of state by symbols
+ # then return final state.
+ def transition(state, symbols)
+ symbols.each do |sym|
+ state = state.transition(sym)
+ end
+
+ state
+ end
+
+ def compute_includes_relation
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ @grammar.find_rules_by_symbol!(nterm).each do |rule|
+ i = rule.rhs.count - 1
+
+ while (i > -1) do
+ sym = rule.rhs[i]
+
+ break if sym.term?
+ state2 = transition(state, rule.rhs[0...i])
+ # p' = state, B = nterm, p = state2, A = sym
+ key = [state2.id, sym.token_id]
+ # TODO: need to omit if state == state2 ?
+ @includes_relation[key] ||= []
+ @includes_relation[key] << [state.id, nterm.token_id]
+ break if !sym.nullable
+ i -= 1
+ end
+ end
+ end
+ end
+ end
+
+ def compute_lookback_relation
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ @grammar.find_rules_by_symbol!(nterm).each do |rule|
+ state2 = transition(state, rule.rhs)
+ # p = state, A = nterm, q = state2, A -> ω = rule
+ key = [state2.id, rule.id]
+ @lookback_relation[key] ||= []
+ @lookback_relation[key] << [state.id, nterm.token_id]
+ end
+ end
+ end
+ end
+
+ def compute_follow_sets
+ sets = nterm_transitions.map do |state, nterm, next_state|
+ [state.id, nterm.token_id]
+ end
+
+ @follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute
+ end
+
+ def compute_look_ahead_sets
+ @states.each do |state|
+ rules.each do |rule|
+ ary = @lookback_relation[[state.id, rule.id]]
+ next if !ary
+
+ ary.each do |state2_id, nterm_token_id|
+ # q = state, A -> ω = rule, p = state2, A = nterm
+ follows = @follow_sets[[state2_id, nterm_token_id]]
+
+ next if follows == 0
+
+ key = [state.id, rule.id]
+ @la[key] ||= 0
+ look_ahead = @la[key] | follows
+ @la[key] |= look_ahead
+
+ # No risk of conflict when
+ # * the state only has single reduce
+ # * the state only has nterm_transitions (GOTO)
+ next if state.reduces.count == 1 && state.term_transitions.count == 0
+
+ state.set_look_ahead(rule, bitmap_to_terms(look_ahead))
+ end
+ end
+ end
+ end
+
+ def bitmap_to_terms(bit)
+ ary = Bitmap.to_array(bit)
+ ary.map do |i|
+ @grammar.find_symbol_by_number!(i)
+ end
+ end
+
+ def compute_conflicts
+ compute_shift_reduce_conflicts
+ compute_reduce_reduce_conflicts
+ end
+
+ def compute_shift_reduce_conflicts
+ states.each do |state|
+ state.shifts.each do |shift|
+ state.reduces.each do |reduce|
+ sym = shift.next_sym
+
+ next unless reduce.look_ahead
+ next if !reduce.look_ahead.include?(sym)
+
+ # Shift/Reduce conflict
+ shift_prec = sym.precedence
+ reduce_prec = reduce.item.rule.precedence
+
+ # Can resolve only when both have prec
+ unless shift_prec && reduce_prec
+ state.conflicts << State::Conflict.new(symbols: [sym], reduce: reduce, type: :shift_reduce)
+ next
+ end
+
+ case
+ when shift_prec < reduce_prec
+ # Reduce is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce)
+ shift.not_selected = true
+ next
+ when shift_prec > reduce_prec
+ # Shift is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift)
+ reduce.add_not_selected_symbol(sym)
+ next
+ end
+
+ # shift_prec == reduce_prec, then check associativity
+ case sym.precedence.type
+ when :right
+ # Shift is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true)
+ reduce.add_not_selected_symbol(sym)
+ next
+ when :left
+ # Reduce is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true)
+ shift.not_selected = true
+ next
+ when :nonassoc
+ # Can not resolve
+ #
+ # nonassoc creates "run-time" error, precedence creates "compile-time" error.
+ # Then omit both the shift and reduce.
+ #
+ # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error)
+ shift.not_selected = true
+ reduce.add_not_selected_symbol(sym)
+ else
+ raise "Unknown precedence type. #{sym}"
+ end
+ end
+ end
+ end
+ end
+
+ def compute_reduce_reduce_conflicts
+ states.each do |state|
+ a = []
+
+ state.reduces.each do |reduce|
+ next if reduce.look_ahead.nil?
+
+ intersection = a & reduce.look_ahead
+ a += reduce.look_ahead
+
+ if !intersection.empty?
+ state.conflicts << State::Conflict.new(symbols: intersection.dup, reduce: reduce, type: :reduce_reduce)
+ end
+ end
+ end
+ end
+
+ def compute_default_reduction
+ states.each do |state|
+ next if state.reduces.empty?
+ # Do not set, if conflict exist
+ next if !state.conflicts.empty?
+ # Do not set, if shift with `error` exists.
+ next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol)
+
+ state.default_reduction_rule = state.reduces.map do |r|
+ [r.rule, r.rule.id, (r.look_ahead || []).count]
+ end.sort_by do |rule, rule_id, count|
+ [-count, rule_id]
+ end.first.first
+ end
+ end
+
+ def check_conflicts
+ sr_count = sr_conflicts.count
+ rr_count = rr_conflicts.count
+
+ if @grammar.expect
+
+ expected_sr_conflicts = @grammar.expect
+ expected_rr_conflicts = 0
+
+ if expected_sr_conflicts != sr_count
+ @warning.error("shift/reduce conflicts: #{sr_count} found, #{expected_sr_conflicts} expected")
+ end
+
+ if expected_rr_conflicts != rr_count
+ @warning.error("reduce/reduce conflicts: #{rr_count} found, #{expected_rr_conflicts} expected")
+ end
+ else
+ if sr_count != 0
+ @warning.warn("shift/reduce conflicts: #{sr_count} found")
+ end
+
+ if rr_count != 0
+ @warning.warn("reduce/reduce conflicts: #{rr_count} found")
+ end
+ end
+ end
+ end
+end