summaryrefslogtreecommitdiff
path: root/tool/lrama/lib
diff options
context:
space:
mode:
Diffstat (limited to 'tool/lrama/lib')
-rw-r--r--tool/lrama/lib/lrama.rb13
-rw-r--r--tool/lrama/lib/lrama/bitmap.rb29
-rw-r--r--tool/lrama/lib/lrama/command.rb144
-rw-r--r--tool/lrama/lib/lrama/context.rb511
-rw-r--r--tool/lrama/lib/lrama/digraph.rb53
-rw-r--r--tool/lrama/lib/lrama/grammar.rb850
-rw-r--r--tool/lrama/lib/lrama/lexer.rb349
-rw-r--r--tool/lrama/lib/lrama/output.rb370
-rw-r--r--tool/lrama/lib/lrama/parser.rb321
-rw-r--r--tool/lrama/lib/lrama/report.rb47
-rw-r--r--tool/lrama/lib/lrama/states.rb832
-rw-r--r--tool/lrama/lib/lrama/states_reporter.rb310
-rw-r--r--tool/lrama/lib/lrama/version.rb3
-rw-r--r--tool/lrama/lib/lrama/warning.rb25
14 files changed, 3857 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama.rb b/tool/lrama/lib/lrama.rb
new file mode 100644
index 0000000000..1973cc6646
--- /dev/null
+++ b/tool/lrama/lib/lrama.rb
@@ -0,0 +1,13 @@
+require "lrama/bitmap"
+require "lrama/command"
+require "lrama/context"
+require "lrama/digraph"
+require "lrama/grammar"
+require "lrama/lexer"
+require "lrama/output"
+require "lrama/parser"
+require "lrama/report"
+require "lrama/states"
+require "lrama/states_reporter"
+require "lrama/version"
+require "lrama/warning"
diff --git a/tool/lrama/lib/lrama/bitmap.rb b/tool/lrama/lib/lrama/bitmap.rb
new file mode 100644
index 0000000000..8349a23c34
--- /dev/null
+++ b/tool/lrama/lib/lrama/bitmap.rb
@@ -0,0 +1,29 @@
+module Lrama
+ module Bitmap
+ def self.from_array(ary)
+ bit = 0
+
+ ary.each do |int|
+ bit |= (1 << int)
+ end
+
+ bit
+ end
+
+ def self.to_array(int)
+ a = []
+ i = 0
+
+ while int > 0 do
+ if int & 1 == 1
+ a << i
+ end
+
+ i += 1
+ int >>= 1
+ end
+
+ a
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/command.rb b/tool/lrama/lib/lrama/command.rb
new file mode 100644
index 0000000000..71369de8ef
--- /dev/null
+++ b/tool/lrama/lib/lrama/command.rb
@@ -0,0 +1,144 @@
+require 'optparse'
+
+module Lrama
+ class Command
+ def run(argv)
+ opt = OptionParser.new
+
+ # opt.on('-h') {|v| p v }
+ opt.on('-V', '--version') {|v| puts Lrama::VERSION ; exit 0 }
+
+ # Tuning the Parser
+ skeleton = "bison/yacc.c"
+
+ opt.on('-S', '--skeleton=FILE') {|v| skeleton = v }
+ opt.on('-t') { } # Do nothing
+
+ # Output Files:
+ header = false
+ header_file = nil
+ report = []
+ report_file = nil
+ outfile = "y.tab.c"
+
+ opt.on('-h', '--header=[FILE]') {|v| header = true; header_file = v }
+ opt.on('-d') { header = true }
+ opt.on('-r', '--report=THINGS') {|v| report = v.split(',') }
+ opt.on('--report-file=FILE') {|v| report_file = v }
+ opt.on('-v') { } # Do nothing
+ opt.on('-o', '--output=FILE') {|v| outfile = v }
+
+ # Hidden
+ trace = []
+ opt.on('--trace=THINGS') {|v| trace = v.split(',') }
+
+ # Error Recovery
+ error_recovery = false
+ opt.on('-e') {|v| error_recovery = true }
+
+ opt.parse!(argv)
+
+ trace_opts = validate_trace(trace)
+ report_opts = validate_report(report)
+
+ grammar_file = argv.shift
+
+ if !report.empty? && report_file.nil? && grammar_file
+ report_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".output"
+ end
+
+ if !header_file && header
+ case
+ when outfile
+ header_file = File.dirname(outfile) + "/" + File.basename(outfile, ".*") + ".h"
+ when grammar_file
+ header_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".h"
+ end
+ end
+
+ if !grammar_file
+ puts "File should be specified\n"
+ exit 1
+ end
+
+ Report::Duration.enable if trace_opts[:time]
+
+ warning = Lrama::Warning.new
+ y = File.read(grammar_file)
+ grammar = Lrama::Parser.new(y).parse
+ states = Lrama::States.new(grammar, warning, trace_state: (trace_opts[:automaton] || trace_opts[:closure]))
+ states.compute
+ context = Lrama::Context.new(states)
+
+ if report_file
+ reporter = Lrama::StatesReporter.new(states)
+ File.open(report_file, "w+") do |f|
+ reporter.report(f, **report_opts)
+ end
+ end
+
+ File.open(outfile, "w+") do |f|
+ Lrama::Output.new(
+ out: f,
+ output_file_path: outfile,
+ template_name: skeleton,
+ grammar_file_path: grammar_file,
+ header_file_path: header_file,
+ context: context,
+ grammar: grammar,
+ ).render
+ end
+
+ if warning.has_error?
+ exit 1
+ end
+ end
+
+ private
+
+ def validate_report(report)
+ bison_list = %w[states itemsets lookaheads solved counterexamples cex all none]
+ others = %w[verbose]
+ list = bison_list + others
+ not_supported = %w[counterexamples cex none]
+ h = { grammar: true }
+
+ report.each do |r|
+ if list.include?(r) && !not_supported.include?(r)
+ h[r.to_sym] = true
+ else
+ raise "Invalid report option \"#{r}\"."
+ end
+ end
+
+ if h[:all]
+ (bison_list - not_supported).each do |r|
+ h[r.to_sym] = true
+ end
+
+ h.delete(:all)
+ end
+
+ return h
+ end
+
+ def validate_trace(trace)
+ list = %w[
+ none locations scan parse automaton bitsets
+ closure grammar resource sets muscles tools
+ m4-early m4 skeleton time ielr cex all
+ ]
+ h = {}
+
+ trace.each do |t|
+ if list.include?(t)
+ h[t.to_sym] = true
+ else
+ raise "Invalid trace option \"#{t}\"."
+ end
+ end
+
+ return h
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/context.rb b/tool/lrama/lib/lrama/context.rb
new file mode 100644
index 0000000000..9086470011
--- /dev/null
+++ b/tool/lrama/lib/lrama/context.rb
@@ -0,0 +1,511 @@
+require "lrama/report"
+
+module Lrama
+ # This is passed to a template
+ class Context
+ include Report::Duration
+
+ ErrorActionNumber = -Float::INFINITY
+ BaseMin = -Float::INFINITY
+
+ # TODO: It might be better to pass `states` to Output directly?
+ attr_reader :states
+
+ def initialize(states)
+ @states = states
+ @yydefact = nil
+ @yydefgoto = nil
+ # Array of array
+ @_actions = []
+
+ compute_tables
+ end
+
+ # enum yytokentype
+ def yytokentype
+ @states.terms.reject do |term|
+ 0 < term.token_id && term.token_id < 128
+ end.map do |term|
+ [term.id.s_value, term.token_id, term.display_name]
+ end.unshift(["YYEMPTY", -2, nil])
+ end
+
+ # enum yysymbol_kind_t
+ def yysymbol_kind_t
+ @states.symbols.map do |sym|
+ [sym.enum_name, sym.number, sym.comment]
+ end.unshift(["YYSYMBOL_YYEMPTY", -2, nil])
+ end
+
+ # State number of final (accepted) state
+ def yyfinal
+ @states.states.find do |state|
+ state.items.find do |item|
+ item.rule.lhs.id.s_value == "$accept" && item.end_of_rule?
+ end
+ end.id
+ end
+
+ def yylast
+ @yylast
+ end
+
+ # Number of terms
+ def yyntokens
+ @states.terms.count
+ end
+
+ # Number of nterms
+ def yynnts
+ @states.nterms.count
+ end
+
+ # Number of rules
+ def yynrules
+ @states.rules.count
+ end
+
+ # Number of states
+ def yynstates
+ @states.states.count
+ end
+
+ # Last token number
+ def yymaxutok
+ @states.terms.map(&:token_id).max
+ end
+
+ # YYTRANSLATE
+ #
+ # yytranslate is a mapping from token id to symbol number
+ def yytranslate
+ # 2 is YYSYMBOL_YYUNDEF
+ a = Array.new(yymaxutok, 2)
+
+ @states.terms.each do |term|
+ a[term.token_id] = term.number
+ end
+
+ return a
+ end
+
+ # Mapping from rule number to line number of the rule is defined.
+ # Dummy rule is appended as the first element whose value is 0
+ # because 0 means error in yydefact.
+ def yyrline
+ a = [0]
+
+ @states.rules.each do |rule|
+ a << rule.lineno
+ end
+
+ return a
+ end
+
+ # Mapping from symbol number to its name
+ def yytname
+ @states.symbols.sort_by(&:number).map do |sym|
+ sym.display_name
+ end
+ end
+
+ def yypact_ninf
+ @yypact_ninf
+ end
+
+ def yytable_ninf
+ @yytable_ninf
+ end
+
+ def yypact
+ @base[0...yynstates]
+ end
+
+ def yydefact
+ @yydefact
+ end
+
+ def yypgoto
+ @base[yynstates..-1]
+ end
+
+ def yydefgoto
+ @yydefgoto
+ end
+
+ def yytable
+ @table
+ end
+
+ def yycheck
+ @check
+ end
+
+ def yystos
+ @states.states.map do |state|
+ state.accessing_symbol.number
+ end
+ end
+
+ # Mapping from rule number to symbol number of LHS.
+ # Dummy rule is appended as the first element whose value is 0
+ # because 0 means error in yydefact.
+ def yyr1
+ a = [0]
+
+ @states.rules.each do |rule|
+ a << rule.lhs.number
+ end
+
+ return a
+ end
+
+ # Mapping from rule number to lenght of RHS.
+ # Dummy rule is appended as the first element whose value is 0
+ # because 0 means error in yydefact.
+ def yyr2
+ a = [0]
+
+ @states.rules.each do |rule|
+ a << rule.rhs.count
+ end
+
+ return a
+ end
+
+ private
+
+ # Compute these
+ #
+ # See also: "src/tables.c" of Bison.
+ #
+ # * yydefact
+ # * yydefgoto
+ # * yypact and yypgoto
+ # * yytable
+ # * yycheck
+ # * yypact_ninf
+ # * yytable_ninf
+ def compute_tables
+ report_duration(:compute_yydefact) { compute_yydefact }
+ report_duration(:compute_yydefgoto) { compute_yydefgoto }
+ report_duration(:sort_actions) { sort_actions }
+ # debug_sorted_actions
+ report_duration(:compute_packed_table) { compute_packed_table }
+ end
+
+ def vectors_count
+ @states.states.count + @states.nterms.count
+ end
+
+ # In compressed table, rule 0 is appended as an error case
+ # and reduce is represented as minus number.
+ def rule_id_to_action_number(rule_id)
+ (rule_id + 1) * -1
+ end
+
+ # Symbol number is assinged to term first then nterm.
+ # This method calculates sequence_number for nterm.
+ def nterm_number_to_sequence_number(nterm_number)
+ nterm_number - @states.terms.count
+ end
+
+ # Vector is states + nterms
+ def nterm_number_to_vector_number(nterm_number)
+ @states.states.count + (nterm_number - @states.terms.count)
+ end
+
+ def compute_yydefact
+ # Default action (shift/reduce/error) for each state.
+ # Index is state id, value is `rule id + 1` of a default reduction.
+ @yydefact = Array.new(@states.states.count, 0)
+
+ @states.states.each do |state|
+ # Action number means
+ #
+ # * number = 0, default action
+ # * number = -Float::INFINITY, error by %nonassoc
+ # * number > 0, shift then move to state "number"
+ # * number < 0, reduce by "-number" rule. Rule "number" is already added by 1.
+ actions = Array.new(@states.terms.count, 0)
+
+ if state.reduces.map(&:selected_look_ahead).any? {|la| !la.empty? }
+ # Iterate reduces with reverse order so that first rule is used.
+ state.reduces.reverse.each do |reduce|
+ reduce.look_ahead.each do |term|
+ actions[term.number] = rule_id_to_action_number(reduce.rule.id)
+ end
+ end
+ end
+
+ # Shift is selected when S/R conflict exists.
+ state.selected_term_transitions.each do |shift, next_state|
+ actions[shift.next_sym.number] = next_state.id
+ end
+
+ state.resolved_conflicts.select do |conflict|
+ conflict.which == :error
+ end.each do |conflict|
+ actions[conflict.symbol.number] = ErrorActionNumber
+ end
+
+ # If default_reduction_rule, replase default_reduction_rule in
+ # actions with zero.
+ if state.default_reduction_rule
+ actions.map! do |e|
+ if e == rule_id_to_action_number(state.default_reduction_rule.id)
+ 0
+ else
+ e
+ end
+ end
+ end
+
+ # If no default_reduction_rule, default behavior is an
+ # error then replase ErrorActionNumber with zero.
+ if !state.default_reduction_rule
+ actions.map! do |e|
+ if e == ErrorActionNumber
+ 0
+ else
+ e
+ end
+ end
+ end
+
+ s = actions.each_with_index.map do |n, i|
+ [i, n]
+ end.select do |i, n|
+ # Remove default_reduction_rule entries
+ n != 0
+ end
+
+ if s.count != 0
+ # Entry of @_actions is an array of
+ #
+ # * State id
+ # * Array of tuple, [from, to] where from is term number and to is action.
+ # * The number of "Array of tuple" used by sort_actions
+ # * "width" used by sort_actions
+ @_actions << [state.id, s, s.count, s.last[0] - s.first[0] + 1]
+ end
+
+ @yydefact[state.id] = state.default_reduction_rule ? state.default_reduction_rule.id + 1 : 0
+ end
+ end
+
+ def compute_yydefgoto
+ # Default GOTO (nterm transition) for each nterm.
+ # Index is sequence number of nterm, value is state id
+ # of a default nterm transition destination.
+ @yydefgoto = Array.new(@states.nterms.count, 0)
+ h = {}
+ # Mapping from nterm to next_states
+ nterm_to_next_states = {}
+ terms_count = @states.terms.count
+
+ @states.states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ key = shift.next_sym
+ nterm_to_next_states[key] ||= []
+ nterm_to_next_states[key] << [state, next_state] # [from_state, to_state]
+ end
+ end
+
+ @states.nterms.each do |nterm|
+ if !(states = nterm_to_next_states[nterm])
+ default_goto = 0
+ not_default_gotos = []
+ else
+ default_state = states.map(&:last).group_by {|s| s }.max_by {|_, v| v.count }.first
+ default_goto = default_state.id
+ not_default_gotos = []
+ states.each do |from_state, to_state|
+ next if to_state.id == default_goto
+ not_default_gotos << [from_state.id, to_state.id]
+ end
+ end
+
+ k = nterm_number_to_sequence_number(nterm.number)
+ @yydefgoto[k] = default_goto
+
+ if not_default_gotos.count != 0
+ v = nterm_number_to_vector_number(nterm.number)
+
+ # Entry of @_actions is an array of
+ #
+ # * Nterm number as vector number
+ # * Array of tuple, [from, to] where from is state number and to is state number.
+ # * The number of "Array of tuple" used by sort_actions
+ # * "width" used by sort_actions
+ @_actions << [v, not_default_gotos, not_default_gotos.count, not_default_gotos.last[0] - not_default_gotos.first[0] + 1]
+ end
+ end
+ end
+
+ def sort_actions
+ # This is not same with #sort_actions
+ #
+ # @sorted_actions = @_actions.sort_by do |_, _, count, width|
+ # [-width, -count]
+ # end
+
+ @sorted_actions = []
+
+ @_actions.each do |action|
+ if @sorted_actions.empty?
+ @sorted_actions << action
+ next
+ end
+
+ j = @sorted_actions.count - 1
+ state_id, froms_and_tos, count, width = action
+
+ while (j >= 0) do
+ case
+ when @sorted_actions[j][3] < width
+ j -= 1
+ when @sorted_actions[j][3] == width && @sorted_actions[j][2] < count
+ j -= 1
+ else
+ break
+ end
+ end
+
+ @sorted_actions.insert(j + 1, action)
+ end
+ end
+
+ def debug_sorted_actions
+ ary = Array.new
+ @sorted_actions.each do |state_id, froms_and_tos, count, width|
+ ary[state_id] = [state_id, froms_and_tos, count, width]
+ end
+
+ print sprintf("table_print:\n\n")
+
+ print sprintf("order [\n")
+ vectors_count.times do |i|
+ print sprintf("%d, ", @sorted_actions[i] ? @sorted_actions[i][0] : 0)
+ print "\n" if i % 10 == 9
+ end
+ print sprintf("]\n\n")
+
+
+ print sprintf("width [\n")
+ vectors_count.times do |i|
+ print sprintf("%d, ", ary[i] ? ary[i][3] : 0)
+ print "\n" if i % 10 == 9
+ end
+ print sprintf("]\n\n")
+
+
+ print sprintf("tally [\n")
+ vectors_count.times do |i|
+ print sprintf("%d, ", ary[i] ? ary[i][2] : 0)
+ print "\n" if i % 10 == 9
+ end
+ print sprintf("]\n\n")
+ end
+
+ def compute_packed_table
+ # yypact and yypgoto
+ @base = Array.new(vectors_count, BaseMin)
+ # yytable
+ @table = []
+ # yycheck
+ @check = []
+ # Key is froms_and_tos, value is index position
+ pushed = {}
+ userd_res = {}
+ lowzero = 0
+ high = 0
+
+ @sorted_actions.each do |state_id, froms_and_tos, _, _|
+ if (res = pushed[froms_and_tos])
+ @base[state_id] = res
+ next
+ end
+
+ res = lowzero - froms_and_tos.first[0]
+
+ while true do
+ ok = true
+
+ froms_and_tos.each do |from, to|
+ loc = res + from
+
+ if @table[loc]
+ # If the cell of table is set, can not use the cell.
+ ok = false
+ break
+ end
+ end
+
+ if ok && userd_res[res]
+ ok = false
+ end
+
+ if ok
+ break
+ else
+ res += 1
+ end
+ end
+
+ loc = 0
+
+ froms_and_tos.each do |from, to|
+ loc = res + from
+
+ @table[loc] = to
+ @check[loc] = from
+ end
+
+ while (@table[lowzero]) do
+ lowzero += 1
+ end
+
+ high = loc if high < loc
+
+ @base[state_id] = res
+ pushed[froms_and_tos] = res
+ userd_res[res] = true
+ end
+
+ @yylast = high
+
+ # replace_ninf
+ @yypact_ninf = (@base.select {|i| i != BaseMin } + [0]).min - 1
+ @base.map! do |i|
+ case i
+ when BaseMin
+ @yypact_ninf
+ else
+ i
+ end
+ end
+
+ @yytable_ninf = (@table.compact.select {|i| i != ErrorActionNumber } + [0]).min - 1
+ @table.map! do |i|
+ case i
+ when nil
+ 0
+ when ErrorActionNumber
+ @yytable_ninf
+ else
+ i
+ end
+ end
+
+ @check.map! do |i|
+ case i
+ when nil
+ -1
+ else
+ i
+ end
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/digraph.rb b/tool/lrama/lib/lrama/digraph.rb
new file mode 100644
index 0000000000..28f26781b1
--- /dev/null
+++ b/tool/lrama/lib/lrama/digraph.rb
@@ -0,0 +1,53 @@
+module Lrama
+ # Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625)
+ class Digraph
+ def initialize(sets, relation, base_function)
+ # X in the paper
+ @sets = sets
+ # R in the paper
+ @relation = relation
+ # F' in the paper
+ @base_function = base_function
+ # S in the paper
+ @stack = []
+ # N in the paper
+ @h = Hash.new(0)
+ # F in the paper
+ @result = {}
+ end
+
+ def compute
+ @sets.each do |x|
+ next if @h[x] != 0
+ traverse(x)
+ end
+
+ return @result
+ end
+
+ private
+
+ def traverse(x)
+ @stack.push(x)
+ d = @stack.count
+ @h[x] = d
+ @result[x] = @base_function[x] # F x = F' x
+
+ @relation[x] && @relation[x].each do |y|
+ traverse(y) if @h[y] == 0
+ @h[x] = [@h[x], @h[y]].min
+ @result[x] |= @result[y] # F x = F x + F y
+ end
+
+ if @h[x] == d
+ while true do
+ z = @stack.pop
+ @h[z] = Float::INFINITY
+ @result[z] = @result[x] # F (Top of S) = F x
+
+ break if z == x
+ end
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb
new file mode 100644
index 0000000000..8ca75cb60b
--- /dev/null
+++ b/tool/lrama/lib/lrama/grammar.rb
@@ -0,0 +1,850 @@
+require "forwardable"
+require "lrama/lexer"
+
+module Lrama
+ Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do
+ # TODO: Change this to display_name
+ def to_s
+ l = lhs.id.s_value
+ r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
+
+ "#{l} -> #{r}"
+ end
+
+ # Used by #user_actions
+ def as_comment
+ l = lhs.id.s_value
+ r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ")
+
+ "#{l}: #{r}"
+ end
+
+ def precedence
+ precedence_sym && precedence_sym.precedence
+ end
+
+ def initial_rule?
+ id == 0
+ end
+
+ def translated_code
+ if code
+ code.translated_code
+ else
+ nil
+ end
+ end
+ end
+
+ # Symbol is both of nterm and term
+ # `number` is both for nterm and term
+ # `token_id` is tokentype for term, internal sequence number for nterm
+ #
+ # TODO: Add validation for ASCII code range for Token::Char
+ Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do
+ attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
+
+ def term?
+ term
+ end
+
+ def nterm?
+ !term
+ end
+
+ def eof_symbol?
+ !!@eof_symbol
+ end
+
+ def error_symbol?
+ !!@error_symbol
+ end
+
+ def undef_symbol?
+ !!@undef_symbol
+ end
+
+ def accept_symbol?
+ !!@accept_symbol
+ end
+
+ def display_name
+ if alias_name
+ alias_name
+ else
+ id.s_value
+ end
+ end
+
+ # name for yysymbol_kind_t
+ #
+ # See: b4_symbol_kind_base
+ def enum_name
+ case
+ when accept_symbol?
+ name = "YYACCEPT"
+ when eof_symbol?
+ name = "YYEOF"
+ when term? && id.type == Token::Char
+ if alias_name
+ name = number.to_s + alias_name
+ else
+ name = number.to_s + id.s_value
+ end
+ when term? && id.type == Token::Ident
+ name = id.s_value
+ when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
+ name = number.to_s + id.s_value
+ when nterm?
+ name = id.s_value
+ else
+ raise "Unexpected #{self}"
+ end
+
+ "YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
+ end
+
+ # comment for yysymbol_kind_t
+ def comment
+ case
+ when accept_symbol?
+ # YYSYMBOL_YYACCEPT
+ id.s_value
+ when eof_symbol?
+ # YYEOF
+ alias_name
+ when (term? && 0 < token_id && token_id < 128)
+ # YYSYMBOL_3_backslash_, YYSYMBOL_14_
+ alias_name || id.s_value
+ when id.s_value.include?("$") || id.s_value.include?("@")
+ # YYSYMBOL_21_1
+ id.s_value
+ else
+ # YYSYMBOL_keyword_class, YYSYMBOL_strings_1
+ alias_name || id.s_value
+ end
+ end
+ end
+
+ Type = Struct.new(:id, :tag, keyword_init: true)
+
+ Code = Struct.new(:type, :token_code, keyword_init: true) do
+ extend Forwardable
+
+ def_delegators "token_code", :s_value, :line, :column, :references
+
+ # $$, $n, @$, @n is translated to C code
+ def translated_code
+ case type
+ when :user_code
+ translated_user_code
+ when :initial_action
+ translated_initial_action_code
+ end
+ end
+
+ # * ($1) error
+ # * ($$) *yyvaluep
+ # * (@1) error
+ # * (@$) *yylocationp
+ def translated_printer_code(tag)
+ t_code = s_value.dup
+
+ references.reverse.each do |ref|
+ first_column = ref.first_column
+ last_column = ref.last_column
+
+ case
+ when ref.number == "$" && ref.type == :dollar # $$
+ # Omit "<>"
+ member = tag.s_value[1..-2]
+ str = "((*yyvaluep).#{member})"
+ when ref.number == "$" && ref.type == :at # @$
+ str = "(*yylocationp)"
+ when ref.type == :dollar # $n
+ raise "$#{ref.number} can not be used in %printer."
+ when ref.type == :at # @n
+ raise "@#{ref.number} can not be used in %printer."
+ else
+ raise "Unexpected. #{code}, #{ref}"
+ end
+
+ t_code[first_column..last_column] = str
+ end
+
+ return t_code
+ end
+
+
+ private
+
+ # * ($1) yyvsp[i]
+ # * ($$) yyval
+ # * (@1) yylsp[i]
+ # * (@$) yyloc
+ def translated_user_code
+ t_code = s_value.dup
+
+ references.reverse.each do |ref|
+ first_column = ref.first_column
+ last_column = ref.last_column
+
+ case
+ when ref.number == "$" && ref.type == :dollar # $$
+ # Omit "<>"
+ member = ref.tag.s_value[1..-2]
+ str = "(yyval.#{member})"
+ when ref.number == "$" && ref.type == :at # @$
+ str = "(yyloc)"
+ when ref.type == :dollar # $n
+ i = -ref.position_in_rhs + ref.number
+ # Omit "<>"
+ member = ref.tag.s_value[1..-2]
+ str = "(yyvsp[#{i}].#{member})"
+ when ref.type == :at # @n
+ i = -ref.position_in_rhs + ref.number
+ str = "(yylsp[#{i}])"
+ else
+ raise "Unexpected. #{code}, #{ref}"
+ end
+
+ t_code[first_column..last_column] = str
+ end
+
+ return t_code
+ end
+
+ # * ($1) error
+ # * ($$) yylval
+ # * (@1) error
+ # * (@$) yylloc
+ def translated_initial_action_code
+ t_code = s_value.dup
+
+ references.reverse.each do |ref|
+ first_column = ref.first_column
+ last_column = ref.last_column
+
+ case
+ when ref.number == "$" && ref.type == :dollar # $$
+ str = "yylval"
+ when ref.number == "$" && ref.type == :at # @$
+ str = "yylloc"
+ when ref.type == :dollar # $n
+ raise "$#{ref.number} can not be used in initial_action."
+ when ref.type == :at # @n
+ raise "@#{ref.number} can not be used in initial_action."
+ else
+ raise "Unexpected. #{code}, #{ref}"
+ end
+
+ t_code[first_column..last_column] = str
+ end
+
+ return t_code
+ end
+ end
+
+ # type: :dollar or :at
+ # ex_tag: "$<tag>1" (Optional)
+ Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
+ def tag
+ if ex_tag
+ ex_tag
+ else
+ referring_symbol.tag
+ end
+ end
+ end
+
+ Precedence = Struct.new(:type, :precedence, keyword_init: true) do
+ include Comparable
+
+ def <=>(other)
+ self.precedence <=> other.precedence
+ end
+ end
+
+ Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do
+ def translated_code(member)
+ code.translated_printer_code(member)
+ end
+ end
+
+ Union = Struct.new(:code, :lineno, keyword_init: true) do
+ def braces_less_code
+ # Remove braces
+ code.s_value[1..-2]
+ end
+ end
+
+ Token = Lrama::Lexer::Token
+
+ # Grammar is the result of parsing an input grammar file
+ class Grammar
+ # Grammar file information not used by States but by Output
+ Aux = Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true)
+
+ attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
+ attr_accessor :union, :expect,
+ :printers,
+ :lex_param, :parse_param, :initial_action,
+ :symbols, :types,
+ :rules, :_rules,
+ :sym_to_rules
+
+ def initialize
+ @printers = []
+ @symbols = []
+ @types = []
+ @_rules = []
+ @rules = []
+ @sym_to_rules = {}
+ @empty_symbol = nil
+ @eof_symbol = nil
+ @error_symbol = nil
+ @undef_symbol = nil
+ @accept_symbol = nil
+ @aux = Aux.new
+
+ append_special_symbols
+ end
+
+ def add_printer(ident_or_tags:, code:, lineno:)
+ @printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
+ end
+
+ def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
+ if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
+ if replace
+ sym.id = id
+ sym.alias_name = alias_name
+ sym.tag = tag
+ end
+
+ return sym
+ end
+
+ if sym = @symbols.find {|s| s.id == id }
+ return sym
+ end
+
+ sym = Symbol.new(
+ id: id, alias_name: alias_name, number: nil, tag: tag,
+ term: true, token_id: token_id, nullable: false
+ )
+ @symbols << sym
+ @terms = nil
+
+ return sym
+ end
+
+ def add_nterm(id:, alias_name: nil, tag: nil)
+ return if @symbols.find {|s| s.id == id }
+
+ sym = Symbol.new(
+ id: id, alias_name: alias_name, number: nil, tag: tag,
+ term: false, token_id: nil, nullable: nil,
+ )
+ @symbols << sym
+ @nterms = nil
+
+ return sym
+ end
+
+ def add_type(id:, tag:)
+ @types << Type.new(id: id, tag: tag)
+ end
+
+ def add_nonassoc(sym, precedence)
+ set_precedence(sym, Precedence.new(type: :nonassoc, precedence: precedence))
+ end
+
+ def add_left(sym, precedence)
+ set_precedence(sym, Precedence.new(type: :left, precedence: precedence))
+ end
+
+ def add_right(sym, precedence)
+ set_precedence(sym, Precedence.new(type: :right, precedence: precedence))
+ end
+
+ def set_precedence(sym, precedence)
+ raise "" if sym.nterm?
+ sym.precedence = precedence
+ end
+
+ def set_union(code, lineno)
+ @union = Union.new(code: code, lineno: lineno)
+ end
+
+ def add_rule(lhs:, rhs:, lineno:)
+ @_rules << [lhs, rhs, lineno]
+ end
+
+ def build_references(token_code)
+ token_code.references.map! do |type, number, tag, first_column, last_column|
+ Reference.new(type: type, number: number, ex_tag: tag, first_column: first_column, last_column: last_column)
+ end
+
+ token_code
+ end
+
+ def build_code(type, token_code)
+ build_references(token_code)
+ Code.new(type: type, token_code: token_code)
+ end
+
+ def prologue_first_lineno=(prologue_first_lineno)
+ @aux.prologue_first_lineno = prologue_first_lineno
+ end
+
+ def prologue=(prologue)
+ @aux.prologue = prologue
+ end
+
+ def epilogue_first_lineno=(epilogue_first_lineno)
+ @aux.epilogue_first_lineno = epilogue_first_lineno
+ end
+
+ def epilogue=(epilogue)
+ @aux.epilogue = epilogue
+ end
+
+ def prepare
+ normalize_rules
+ collect_symbols
+ replace_token_with_symbol
+ fill_symbol_number
+ fill_default_precedence
+ fill_sym_to_rules
+ fill_nterm_type
+ fill_symbol_printer
+ @symbols.sort_by!(&:number)
+ end
+
+ # TODO: More validation methods
+ def validate!
+ validate_symbol_number_uniqueness!
+ end
+
+ def compute_nullable
+ @rules.each do |rule|
+ case
+ when rule.rhs.empty?
+ rule.nullable = true
+ when rule.rhs.any?(&:term)
+ rule.nullable = false
+ else
+ # noop
+ end
+ end
+
+ while true do
+ rs = @rules.select {|e| e.nullable.nil? }
+ nts = nterms.select {|e| e.nullable.nil? }
+ rule_count_1 = rs.count
+ nterm_count_1 = nts.count
+
+ rs.each do |rule|
+ if rule.rhs.all?(&:nullable)
+ rule.nullable = true
+ end
+ end
+
+ nts.each do |nterm|
+ find_rules_by_symbol!(nterm).each do |rule|
+ if rule.nullable
+ nterm.nullable = true
+ end
+ end
+ end
+
+ rule_count_2 = @rules.count {|e| e.nullable.nil? }
+ nterm_count_2 = nterms.count {|e| e.nullable.nil? }
+
+ if (rule_count_1 == rule_count_2) && (nterm_count_1 == nterm_count_2)
+ break
+ end
+ end
+
+ rules.select {|r| r.nullable.nil? }.each do |rule|
+ rule.nullable = false
+ end
+
+ nterms.select {|r| r.nullable.nil? }.each do |nterm|
+ nterm.nullable = false
+ end
+ end
+
+ def find_symbol_by_s_value(s_value)
+ @symbols.find do |sym|
+ sym.id.s_value == s_value
+ end
+ end
+
+ def find_symbol_by_s_value!(s_value)
+ find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
+ end
+
+ def find_symbol_by_id(id)
+ @symbols.find do |sym|
+ # TODO: validate uniqueness of Token#s_value and Symbol#alias_name
+ sym.id == id || sym.alias_name == id.s_value
+ end
+ end
+
+ def find_symbol_by_id!(id)
+ find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
+ end
+
+ def find_symbol_by_number!(number)
+ sym = @symbols[number]
+
+ raise "Symbol not found: #{number}" unless sym
+ raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
+
+ sym
+ end
+
+ def find_rules_by_symbol!(sym)
+ find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
+ end
+
+ def find_rules_by_symbol(sym)
+ @sym_to_rules[sym.number]
+ end
+
+ def terms_count
+ terms.count
+ end
+
+ def terms
+ @terms ||= @symbols.select(&:term?)
+ end
+
+ def nterms_count
+ nterms.count
+ end
+
+ def nterms
+ @nterms ||= @symbols.select(&:nterm?)
+ end
+
+ private
+
+ def find_nterm_by_id!(id)
+ nterms.find do |nterm|
+ nterm.id == id
+ end || (raise "Nterm not found: #{id}")
+ end
+
+
+ def append_special_symbols
+ # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
+ # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
+ # term.number = -2
+ # @empty_symbol = term
+
+ # YYEOF
+ term = add_term(id: Token.new(type: Token::Ident, s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
+ term.number = 0
+ term.eof_symbol = true
+ @eof_symbol = term
+
+ # YYerror
+ term = add_term(id: Token.new(type: Token::Ident, s_value: "YYerror"), alias_name: "error")
+ term.number = 1
+ term.error_symbol = true
+ @error_symbol = term
+
+ # YYUNDEF
+ term = add_term(id: Token.new(type: Token::Ident, s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
+ term.number = 2
+ term.undef_symbol = true
+ @undef_symbol = term
+
+ # $accept
+ term = add_nterm(id: Token.new(type: Token::Ident, s_value: "$accept"))
+ term.accept_symbol = true
+ @accept_symbol = term
+ end
+
+ # 1. Add $accept rule to the top of rules
+ # 2. Extract precedence and last action
+ # 3. Extract action in the middle of RHS into new Empty rule
+ # 4. Append id and extract action then create Rule
+ #
+ # Bison 3.8.2 uses different orders for symbol number and rule number
+ # when a rule has actions in the middle of a rule.
+ #
+ # For example,
+ #
+ # `program: $@1 top_compstmt`
+ #
+ # Rules are ordered like below,
+ #
+ # 1 $@1: ε
+ # 2 program: $@1 top_compstmt
+ #
+ # Symbols are ordered like below,
+ #
+ # 164 program
+ # 165 $@1
+ #
+ def normalize_rules
+ # 1. Add $accept rule to the top of rules
+ accept = find_symbol_by_s_value!("$accept")
+ eof = find_symbol_by_number!(0)
+ lineno = @_rules.first ? @_rules.first[2] : 0
+ @rules << Rule.new(id: @rules.count, lhs: accept, rhs: [@_rules.first[0], eof], code: nil, lineno: lineno)
+
+ extracted_action_number = 1 # @n as nterm
+
+ @_rules.each do |lhs, rhs, lineno|
+ a = []
+ rhs1 = []
+ code = nil
+ precedence_sym = nil
+
+ # 2. Extract precedence and last action
+ rhs.reverse.each do |r|
+ case
+ when r.is_a?(Symbol) # precedence_sym
+ precedence_sym = r
+ when (r.type == Token::User_code) && precedence_sym.nil? && code.nil? && rhs1.empty?
+ code = r
+ else
+ rhs1 << r
+ end
+ end
+ rhs1.reverse!
+
+ # Bison n'th component is 1-origin
+ (rhs1 + [code]).compact.each.with_index(1) do |token, i|
+ if token.type == Token::User_code
+ token.references.each do |ref|
+ # Need to keep position_in_rhs for actions in the middle of RHS
+ ref.position_in_rhs = i - 1
+ next if ref.type == :at
+ # $$, $n, @$, @n can be used in any actions
+ number = ref.number
+
+ if number == "$"
+ # TODO: Should be postponed after middle actions are extracted?
+ ref.referring_symbol = lhs
+ else
+ raise "Can not refer following component. #{number} >= #{i}. #{token}" if number >= i
+ rhs1[number - 1].referred = true
+ ref.referring_symbol = rhs1[number - 1]
+ end
+ end
+ end
+ end
+
+ rhs2 = rhs1.map do |token|
+ if token.type == Token::User_code
+ prefix = token.referred ? "@" : "$@"
+ new_token = Token.new(type: Token::Ident, s_value: prefix + extracted_action_number.to_s)
+ extracted_action_number += 1
+ a << [new_token, token]
+ new_token
+ else
+ token
+ end
+ end
+
+ # Extract actions in the middle of RHS
+ # into new rules.
+ a.each do |new_token, code|
+ @rules << Rule.new(id: @rules.count, lhs: new_token, rhs: [], code: Code.new(type: :user_code, token_code: code), lineno: code.line)
+ end
+
+ c = code ? Code.new(type: :user_code, token_code: code) : nil
+ @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
+
+ add_nterm(id: lhs)
+ a.each do |new_token, _|
+ add_nterm(id: new_token)
+ end
+ end
+ end
+
+ # Collect symbols from rules
+ def collect_symbols
+ @rules.flat_map(&:rhs).each do |s|
+ case s
+ when Token
+ if s.type == Token::Char
+ add_term(id: s)
+ end
+ when Symbol
+ # skip
+ else
+ raise "Unknown class: #{s}"
+ end
+ end
+ end
+
+ # Fill #number and #token_id
+ def fill_symbol_number
+ # TODO: why start from 256
+ token_id = 256
+
+ # YYEMPTY = -2
+ # YYEOF = 0
+ # YYerror = 1
+ # YYUNDEF = 2
+ number = 3
+
+ nterm_token_id = 0
+ used_numbers = {}
+
+ @symbols.map(&:number).each do |n|
+ used_numbers[n] = true
+ end
+
+ (@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
+ while used_numbers[number] do
+ number += 1
+ end
+
+ if sym.number.nil?
+ sym.number = number
+ number += 1
+ end
+
+ # If id is Token::Char, it uses ASCII code
+ if sym.term? && sym.token_id.nil?
+ if sym.id.type == Token::Char
+ # Igonre ' on the both sides
+ case sym.id.s_value[1..-2]
+ when "\\b"
+ sym.token_id = 8
+ when "\\f"
+ sym.token_id = 12
+ when "\\n"
+ sym.token_id = 10
+ when "\\r"
+ sym.token_id = 13
+ when "\\t"
+ sym.token_id = 9
+ when "\\v"
+ sym.token_id = 11
+ when "\""
+ sym.token_id = 34
+ when "\'"
+ sym.token_id = 39
+ when "\\\\"
+ sym.token_id = 92
+ when /\A\\(\d+)\z/
+ sym.token_id = Integer($1, 8)
+ when /\A(.)\z/
+ sym.token_id = $1.bytes.first
+ else
+ raise "Unknown Char s_value #{sym}"
+ end
+ else
+ sym.token_id = token_id
+ token_id += 1
+ end
+ end
+
+ if sym.nterm? && sym.token_id.nil?
+ sym.token_id = nterm_token_id
+ nterm_token_id += 1
+ end
+ end
+ end
+
+ def replace_token_with_symbol
+ @rules.each do |rule|
+ rule.lhs = token_to_symbol(rule.lhs)
+
+ rule.rhs.map! do |t|
+ token_to_symbol(t)
+ end
+
+ if rule.code
+ rule.code.references.each do |ref|
+ next if ref.type == :at
+
+ if ref.referring_symbol.type != Token::User_code
+ ref.referring_symbol = token_to_symbol(ref.referring_symbol)
+ end
+ end
+ end
+ end
+ end
+
+ def token_to_symbol(token)
+ case token
+ when Token
+ find_symbol_by_id!(token)
+ when Symbol
+ token
+ else
+ raise "Unknown class: #{token}"
+ end
+ end
+
+ # Rule inherits precedence from the last term in RHS.
+ #
+ # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
+ def fill_default_precedence
+ @rules.each do |rule|
+ # Explicitly specified precedence has the highest priority
+ next if rule.precedence_sym
+
+ precedence_sym = nil
+ rule.rhs.each do |sym|
+ precedence_sym = sym if sym.term?
+ end
+
+ rule.precedence_sym = precedence_sym
+ end
+ end
+
+ def fill_sym_to_rules
+ @rules.each do |rule|
+ key = rule.lhs.number
+ @sym_to_rules[key] ||= []
+ @sym_to_rules[key] << rule
+ end
+ end
+
+ # Fill nterm's tag defined by %type decl
+ def fill_nterm_type
+ @types.each do |type|
+ nterm = find_nterm_by_id!(type.id)
+ nterm.tag = type.tag
+ end
+ end
+
+ def fill_symbol_printer
+ @symbols.each do |sym|
+ @printers.each do |printer|
+ printer.ident_or_tags.each do |ident_or_tag|
+ case ident_or_tag.type
+ when Token::Ident
+ sym.printer = printer if sym.id == ident_or_tag
+ when Token::Tag
+ sym.printer = printer if sym.tag == ident_or_tag
+ else
+ raise "Unknown token type. #{printer}"
+ end
+ end
+ end
+ end
+ end
+
+ def validate_symbol_number_uniqueness!
+ invalid = @symbols.group_by(&:number).select do |number, syms|
+ syms.count > 1
+ end
+
+ return if invalid.empty?
+
+ raise "Symbol number is dupulicated. #{invalid}"
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb
new file mode 100644
index 0000000000..5c9583327b
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer.rb
@@ -0,0 +1,349 @@
+require "strscan"
+require "lrama/report"
+
+module Lrama
+ # Lexer for parse.y
+ class Lexer
+ include Lrama::Report::Duration
+
+ # s_value is semantic value
+ Token = Struct.new(:type, :s_value, keyword_init: true) do
+ Type = Struct.new(:id, :name, keyword_init: true)
+
+ attr_accessor :line, :column, :referred
+ # For User_code
+ attr_accessor :references
+
+ def to_s
+ "#{super} line: #{line}, column: #{column}"
+ end
+
+ @i = 0
+ @types = []
+
+ def self.define_type(name)
+ type = Type.new(id: @i, name: name.to_s)
+ const_set(name, type)
+ @types << type
+ @i += 1
+ end
+
+ # Token types
+ define_type(:P_expect) # %expect
+ define_type(:P_define) # %define
+ define_type(:P_printer) # %printer
+ define_type(:P_lex_param) # %lex-param
+ define_type(:P_parse_param) # %parse-param
+ define_type(:P_initial_action) # %initial-action
+ define_type(:P_union) # %union
+ define_type(:P_token) # %token
+ define_type(:P_type) # %type
+ define_type(:P_nonassoc) # %nonassoc
+ define_type(:P_left) # %left
+ define_type(:P_right) # %right
+ define_type(:P_prec) # %prec
+ define_type(:User_code) # { ... }
+ define_type(:Tag) # <int>
+ define_type(:Number) # 0
+ define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
+ define_type(:Ident) # api.pure, tNUMBER
+ define_type(:Semicolon) # ;
+ define_type(:Bar) # |
+ define_type(:String) # "str"
+ define_type(:Char) # '+'
+ end
+
+ # States
+ #
+ # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
+ Initial = 0
+ Prologue = 1
+ BisonDeclarations = 2
+ GrammarRules = 3
+ Epilogue = 4
+
+ # Token types
+
+ attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
+ :bison_declarations_tokens, :grammar_rules_tokens
+
+ def initialize(text)
+ @text = text
+ @state = Initial
+ # Array of texts
+ @prologue = []
+ @bison_declarations = []
+ @grammar_rules = []
+ @epilogue = []
+
+ #
+ @bison_declarations_tokens = []
+ @grammar_rules_tokens = []
+
+ @debug = false
+
+ report_duration(:lex) do
+ lex_text
+ lex_bison_declarations_tokens
+ lex_grammar_rules_tokens
+ end
+ end
+
+ private
+
+ def create_token(type, s_value, line, column)
+ t = Token.new(type: type, s_value: s_value)
+ t.line = line
+ t.column = column
+
+ return t
+ end
+
+ # TODO: Remove this
+ def lex_text
+ @text.each_line.with_index(1) do |string, lineno|
+ case @state
+ when Initial
+ # Skip until "%{"
+ if string == "%{\n"
+ @state = Prologue
+ @prologue << ["", lineno]
+ next
+ end
+ when Prologue
+ # Between "%{" and "%}"
+ if string == "%}\n"
+ @state = BisonDeclarations
+ @prologue << ["", lineno]
+ next
+ end
+
+ @prologue << [string, lineno]
+ when BisonDeclarations
+ if string == "%%\n"
+ @state = GrammarRules
+ next
+ end
+
+ @bison_declarations << [string, lineno]
+ when GrammarRules
+ # Between "%%" and "%%"
+ if string == "%%\n"
+ @state = Epilogue
+ next
+ end
+
+ @grammar_rules << [string, lineno]
+ when Epilogue
+ @epilogue << [string, lineno]
+ else
+ raise "Unknown state: #{@state}"
+ end
+ end
+ end
+
+ # See:
+ # * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
+ # * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
+ # * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
+ def lex_common(lines, tokens)
+ line = lines.first[1]
+ column = 0
+ ss = StringScanner.new(lines.map(&:first).join)
+
+ while !ss.eos? do
+ case
+ when ss.scan(/\n/)
+ line += 1
+ column = ss.pos
+ when ss.scan(/\s+/)
+ # skip
+ when ss.scan(/;/)
+ tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
+ when ss.scan(/\|/)
+ tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
+ when ss.scan(/(\d+)/)
+ tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
+ when ss.scan(/(<[a-zA-Z0-9_]+>)/)
+ tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
+ when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
+ tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
+ when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
+ tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
+ when ss.scan(/%expect/)
+ tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
+ when ss.scan(/%define/)
+ tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
+ when ss.scan(/%printer/)
+ tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
+ when ss.scan(/%lex-param/)
+ tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
+ when ss.scan(/%parse-param/)
+ tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
+ when ss.scan(/%initial-action/)
+ tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
+ when ss.scan(/%union/)
+ tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
+ when ss.scan(/%token/)
+ tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
+ when ss.scan(/%type/)
+ tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
+ when ss.scan(/%nonassoc/)
+ tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
+ when ss.scan(/%left/)
+ tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
+ when ss.scan(/%right/)
+ tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
+ when ss.scan(/%prec/)
+ tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
+ when ss.scan(/{/)
+ token, line = lex_user_code(ss, line, ss.pos - column, lines)
+ tokens << token
+ when ss.scan(/"/)
+ string, line = lex_string(ss, "\"", line, lines)
+ token = create_token(Token::String, string, line, ss.pos - column)
+ tokens << token
+ when ss.scan(/\/\*/)
+ # TODO: Need to keep comment?
+ line = lex_comment(ss, line, lines, "")
+ when ss.scan(/'(.)'/)
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
+ when ss.scan(/'\\(.)'/) # '\\', '\t'
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
+ when ss.scan(/'\\(\d+)'/) # '\13'
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
+ when ss.scan(/%empty/)
+ # skip
+ else
+ l = line - lines.first[1]
+ split = ss.string.split("\n")
+ col = ss.pos - split[0...l].join("\n").length
+ raise "Parse error (unknow token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
+ end
+ end
+ end
+
+ def lex_bison_declarations_tokens
+ lex_common(@bison_declarations, @bison_declarations_tokens)
+ end
+
+ def lex_user_code(ss, line, column, lines)
+ first_line = line
+ first_column = column
+ debug("Enter lex_user_code: #{line}")
+ brace_count = 1
+ str = "{"
+ # Array of [type, $n, tag, first column, last column]
+ # TODO: Is it better to keep string, like "$$", and use gsub?
+ references = []
+
+ while !ss.eos? do
+ case
+ when ss.scan(/\n/)
+ line += 1
+ when ss.scan(/"/)
+ string, line = lex_string(ss, "\"", line, lines)
+ str << string
+ next
+ when ss.scan(/'/)
+ string, line = lex_string(ss, "'", line, lines)
+ str << string
+ next
+ when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
+ references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
+ when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
+ references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
+ when ss.scan(/@\$/) # @$
+ references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
+ when ss.scan(/@(\d)+/) # @1
+ references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
+ when ss.scan(/{/)
+ brace_count += 1
+ when ss.scan(/}/)
+ brace_count -= 1
+
+ debug("Return lex_user_code: #{line}")
+ if brace_count == 0
+ str << ss[0]
+ user_code = Token.new(type: Token::User_code, s_value: str.freeze)
+ user_code.line = first_line
+ user_code.column = first_column
+ user_code.references = references
+ return [user_code, line]
+ end
+ when ss.scan(/\/\*/)
+ str << ss[0]
+ line = lex_comment(ss, line, lines, str)
+ else
+ # noop, just consume char
+ str << ss.getch
+ next
+ end
+
+ str << ss[0]
+ end
+
+ # Reach to end of input but brace does not match
+ l = line - lines.first[1]
+ raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+ end
+
+ def lex_string(ss, terminator, line, lines)
+ debug("Enter lex_string: #{line}")
+
+ str = terminator.dup
+
+ while (c = ss.getch) do
+ str << c
+
+ case c
+ when "\n"
+ line += 1
+ when terminator
+ debug("Return lex_string: #{line}")
+ return [str, line]
+ else
+ # noop
+ end
+ end
+
+ # Reach to end of input but quote does not match
+ l = line - lines.first[1]
+ raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+ end
+
+ # TODO: Need to handle // style comment
+ #
+ # /* */ style comment
+ def lex_comment(ss, line, lines, str)
+ while !ss.eos? do
+ case
+ when ss.scan(/\n/)
+ line += 1
+ when ss.scan(/\*\//)
+ return line
+ else
+ str << ss.getch
+ next
+ end
+
+ str << ss[0]
+ end
+
+ # Reach to end of input but quote does not match
+ l = line - lines.first[1]
+ raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+ end
+
+ def lex_grammar_rules_tokens
+ lex_common(@grammar_rules, @grammar_rules_tokens)
+ end
+
+ def debug(msg)
+ return unless @debug
+ puts "#{msg}\n"
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/output.rb b/tool/lrama/lib/lrama/output.rb
new file mode 100644
index 0000000000..eaefbd04dc
--- /dev/null
+++ b/tool/lrama/lib/lrama/output.rb
@@ -0,0 +1,370 @@
+require "erb"
+require "forwardable"
+require "lrama/report"
+
+module Lrama
+ class Output
+ extend Forwardable
+ include Report::Duration
+
+ attr_reader :grammar_file_path, :context, :grammar
+
+ def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
+ :yymaxutok, :yypact_ninf, :yytable_ninf
+
+ def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
+
+ def initialize(out:, output_file_path:, template_name:, grammar_file_path:, header_out: nil, header_file_path: nil, context:, grammar:)
+ @out = out
+ @output_file_path = output_file_path
+ @template_name = template_name
+ @grammar_file_path = grammar_file_path
+ @header_out = header_out
+ @header_file_path = header_file_path
+ @context = context
+ @grammar = grammar
+ end
+
+ if ERB.instance_method(:initialize).parameters.last.first == :key
+ def self.erb(input)
+ ERB.new(input, trim_mode: '-')
+ end
+ else
+ def self.erb(input)
+ ERB.new(input, nil, '-')
+ end
+ end
+
+ def eval_template(file, path)
+ erb = self.class.erb(File.read(file))
+ erb.filename = file
+ tmp = erb.result_with_hash(context: @context, output: self)
+ replace_special_variables(tmp, path)
+ end
+
+ def render
+ report_duration(:render) do
+ tmp = eval_template(template_file, @output_file_path)
+ @out << tmp
+
+ if @header_file_path
+ tmp = eval_template(header_template_file, @header_file_path)
+
+ if @header_out
+ @header_out << tmp
+ else
+ File.open(@header_file_path, "w+") do |f|
+ f << tmp
+ end
+ end
+ end
+ end
+ end
+
+ # A part of b4_token_enums
+ def token_enums
+ str = ""
+
+ @context.yytokentype.each do |s_value, token_id, display_name|
+ s = sprintf("%s = %d%s", s_value, token_id, token_id == yymaxutok ? "" : ",")
+
+ if display_name
+ str << sprintf(" %-30s /* %s */\n", s, display_name)
+ else
+ str << sprintf(" %s\n", s)
+ end
+ end
+
+ str
+ end
+
+ # b4_symbol_enum
+ def symbol_enum
+ str = ""
+
+ last_sym_number = @context.yysymbol_kind_t.last[1]
+ @context.yysymbol_kind_t.each do |s_value, sym_number, display_name|
+ s = sprintf("%s = %d%s", s_value, sym_number, (sym_number == last_sym_number) ? "" : ",")
+
+ if display_name
+ str << sprintf(" %-40s /* %s */\n", s, display_name)
+ else
+ str << sprintf(" %s\n", s)
+ end
+ end
+
+ str
+ end
+
+ def yytranslate
+ int_array_to_string(@context.yytranslate)
+ end
+
+ def yyrline
+ int_array_to_string(@context.yyrline)
+ end
+
+ def yytname
+ string_array_to_string(@context.yytname) + " YY_NULLPTR"
+ end
+
+ # b4_int_type_for
+ def int_type_for(ary)
+ min = ary.min
+ max = ary.max
+
+ case
+ when (-127 <= min && min <= 127) && (-127 <= max && max <= 127)
+ "yytype_int8"
+ when (0 <= min && min <= 255) && (0 <= max && max <= 255)
+ "yytype_uint8"
+ when (-32767 <= min && min <= 32767) && (-32767 <= max && max <= 32767)
+ "yytype_int16"
+ when (0 <= min && min <= 65535) && (0 <= max && max <= 65535)
+ "yytype_uint16"
+ else
+ "int"
+ end
+ end
+
+ def symbol_actions_for_printer
+ str = ""
+
+ @grammar.symbols.each do |sym|
+ next unless sym.printer
+
+ str << <<-STR
+ case #{sym.enum_name}: /* #{sym.comment} */
+#line #{sym.printer.lineno} "#{@grammar_file_path}"
+ #{sym.printer.translated_code(sym.tag)}
+#line [@oline@] [@ofile@]
+ break;
+
+ STR
+ end
+
+ str
+ end
+
+ # b4_user_initial_action
+ def user_initial_action(comment = "")
+ return "" unless @grammar.initial_action
+
+ <<-STR
+ #{comment}
+#line #{@grammar.initial_action.line} "#{@grammar_file_path}"
+ #{@grammar.initial_action.translated_code}
+ STR
+ end
+
+ # b4_user_actions
+ def user_actions
+ str = ""
+
+ @context.states.rules.each do |rule|
+ next unless rule.code
+
+ rule = rule
+ code = rule.code
+ spaces = " " * (code.column - 1)
+
+ str << <<-STR
+ case #{rule.id + 1}: /* #{rule.as_comment} */
+#line #{code.line} "#{@grammar_file_path}"
+#{spaces}#{rule.translated_code}
+#line [@oline@] [@ofile@]
+ break;
+
+ STR
+ end
+
+ str << <<-STR
+
+#line [@oline@] [@ofile@]
+ STR
+
+ str
+ end
+
+ def omit_braces_and_blanks(param)
+ param[1..-2].strip
+ end
+
+ # b4_parse_param
+ def parse_param
+ if @grammar.parse_param
+ omit_braces_and_blanks(@grammar.parse_param)
+ else
+ ""
+ end
+ end
+
+ def lex_param
+ if @grammar.lex_param
+ omit_braces_and_blanks(@grammar.lex_param)
+ else
+ ""
+ end
+ end
+
+ # b4_user_formals
+ def user_formals
+ if @grammar.parse_param
+ ", #{parse_param}"
+ else
+ ""
+ end
+ end
+
+ # b4_user_args
+ def user_args
+ if @grammar.parse_param
+ ", #{parse_param_name}"
+ else
+ ""
+ end
+ end
+
+ def extract_param_name(param)
+ /\A(.)+([a-zA-Z0-9_]+)\z/.match(param)[2]
+ end
+
+ def parse_param_name
+ if @grammar.parse_param
+ extract_param_name(parse_param)
+ else
+ ""
+ end
+ end
+
+ def lex_param_name
+ if @grammar.lex_param
+ extract_param_name(lex_param)
+ else
+ ""
+ end
+ end
+
+ # b4_parse_param_use
+ def parse_param_use(val, loc)
+ str = <<-STR
+ YY_USE (#{val});
+ YY_USE (#{loc});
+ STR
+
+ if @grammar.parse_param
+ str << " YY_USE (#{parse_param_name});"
+ end
+
+ str
+ end
+
+ # b4_yylex_formals
+ def yylex_formals
+ ary = ["&yylval", "&yylloc"]
+
+ if @grammar.lex_param
+ ary << lex_param_name
+ end
+
+ "(#{ary.join(', ')})"
+ end
+
+ # b4_table_value_equals
+ def table_value_equals(table, value, literal, symbol)
+ if literal < table.min || table.max < literal
+ "0"
+ else
+ "((#{value}) == #{symbol})"
+ end
+ end
+
+ # b4_yyerror_args
+ def yyerror_args
+ ary = ["&yylloc"]
+
+ if @grammar.parse_param
+ ary << parse_param_name
+ end
+
+ "#{ary.join(', ')}"
+ end
+
+ def template_basename
+ File.basename(template_file)
+ end
+
+ def aux
+ @grammar.aux
+ end
+
+ def int_array_to_string(ary)
+ last = ary.count - 1
+
+ s = ary.each_with_index.each_slice(10).map do |slice|
+ str = " "
+
+ slice.each do |e, i|
+ str << sprintf("%6d%s", e, (i == last) ? "" : ",")
+ end
+
+ str
+ end
+
+ s.join("\n")
+ end
+
+ def spec_mapped_header_file
+ @header_file_path
+ end
+
+ def b4_cpp_guard__b4_spec_mapped_header_file
+ if @header_file_path
+ "YY_YY_" + @header_file_path.gsub(/[^a-zA-Z_0-9]+/, "_").upcase + "_INCLUDED"
+ else
+ ""
+ end
+ end
+
+ private
+
+ def template_file
+ File.join(template_dir, @template_name)
+ end
+
+ def header_template_file
+ File.join(template_dir, "bison/yacc.h")
+ end
+
+ def template_dir
+ File.expand_path("../../../template", __FILE__)
+ end
+
+ def string_array_to_string(ary)
+ str = ""
+ tmp = " "
+
+ ary.each do |s|
+ s = s.gsub('\\', '\\\\\\\\')
+ s = s.gsub('"', '\\"')
+
+ if (tmp + s + " \"\",").length > 75
+ str << tmp << "\n"
+ tmp = " \"#{s}\","
+ else
+ tmp << " \"#{s}\","
+ end
+ end
+
+ str << tmp
+ end
+
+ def replace_special_variables(str, ofile)
+ str.each_line.with_index(1).map do |line, i|
+ line.gsub!("[@oline@]", (i + 1).to_s)
+ line.gsub!("[@ofile@]", "\"#{ofile}\"")
+ line
+ end.join
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb
new file mode 100644
index 0000000000..e90a94c637
--- /dev/null
+++ b/tool/lrama/lib/lrama/parser.rb
@@ -0,0 +1,321 @@
+require "lrama/report"
+
+module Lrama
+ # Parser for parse.y, generates a grammar
+ class Parser
+ include Lrama::Report::Duration
+
+ T = Lrama::Lexer::Token
+
+ class TokenScanner
+ def initialize(tokens)
+ @tokens = tokens
+ @index = 0
+ end
+
+ def current_token
+ @tokens[@index]
+ end
+
+ def current_type
+ current_token && current_token.type
+ end
+
+ def next
+ token = current_token
+ @index += 1
+ return token
+ end
+
+ def consume(*token_types)
+ if token_types.include?(current_type)
+ token = current_token
+ self.next
+ return token
+ end
+
+ return nil
+ end
+
+ def consume!(*token_types)
+ consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
+ end
+
+ def consume_multi(*token_types)
+ a = []
+
+ while token_types.include?(current_type)
+ a << current_token
+ self.next
+ end
+
+ raise "No token is consumed. #{token_types}" if a.empty?
+
+ return a
+ end
+
+ def eots?
+ current_token.nil?
+ end
+ end
+
+ def initialize(text)
+ @text = text
+ end
+
+ def parse
+ report_duration(:parse) do
+ lexer = Lexer.new(@text)
+ grammar = Grammar.new
+ process_prologue(grammar, lexer)
+ parse_bison_declarations(TokenScanner.new(lexer.bison_declarations_tokens), grammar)
+ parse_grammar_rules(TokenScanner.new(lexer.grammar_rules_tokens), grammar)
+ process_epilogue(grammar, lexer)
+ grammar.prepare
+ grammar.compute_nullable
+ grammar.validate!
+
+ grammar
+ end
+ end
+
+ private
+
+ def process_prologue(grammar, lexer)
+ grammar.prologue_first_lineno = lexer.prologue.first[1] if lexer.prologue.first
+ grammar.prologue = lexer.prologue.map(&:first).join
+ end
+
+ def process_epilogue(grammar, lexer)
+ grammar.epilogue_first_lineno = lexer.epilogue.first[1] if lexer.epilogue.first
+ grammar.epilogue = lexer.epilogue.map(&:first).join
+ end
+
+ def parse_bison_declarations(ts, grammar)
+ precedence_number = 0
+
+ while !ts.eots? do
+ case ts.current_type
+ when T::P_expect
+ ts.next
+ grammar.expect = ts.consume!(T::Number).s_value
+ when T::P_define
+ ts.next
+ # Ignore
+ ts.consume_multi(T::Ident)
+ when T::P_printer
+ lineno = ts.current_token.line
+ ts.next
+ code = ts.consume!(T::User_code)
+ code = grammar.build_code(:printer, code)
+ ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
+ grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
+ when T::P_lex_param
+ ts.next
+ code = ts.consume!(T::User_code)
+ code = grammar.build_code(:lex_param, code)
+ grammar.lex_param = code.token_code.s_value
+ when T::P_parse_param
+ ts.next
+ code = ts.consume!(T::User_code)
+ code = grammar.build_code(:parse_param, code)
+ grammar.parse_param = code.token_code.s_value
+ when T::P_initial_action
+ ts.next
+ code = ts.consume!(T::User_code)
+ code = grammar.build_code(:initial_action, code)
+ ts.consume(T::Semicolon)
+ grammar.initial_action = code
+ when T::P_union
+ lineno = ts.current_token.line
+ ts.next
+ code = ts.consume!(T::User_code)
+ code = grammar.build_code(:union, code)
+ ts.consume(T::Semicolon)
+ grammar.set_union(code, lineno)
+ when T::P_token
+ # %token tag? (ident number? string?)+
+ #
+ # * ident can be char, e.g. '\\', '\t', '\13'
+ # * number is a token_id for term
+ #
+ # These are valid token declaration (from CRuby parse.y)
+ #
+ # %token END_OF_INPUT 0 "end-of-input"
+ # %token <id> '\\' "backslash"
+ # %token tSP "escaped space"
+ # %token tUPLUS 132 "unary+"
+ # %token tCOLON3 ":: at EXPR_BEG"
+ # %token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END
+ #
+ #
+ # See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
+ ts.next
+ opt_tag = ts.consume(T::Tag)
+
+ while (id = ts.consume(T::Ident, T::Char)) do
+ opt_number = ts.consume(T::Number)
+ opt_string = ts.consume(T::String)
+ # Can replace 0 (EOF)
+ grammar.add_term(
+ id: id,
+ alias_name: opt_string && opt_string.s_value,
+ token_id: opt_number && opt_number.s_value,
+ tag: opt_tag,
+ replace: true,
+ )
+ end
+ when T::P_type
+ # %type tag? (ident|char|string)+
+ #
+ # See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
+ ts.next
+ opt_tag = ts.consume(T::Tag)
+
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
+ grammar.add_type(
+ id: id,
+ tag: opt_tag
+ )
+ end
+ when T::P_nonassoc
+ # %nonassoc (ident|char|string)+
+ ts.next
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
+ sym = grammar.add_term(id: id)
+ grammar.add_nonassoc(sym, precedence_number)
+ end
+ precedence_number += 1
+ when T::P_left
+ # %left (ident|char|string)+
+ ts.next
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
+ sym = grammar.add_term(id: id)
+ grammar.add_left(sym, precedence_number)
+ end
+ precedence_number += 1
+ when T::P_right
+ # %right (ident|char|string)+
+ ts.next
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
+ sym = grammar.add_term(id: id)
+ grammar.add_right(sym, precedence_number)
+ end
+ precedence_number += 1
+ when nil
+ # end of input
+ raise "Reach to end of input within declarations"
+ else
+ raise "Unexpected token: #{ts.current_token}"
+ end
+ end
+ end
+
+ def parse_grammar_rules(ts, grammar)
+ while !ts.eots? do
+ parse_grammar_rule(ts, grammar)
+ end
+ end
+
+ # TODO: Take care of %prec of rule.
+ # If %prec exists, user code before %prec
+ # is NOT an action. For example "{ code 3 }" is NOT an action.
+ #
+ # keyword_class { code 2 } tSTRING '!' keyword_end { code 3 } %prec "="
+ def parse_grammar_rule(ts, grammar)
+ # LHS
+ lhs = ts.consume!(T::Ident_Colon) # class:
+ lhs.type = T::Ident
+
+ rhs = parse_grammar_rule_rhs(ts, grammar)
+
+ grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)
+
+ while true do
+ case ts.current_type
+ when T::Bar
+ # |
+ bar_lineno = ts.current_token.line
+ ts.next
+ rhs = parse_grammar_rule_rhs(ts, grammar)
+ grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
+ when T::Semicolon
+ # ;
+ ts.next
+ break
+ when T::Ident_Colon
+ # Next lhs can be here because ";" is optional.
+ # Do not consume next token.
+ break
+ when nil
+ # end of input can be here when ";" is omitted
+ break
+ else
+ raise "Unexpected token: #{ts.current_token}"
+ end
+ end
+ end
+
+ def parse_grammar_rule_rhs(ts, grammar)
+ a = []
+ prec_seen = false
+ code_after_prec = false
+
+ while true do
+ # TODO: Srting can be here
+ case ts.current_type
+ when T::Ident
+ # keyword_class
+
+ raise "Ident after %prec" if prec_seen
+ a << ts.current_token
+ ts.next
+ when T::Char
+ # '!'
+
+ raise "Char after %prec" if prec_seen
+ a << ts.current_token
+ ts.next
+ when T::P_prec
+ # %prec tPLUS
+ #
+ # See: https://www.gnu.org/software/bison/manual/html_node/Contextual-Precedence.html
+
+ ts.next
+ prec_seen = true
+ precedence_id = ts.consume!(T::Ident, T::String, T::Char)
+ precedence_sym = grammar.find_symbol_by_id!(precedence_id)
+ a << precedence_sym
+ when T::User_code
+ # { code } in the middle of rhs
+
+ if prec_seen
+ raise "Multiple User_code after %prec" if code_after_prec
+ code_after_prec = true
+ end
+
+ code = ts.current_token
+ grammar.build_references(code)
+ a << code
+ ts.next
+ when T::Bar
+ # |
+ break
+ when T::Semicolon
+ # ;
+ break
+ when T::Ident_Colon
+ # Next lhs can be here because ";" is optional.
+ break
+ when nil
+ # end of input can be here when ";" is omitted
+ break
+ else
+ raise "Unexpected token: #{ts.current_token}"
+ end
+ end
+
+ return a
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/report.rb b/tool/lrama/lib/lrama/report.rb
new file mode 100644
index 0000000000..7016a45171
--- /dev/null
+++ b/tool/lrama/lib/lrama/report.rb
@@ -0,0 +1,47 @@
+module Lrama
+ class Report
+ module Profile
+ # 1. Wrap target method with Profile.report_profile like below:
+ #
+ # Lrama::Report::Profile.report_profile { method }
+ #
+ # 2. Run lrama command, for example
+ #
+ # $ ./exe/lrama --trace=time spec/fixtures/integration/ruby_3_2_0/parse.tmp.y
+ #
+ # 3. Generate html file
+ #
+ # $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html
+ #
+ def self.report_profile
+ require "stackprof"
+
+ StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
+ yield
+ end
+ end
+ end
+
+ module Duration
+ def self.enable
+ @_report_duration_enabled = true
+ end
+
+ def self.enabled?
+ !!@_report_duration_enabled
+ end
+
+ def report_duration(method_name)
+ time1 = Time.now.to_f
+ result = yield
+ time2 = Time.now.to_f
+
+ if Duration.enabled?
+ puts sprintf("%s %10.5f s", method_name, time2 - time1)
+ end
+
+ return result
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb
new file mode 100644
index 0000000000..f907db30cc
--- /dev/null
+++ b/tool/lrama/lib/lrama/states.rb
@@ -0,0 +1,832 @@
+require "forwardable"
+require "lrama/report"
+
+module Lrama
+ class State
+ class Reduce
+ # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html
+ attr_reader :item, :look_ahead, :not_selected_symbols
+ attr_accessor :default_reduction
+
+ def initialize(item)
+ @item = item
+ @look_ahead = nil
+ @not_selected_symbols = []
+ end
+
+ def rule
+ @item.rule
+ end
+
+ def look_ahead=(look_ahead)
+ @look_ahead = look_ahead.freeze
+ end
+
+ def add_not_selected_symbol(sym)
+ @not_selected_symbols << sym
+ end
+
+ def selected_look_ahead
+ if @look_ahead
+ @look_ahead - @not_selected_symbols
+ else
+ []
+ end
+ end
+ end
+
+ class Shift
+ attr_reader :next_sym, :next_items
+ attr_accessor :not_selected
+
+ def initialize(next_sym, next_items)
+ @next_sym = next_sym
+ @next_items = next_items
+ end
+ end
+
+ # * symbol: A symbol under discussion
+ # * reduce: A reduce under discussion
+ # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative)
+ ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do
+ def report_message
+ s = symbol.display_name
+ r = reduce.rule.precedence_sym.display_name
+ case
+ when which == :shift && same_prec
+ msg = "resolved as #{which} (%right #{s})"
+ when which == :shift
+ msg = "resolved as #{which} (#{r} < #{s})"
+ when which == :reduce && same_prec
+ msg = "resolved as #{which} (%left #{s})"
+ when which == :reduce
+ msg = "resolved as #{which} (#{s} < #{r})"
+ when which == :error
+ msg = "resolved as an #{which} (%nonassoc #{s})"
+ else
+ raise "Unknown direction. #{self}"
+ end
+
+ "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}."
+ end
+ end
+
+ Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true)
+
+ attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts,
+ :default_reduction_rule, :closure, :items
+ attr_accessor :shifts, :reduces
+
+ def initialize(id, accessing_symbol, kernels)
+ @id = id
+ @accessing_symbol = accessing_symbol
+ @kernels = kernels.freeze
+ @items = @kernels
+ # Manage relationships between items to state
+ # to resolve next state
+ @items_to_state = {}
+ @conflicts = []
+ @resolved_conflicts = []
+ @default_reduction_rule = nil
+ end
+
+ def closure=(closure)
+ @closure = closure
+ @items = @kernels + @closure
+ end
+
+ def non_default_reduces
+ reduces.select do |reduce|
+ reduce.rule != @default_reduction_rule
+ end
+ end
+
+ def compute_shifts_reduces
+ _shifts = {}
+ reduces = []
+ items.each do |item|
+ # TODO: Consider what should be pushed
+ if item.end_of_rule?
+ reduces << Reduce.new(item)
+ else
+ key = item.next_sym
+ _shifts[key] ||= []
+ _shifts[key] << item.new_by_next_position
+ end
+ end
+
+ # It seems Bison 3.8.2 iterates transitions order by symbol number
+ shifts = _shifts.sort_by do |next_sym, new_items|
+ next_sym.number
+ end.map do |next_sym, new_items|
+ Shift.new(next_sym, new_items.flatten)
+ end
+ self.shifts = shifts.freeze
+ self.reduces = reduces.freeze
+ end
+
+ def set_items_to_state(items, next_state)
+ @items_to_state[items] = next_state
+ end
+
+ #
+ def set_look_ahead(rule, look_ahead)
+ reduce = reduces.find do |r|
+ r.rule == rule
+ end
+
+ reduce.look_ahead = look_ahead
+ end
+
+ # Returns array of [nterm, next_state]
+ def nterm_transitions
+ return @nterm_transitions if @nterm_transitions
+
+ @nterm_transitions = []
+
+ shifts.each do |shift|
+ next if shift.next_sym.term?
+
+ @nterm_transitions << [shift, @items_to_state[shift.next_items]]
+ end
+
+ @nterm_transitions
+ end
+
+ # Returns array of [term, next_state]
+ def term_transitions
+ return @term_transitions if @term_transitions
+
+ @term_transitions = []
+
+ shifts.each do |shift|
+ next if shift.next_sym.nterm?
+
+ @term_transitions << [shift, @items_to_state[shift.next_items]]
+ end
+
+ @term_transitions
+ end
+
+ def selected_term_transitions
+ term_transitions.select do |shift, next_state|
+ !shift.not_selected
+ end
+ end
+
+ # Move to next state by sym
+ def transition(sym)
+ result = nil
+
+ if sym.term?
+ term_transitions.each do |shift, next_state|
+ term = shift.next_sym
+ result = next_state if term == sym
+ end
+ else
+ nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ result = next_state if nterm == sym
+ end
+ end
+
+ raise "Can not transit by #{sym} #{self}" if result.nil?
+
+ result
+ end
+
+ def find_reduce_by_item!(item)
+ reduces.find do |r|
+ r.item == item
+ end || (raise "reduce is not found. #{item}, #{state}")
+ end
+
+ def default_reduction_rule=(default_reduction_rule)
+ @default_reduction_rule = default_reduction_rule
+
+ reduces.each do |r|
+ if r.rule == default_reduction_rule
+ r.default_reduction = true
+ end
+ end
+ end
+
+ def sr_conflicts
+ @conflicts.select do |conflict|
+ conflict.type == :shift_reduce
+ end
+ end
+
+ def rr_conflicts
+ @conflicts.select do |conflict|
+ conflict.type == :reduce_reduce
+ end
+ end
+ end
+
+ # States is passed to a template file
+ #
+ # "Efficient Computation of LALR(1) Look-Ahead Sets"
+ # https://dl.acm.org/doi/pdf/10.1145/69622.357187
+ class States
+ extend Forwardable
+ include Lrama::Report::Duration
+
+ def_delegators "@grammar", :symbols, :terms, :nterms, :rules,
+ :accept_symbol, :eof_symbol, :find_symbol_by_s_value!
+
+ # TODO: Validate position is not over rule rhs
+ Item = Struct.new(:rule, :position, keyword_init: true) do
+ # Optimization for States#setup_state
+ def hash
+ [rule.id, position].hash
+ end
+
+ def rule_id
+ rule.id
+ end
+
+ def next_sym
+ rule.rhs[position]
+ end
+
+ def end_of_rule?
+ rule.rhs.count == position
+ end
+
+ def new_by_next_position
+ Item.new(rule: rule, position: position + 1)
+ end
+
+ def previous_sym
+ rule.rhs[position - 1]
+ end
+
+ def display_name
+ r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
+ "#{r} (rule #{rule.id})"
+ end
+
+ # Right after position
+ def display_rest
+ r = rule.rhs[position..-1].map(&:display_name).join(" ")
+ ". #{r} (rule #{rule.id})"
+ end
+ end
+
+ attr_reader :states, :reads_relation, :includes_relation, :lookback_relation
+
+ def initialize(grammar, warning, trace_state: false)
+ @grammar = grammar
+ @warning = warning
+ @trace_state = trace_state
+
+ @states = []
+
+ # `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }`
+ # where p is state, A is nterm, t is term.
+ #
+ # `@direct_read_sets` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is bitmap of term.
+ @direct_read_sets = {}
+
+ # Reads relation on nonterminal transitions (pair of state and nterm)
+ # `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε`
+ # where p, r are state, A, C are nterm.
+ #
+ # `@reads_relation` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is array of [state.id, nterm.token_id].
+ @reads_relation = {}
+
+ # `@read_sets` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is bitmap of term.
+ @read_sets = {}
+
+ # `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p`
+ # where p, p' are state, A, B are nterm, β, γ is sequence of symbol.
+ #
+ # `@includes_relation` is a hash whose
+ # key is [state.id, nterm.token_id],
+ # value is array of [state.id, nterm.token_id].
+ @includes_relation = {}
+
+ # `(q, A -> ω) lookback (p, A) iff p -(ω)-> q`
+ # where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol.
+ #
+ # `@lookback_relation` is a hash whose
+ # key is [state.id, rule.id],
+ # value is array of [state.id, nterm.token_id].
+ @lookback_relation = {}
+
+ # `@follow_sets` is a hash whose
+ # key is [state.id, rule.id],
+ # value is bitmap of term.
+ @follow_sets = {}
+
+ # `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)`
+ #
+ # `@la` is a hash whose
+ # key is [state.id, rule.id],
+ # value is bitmap of term.
+ @la = {}
+ end
+
+ def compute
+ # Look Ahead Sets
+ report_duration(:compute_lr0_states) { compute_lr0_states }
+ report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
+ report_duration(:compute_reads_relation) { compute_reads_relation }
+ report_duration(:compute_read_sets) { compute_read_sets }
+ report_duration(:compute_includes_relation) { compute_includes_relation }
+ report_duration(:compute_lookback_relation) { compute_lookback_relation }
+ report_duration(:compute_follow_sets) { compute_follow_sets }
+ report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets }
+
+ # Conflicts
+ report_duration(:compute_conflicts) { compute_conflicts }
+
+ report_duration(:compute_default_reduction) { compute_default_reduction }
+
+ check_conflicts
+ end
+
+ def reporter
+ StatesReporter.new(self)
+ end
+
+ def states_count
+ @states.count
+ end
+
+ def direct_read_sets
+ h = {}
+
+ @direct_read_sets.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ def read_sets
+ h = {}
+
+ @read_sets.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ def follow_sets
+ h = {}
+
+ @follow_sets.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ def la
+ h = {}
+
+ @la.each do |k, v|
+ h[k] = bitmap_to_terms(v)
+ end
+
+ return h
+ end
+
+ private
+
+ def sr_conflicts
+ @states.flat_map(&:sr_conflicts)
+ end
+
+ def rr_conflicts
+ @states.flat_map(&:rr_conflicts)
+ end
+
+ def initial_attrs
+ h = {}
+
+ attrs.each do |attr|
+ h[attr.id] = false
+ end
+
+ h
+ end
+
+ def trace_state
+ if @trace_state
+ yield STDERR
+ end
+ end
+
+ def create_state(accessing_symbol, kernels, states_creted)
+ # A item can appear in some states,
+ # so need to use `kernels` (not `kernels.first`) as a key.
+ #
+ # For example...
+ #
+ # %%
+ # program: '+' strings_1
+ # | '-' strings_2
+ # ;
+ #
+ # strings_1: string_1
+ # ;
+ #
+ # strings_2: string_1
+ # | string_2
+ # ;
+ #
+ # string_1: string
+ # ;
+ #
+ # string_2: string '+'
+ # ;
+ #
+ # string: tSTRING
+ # ;
+ # %%
+ #
+ # For these grammar, there are 2 states
+ #
+ # State A
+ # string_1: string •
+ #
+ # State B
+ # string_1: string •
+ # string_2: string • '+'
+ #
+ return [states_creted[kernels], false] if states_creted[kernels]
+
+ state = State.new(@states.count, accessing_symbol, kernels)
+ @states << state
+ states_creted[kernels] = state
+
+ return [state, true]
+ end
+
+ def setup_state(state)
+ # closure
+ closure = []
+ visited = {}
+ queued = {}
+ items = state.kernels.dup
+
+ items.each do |item|
+ queued[item] = true
+ end
+
+ while (item = items.shift) do
+ visited[item] = true
+
+ if (sym = item.next_sym) && sym.nterm?
+ @grammar.find_rules_by_symbol!(sym).each do |rule|
+ i = Item.new(rule: rule, position: 0)
+ next if queued[i]
+ closure << i
+ items << i
+ queued[i] = true
+ end
+ end
+ end
+
+ state.closure = closure.sort_by {|i| i.rule.id }
+
+ # Trace
+ trace_state do |out|
+ out << "Closure: input\n"
+ state.kernels.each do |item|
+ out << " #{item.display_rest}\n"
+ end
+ out << "\n\n"
+ out << "Closure: output\n"
+ state.items.each do |item|
+ out << " #{item.display_rest}\n"
+ end
+ out << "\n\n"
+ end
+
+ # shift & reduce
+ state.compute_shifts_reduces
+ end
+
+ def enqueue_state(states, state)
+ # Trace
+ previous = state.kernels.first.previous_sym
+ trace_state do |out|
+ out << sprintf("state_list_append (state = %d, symbol = %d (%s))",
+ @states.count, previous.number, previous.display_name)
+ end
+
+ states << state
+ end
+
+ def compute_lr0_states
+ # State queue
+ states = []
+ states_creted = {}
+
+ state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_creted)
+ enqueue_state(states, state)
+
+ while (state = states.shift) do
+ # Trace
+ #
+ # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but
+ # I think it is not correct...
+ previous = state.kernels.first.previous_sym
+ trace_state do |out|
+ out << "Processing state #{state.id} (reached by #{previous.display_name})\n"
+ end
+
+ setup_state(state)
+
+ state.shifts.each do |shift|
+ new_state, created = create_state(shift.next_sym, shift.next_items, states_creted)
+ state.set_items_to_state(shift.next_items, new_state)
+ enqueue_state(states, new_state) if created
+ end
+ end
+ end
+
+ def nterm_transitions
+ a = []
+
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ a << [state, nterm, next_state]
+ end
+ end
+
+ a
+ end
+
+ def compute_direct_read_sets
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+
+ ary = next_state.term_transitions.map do |shift, _|
+ shift.next_sym.number
+ end
+
+ key = [state.id, nterm.token_id]
+ @direct_read_sets[key] = Bitmap.from_array(ary)
+ end
+ end
+ end
+
+ def compute_reads_relation
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ next_state.nterm_transitions.each do |shift2, _next_state2|
+ nterm2 = shift2.next_sym
+ if nterm2.nullable
+ key = [state.id, nterm.token_id]
+ @reads_relation[key] ||= []
+ @reads_relation[key] << [next_state.id, nterm2.token_id]
+ end
+ end
+ end
+ end
+ end
+
+ def compute_read_sets
+ sets = nterm_transitions.map do |state, nterm, next_state|
+ [state.id, nterm.token_id]
+ end
+
+ @read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute
+ end
+
+ # Execute transition of state by symbols
+ # then return final state.
+ def transition(state, symbols)
+ symbols.each do |sym|
+ state = state.transition(sym)
+ end
+
+ state
+ end
+
+ def compute_includes_relation
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ @grammar.find_rules_by_symbol!(nterm).each do |rule|
+ i = rule.rhs.count - 1
+
+ while (i > -1) do
+ sym = rule.rhs[i]
+
+ break if sym.term?
+ state2 = transition(state, rule.rhs[0...i])
+ # p' = state, B = nterm, p = state2, A = sym
+ key = [state2.id, sym.token_id]
+ # TODO: need to omit if state == state2 ?
+ @includes_relation[key] ||= []
+ @includes_relation[key] << [state.id, nterm.token_id]
+ break if !sym.nullable
+ i -= 1
+ end
+ end
+ end
+ end
+ end
+
+ def compute_lookback_relation
+ @states.each do |state|
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ @grammar.find_rules_by_symbol!(nterm).each do |rule|
+ state2 = transition(state, rule.rhs)
+ # p = state, A = nterm, q = state2, A -> ω = rule
+ key = [state2.id, rule.id]
+ @lookback_relation[key] ||= []
+ @lookback_relation[key] << [state.id, nterm.token_id]
+ end
+ end
+ end
+ end
+
+ def compute_follow_sets
+ sets = nterm_transitions.map do |state, nterm, next_state|
+ [state.id, nterm.token_id]
+ end
+
+ @follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute
+ end
+
+ def compute_look_ahead_sets
+ @states.each do |state|
+ rules.each do |rule|
+ ary = @lookback_relation[[state.id, rule.id]]
+ next if !ary
+
+ ary.each do |state2_id, nterm_token_id|
+ # q = state, A -> ω = rule, p = state2, A = nterm
+ follows = @follow_sets[[state2_id, nterm_token_id]]
+
+ next if follows == 0
+
+ key = [state.id, rule.id]
+ @la[key] ||= 0
+ look_ahead = @la[key] | follows
+ @la[key] |= look_ahead
+
+ # No risk of conflict when
+ # * the state only has single reduce
+ # * the state only has nterm_transitions (GOTO)
+ next if state.reduces.count == 1 && state.term_transitions.count == 0
+
+ state.set_look_ahead(rule, bitmap_to_terms(look_ahead))
+ end
+ end
+ end
+ end
+
+ def bitmap_to_terms(bit)
+ ary = Bitmap.to_array(bit)
+ ary.map do |i|
+ @grammar.find_symbol_by_number!(i)
+ end
+ end
+
+ def compute_conflicts
+ compute_shift_reduce_conflicts
+ compute_reduce_reduce_conflicts
+ end
+
+ def compute_shift_reduce_conflicts
+ states.each do |state|
+ state.shifts.each do |shift|
+ state.reduces.each do |reduce|
+ sym = shift.next_sym
+
+ next unless reduce.look_ahead
+ next if !reduce.look_ahead.include?(sym)
+
+ # Shift/Reduce conflict
+ shift_prec = sym.precedence
+ reduce_prec = reduce.item.rule.precedence
+
+ # Can resolve only when both have prec
+ unless shift_prec && reduce_prec
+ state.conflicts << State::Conflict.new(symbols: [sym], reduce: reduce, type: :shift_reduce)
+ next
+ end
+
+ case
+ when shift_prec < reduce_prec
+ # Reduce is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce)
+ shift.not_selected = true
+ next
+ when shift_prec > reduce_prec
+ # Shift is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift)
+ reduce.add_not_selected_symbol(sym)
+ next
+ end
+
+ # shift_prec == reduce_prec, then check associativity
+ case sym.precedence.type
+ when :right
+ # Shift is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true)
+ reduce.add_not_selected_symbol(sym)
+ next
+ when :left
+ # Reduce is selected
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true)
+ shift.not_selected = true
+ next
+ when :nonassoc
+ # Can not resolve
+ #
+ # nonassoc creates "run-time" error, precedence creates "compile-time" error.
+ # Then omit both the shift and reduce.
+ #
+ # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error)
+ shift.not_selected = true
+ reduce.add_not_selected_symbol(sym)
+ else
+ raise "Unknown precedence type. #{sym}"
+ end
+ end
+ end
+ end
+ end
+
+ def compute_reduce_reduce_conflicts
+ states.each do |state|
+ a = []
+
+ state.reduces.each do |reduce|
+ next if reduce.look_ahead.nil?
+
+ intersection = a & reduce.look_ahead
+ a += reduce.look_ahead
+
+ if !intersection.empty?
+ state.conflicts << State::Conflict.new(symbols: intersection.dup, reduce: reduce, type: :reduce_reduce)
+ end
+ end
+ end
+ end
+
+ def compute_default_reduction
+ states.each do |state|
+ next if state.reduces.empty?
+ # Do not set, if conflict exist
+ next if !state.conflicts.empty?
+ # Do not set, if shift with `error` exists.
+ next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol)
+
+ state.default_reduction_rule = state.reduces.map do |r|
+ [r.rule, r.rule.id, (r.look_ahead || []).count]
+ end.sort_by do |rule, rule_id, count|
+ [-count, rule_id]
+ end.first.first
+ end
+ end
+
+ def check_conflicts
+ sr_count = sr_conflicts.count
+ rr_count = rr_conflicts.count
+
+ if @grammar.expect
+
+ expected_sr_conflicts = @grammar.expect
+ expected_rr_conflicts = 0
+
+ if expected_sr_conflicts != sr_count
+ @warning.error("shift/reduce conflicts: #{sr_count} found, #{expected_sr_conflicts} expected")
+ end
+
+ if expected_rr_conflicts != rr_count
+ @warning.error("reduce/reduce conflicts: #{rr_count} found, #{expected_rr_conflicts} expected")
+ end
+ else
+ if sr_count != 0
+ @warning.warn("shift/reduce conflicts: #{sr_count} found")
+ end
+
+ if rr_count != 0
+ @warning.warn("reduce/reduce conflicts: #{rr_count} found")
+ end
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/states_reporter.rb b/tool/lrama/lib/lrama/states_reporter.rb
new file mode 100644
index 0000000000..25893e61be
--- /dev/null
+++ b/tool/lrama/lib/lrama/states_reporter.rb
@@ -0,0 +1,310 @@
+module Lrama
+ class StatesReporter
+ include Lrama::Report::Duration
+
+ def initialize(states)
+ @states = states
+ end
+
+ def report(io, **options)
+ report_duration(:report) do
+ _report(io, **options)
+ end
+ end
+
+ private
+
+ def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false)
+ # TODO: Unused terms
+ # TODO: Unused rules
+
+ report_conflicts(io)
+ report_grammar(io) if grammar
+ report_states(io, itemsets, lookaheads, solved, verbose)
+ end
+
+ def report_conflicts(io)
+ has_conflict = false
+
+ @states.states.each do |state|
+ messages = []
+ cs = state.conflicts.group_by(&:type)
+ if cs[:shift_reduce]
+ messages << "#{cs[:shift_reduce].count} shift/reduce"
+ end
+
+ if cs[:reduce_reduce]
+ messages << "#{cs[:reduce_reduce].count} reduce/reduce"
+ end
+
+ if !messages.empty?
+ has_conflict = true
+ io << "State #{state.id} conflicts: #{messages.join(', ')}\n"
+ end
+ end
+
+ if has_conflict
+ io << "\n\n"
+ end
+ end
+
+ def report_grammar(io)
+ io << "Grammar\n"
+ last_lhs = nil
+
+ @states.rules.each do |rule|
+ if rule.rhs.empty?
+ r = "ε"
+ else
+ r = rule.rhs.map(&:display_name).join(" ")
+ end
+
+ if rule.lhs == last_lhs
+ io << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r)
+ else
+ io << "\n"
+ io << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r)
+ end
+
+ last_lhs = rule.lhs
+ end
+ io << "\n\n"
+ end
+
+ def report_states(io, itemsets, lookaheads, solved, verbose)
+ @states.states.each do |state|
+ # Report State
+ io << "State #{state.id}\n\n"
+
+ # Report item
+ last_lhs = nil
+ list = itemsets ? state.items : state.kernels
+ list.sort_by {|i| [i.rule_id, i.position] }.each do |item|
+ rule = item.rule
+ position = item.position
+ if rule.rhs.empty?
+ r = "ε •"
+ else
+ r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
+ end
+ if rule.lhs == last_lhs
+ l = " " * rule.lhs.id.s_value.length + "|"
+ else
+ l = rule.lhs.id.s_value + ":"
+ end
+ la = ""
+ if lookaheads && item.end_of_rule?
+ reduce = state.find_reduce_by_item!(item)
+ look_ahead = reduce.selected_look_ahead
+ if !look_ahead.empty?
+ la = " [#{look_ahead.map(&:display_name).join(", ")}]"
+ end
+ end
+ last_lhs = rule.lhs
+
+ io << sprintf("%5i %s %s%s\n", rule.id, l, r, la)
+ end
+ io << "\n"
+
+
+ # Report shifts
+ tmp = state.term_transitions.select do |shift, _|
+ !shift.not_selected
+ end.map do |shift, next_state|
+ [shift.next_sym, next_state.id]
+ end
+ max_len = tmp.map(&:first).map(&:display_name).map(&:length).max
+ tmp.each do |term, state_id|
+ io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n"
+ end
+ io << "\n" if !tmp.empty?
+
+
+ # Report error caused by %nonassoc
+ nl = false
+ tmp = state.resolved_conflicts.select do |resolved|
+ resolved.which == :error
+ end.map do |error|
+ error.symbol.display_name
+ end
+ max_len = tmp.map(&:length).max
+ tmp.each do |name|
+ nl = true
+ io << " #{name.ljust(max_len)} error (nonassociative)\n"
+ end
+ io << "\n" if !tmp.empty?
+
+
+ # Report reduces
+ nl = false
+ max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0
+ max_len = [max_len, "$default".length].max if state.default_reduction_rule
+ ary = []
+
+ state.non_default_reduces.each do |reduce|
+ reduce.look_ahead.each do |term|
+ ary << [term, reduce]
+ end
+ end
+
+ ary.sort_by do |term, reduce|
+ term.number
+ end.each do |term, reduce|
+ rule = reduce.item.rule
+ io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n"
+ nl = true
+ end
+
+ if r = state.default_reduction_rule
+ nl = true
+ s = "$default".ljust(max_len)
+
+ if r.initial_rule?
+ io << " #{s} accept\n"
+ else
+ io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n"
+ end
+ end
+ io << "\n" if nl
+
+
+ # Report nonterminal transitions
+ tmp = []
+ max_len = 0
+ state.nterm_transitions.each do |shift, next_state|
+ nterm = shift.next_sym
+ tmp << [nterm, next_state.id]
+ max_len = [max_len, nterm.id.s_value.length].max
+ end
+ tmp.uniq!
+ tmp.sort_by! do |nterm, state_id|
+ nterm.number
+ end
+ tmp.each do |nterm, state_id|
+ io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n"
+ end
+ io << "\n" if !tmp.empty?
+
+
+ if solved
+ # Report conflict resolutions
+ state.resolved_conflicts.each do |resolved|
+ io << " #{resolved.report_message}\n"
+ end
+ io << "\n" if !state.resolved_conflicts.empty?
+ end
+
+
+ if verbose
+ # Report direct_read_sets
+ io << " [Direct Read sets]\n"
+ direct_read_sets = @states.direct_read_sets
+ @states.nterms.each do |nterm|
+ terms = direct_read_sets[[state.id, nterm.token_id]]
+ next if !terms
+ next if terms.empty?
+
+ str = terms.map {|sym| sym.id.s_value }.join(", ")
+ io << " read #{nterm.id.s_value} shift #{str}\n"
+ end
+ io << "\n"
+
+
+ # Reprot reads_relation
+ io << " [Reads Relation]\n"
+ @states.nterms.each do |nterm|
+ a = @states.reads_relation[[state.id, nterm.token_id]]
+ next if !a
+
+ a.each do |state_id2, nterm_id2|
+ n = @states.nterms.find {|n| n.token_id == nterm_id2 }
+ io << " (State #{state_id2}, #{n.id.s_value})\n"
+ end
+ end
+ io << "\n"
+
+
+ # Reprot read_sets
+ io << " [Read sets]\n"
+ read_sets = @states.read_sets
+ @states.nterms.each do |nterm|
+ terms = read_sets[[state.id, nterm.token_id]]
+ next if !terms
+ next if terms.empty?
+
+ terms.each do |sym|
+ io << " #{sym.id.s_value}\n"
+ end
+ end
+ io << "\n"
+
+
+ # Reprot includes_relation
+ io << " [Includes Relation]\n"
+ @states.nterms.each do |nterm|
+ a = @states.includes_relation[[state.id, nterm.token_id]]
+ next if !a
+
+ a.each do |state_id2, nterm_id2|
+ n = @states.nterms.find {|n| n.token_id == nterm_id2 }
+ io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n"
+ end
+ end
+ io << "\n"
+
+
+ # Report lookback_relation
+ io << " [Lookback Relation]\n"
+ @states.rules.each do |rule|
+ a = @states.lookback_relation[[state.id, rule.id]]
+ next if !a
+
+ a.each do |state_id2, nterm_id2|
+ n = @states.nterms.find {|n| n.token_id == nterm_id2 }
+ io << " (Rule: #{rule.to_s}) -> (State #{state_id2}, #{n.id.s_value})\n"
+ end
+ end
+ io << "\n"
+
+
+ # Reprot follow_sets
+ io << " [Follow sets]\n"
+ follow_sets = @states.follow_sets
+ @states.nterms.each do |nterm|
+ terms = follow_sets[[state.id, nterm.token_id]]
+
+ next if !terms
+
+ terms.each do |sym|
+ io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n"
+ end
+ end
+ io << "\n"
+
+
+ # Report LA
+ io << " [Look-Ahead Sets]\n"
+ tmp = []
+ max_len = 0
+ @states.rules.each do |rule|
+ syms = @states.la[[state.id, rule.id]]
+ next if !syms
+
+ tmp << [rule, syms]
+ max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max
+ end
+ tmp.each do |rule, syms|
+ syms.each do |sym|
+ io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n"
+ end
+ end
+ io << "\n" if !tmp.empty?
+ end
+
+
+ # End of Report State
+ io << "\n"
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb
new file mode 100644
index 0000000000..2da384bf73
--- /dev/null
+++ b/tool/lrama/lib/lrama/version.rb
@@ -0,0 +1,3 @@
+module Lrama
+ VERSION = "0.4.0".freeze
+end
diff --git a/tool/lrama/lib/lrama/warning.rb b/tool/lrama/lib/lrama/warning.rb
new file mode 100644
index 0000000000..3c99791ebf
--- /dev/null
+++ b/tool/lrama/lib/lrama/warning.rb
@@ -0,0 +1,25 @@
+module Lrama
+ class Warning
+ attr_reader :errors, :warns
+
+ def initialize(out = STDERR)
+ @out = out
+ @errors = []
+ @warns = []
+ end
+
+ def error(message)
+ @out << message << "\n"
+ @errors << message
+ end
+
+ def warn(message)
+ @out << message << "\n"
+ @warns << message
+ end
+
+ def has_error?
+ !@errors.empty?
+ end
+ end
+end