diff options
Diffstat (limited to 'tool/lrama/lib/lrama')
68 files changed, 7645 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama/bitmap.rb b/tool/lrama/lib/lrama/bitmap.rb new file mode 100644 index 0000000000..8349a23c34 --- /dev/null +++ b/tool/lrama/lib/lrama/bitmap.rb @@ -0,0 +1,29 @@ +module Lrama + module Bitmap + def self.from_array(ary) + bit = 0 + + ary.each do |int| + bit |= (1 << int) + end + + bit + end + + def self.to_array(int) + a = [] + i = 0 + + while int > 0 do + if int & 1 == 1 + a << i + end + + i += 1 + int >>= 1 + end + + a + end + end +end diff --git a/tool/lrama/lib/lrama/command.rb b/tool/lrama/lib/lrama/command.rb new file mode 100644 index 0000000000..12fc4fc7ec --- /dev/null +++ b/tool/lrama/lib/lrama/command.rb @@ -0,0 +1,73 @@ +module Lrama + class Command + LRAMA_LIB = File.realpath(File.join(File.dirname(__FILE__))) + STDLIB_FILE_PATH = File.join(LRAMA_LIB, 'grammar', 'stdlib.y') + + def run(argv) + begin + options = OptionParser.new.parse(argv) + rescue => e + message = e.message + message = message.gsub(/.+/, "\e[1m\\&\e[m") if Exception.to_tty? + abort message + end + + Report::Duration.enable if options.trace_opts[:time] + + warning = Lrama::Warning.new + text = options.y.read + options.y.close if options.y != STDIN + begin + grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse + unless grammar.no_stdlib + stdlib_grammar = Lrama::Parser.new(File.read(STDLIB_FILE_PATH), STDLIB_FILE_PATH, options.debug).parse + grammar.insert_before_parameterizing_rules(stdlib_grammar.parameterizing_rules) + end + grammar.prepare + grammar.validate! + rescue => e + raise e if options.debug + message = e.message + message = message.gsub(/.+/, "\e[1m\\&\e[m") if Exception.to_tty? + abort message + end + states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure])) + states.compute + context = Lrama::Context.new(states) + + if options.report_file + reporter = Lrama::StatesReporter.new(states) + File.open(options.report_file, "w+") do |f| + reporter.report(f, **options.report_opts) + end + end + + if options.trace_opts && options.trace_opts[:rules] + puts "Grammar rules:" + puts grammar.rules + end + + if options.trace_opts && options.trace_opts[:actions] + puts "Grammar rules with actions:" + grammar.rules.each { |rule| puts rule.with_actions } + end + + File.open(options.outfile, "w+") do |f| + Lrama::Output.new( + out: f, + output_file_path: options.outfile, + template_name: options.skeleton, + grammar_file_path: options.grammar_file, + header_file_path: options.header_file, + context: context, + grammar: grammar, + error_recovery: options.error_recovery, + ).render + end + + if warning.has_error? + exit false + end + end + end +end diff --git a/tool/lrama/lib/lrama/context.rb b/tool/lrama/lib/lrama/context.rb new file mode 100644 index 0000000000..32017e65fc --- /dev/null +++ b/tool/lrama/lib/lrama/context.rb @@ -0,0 +1,497 @@ +require "lrama/report/duration" + +module Lrama + # This is passed to a template + class Context + include Report::Duration + + ErrorActionNumber = -Float::INFINITY + BaseMin = -Float::INFINITY + + # TODO: It might be better to pass `states` to Output directly? + attr_reader :states, :yylast, :yypact_ninf, :yytable_ninf, :yydefact, :yydefgoto + + def initialize(states) + @states = states + @yydefact = nil + @yydefgoto = nil + # Array of array + @_actions = [] + + compute_tables + end + + # enum yytokentype + def yytokentype + @states.terms.reject do |term| + 0 < term.token_id && term.token_id < 128 + end.map do |term| + [term.id.s_value, term.token_id, term.display_name] + end.unshift(["YYEMPTY", -2, nil]) + end + + # enum yysymbol_kind_t + def yysymbol_kind_t + @states.symbols.map do |sym| + [sym.enum_name, sym.number, sym.comment] + end.unshift(["YYSYMBOL_YYEMPTY", -2, nil]) + end + + # State number of final (accepted) state + def yyfinal + @states.states.find do |state| + state.items.find do |item| + item.lhs.accept_symbol? && item.end_of_rule? + end + end.id + end + + # Number of terms + def yyntokens + @states.terms.count + end + + # Number of nterms + def yynnts + @states.nterms.count + end + + # Number of rules + def yynrules + @states.rules.count + end + + # Number of states + def yynstates + @states.states.count + end + + # Last token number + def yymaxutok + @states.terms.map(&:token_id).max + end + + # YYTRANSLATE + # + # yytranslate is a mapping from token id to symbol number + def yytranslate + # 2 is YYSYMBOL_YYUNDEF + a = Array.new(yymaxutok, 2) + + @states.terms.each do |term| + a[term.token_id] = term.number + end + + return a + end + + def yytranslate_inverted + a = Array.new(@states.symbols.count, @states.undef_symbol.token_id) + + @states.terms.each do |term| + a[term.number] = term.token_id + end + + return a + end + + # Mapping from rule number to line number of the rule is defined. + # Dummy rule is appended as the first element whose value is 0 + # because 0 means error in yydefact. + def yyrline + a = [0] + + @states.rules.each do |rule| + a << rule.lineno + end + + return a + end + + # Mapping from symbol number to its name + def yytname + @states.symbols.sort_by(&:number).map do |sym| + sym.display_name + end + end + + def yypact + @base[0...yynstates] + end + + def yypgoto + @base[yynstates..-1] + end + + def yytable + @table + end + + def yycheck + @check + end + + def yystos + @states.states.map do |state| + state.accessing_symbol.number + end + end + + # Mapping from rule number to symbol number of LHS. + # Dummy rule is appended as the first element whose value is 0 + # because 0 means error in yydefact. + def yyr1 + a = [0] + + @states.rules.each do |rule| + a << rule.lhs.number + end + + return a + end + + # Mapping from rule number to length of RHS. + # Dummy rule is appended as the first element whose value is 0 + # because 0 means error in yydefact. + def yyr2 + a = [0] + + @states.rules.each do |rule| + a << rule.rhs.count + end + + return a + end + + private + + # Compute these + # + # See also: "src/tables.c" of Bison. + # + # * yydefact + # * yydefgoto + # * yypact and yypgoto + # * yytable + # * yycheck + # * yypact_ninf + # * yytable_ninf + def compute_tables + report_duration(:compute_yydefact) { compute_yydefact } + report_duration(:compute_yydefgoto) { compute_yydefgoto } + report_duration(:sort_actions) { sort_actions } + # debug_sorted_actions + report_duration(:compute_packed_table) { compute_packed_table } + end + + def vectors_count + @states.states.count + @states.nterms.count + end + + # In compressed table, rule 0 is appended as an error case + # and reduce is represented as minus number. + def rule_id_to_action_number(rule_id) + (rule_id + 1) * -1 + end + + # Symbol number is assigned to term first then nterm. + # This method calculates sequence_number for nterm. + def nterm_number_to_sequence_number(nterm_number) + nterm_number - @states.terms.count + end + + # Vector is states + nterms + def nterm_number_to_vector_number(nterm_number) + @states.states.count + (nterm_number - @states.terms.count) + end + + def compute_yydefact + # Default action (shift/reduce/error) for each state. + # Index is state id, value is `rule id + 1` of a default reduction. + @yydefact = Array.new(@states.states.count, 0) + + @states.states.each do |state| + # Action number means + # + # * number = 0, default action + # * number = -Float::INFINITY, error by %nonassoc + # * number > 0, shift then move to state "number" + # * number < 0, reduce by "-number" rule. Rule "number" is already added by 1. + actions = Array.new(@states.terms.count, 0) + + if state.reduces.map(&:selected_look_ahead).any? {|la| !la.empty? } + # Iterate reduces with reverse order so that first rule is used. + state.reduces.reverse_each do |reduce| + reduce.look_ahead.each do |term| + actions[term.number] = rule_id_to_action_number(reduce.rule.id) + end + end + end + + # Shift is selected when S/R conflict exists. + state.selected_term_transitions.each do |shift, next_state| + actions[shift.next_sym.number] = next_state.id + end + + state.resolved_conflicts.select do |conflict| + conflict.which == :error + end.each do |conflict| + actions[conflict.symbol.number] = ErrorActionNumber + end + + # If default_reduction_rule, replace default_reduction_rule in + # actions with zero. + if state.default_reduction_rule + actions.map! do |e| + if e == rule_id_to_action_number(state.default_reduction_rule.id) + 0 + else + e + end + end + end + + # If no default_reduction_rule, default behavior is an + # error then replace ErrorActionNumber with zero. + if !state.default_reduction_rule + actions.map! do |e| + if e == ErrorActionNumber + 0 + else + e + end + end + end + + s = actions.each_with_index.map do |n, i| + [i, n] + end.reject do |i, n| + # Remove default_reduction_rule entries + n == 0 + end + + if s.count != 0 + # Entry of @_actions is an array of + # + # * State id + # * Array of tuple, [from, to] where from is term number and to is action. + # * The number of "Array of tuple" used by sort_actions + # * "width" used by sort_actions + @_actions << [state.id, s, s.count, s.last[0] - s.first[0] + 1] + end + + @yydefact[state.id] = state.default_reduction_rule ? state.default_reduction_rule.id + 1 : 0 + end + end + + def compute_yydefgoto + # Default GOTO (nterm transition) for each nterm. + # Index is sequence number of nterm, value is state id + # of a default nterm transition destination. + @yydefgoto = Array.new(@states.nterms.count, 0) + # Mapping from nterm to next_states + nterm_to_next_states = {} + + @states.states.each do |state| + state.nterm_transitions.each do |shift, next_state| + key = shift.next_sym + nterm_to_next_states[key] ||= [] + nterm_to_next_states[key] << [state, next_state] # [from_state, to_state] + end + end + + @states.nterms.each do |nterm| + if !(states = nterm_to_next_states[nterm]) + default_goto = 0 + not_default_gotos = [] + else + default_state = states.map(&:last).group_by {|s| s }.max_by {|_, v| v.count }.first + default_goto = default_state.id + not_default_gotos = [] + states.each do |from_state, to_state| + next if to_state.id == default_goto + not_default_gotos << [from_state.id, to_state.id] + end + end + + k = nterm_number_to_sequence_number(nterm.number) + @yydefgoto[k] = default_goto + + if not_default_gotos.count != 0 + v = nterm_number_to_vector_number(nterm.number) + + # Entry of @_actions is an array of + # + # * Nterm number as vector number + # * Array of tuple, [from, to] where from is state number and to is state number. + # * The number of "Array of tuple" used by sort_actions + # * "width" used by sort_actions + @_actions << [v, not_default_gotos, not_default_gotos.count, not_default_gotos.last[0] - not_default_gotos.first[0] + 1] + end + end + end + + def sort_actions + # This is not same with #sort_actions + # + # @sorted_actions = @_actions.sort_by do |_, _, count, width| + # [-width, -count] + # end + + @sorted_actions = [] + + @_actions.each do |action| + if @sorted_actions.empty? + @sorted_actions << action + next + end + + j = @sorted_actions.count - 1 + _state_id, _froms_and_tos, count, width = action + + while (j >= 0) do + case + when @sorted_actions[j][3] < width + j -= 1 + when @sorted_actions[j][3] == width && @sorted_actions[j][2] < count + j -= 1 + else + break + end + end + + @sorted_actions.insert(j + 1, action) + end + end + + def debug_sorted_actions + ary = Array.new + @sorted_actions.each do |state_id, froms_and_tos, count, width| + ary[state_id] = [state_id, froms_and_tos, count, width] + end + + print sprintf("table_print:\n\n") + + print sprintf("order [\n") + vectors_count.times do |i| + print sprintf("%d, ", @sorted_actions[i] ? @sorted_actions[i][0] : 0) + print "\n" if i % 10 == 9 + end + print sprintf("]\n\n") + + print sprintf("width [\n") + vectors_count.times do |i| + print sprintf("%d, ", ary[i] ? ary[i][3] : 0) + print "\n" if i % 10 == 9 + end + print sprintf("]\n\n") + + print sprintf("tally [\n") + vectors_count.times do |i| + print sprintf("%d, ", ary[i] ? ary[i][2] : 0) + print "\n" if i % 10 == 9 + end + print sprintf("]\n\n") + end + + def compute_packed_table + # yypact and yypgoto + @base = Array.new(vectors_count, BaseMin) + # yytable + @table = [] + # yycheck + @check = [] + # Key is froms_and_tos, value is index position + pushed = {} + userd_res = {} + lowzero = 0 + high = 0 + + @sorted_actions.each do |state_id, froms_and_tos, _, _| + if (res = pushed[froms_and_tos]) + @base[state_id] = res + next + end + + res = lowzero - froms_and_tos.first[0] + + while true do + ok = true + + froms_and_tos.each do |from, to| + loc = res + from + + if @table[loc] + # If the cell of table is set, can not use the cell. + ok = false + break + end + end + + if ok && userd_res[res] + ok = false + end + + if ok + break + else + res += 1 + end + end + + loc = 0 + + froms_and_tos.each do |from, to| + loc = res + from + + @table[loc] = to + @check[loc] = from + end + + while (@table[lowzero]) do + lowzero += 1 + end + + high = loc if high < loc + + @base[state_id] = res + pushed[froms_and_tos] = res + userd_res[res] = true + end + + @yylast = high + + # replace_ninf + @yypact_ninf = (@base.reject {|i| i == BaseMin } + [0]).min - 1 + @base.map! do |i| + case i + when BaseMin + @yypact_ninf + else + i + end + end + + @yytable_ninf = (@table.compact.reject {|i| i == ErrorActionNumber } + [0]).min - 1 + @table.map! do |i| + case i + when nil + 0 + when ErrorActionNumber + @yytable_ninf + else + i + end + end + + @check.map! do |i| + case i + when nil + -1 + else + i + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples.rb b/tool/lrama/lib/lrama/counterexamples.rb new file mode 100644 index 0000000000..046265da59 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples.rb @@ -0,0 +1,286 @@ +require "set" + +require "lrama/counterexamples/derivation" +require "lrama/counterexamples/example" +require "lrama/counterexamples/path" +require "lrama/counterexamples/production_path" +require "lrama/counterexamples/start_path" +require "lrama/counterexamples/state_item" +require "lrama/counterexamples/transition_path" +require "lrama/counterexamples/triple" + +module Lrama + # See: https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf + # 4. Constructing Nonunifying Counterexamples + class Counterexamples + attr_reader :transitions, :productions + + def initialize(states) + @states = states + setup_transitions + setup_productions + end + + def to_s + "#<Counterexamples>" + end + alias :inspect :to_s + + def compute(conflict_state) + conflict_state.conflicts.flat_map do |conflict| + case conflict.type + when :shift_reduce + shift_reduce_example(conflict_state, conflict) + when :reduce_reduce + reduce_reduce_examples(conflict_state, conflict) + end + end.compact + end + + private + + def setup_transitions + # Hash [StateItem, Symbol] => StateItem + @transitions = {} + # Hash [StateItem, Symbol] => Set(StateItem) + @reverse_transitions = {} + + @states.states.each do |src_state| + trans = {} + + src_state.transitions.each do |shift, next_state| + trans[shift.next_sym] = next_state + end + + src_state.items.each do |src_item| + next if src_item.end_of_rule? + sym = src_item.next_sym + dest_state = trans[sym] + + dest_state.kernels.each do |dest_item| + next unless (src_item.rule == dest_item.rule) && (src_item.position + 1 == dest_item.position) + src_state_item = StateItem.new(src_state, src_item) + dest_state_item = StateItem.new(dest_state, dest_item) + + @transitions[[src_state_item, sym]] = dest_state_item + + key = [dest_state_item, sym] + @reverse_transitions[key] ||= Set.new + @reverse_transitions[key] << src_state_item + end + end + end + end + + def setup_productions + # Hash [StateItem] => Set(Item) + @productions = {} + # Hash [State, Symbol] => Set(Item). Symbol is nterm + @reverse_productions = {} + + @states.states.each do |state| + # LHS => Set(Item) + h = {} + + state.closure.each do |item| + sym = item.lhs + + h[sym] ||= Set.new + h[sym] << item + end + + state.items.each do |item| + next if item.end_of_rule? + next if item.next_sym.term? + + sym = item.next_sym + state_item = StateItem.new(state, item) + key = [state, sym] + + @productions[state_item] = h[sym] + + @reverse_productions[key] ||= Set.new + @reverse_productions[key] << item + end + end + end + + def shift_reduce_example(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + shift_conflict_item = conflict_state.items.find { |item| item.next_sym == conflict_symbol } + path2 = shortest_path(conflict_state, conflict.reduce.item, conflict_symbol) + path1 = find_shift_conflict_shortest_path(path2, conflict_state, shift_conflict_item) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def reduce_reduce_examples(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + path1 = shortest_path(conflict_state, conflict.reduce1.item, conflict_symbol) + path2 = shortest_path(conflict_state, conflict.reduce2.item, conflict_symbol) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def find_shift_conflict_shortest_path(reduce_path, conflict_state, conflict_item) + state_items = find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + build_paths_from_state_items(state_items) + end + + def find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + target_state_item = StateItem.new(conflict_state, conflict_item) + result = [target_state_item] + reversed_reduce_path = reduce_path.to_a.reverse + # Index for state_item + i = 0 + + while (path = reversed_reduce_path[i]) + # Index for prev_state_item + j = i + 1 + _j = j + + while (prev_path = reversed_reduce_path[j]) + if prev_path.production? + j += 1 + else + break + end + end + + state_item = path.to + prev_state_item = prev_path&.to + + if target_state_item == state_item || target_state_item.item.start_item? + result.concat(reversed_reduce_path[_j..-1].map(&:to)) + break + end + + if target_state_item.item.beginning_of_rule? + queue = [] + queue << [target_state_item] + + # Find reverse production + while (sis = queue.shift) + si = sis.last + + # Reach to start state + if si.item.start_item? + sis.shift + result.concat(sis) + target_state_item = si + break + end + + if !si.item.beginning_of_rule? + key = [si, si.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item.state + sis.shift + result.concat(sis) + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + queue.clear + break + end + else + key = [si.state, si.item.lhs] + @reverse_productions[key].each do |item| + state_item = StateItem.new(si.state, item) + queue << (sis + [state_item]) + end + end + end + else + # Find reverse transition + key = [target_state_item, target_state_item.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item.state + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + break + end + end + end + + result.reverse + end + + def build_paths_from_state_items(state_items) + state_items.zip([nil] + state_items).map do |si, prev_si| + case + when prev_si.nil? + StartPath.new(si) + when si.item.beginning_of_rule? + ProductionPath.new(prev_si, si) + else + TransitionPath.new(prev_si, si) + end + end + end + + def shortest_path(conflict_state, conflict_reduce_item, conflict_term) + # queue: is an array of [Triple, [Path]] + queue = [] + visited = {} + start_state = @states.states.first + raise "BUG: Start state should be just one kernel." if start_state.kernels.count != 1 + + start = Triple.new(start_state, start_state.kernels.first, Set.new([@states.eof_symbol])) + + queue << [start, [StartPath.new(start.state_item)]] + + while true + triple, paths = queue.shift + + next if visited[triple] + visited[triple] = true + + # Found + if triple.state == conflict_state && triple.item == conflict_reduce_item && triple.l.include?(conflict_term) + return paths + end + + # transition + triple.state.transitions.each do |shift, next_state| + next unless triple.item.next_sym && triple.item.next_sym == shift.next_sym + next_state.kernels.each do |kernel| + next if kernel.rule != triple.item.rule + t = Triple.new(next_state, kernel, triple.l) + queue << [t, paths + [TransitionPath.new(triple.state_item, t.state_item)]] + end + end + + # production step + triple.state.closure.each do |item| + next unless triple.item.next_sym && triple.item.next_sym == item.lhs + l = follow_l(triple.item, triple.l) + t = Triple.new(triple.state, item, l) + queue << [t, paths + [ProductionPath.new(triple.state_item, t.state_item)]] + end + + break if queue.empty? + end + + return nil + end + + def follow_l(item, current_l) + # 1. follow_L (A -> X1 ... Xn-1 • Xn) = L + # 2. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = {Xk+2} if Xk+2 is a terminal + # 3. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) if Xk+2 is a nonnullable nonterminal + # 4. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) + follow_L (A -> X1 ... Xk+1 • Xk+2 ... Xn) if Xk+2 is a nullable nonterminal + case + when item.number_of_rest_symbols == 1 + current_l + when item.next_next_sym.term? + Set.new([item.next_next_sym]) + when !item.next_next_sym.nullable + item.next_next_sym.first_set + else + item.next_next_sym.first_set + follow_l(item.new_by_next_position, current_l) + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/derivation.rb b/tool/lrama/lib/lrama/counterexamples/derivation.rb new file mode 100644 index 0000000000..691e935356 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/derivation.rb @@ -0,0 +1,63 @@ +module Lrama + class Counterexamples + class Derivation + attr_reader :item, :left, :right + attr_writer :right + + def initialize(item, left, right = nil) + @item = item + @left = left + @right = right + end + + def to_s + "#<Derivation(#{item.display_name})>" + end + alias :inspect :to_s + + def render_strings_for_report + result = [] + _render_for_report(self, 0, result, 0) + result.map(&:rstrip) + end + + def render_for_report + render_strings_for_report.join("\n") + end + + private + + def _render_for_report(derivation, offset, strings, index) + item = derivation.item + if strings[index] + strings[index] << " " * (offset - strings[index].length) + else + strings[index] = " " * offset + end + str = strings[index] + str << "#{item.rule_id}: #{item.symbols_before_dot.map(&:display_name).join(" ")} " + + if derivation.left + len = str.length + str << "#{item.next_sym.display_name}" + length = _render_for_report(derivation.left, len, strings, index + 1) + # I want String#ljust! + str << " " * (length - str.length) + else + str << " • #{item.symbols_after_dot.map(&:display_name).join(" ")} " + return str.length + end + + if derivation.right&.left + length = _render_for_report(derivation.right.left, str.length, strings, index + 1) + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " + str << " " * (length - str.length) if length > str.length + elsif item.next_next_sym + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " + end + + return str.length + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/example.rb b/tool/lrama/lib/lrama/counterexamples/example.rb new file mode 100644 index 0000000000..62244a77e0 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/example.rb @@ -0,0 +1,124 @@ +module Lrama + class Counterexamples + class Example + attr_reader :path1, :path2, :conflict, :conflict_symbol + + # path1 is shift conflict when S/R conflict + # path2 is always reduce conflict + def initialize(path1, path2, conflict, conflict_symbol, counterexamples) + @path1 = path1 + @path2 = path2 + @conflict = conflict + @conflict_symbol = conflict_symbol + @counterexamples = counterexamples + end + + def type + @conflict.type + end + + def path1_item + @path1.last.to.item + end + + def path2_item + @path2.last.to.item + end + + def derivations1 + @derivations1 ||= _derivations(path1) + end + + def derivations2 + @derivations2 ||= _derivations(path2) + end + + private + + def _derivations(paths) + derivation = nil + current = :production + lookahead_sym = paths.last.to.item.end_of_rule? ? @conflict_symbol : nil + + paths.reverse_each do |path| + item = path.to.item + + case current + when :production + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + derivation = Derivation.new(item, derivation) + current = :transition + when ProductionPath + derivation = Derivation.new(item, derivation) + current = :production + end + + if lookahead_sym && item.next_next_sym && item.next_next_sym.first_set.include?(lookahead_sym) + state_item = @counterexamples.transitions[[path.to, item.next_sym]] + derivation2 = find_derivation_for_symbol(state_item, lookahead_sym) + derivation.right = derivation2 + lookahead_sym = nil + end + + when :transition + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + # ignore + current = :transition + when ProductionPath + # ignore + current = :production + end + else + raise "BUG: Unknown #{current}" + end + + break if current == :start + end + + derivation + end + + def find_derivation_for_symbol(state_item, sym) + queue = [] + queue << [state_item] + + while (sis = queue.shift) + si = sis.last + next_sym = si.item.next_sym + + if next_sym == sym + derivation = nil + + sis.reverse_each do |si| + derivation = Derivation.new(si.item, derivation) + end + + return derivation + end + + if next_sym.nterm? && next_sym.first_set.include?(sym) + @counterexamples.productions[si].each do |next_item| + next if next_item.empty_rule? + next_si = StateItem.new(si.state, next_item) + next if sis.include?(next_si) + queue << (sis + [next_si]) + end + + if next_sym.nullable + next_si = @counterexamples.transitions[[si, next_sym]] + queue << (sis + [next_si]) + end + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/path.rb b/tool/lrama/lib/lrama/counterexamples/path.rb new file mode 100644 index 0000000000..edba67a3b6 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/path.rb @@ -0,0 +1,23 @@ +module Lrama + class Counterexamples + class Path + def initialize(from_state_item, to_state_item) + @from_state_item = from_state_item + @to_state_item = to_state_item + end + + def from + @from_state_item + end + + def to + @to_state_item + end + + def to_s + "#<Path(#{type})>" + end + alias :inspect :to_s + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/production_path.rb b/tool/lrama/lib/lrama/counterexamples/production_path.rb new file mode 100644 index 0000000000..d7db688518 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/production_path.rb @@ -0,0 +1,17 @@ +module Lrama + class Counterexamples + class ProductionPath < Path + def type + :production + end + + def transition? + false + end + + def production? + true + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/start_path.rb b/tool/lrama/lib/lrama/counterexamples/start_path.rb new file mode 100644 index 0000000000..4a6821cd0f --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/start_path.rb @@ -0,0 +1,21 @@ +module Lrama + class Counterexamples + class StartPath < Path + def initialize(to_state_item) + super nil, to_state_item + end + + def type + :start + end + + def transition? + false + end + + def production? + false + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/state_item.rb b/tool/lrama/lib/lrama/counterexamples/state_item.rb new file mode 100644 index 0000000000..930ff4a5f8 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/state_item.rb @@ -0,0 +1,6 @@ +module Lrama + class Counterexamples + class StateItem < Struct.new(:state, :item) + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/transition_path.rb b/tool/lrama/lib/lrama/counterexamples/transition_path.rb new file mode 100644 index 0000000000..96e611612a --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/transition_path.rb @@ -0,0 +1,17 @@ +module Lrama + class Counterexamples + class TransitionPath < Path + def type + :transition + end + + def transition? + true + end + + def production? + false + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/triple.rb b/tool/lrama/lib/lrama/counterexamples/triple.rb new file mode 100644 index 0000000000..e802beccf4 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/triple.rb @@ -0,0 +1,21 @@ +module Lrama + class Counterexamples + # s: state + # itm: item within s + # l: precise lookahead set + class Triple < Struct.new(:s, :itm, :l) + alias :state :s + alias :item :itm + alias :precise_lookahead_set :l + + def state_item + StateItem.new(state, item) + end + + def inspect + "#{state.inspect}. #{item.display_name}. #{l.map(&:id).map(&:s_value)}" + end + alias :to_s :inspect + end + end +end diff --git a/tool/lrama/lib/lrama/digraph.rb b/tool/lrama/lib/lrama/digraph.rb new file mode 100644 index 0000000000..bbaa86019f --- /dev/null +++ b/tool/lrama/lib/lrama/digraph.rb @@ -0,0 +1,51 @@ +module Lrama + # Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625) + class Digraph + def initialize(sets, relation, base_function) + # X in the paper + @sets = sets + # R in the paper + @relation = relation + # F' in the paper + @base_function = base_function + # S in the paper + @stack = [] + # N in the paper + @h = Hash.new(0) + # F in the paper + @result = {} + end + + def compute + @sets.each do |x| + next if @h[x] != 0 + traverse(x) + end + + return @result + end + + private + + def traverse(x) + @stack.push(x) + d = @stack.count + @h[x] = d + @result[x] = @base_function[x] # F x = F' x + + @relation[x]&.each do |y| + traverse(y) if @h[y] == 0 + @h[x] = [@h[x], @h[y]].min + @result[x] |= @result[y] # F x = F x + F y + end + + if @h[x] == d + while (z = @stack.pop) do + @h[z] = Float::INFINITY + break if z == x + @result[z] = @result[x] # F (Top of S) = F x + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb new file mode 100644 index 0000000000..a816b8261b --- /dev/null +++ b/tool/lrama/lib/lrama/grammar.rb @@ -0,0 +1,381 @@ +require "forwardable" +require "lrama/grammar/auxiliary" +require "lrama/grammar/binding" +require "lrama/grammar/code" +require "lrama/grammar/counter" +require "lrama/grammar/destructor" +require "lrama/grammar/error_token" +require "lrama/grammar/parameterizing_rule" +require "lrama/grammar/percent_code" +require "lrama/grammar/precedence" +require "lrama/grammar/printer" +require "lrama/grammar/reference" +require "lrama/grammar/rule" +require "lrama/grammar/rule_builder" +require "lrama/grammar/symbol" +require "lrama/grammar/symbols" +require "lrama/grammar/type" +require "lrama/grammar/union" +require "lrama/lexer" + +module Lrama + # Grammar is the result of parsing an input grammar file + class Grammar + extend Forwardable + + attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux + attr_accessor :union, :expect, + :printers, :error_tokens, + :lex_param, :parse_param, :initial_action, + :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack, + :symbols_resolver, :types, + :rules, :rule_builders, + :sym_to_rules, :no_stdlib + + def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, + :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, + :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type, + :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number! + + + def initialize(rule_counter) + @rule_counter = rule_counter + + # Code defined by "%code" + @percent_codes = [] + @printers = [] + @destructors = [] + @error_tokens = [] + @symbols_resolver = Grammar::Symbols::Resolver.new + @types = [] + @rule_builders = [] + @rules = [] + @sym_to_rules = {} + @parameterizing_rule_resolver = ParameterizingRule::Resolver.new + @empty_symbol = nil + @eof_symbol = nil + @error_symbol = nil + @undef_symbol = nil + @accept_symbol = nil + @aux = Auxiliary.new + @no_stdlib = false + + append_special_symbols + end + + def add_percent_code(id:, code:) + @percent_codes << PercentCode.new(id.s_value, code.s_value) + end + + def add_destructor(ident_or_tags:, token_code:, lineno:) + @destructors << Destructor.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) + end + + def add_printer(ident_or_tags:, token_code:, lineno:) + @printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) + end + + def add_error_token(ident_or_tags:, token_code:, lineno:) + @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) + end + + def add_type(id:, tag:) + @types << Type.new(id: id, tag: tag) + end + + def add_nonassoc(sym, precedence) + set_precedence(sym, Precedence.new(type: :nonassoc, precedence: precedence)) + end + + def add_left(sym, precedence) + set_precedence(sym, Precedence.new(type: :left, precedence: precedence)) + end + + def add_right(sym, precedence) + set_precedence(sym, Precedence.new(type: :right, precedence: precedence)) + end + + def add_precedence(sym, precedence) + set_precedence(sym, Precedence.new(type: :precedence, precedence: precedence)) + end + + def set_precedence(sym, precedence) + raise "" if sym.nterm? + sym.precedence = precedence + end + + def set_union(code, lineno) + @union = Union.new(code: code, lineno: lineno) + end + + def add_rule_builder(builder) + @rule_builders << builder + end + + def add_parameterizing_rule(rule) + @parameterizing_rule_resolver.add_parameterizing_rule(rule) + end + + def parameterizing_rules + @parameterizing_rule_resolver.rules + end + + def insert_before_parameterizing_rules(rules) + @parameterizing_rule_resolver.rules = rules + @parameterizing_rule_resolver.rules + end + + def prologue_first_lineno=(prologue_first_lineno) + @aux.prologue_first_lineno = prologue_first_lineno + end + + def prologue=(prologue) + @aux.prologue = prologue + end + + def epilogue_first_lineno=(epilogue_first_lineno) + @aux.epilogue_first_lineno = epilogue_first_lineno + end + + def epilogue=(epilogue) + @aux.epilogue = epilogue + end + + def prepare + normalize_rules + collect_symbols + set_lhs_and_rhs + fill_default_precedence + fill_symbols + fill_sym_to_rules + compute_nullable + compute_first_set + end + + # TODO: More validation methods + # + # * Validation for no_declared_type_reference + def validate! + @symbols_resolver.validate! + validate_rule_lhs_is_nterm! + end + + def find_rules_by_symbol!(sym) + find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found") + end + + def find_rules_by_symbol(sym) + @sym_to_rules[sym.number] + end + + private + + def compute_nullable + @rules.each do |rule| + case + when rule.empty_rule? + rule.nullable = true + when rule.rhs.any?(&:term) + rule.nullable = false + else + # noop + end + end + + while true do + rs = @rules.select {|e| e.nullable.nil? } + nts = nterms.select {|e| e.nullable.nil? } + rule_count_1 = rs.count + nterm_count_1 = nts.count + + rs.each do |rule| + if rule.rhs.all?(&:nullable) + rule.nullable = true + end + end + + nts.each do |nterm| + find_rules_by_symbol!(nterm).each do |rule| + if rule.nullable + nterm.nullable = true + end + end + end + + rule_count_2 = @rules.count {|e| e.nullable.nil? } + nterm_count_2 = nterms.count {|e| e.nullable.nil? } + + if (rule_count_1 == rule_count_2) && (nterm_count_1 == nterm_count_2) + break + end + end + + rules.select {|r| r.nullable.nil? }.each do |rule| + rule.nullable = false + end + + nterms.select {|e| e.nullable.nil? }.each do |nterm| + nterm.nullable = false + end + end + + def compute_first_set + terms.each do |term| + term.first_set = Set.new([term]).freeze + term.first_set_bitmap = Lrama::Bitmap.from_array([term.number]) + end + + nterms.each do |nterm| + nterm.first_set = Set.new([]).freeze + nterm.first_set_bitmap = Lrama::Bitmap.from_array([]) + end + + while true do + changed = false + + @rules.each do |rule| + rule.rhs.each do |r| + if rule.lhs.first_set_bitmap | r.first_set_bitmap != rule.lhs.first_set_bitmap + changed = true + rule.lhs.first_set_bitmap = rule.lhs.first_set_bitmap | r.first_set_bitmap + end + + break unless r.nullable + end + end + + break unless changed + end + + nterms.each do |nterm| + nterm.first_set = Lrama::Bitmap.to_array(nterm.first_set_bitmap).map do |number| + find_symbol_by_number!(number) + end.to_set + end + end + + def setup_rules + @rule_builders.each do |builder| + builder.setup_rules(@parameterizing_rule_resolver) + end + end + + def append_special_symbols + # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated + # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2) + # term.number = -2 + # @empty_symbol = term + + # YYEOF + term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0) + term.number = 0 + term.eof_symbol = true + @eof_symbol = term + + # YYerror + term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error") + term.number = 1 + term.error_symbol = true + @error_symbol = term + + # YYUNDEF + term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"") + term.number = 2 + term.undef_symbol = true + @undef_symbol = term + + # $accept + term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept")) + term.accept_symbol = true + @accept_symbol = term + end + + def normalize_rules + # Add $accept rule to the top of rules + lineno = @rule_builders.first ? @rule_builders.first.line : 0 + @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno) + + setup_rules + + @rule_builders.each do |builder| + builder.rules.each do |rule| + add_nterm(id: rule._lhs, tag: rule.lhs_tag) + @rules << rule + end + end + + @rules.sort_by!(&:id) + end + + # Collect symbols from rules + def collect_symbols + @rules.flat_map(&:_rhs).each do |s| + case s + when Lrama::Lexer::Token::Char + add_term(id: s) + when Lrama::Lexer::Token + # skip + else + raise "Unknown class: #{s}" + end + end + end + + def set_lhs_and_rhs + @rules.each do |rule| + rule.lhs = token_to_symbol(rule._lhs) if rule._lhs + + rule.rhs = rule._rhs.map do |t| + token_to_symbol(t) + end + end + end + + # Rule inherits precedence from the last term in RHS. + # + # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html + def fill_default_precedence + @rules.each do |rule| + # Explicitly specified precedence has the highest priority + next if rule.precedence_sym + + precedence_sym = nil + rule.rhs.each do |sym| + precedence_sym = sym if sym.term? + end + + rule.precedence_sym = precedence_sym + end + end + + def fill_symbols + fill_symbol_number + fill_nterm_type(@types) + fill_printer(@printers) + fill_destructor(@destructors) + fill_error_token(@error_tokens) + sort_by_number! + end + + def fill_sym_to_rules + @rules.each do |rule| + key = rule.lhs.number + @sym_to_rules[key] ||= [] + @sym_to_rules[key] << rule + end + end + + def validate_rule_lhs_is_nterm! + errors = [] + + rules.each do |rule| + next if rule.lhs.nterm? + + errors << "[BUG] LHS of #{rule} (line: #{rule.lineno}) is term. It should be nterm." + end + + return if errors.empty? + + raise errors.join("\n") + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/auxiliary.rb b/tool/lrama/lib/lrama/grammar/auxiliary.rb new file mode 100644 index 0000000000..933574b0f6 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/auxiliary.rb @@ -0,0 +1,7 @@ +module Lrama + class Grammar + # Grammar file information not used by States but by Output + class Auxiliary < Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true) + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/binding.rb b/tool/lrama/lib/lrama/grammar/binding.rb new file mode 100644 index 0000000000..e5ea3fb037 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/binding.rb @@ -0,0 +1,24 @@ +module Lrama + class Grammar + class Binding + attr_reader :actual_args, :count + + def initialize(parameterizing_rule, actual_args) + @parameters = parameterizing_rule.parameters + @actual_args = actual_args + @parameter_to_arg = @parameters.zip(actual_args).map do |param, arg| + [param.s_value, arg] + end.to_h + end + + def resolve_symbol(symbol) + if symbol.is_a?(Lexer::Token::InstantiateRule) + resolved_args = symbol.args.map { |arg| resolve_symbol(arg) } + Lrama::Lexer::Token::InstantiateRule.new(s_value: symbol.s_value, location: symbol.location, args: resolved_args, lhs_tag: symbol.lhs_tag) + else + @parameter_to_arg[symbol.s_value] || symbol + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/code.rb b/tool/lrama/lib/lrama/grammar/code.rb new file mode 100644 index 0000000000..3bad599dae --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/code.rb @@ -0,0 +1,51 @@ +require "forwardable" +require "lrama/grammar/code/destructor_code" +require "lrama/grammar/code/initial_action_code" +require "lrama/grammar/code/no_reference_code" +require "lrama/grammar/code/printer_code" +require "lrama/grammar/code/rule_action" + +module Lrama + class Grammar + class Code + extend Forwardable + + def_delegators "token_code", :s_value, :line, :column, :references + + attr_reader :type, :token_code + + def initialize(type:, token_code:) + @type = type + @token_code = token_code + end + + def ==(other) + self.class == other.class && + self.type == other.type && + self.token_code == other.token_code + end + + # $$, $n, @$, @n are translated to C code + def translated_code + t_code = s_value.dup + + references.reverse_each do |ref| + first_column = ref.first_column + last_column = ref.last_column + + str = reference_to_c(ref) + + t_code[first_column...last_column] = str + end + + return t_code + end + + private + + def reference_to_c(ref) + raise NotImplementedError.new("#reference_to_c is not implemented") + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/code/destructor_code.rb b/tool/lrama/lib/lrama/grammar/code/destructor_code.rb new file mode 100644 index 0000000000..70360eb90f --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/code/destructor_code.rb @@ -0,0 +1,40 @@ +module Lrama + class Grammar + class Code + class DestructorCode < Code + def initialize(type:, token_code:, tag:) + super(type: type, token_code: token_code) + @tag = tag + end + + private + + # * ($$) *yyvaluep + # * (@$) *yylocationp + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + member = @tag.member + "((*yyvaluep).#{member})" + when ref.type == :at && ref.name == "$" # @$ + "(*yylocationp)" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:#{ref.value} can not be used in #{type}." + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in #{type}." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in #{type}." + when ref.type == :index # $:n + raise "$:#{ref.value} can not be used in #{type}." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/code/initial_action_code.rb b/tool/lrama/lib/lrama/grammar/code/initial_action_code.rb new file mode 100644 index 0000000000..a694f193cb --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/code/initial_action_code.rb @@ -0,0 +1,34 @@ +module Lrama + class Grammar + class Code + class InitialActionCode < Code + private + + # * ($$) yylval + # * (@$) yylloc + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + "yylval" + when ref.type == :at && ref.name == "$" # @$ + "yylloc" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:#{ref.value} can not be used in initial_action." + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in initial_action." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in initial_action." + when ref.type == :index # $:n + raise "$:#{ref.value} can not be used in initial_action." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/code/no_reference_code.rb b/tool/lrama/lib/lrama/grammar/code/no_reference_code.rb new file mode 100644 index 0000000000..6e614cc64a --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/code/no_reference_code.rb @@ -0,0 +1,28 @@ +module Lrama + class Grammar + class Code + class NoReferenceCode < Code + private + + # * ($$) error + # * (@$) error + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar # $$, $n + raise "$#{ref.value} can not be used in #{type}." + when ref.type == :at # @$, @n + raise "@#{ref.value} can not be used in #{type}." + when ref.type == :index # $:$, $:n + raise "$:#{ref.value} can not be used in #{type}." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/code/printer_code.rb b/tool/lrama/lib/lrama/grammar/code/printer_code.rb new file mode 100644 index 0000000000..ffccd89395 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/code/printer_code.rb @@ -0,0 +1,40 @@ +module Lrama + class Grammar + class Code + class PrinterCode < Code + def initialize(type:, token_code:, tag:) + super(type: type, token_code: token_code) + @tag = tag + end + + private + + # * ($$) *yyvaluep + # * (@$) *yylocationp + # * ($:$) error + # * ($1) error + # * (@1) error + # * ($:1) error + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + member = @tag.member + "((*yyvaluep).#{member})" + when ref.type == :at && ref.name == "$" # @$ + "(*yylocationp)" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:#{ref.value} can not be used in #{type}." + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in #{type}." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in #{type}." + when ref.type == :index # $:n + raise "$:#{ref.value} can not be used in #{type}." + else + raise "Unexpected. #{self}, #{ref}" + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/code/rule_action.rb b/tool/lrama/lib/lrama/grammar/code/rule_action.rb new file mode 100644 index 0000000000..d3c0eab64a --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/code/rule_action.rb @@ -0,0 +1,88 @@ +module Lrama + class Grammar + class Code + class RuleAction < Code + def initialize(type:, token_code:, rule:) + super(type: type, token_code: token_code) + @rule = rule + end + + private + + # * ($$) yyval + # * (@$) yyloc + # * ($:$) error + # * ($1) yyvsp[i] + # * (@1) yylsp[i] + # * ($:1) i - 1 + # + # + # Consider a rule like + # + # class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } + # + # For the semantic action of original rule: + # + # "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } + # "Position in grammar" $1 $2 $3 $4 $5 + # "Index for yyvsp" -4 -3 -2 -1 0 + # "$:n" $:1 $:2 $:3 $:4 $:5 + # "index of $:n" -5 -4 -3 -2 -1 + # + # + # For the first midrule action: + # + # "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } + # "Position in grammar" $1 + # "Index for yyvsp" 0 + # "$:n" $:1 + def reference_to_c(ref) + case + when ref.type == :dollar && ref.name == "$" # $$ + tag = ref.ex_tag || lhs.tag + raise_tag_not_found_error(ref) unless tag + "(yyval.#{tag.member})" + when ref.type == :at && ref.name == "$" # @$ + "(yyloc)" + when ref.type == :index && ref.name == "$" # $:$ + raise "$:$ is not supported" + when ref.type == :dollar # $n + i = -position_in_rhs + ref.index + tag = ref.ex_tag || rhs[ref.index - 1].tag + raise_tag_not_found_error(ref) unless tag + "(yyvsp[#{i}].#{tag.member})" + when ref.type == :at # @n + i = -position_in_rhs + ref.index + "(yylsp[#{i}])" + when ref.type == :index # $:n + i = -position_in_rhs + ref.index + "(#{i} - 1)" + else + raise "Unexpected. #{self}, #{ref}" + end + end + + def position_in_rhs + # If rule is not derived rule, User Code is only action at + # the end of rule RHS. In such case, the action is located on + # `@rule.rhs.count`. + @rule.position_in_original_rule_rhs || @rule.rhs.count + end + + # If this is midrule action, RHS is a RHS of the original rule. + def rhs + (@rule.original_rule || @rule).rhs + end + + # Unlike `rhs`, LHS is always a LHS of the rule. + def lhs + @rule.lhs + end + + def raise_tag_not_found_error(ref) + raise "Tag is not specified for '$#{ref.value}' in '#{@rule}'" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/counter.rb b/tool/lrama/lib/lrama/grammar/counter.rb new file mode 100644 index 0000000000..c13f4ec3e3 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/counter.rb @@ -0,0 +1,15 @@ +module Lrama + class Grammar + class Counter + def initialize(number) + @number = number + end + + def increment + n = @number + @number += 1 + n + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/destructor.rb b/tool/lrama/lib/lrama/grammar/destructor.rb new file mode 100644 index 0000000000..4b7059e923 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/destructor.rb @@ -0,0 +1,9 @@ +module Lrama + class Grammar + class Destructor < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + def translated_code(tag) + Code::DestructorCode.new(type: :destructor, token_code: token_code, tag: tag).translated_code + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/error_token.rb b/tool/lrama/lib/lrama/grammar/error_token.rb new file mode 100644 index 0000000000..8efde7df33 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/error_token.rb @@ -0,0 +1,9 @@ +module Lrama + class Grammar + class ErrorToken < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + def translated_code(tag) + Code::PrinterCode.new(type: :error_token, token_code: token_code, tag: tag).translated_code + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule.rb b/tool/lrama/lib/lrama/grammar/parameterizing_rule.rb new file mode 100644 index 0000000000..d371805f4b --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/parameterizing_rule.rb @@ -0,0 +1,3 @@ +require_relative 'parameterizing_rule/resolver' +require_relative 'parameterizing_rule/rhs' +require_relative 'parameterizing_rule/rule' diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb b/tool/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb new file mode 100644 index 0000000000..d8f3ae7897 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/parameterizing_rule/resolver.rb @@ -0,0 +1,56 @@ +module Lrama + class Grammar + class ParameterizingRule + class Resolver + attr_accessor :rules, :created_lhs_list + + def initialize + @rules = [] + @created_lhs_list = [] + end + + def add_parameterizing_rule(rule) + @rules << rule + end + + def find_rule(token) + select_rules(@rules, token).last + end + + def find_inline(token) + @rules.select { |rule| rule.name == token.s_value && rule.is_inline }.last + end + + def created_lhs(lhs_s_value) + @created_lhs_list.reverse.find { |created_lhs| created_lhs.s_value == lhs_s_value } + end + + private + + def select_rules(rules, token) + rules = select_not_inline_rules(rules) + rules = select_rules_by_name(rules, token.rule_name) + rules = rules.select { |rule| rule.required_parameters_count == token.args_count } + if rules.empty? + raise "Invalid number of arguments. `#{token.rule_name}`" + else + rules + end + end + + def select_not_inline_rules(rules) + rules.select { |rule| !rule.is_inline } + end + + def select_rules_by_name(rules, rule_name) + rules = rules.select { |rule| rule.name == rule_name } + if rules.empty? + raise "Parameterizing rule does not exist. `#{rule_name}`" + else + rules + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb b/tool/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb new file mode 100644 index 0000000000..3eb92f8ef4 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/parameterizing_rule/rhs.rb @@ -0,0 +1,37 @@ +module Lrama + class Grammar + class ParameterizingRule + class Rhs + attr_accessor :symbols, :user_code, :precedence_sym + + def initialize + @symbols = [] + @user_code = nil + @precedence_sym = nil + end + + def resolve_user_code(bindings) + return unless user_code + + var_to_arg = {} + symbols.each do |sym| + resolved_sym = bindings.resolve_symbol(sym) + if resolved_sym != sym + var_to_arg[sym.s_value] = resolved_sym.s_value + end + end + + var_to_arg.each do |var, arg| + user_code.references.each do |ref| + if ref.name == var + ref.name = arg + end + end + end + + return user_code + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb b/tool/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb new file mode 100644 index 0000000000..38f0fca4ea --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/parameterizing_rule/rule.rb @@ -0,0 +1,18 @@ +module Lrama + class Grammar + class ParameterizingRule + class Rule + attr_reader :name, :parameters, :rhs_list, :required_parameters_count, :tag, :is_inline + + def initialize(name, parameters, rhs_list, tag: nil, is_inline: false) + @name = name + @parameters = parameters + @rhs_list = rhs_list + @tag = tag + @is_inline = is_inline + @required_parameters_count = parameters.count + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/percent_code.rb b/tool/lrama/lib/lrama/grammar/percent_code.rb new file mode 100644 index 0000000000..8cbc5aef2c --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/percent_code.rb @@ -0,0 +1,12 @@ +module Lrama + class Grammar + class PercentCode + attr_reader :name, :code + + def initialize(name, code) + @name = name + @code = code + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/precedence.rb b/tool/lrama/lib/lrama/grammar/precedence.rb new file mode 100644 index 0000000000..fed739b3c0 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/precedence.rb @@ -0,0 +1,11 @@ +module Lrama + class Grammar + class Precedence < Struct.new(:type, :precedence, keyword_init: true) + include Comparable + + def <=>(other) + self.precedence <=> other.precedence + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/printer.rb b/tool/lrama/lib/lrama/grammar/printer.rb new file mode 100644 index 0000000000..8984a96e1a --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/printer.rb @@ -0,0 +1,9 @@ +module Lrama + class Grammar + class Printer < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true) + def translated_code(tag) + Code::PrinterCode.new(type: :printer, token_code: token_code, tag: tag).translated_code + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/reference.rb b/tool/lrama/lib/lrama/grammar/reference.rb new file mode 100644 index 0000000000..c56e7673a6 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/reference.rb @@ -0,0 +1,14 @@ +module Lrama + class Grammar + # type: :dollar or :at + # name: String (e.g. $$, $foo, $expr.right) + # number: Integer (e.g. $1) + # index: Integer + # ex_tag: "$<tag>1" (Optional) + class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true) + def value + name || number + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/rule.rb b/tool/lrama/lib/lrama/grammar/rule.rb new file mode 100644 index 0000000000..0e06edc80d --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/rule.rb @@ -0,0 +1,60 @@ +module Lrama + class Grammar + # _rhs holds original RHS element. Use rhs to refer to Symbol. + class Rule < Struct.new(:id, :_lhs, :lhs, :lhs_tag, :_rhs, :rhs, :token_code, :position_in_original_rule_rhs, :nullable, :precedence_sym, :lineno, keyword_init: true) + attr_accessor :original_rule + + def ==(other) + self.class == other.class && + self.lhs == other.lhs && + self.lhs_tag == other.lhs_tag && + self.rhs == other.rhs && + self.token_code == other.token_code && + self.position_in_original_rule_rhs == other.position_in_original_rule_rhs && + self.nullable == other.nullable && + self.precedence_sym == other.precedence_sym && + self.lineno == other.lineno + end + + # TODO: Change this to display_name + def to_s + l = lhs.id.s_value + r = empty_rule? ? "ε" : rhs.map {|r| r.id.s_value }.join(" ") + + "#{l} -> #{r}" + end + + # Used by #user_actions + def as_comment + l = lhs.id.s_value + r = empty_rule? ? "%empty" : rhs.map(&:display_name).join(" ") + + "#{l}: #{r}" + end + + def with_actions + "#{to_s} {#{token_code&.s_value}}" + end + + # opt_nl: ε <-- empty_rule + # | '\n' <-- not empty_rule + def empty_rule? + rhs.empty? + end + + def precedence + precedence_sym&.precedence + end + + def initial_rule? + id == 0 + end + + def translated_code + return nil unless token_code + + Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self).translated_code + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/rule_builder.rb b/tool/lrama/lib/lrama/grammar/rule_builder.rb new file mode 100644 index 0000000000..ccb41e67f8 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/rule_builder.rb @@ -0,0 +1,268 @@ +module Lrama + class Grammar + class RuleBuilder + attr_accessor :lhs, :line + attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym + + def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false) + @rule_counter = rule_counter + @midrule_action_counter = midrule_action_counter + @position_in_original_rule_rhs = position_in_original_rule_rhs + @skip_preprocess_references = skip_preprocess_references + + @lhs = nil + @lhs_tag = lhs_tag + @rhs = [] + @user_code = nil + @precedence_sym = nil + @line = nil + @rules = [] + @rule_builders_for_parameterizing_rules = [] + @rule_builders_for_derived_rules = [] + @rule_builders_for_inline_rules = [] + @parameterizing_rules = [] + @inline_rules = [] + @midrule_action_rules = [] + end + + def add_rhs(rhs) + if !@line + @line = rhs.line + end + + flush_user_code + + @rhs << rhs + end + + def user_code=(user_code) + if !@line + @line = user_code&.line + end + + flush_user_code + + @user_code = user_code + end + + def precedence_sym=(precedence_sym) + flush_user_code + + @precedence_sym = precedence_sym + end + + def complete_input + freeze_rhs + end + + def setup_rules(parameterizing_rule_resolver) + preprocess_references unless @skip_preprocess_references + if rhs.any? { |token| parameterizing_rule_resolver.find_inline(token) } + resolve_inline(parameterizing_rule_resolver) + else + process_rhs(parameterizing_rule_resolver) + end + build_rules + end + + def rules + @parameterizing_rules + @inline_rules + @midrule_action_rules + @rules + end + + private + + def freeze_rhs + @rhs.freeze + end + + def preprocess_references + numberize_references + end + + def build_rules + tokens = @replaced_rhs + + if tokens + rule = Rule.new( + id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code, + position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line + ) + @rules = [rule] + @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder| + rule_builder.rules + end.flatten + @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder| + rule_builder.rules + end.flatten + @midrule_action_rules.each do |r| + r.original_rule = rule + end + else + @inline_rules = @rule_builders_for_inline_rules.map do |rule_builder| + rule_builder.rules + end.flatten + end + end + + # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on. + # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`. + def process_rhs(parameterizing_rule_resolver) + return if @replaced_rhs + + @replaced_rhs = [] + + rhs.each_with_index do |token, i| + case token + when Lrama::Lexer::Token::Char + @replaced_rhs << token + when Lrama::Lexer::Token::Ident + @replaced_rhs << token + when Lrama::Lexer::Token::InstantiateRule + parameterizing_rule = parameterizing_rule_resolver.find_rule(token) + raise "Unexpected token. #{token}" unless parameterizing_rule + + bindings = Binding.new(parameterizing_rule, token.args) + lhs_s_value = lhs_s_value(token, bindings) + if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value)) + @replaced_rhs << created_lhs + else + lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location) + @replaced_rhs << lhs_token + parameterizing_rule_resolver.created_lhs_list << lhs_token + parameterizing_rule.rhs_list.each do |r| + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: token.lhs_tag || parameterizing_rule.tag) + rule_builder.lhs = lhs_token + r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) } + rule_builder.line = line + rule_builder.precedence_sym = r.precedence_sym + rule_builder.user_code = r.resolve_user_code(bindings) + rule_builder.complete_input + rule_builder.setup_rules(parameterizing_rule_resolver) + @rule_builders_for_parameterizing_rules << rule_builder + end + end + when Lrama::Lexer::Token::UserCode + prefix = token.referred ? "@" : "$@" + tag = token.tag || lhs_tag + new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s) + @replaced_rhs << new_token + + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: tag, skip_preprocess_references: true) + rule_builder.lhs = new_token + rule_builder.user_code = token + rule_builder.complete_input + rule_builder.setup_rules(parameterizing_rule_resolver) + + @rule_builders_for_derived_rules << rule_builder + else + raise "Unexpected token. #{token}" + end + end + end + + def lhs_s_value(token, bindings) + s_values = token.args.map do |arg| + resolved = bindings.resolve_symbol(arg) + if resolved.is_a?(Lexer::Token::InstantiateRule) + [resolved.s_value, resolved.args.map(&:s_value)] + else + resolved.s_value + end + end + "#{token.rule_name}_#{s_values.join('_')}" + end + + def resolve_inline(parameterizing_rule_resolver) + rhs.each_with_index do |token, i| + if inline_rule = parameterizing_rule_resolver.find_inline(token) + inline_rule.rhs_list.each_with_index do |inline_rhs| + rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: lhs_tag, skip_preprocess_references: true) + resolve_inline_rhs(rule_builder, inline_rhs, i) + rule_builder.lhs = lhs + rule_builder.line = line + rule_builder.user_code = replace_inline_user_code(inline_rhs, i) + rule_builder.complete_input + rule_builder.setup_rules(parameterizing_rule_resolver) + @rule_builders_for_inline_rules << rule_builder + end + end + end + end + + def resolve_inline_rhs(rule_builder, inline_rhs, index) + rhs.each_with_index do |token, i| + if index == i + inline_rhs.symbols.each { |sym| rule_builder.add_rhs(sym) } + else + rule_builder.add_rhs(token) + end + end + end + + def replace_inline_user_code(inline_rhs, index) + return user_code if inline_rhs.user_code.nil? + return user_code if user_code.nil? + + code = user_code.s_value.gsub(/\$#{index + 1}/, inline_rhs.user_code.s_value) + Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location) + end + + def numberize_references + # Bison n'th component is 1-origin + (rhs + [user_code]).compact.each.with_index(1) do |token, i| + next unless token.is_a?(Lrama::Lexer::Token::UserCode) + + token.references.each do |ref| + ref_name = ref.name + + if ref_name + if ref_name == '$' + ref.name = '$' + else + candidates = ([lhs] + rhs).each_with_index.select {|token, _i| token.referred_by?(ref_name) } + + if candidates.size >= 2 + token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.") + end + + unless (referring_symbol = candidates.first) + token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.") + end + + if referring_symbol[1] == 0 # Refers to LHS + ref.name = '$' + else + ref.number = referring_symbol[1] + end + end + end + + if ref.number + # TODO: When Inlining is implemented, for example, if `$1` is expanded to multiple RHS tokens, + # `$2` needs to access `$2 + n` to actually access it. So, after the Inlining implementation, + # it needs resolves from number to index. + ref.index = ref.number + end + + # TODO: Need to check index of @ too? + next if ref.type == :at + + if ref.index + # TODO: Prohibit $0 even so Bison allows it? + # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html + token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i + rhs[ref.index - 1].referred = true + end + end + end + end + + def flush_user_code + if (c = @user_code) + @rhs << c + @user_code = nil + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/stdlib.y b/tool/lrama/lib/lrama/grammar/stdlib.y new file mode 100644 index 0000000000..d6e89c908c --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/stdlib.y @@ -0,0 +1,122 @@ +/********************************************************************** + + stdlib.y + + This is lrama's standard library. It provides a number of + parameterizing rule definitions, such as options and lists, + that should be useful in a number of situations. + +**********************************************************************/ + +// ------------------------------------------------------------------- +// Options + +/* + * program: option(number) + * + * => + * + * program: option_number + * option_number: %empty + * option_number: number + */ +%rule option(X): /* empty */ + | X + ; + +// ------------------------------------------------------------------- +// Sequences + +/* + * program: preceded(opening, X) + * + * => + * + * program: preceded_opening_X + * preceded_opening_X: opening X + */ +%rule preceded(opening, X): opening X { $$ = $2; } + ; + +/* + * program: terminated(X, closing) + * + * => + * + * program: terminated_X_closing + * terminated_X_closing: X closing + */ +%rule terminated(X, closing): X closing { $$ = $1; } + ; + +/* + * program: delimited(opening, X, closing) + * + * => + * + * program: delimited_opening_X_closing + * delimited_opening_X_closing: opening X closing + */ +%rule delimited(opening, X, closing): opening X closing { $$ = $2; } + ; + +// ------------------------------------------------------------------- +// Lists + +/* + * program: list(number) + * + * => + * + * program: list_number + * list_number: %empty + * list_number: list_number number + */ +%rule list(X): /* empty */ + | list(X) X + ; + +/* + * program: nonempty_list(number) + * + * => + * + * program: nonempty_list_number + * nonempty_list_number: number + * nonempty_list_number: nonempty_list_number number + */ +%rule nonempty_list(X): X + | nonempty_list(X) X + ; + +/* + * program: separated_nonempty_list(comma, number) + * + * => + * + * program: separated_nonempty_list_comma_number + * separated_nonempty_list_comma_number: number + * separated_nonempty_list_comma_number: separated_nonempty_list_comma_number comma number + */ +%rule separated_nonempty_list(separator, X): X + | separated_nonempty_list(separator, X) separator X + ; + +/* + * program: separated_list(comma, number) + * + * => + * + * program: separated_list_comma_number + * separated_list_comma_number: option_separated_nonempty_list_comma_number + * option_separated_nonempty_list_comma_number: %empty + * option_separated_nonempty_list_comma_number: separated_nonempty_list_comma_number + * separated_nonempty_list_comma_number: number + * separated_nonempty_list_comma_number: comma separated_nonempty_list_comma_number number + */ +%rule separated_list(separator, X): option(separated_nonempty_list(separator, X)) + ; + +%% + +%union{}; diff --git a/tool/lrama/lib/lrama/grammar/symbol.rb b/tool/lrama/lib/lrama/grammar/symbol.rb new file mode 100644 index 0000000000..deb67ad9a8 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/symbol.rb @@ -0,0 +1,103 @@ +# Symbol is both of nterm and term +# `number` is both for nterm and term +# `token_id` is tokentype for term, internal sequence number for nterm +# +# TODO: Add validation for ASCII code range for Token::Char + +module Lrama + class Grammar + class Symbol + attr_accessor :id, :alias_name, :tag, :number, :token_id, :nullable, :precedence, + :printer, :destructor, :error_token, :first_set, :first_set_bitmap + attr_reader :term + attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol + + def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil, nullable: nil, precedence: nil, printer: nil, destructor: nil) + @id = id + @alias_name = alias_name + @number = number + @tag = tag + @term = term + @token_id = token_id + @nullable = nullable + @precedence = precedence + @printer = printer + @destructor = destructor + end + + def term? + term + end + + def nterm? + !term + end + + def eof_symbol? + !!@eof_symbol + end + + def error_symbol? + !!@error_symbol + end + + def undef_symbol? + !!@undef_symbol + end + + def accept_symbol? + !!@accept_symbol + end + + def display_name + alias_name || id.s_value + end + + # name for yysymbol_kind_t + # + # See: b4_symbol_kind_base + # @type var name: String + def enum_name + case + when accept_symbol? + name = "YYACCEPT" + when eof_symbol? + name = "YYEOF" + when term? && id.is_a?(Lrama::Lexer::Token::Char) + name = number.to_s + display_name + when term? && id.is_a?(Lrama::Lexer::Token::Ident) + name = id.s_value + when nterm? && (id.s_value.include?("$") || id.s_value.include?("@")) + name = number.to_s + id.s_value + when nterm? + name = id.s_value + else + raise "Unexpected #{self}" + end + + "YYSYMBOL_" + name.gsub(/\W+/, "_") + end + + # comment for yysymbol_kind_t + def comment + case + when accept_symbol? + # YYSYMBOL_YYACCEPT + id.s_value + when eof_symbol? + # YYEOF + alias_name + when (term? && 0 < token_id && token_id < 128) + # YYSYMBOL_3_backslash_, YYSYMBOL_14_ + alias_name || id.s_value + when id.s_value.include?("$") || id.s_value.include?("@") + # YYSYMBOL_21_1 + id.s_value + else + # YYSYMBOL_keyword_class, YYSYMBOL_strings_1 + alias_name || id.s_value + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/symbols.rb b/tool/lrama/lib/lrama/grammar/symbols.rb new file mode 100644 index 0000000000..cc9b4ec559 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/symbols.rb @@ -0,0 +1 @@ +require_relative "symbols/resolver" diff --git a/tool/lrama/lib/lrama/grammar/symbols/resolver.rb b/tool/lrama/lib/lrama/grammar/symbols/resolver.rb new file mode 100644 index 0000000000..1788ed63fa --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/symbols/resolver.rb @@ -0,0 +1,293 @@ +module Lrama + class Grammar + class Symbols + class Resolver + attr_reader :terms, :nterms + + def initialize + @terms = [] + @nterms = [] + end + + def symbols + @symbols ||= (@terms + @nterms) + end + + def sort_by_number! + symbols.sort_by!(&:number) + end + + def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false) + if token_id && (sym = find_symbol_by_token_id(token_id)) + if replace + sym.id = id + sym.alias_name = alias_name + sym.tag = tag + end + + return sym + end + + if (sym = find_symbol_by_id(id)) + return sym + end + + @symbols = nil + term = Symbol.new( + id: id, alias_name: alias_name, number: nil, tag: tag, + term: true, token_id: token_id, nullable: false + ) + @terms << term + term + end + + def add_nterm(id:, alias_name: nil, tag: nil) + return if find_symbol_by_id(id) + + @symbols = nil + nterm = Symbol.new( + id: id, alias_name: alias_name, number: nil, tag: tag, + term: false, token_id: nil, nullable: nil, + ) + @nterms << nterm + nterm + end + + def find_symbol_by_s_value(s_value) + symbols.find { |s| s.id.s_value == s_value } + end + + def find_symbol_by_s_value!(s_value) + find_symbol_by_s_value(s_value) || (raise "Symbol not found. value: `#{s_value}`") + end + + def find_symbol_by_id(id) + symbols.find do |s| + s.id == id || s.alias_name == id.s_value + end + end + + def find_symbol_by_id!(id) + find_symbol_by_id(id) || (raise "Symbol not found. #{id}") + end + + def find_symbol_by_token_id(token_id) + symbols.find {|s| s.token_id == token_id } + end + + def find_symbol_by_number!(number) + sym = symbols[number] + + raise "Symbol not found. number: `#{number}`" unless sym + raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number + + sym + end + + def fill_symbol_number + # YYEMPTY = -2 + # YYEOF = 0 + # YYerror = 1 + # YYUNDEF = 2 + @number = 3 + fill_terms_number + fill_nterms_number + end + + def fill_nterm_type(types) + types.each do |type| + nterm = find_nterm_by_id!(type.id) + nterm.tag = type.tag + end + end + + def fill_printer(printers) + symbols.each do |sym| + printers.each do |printer| + printer.ident_or_tags.each do |ident_or_tag| + case ident_or_tag + when Lrama::Lexer::Token::Ident + sym.printer = printer if sym.id == ident_or_tag + when Lrama::Lexer::Token::Tag + sym.printer = printer if sym.tag == ident_or_tag + else + raise "Unknown token type. #{printer}" + end + end + end + end + end + + def fill_destructor(destructors) + symbols.each do |sym| + destructors.each do |destructor| + destructor.ident_or_tags.each do |ident_or_tag| + case ident_or_tag + when Lrama::Lexer::Token::Ident + sym.destructor = destructor if sym.id == ident_or_tag + when Lrama::Lexer::Token::Tag + sym.destructor = destructor if sym.tag == ident_or_tag + else + raise "Unknown token type. #{destructor}" + end + end + end + end + end + + def fill_error_token(error_tokens) + symbols.each do |sym| + error_tokens.each do |token| + token.ident_or_tags.each do |ident_or_tag| + case ident_or_tag + when Lrama::Lexer::Token::Ident + sym.error_token = token if sym.id == ident_or_tag + when Lrama::Lexer::Token::Tag + sym.error_token = token if sym.tag == ident_or_tag + else + raise "Unknown token type. #{token}" + end + end + end + end + end + + def token_to_symbol(token) + case token + when Lrama::Lexer::Token + find_symbol_by_id!(token) + else + raise "Unknown class: #{token}" + end + end + + def validate! + validate_number_uniqueness! + validate_alias_name_uniqueness! + end + + private + + def find_nterm_by_id!(id) + @nterms.find do |s| + s.id == id + end || (raise "Symbol not found. #{id}") + end + + def fill_terms_number + # Character literal in grammar file has + # token id corresponding to ASCII code by default, + # so start token_id from 256. + token_id = 256 + + @terms.each do |sym| + while used_numbers[@number] do + @number += 1 + end + + if sym.number.nil? + sym.number = @number + used_numbers[@number] = true + @number += 1 + end + + # If id is Token::Char, it uses ASCII code + if sym.token_id.nil? + if sym.id.is_a?(Lrama::Lexer::Token::Char) + # Ignore ' on the both sides + case sym.id.s_value[1..-2] + when "\\b" + sym.token_id = 8 + when "\\f" + sym.token_id = 12 + when "\\n" + sym.token_id = 10 + when "\\r" + sym.token_id = 13 + when "\\t" + sym.token_id = 9 + when "\\v" + sym.token_id = 11 + when "\"" + sym.token_id = 34 + when "'" + sym.token_id = 39 + when "\\\\" + sym.token_id = 92 + when /\A\\(\d+)\z/ + unless (id = Integer($1, 8)).nil? + sym.token_id = id + else + raise "Unknown Char s_value #{sym}" + end + when /\A(.)\z/ + unless (id = $1&.bytes&.first).nil? + sym.token_id = id + else + raise "Unknown Char s_value #{sym}" + end + else + raise "Unknown Char s_value #{sym}" + end + else + sym.token_id = token_id + token_id += 1 + end + end + end + end + + def fill_nterms_number + token_id = 0 + + @nterms.each do |sym| + while used_numbers[@number] do + @number += 1 + end + + if sym.number.nil? + sym.number = @number + used_numbers[@number] = true + @number += 1 + end + + if sym.token_id.nil? + sym.token_id = token_id + token_id += 1 + end + end + end + + def used_numbers + return @used_numbers if defined?(@used_numbers) + + @used_numbers = {} + symbols.map(&:number).each do |n| + @used_numbers[n] = true + end + @used_numbers + end + + def validate_number_uniqueness! + invalid = symbols.group_by(&:number).select do |number, syms| + syms.count > 1 + end + + return if invalid.empty? + + raise "Symbol number is duplicated. #{invalid}" + end + + def validate_alias_name_uniqueness! + invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms| + syms.count > 1 + end + + return if invalid.empty? + + raise "Symbol alias name is duplicated. #{invalid}" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/type.rb b/tool/lrama/lib/lrama/grammar/type.rb new file mode 100644 index 0000000000..6b4b0961a1 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/type.rb @@ -0,0 +1,18 @@ +module Lrama + class Grammar + class Type + attr_reader :id, :tag + + def initialize(id:, tag:) + @id = id + @tag = tag + end + + def ==(other) + self.class == other.class && + self.id == other.id && + self.tag == other.tag + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/union.rb b/tool/lrama/lib/lrama/grammar/union.rb new file mode 100644 index 0000000000..854bffb5c1 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/union.rb @@ -0,0 +1,10 @@ +module Lrama + class Grammar + class Union < Struct.new(:code, :lineno, keyword_init: true) + def braces_less_code + # Braces is already removed by lexer + code.s_value + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb new file mode 100644 index 0000000000..40622a51b4 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer.rb @@ -0,0 +1,188 @@ +require "strscan" + +require "lrama/lexer/grammar_file" +require "lrama/lexer/location" +require "lrama/lexer/token" + +module Lrama + class Lexer + attr_reader :head_line, :head_column, :line + attr_accessor :status, :end_symbol + + SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'] + PERCENT_TOKENS = %w( + %union + %token + %type + %left + %right + %nonassoc + %expect + %define + %require + %printer + %destructor + %lex-param + %parse-param + %initial-action + %precedence + %prec + %error-token + %before-reduce + %after-reduce + %after-shift-error-token + %after-shift + %after-pop-stack + %empty + %code + %rule + %no-stdlib + %inline + ) + + def initialize(grammar_file) + @grammar_file = grammar_file + @scanner = StringScanner.new(grammar_file.text) + @head_column = @head = @scanner.pos + @head_line = @line = 1 + @status = :initial + @end_symbol = nil + end + + def next_token + case @status + when :initial + lex_token + when :c_declaration + lex_c_code + end + end + + def column + @scanner.pos - @head + end + + def location + Location.new( + grammar_file: @grammar_file, + first_line: @head_line, first_column: @head_column, + last_line: line, last_column: column + ) + end + + def lex_token + while !@scanner.eos? do + case + when @scanner.scan(/\n/) + newline + when @scanner.scan(/\s+/) + # noop + when @scanner.scan(/\/\*/) + lex_comment + when @scanner.scan(/\/\/.*(?<newline>\n)?/) + newline if @scanner[:newline] + else + break + end + end + + reset_first_position + + case + when @scanner.eos? + return + when @scanner.scan(/#{SYMBOLS.join('|')}/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/[\?\+\*]/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/<\w+>/) + return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/'.'/) + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/) + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/".*?"/) + return [:STRING, %Q(#{@scanner.matched})] + when @scanner.scan(/\d+/) + return [:INTEGER, Integer(@scanner.matched)] + when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/) + token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location) + type = + if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/) + :IDENT_COLON + else + :IDENTIFIER + end + return [type, token] + else + raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}." + end + end + + def lex_c_code + nested = 0 + code = '' + reset_first_position + + while !@scanner.eos? do + case + when @scanner.scan(/{/) + code += @scanner.matched + nested += 1 + when @scanner.scan(/}/) + if nested == 0 && @end_symbol == '}' + @scanner.unscan + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] + else + code += @scanner.matched + nested -= 1 + end + when @scanner.check(/#{@end_symbol}/) + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] + when @scanner.scan(/\n/) + code += @scanner.matched + newline + when @scanner.scan(/".*?"/) + code += %Q(#{@scanner.matched}) + @line += @scanner.matched.count("\n") + when @scanner.scan(/'.*?'/) + code += %Q(#{@scanner.matched}) + when @scanner.scan(/[^\"'\{\}\n]+/) + code += @scanner.matched + when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) + code += @scanner.matched + else + code += @scanner.getch + end + end + raise ParseError, "Unexpected code: #{code}." + end + + private + + def lex_comment + while !@scanner.eos? do + case + when @scanner.scan(/\n/) + newline + when @scanner.scan(/\*\//) + return + else + @scanner.getch + end + end + end + + def reset_first_position + @head_line = line + @head_column = column + end + + def newline + @line += 1 + @head = @scanner.pos + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/grammar_file.rb b/tool/lrama/lib/lrama/lexer/grammar_file.rb new file mode 100644 index 0000000000..3d3368625d --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/grammar_file.rb @@ -0,0 +1,31 @@ +module Lrama + class Lexer + class GrammarFile + class Text < String + def inspect + length <= 50 ? super : "#{self[0..47]}...".inspect + end + end + + attr_reader :path, :text + + def initialize(path, text) + @path = path + @text = Text.new(text).freeze + end + + def inspect + "<#{self.class}: @path=#{path}, @text=#{text.inspect}>" + end + + def ==(other) + self.class == other.class && + self.path == other.path + end + + def lines + @lines ||= text.split("\n") + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/location.rb b/tool/lrama/lib/lrama/lexer/location.rb new file mode 100644 index 0000000000..aefce3e16b --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/location.rb @@ -0,0 +1,97 @@ +module Lrama + class Lexer + class Location + attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column + + def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:) + @grammar_file = grammar_file + @first_line = first_line + @first_column = first_column + @last_line = last_line + @last_column = last_column + end + + def ==(other) + self.class == other.class && + self.grammar_file == other.grammar_file && + self.first_line == other.first_line && + self.first_column == other.first_column && + self.last_line == other.last_line && + self.last_column == other.last_column + end + + def partial_location(left, right) + offset = -first_column + new_first_line = -1 + new_first_column = -1 + new_last_line = -1 + new_last_column = -1 + + _text.each.with_index do |line, index| + new_offset = offset + line.length + 1 + + if offset <= left && left <= new_offset + new_first_line = first_line + index + new_first_column = left - offset + end + + if offset <= right && right <= new_offset + new_last_line = first_line + index + new_last_column = right - offset + end + + offset = new_offset + end + + Location.new( + grammar_file: grammar_file, + first_line: new_first_line, first_column: new_first_column, + last_line: new_last_line, last_column: new_last_column + ) + end + + def to_s + "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})" + end + + def generate_error_message(error_message) + <<~ERROR.chomp + #{path}:#{first_line}:#{first_column}: #{error_message} + #{line_with_carets} + ERROR + end + + def line_with_carets + <<~TEXT + #{text} + #{carets} + TEXT + end + + private + + def path + grammar_file.path + end + + def blanks + (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ') + end + + def carets + blanks + '^' * (last_column - first_column) + end + + def text + @text ||= _text.join("\n") + end + + def _text + @_text ||=begin + range = (first_line - 1)...last_line + grammar_file.lines[range] or raise "#{range} is invalid" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb new file mode 100644 index 0000000000..59b49d5fba --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token.rb @@ -0,0 +1,56 @@ +require 'lrama/lexer/token/char' +require 'lrama/lexer/token/ident' +require 'lrama/lexer/token/instantiate_rule' +require 'lrama/lexer/token/tag' +require 'lrama/lexer/token/user_code' + +module Lrama + class Lexer + class Token + attr_reader :s_value, :location + attr_accessor :alias_name, :referred + + def initialize(s_value:, alias_name: nil, location: nil) + s_value.freeze + @s_value = s_value + @alias_name = alias_name + @location = location + end + + def to_s + "value: `#{s_value}`, location: #{location}" + end + + def referred_by?(string) + [self.s_value, self.alias_name].compact.include?(string) + end + + def ==(other) + self.class == other.class && self.s_value == other.s_value + end + + def first_line + location.first_line + end + alias :line :first_line + + def first_column + location.first_column + end + alias :column :first_column + + def last_line + location.last_line + end + + def last_column + location.last_column + end + + def invalid_ref(ref, message) + location = self.location.partial_location(ref.first_column, ref.last_column) + raise location.generate_error_message(message) + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/char.rb b/tool/lrama/lib/lrama/lexer/token/char.rb new file mode 100644 index 0000000000..ec3560ca09 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/char.rb @@ -0,0 +1,8 @@ +module Lrama + class Lexer + class Token + class Char < Token + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/ident.rb b/tool/lrama/lib/lrama/lexer/token/ident.rb new file mode 100644 index 0000000000..e576eaeccd --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/ident.rb @@ -0,0 +1,8 @@ +module Lrama + class Lexer + class Token + class Ident < Token + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb new file mode 100644 index 0000000000..1c4d1095c8 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb @@ -0,0 +1,23 @@ +module Lrama + class Lexer + class Token + class InstantiateRule < Token + attr_reader :args, :lhs_tag + + def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil) + super s_value: s_value, alias_name: alias_name, location: location + @args = args + @lhs_tag = lhs_tag + end + + def rule_name + s_value + end + + def args_count + args.count + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/tag.rb b/tool/lrama/lib/lrama/lexer/token/tag.rb new file mode 100644 index 0000000000..e54d773915 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/tag.rb @@ -0,0 +1,12 @@ +module Lrama + class Lexer + class Token + class Tag < Token + # Omit "<>" + def member + s_value[1..-2] or raise "Unexpected Tag format (#{s_value})" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/user_code.rb b/tool/lrama/lib/lrama/lexer/token/user_code.rb new file mode 100644 index 0000000000..4d487bf01c --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/user_code.rb @@ -0,0 +1,77 @@ +require "strscan" + +module Lrama + class Lexer + class Token + class UserCode < Token + attr_accessor :tag + + def references + @references ||= _references + end + + private + + def _references + scanner = StringScanner.new(s_value) + references = [] + + while !scanner.eos? do + case + when reference = scan_reference(scanner) + references << reference + when scanner.scan(/\/\*/) + scanner.scan_until(/\*\//) + else + scanner.getch + end + end + + references + end + + def scan_reference(scanner) + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos) + when scanner.scan(/@(\d+)/) # @1 + return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos) + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) + + # $: references + when scanner.scan(/\$:\$/) # $:$ + return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:(\d+)/) # $:1 + return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) + + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/option_parser.rb b/tool/lrama/lib/lrama/option_parser.rb new file mode 100644 index 0000000000..1e4d448fd1 --- /dev/null +++ b/tool/lrama/lib/lrama/option_parser.rb @@ -0,0 +1,142 @@ +require 'optparse' + +module Lrama + # Handle option parsing for the command line interface. + class OptionParser + def initialize + @options = Options.new + @trace = [] + @report = [] + end + + def parse(argv) + parse_by_option_parser(argv) + + @options.trace_opts = validate_trace(@trace) + @options.report_opts = validate_report(@report) + @options.grammar_file = argv.shift + + if !@options.grammar_file + abort "File should be specified\n" + end + + if @options.grammar_file == '-' + @options.grammar_file = argv.shift or abort "File name for STDIN should be specified\n" + else + @options.y = File.open(@options.grammar_file, 'r') + end + + if !@report.empty? && @options.report_file.nil? && @options.grammar_file + @options.report_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".output" + end + + if !@options.header_file && @options.header + case + when @options.outfile + @options.header_file = File.dirname(@options.outfile) + "/" + File.basename(@options.outfile, ".*") + ".h" + when @options.grammar_file + @options.header_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".h" + end + end + + @options + end + + private + + def parse_by_option_parser(argv) + ::OptionParser.new do |o| + o.banner = <<~BANNER + Lrama is LALR (1) parser generator written by Ruby. + + Usage: lrama [options] FILE + BANNER + o.separator '' + o.separator 'STDIN mode:' + o.separator 'lrama [options] - FILE read grammar from STDIN' + o.separator '' + o.separator 'Tuning the Parser:' + o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } + o.on('-t', 'reserved, do nothing') { } + o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } + o.separator '' + o.separator 'Output:' + o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } + o.on('-d', 'also produce a header file') { @options.header = true } + o.on('-r', '--report=THINGS', Array, 'also produce details on the automaton') {|v| @report = v } + o.on_tail '' + o.on_tail 'Valid Reports:' + o.on_tail " #{VALID_REPORTS.join(' ')}" + + o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v } + o.on('-o', '--output=FILE', 'leave output to FILE') {|v| @options.outfile = v } + + o.on('--trace=THINGS', Array, 'also output trace logs at runtime') {|v| @trace = v } + o.on_tail '' + o.on_tail 'Valid Traces:' + o.on_tail " #{VALID_TRACES.join(' ')}" + + o.on('-v', 'reserved, do nothing') { } + o.separator '' + o.separator 'Error Recovery:' + o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true } + o.separator '' + o.separator 'Other options:' + o.on('-V', '--version', "output version information and exit") {|v| puts "lrama #{Lrama::VERSION}"; exit 0 } + o.on('-h', '--help', "display this help and exit") {|v| puts o; exit 0 } + o.on_tail + o.parse!(argv) + end + end + + BISON_REPORTS = %w[states itemsets lookaheads solved counterexamples cex all none] + OTHER_REPORTS = %w[verbose] + NOT_SUPPORTED_REPORTS = %w[cex none] + VALID_REPORTS = BISON_REPORTS + OTHER_REPORTS - NOT_SUPPORTED_REPORTS + + def validate_report(report) + list = VALID_REPORTS + h = { grammar: true } + + report.each do |r| + if list.include?(r) + h[r.to_sym] = true + else + raise "Invalid report option \"#{r}\"." + end + end + + if h[:all] + (BISON_REPORTS - NOT_SUPPORTED_REPORTS).each do |r| + h[r.to_sym] = true + end + + h.delete(:all) + end + + return h + end + + VALID_TRACES = %w[ + none locations scan parse automaton bitsets + closure grammar rules actions resource + sets muscles tools m4-early m4 skeleton time + ielr cex all + ] + + def validate_trace(trace) + list = VALID_TRACES + h = {} + + trace.each do |t| + if list.include?(t) + h[t.to_sym] = true + else + raise "Invalid trace option \"#{t}\"." + end + end + + return h + end + end +end diff --git a/tool/lrama/lib/lrama/options.rb b/tool/lrama/lib/lrama/options.rb new file mode 100644 index 0000000000..739ca16f55 --- /dev/null +++ b/tool/lrama/lib/lrama/options.rb @@ -0,0 +1,24 @@ +module Lrama + # Command line options. + class Options + attr_accessor :skeleton, :header, :header_file, + :report_file, :outfile, + :error_recovery, :grammar_file, + :trace_opts, :report_opts, :y, + :debug + + def initialize + @skeleton = "bison/yacc.c" + @header = false + @header_file = nil + @report_file = nil + @outfile = "y.tab.c" + @error_recovery = false + @grammar_file = nil + @trace_opts = nil + @report_opts = nil + @y = STDIN + @debug = false + end + end +end diff --git a/tool/lrama/lib/lrama/output.rb b/tool/lrama/lib/lrama/output.rb new file mode 100644 index 0000000000..642c8b4708 --- /dev/null +++ b/tool/lrama/lib/lrama/output.rb @@ -0,0 +1,490 @@ +require "erb" +require "forwardable" +require "lrama/report/duration" + +module Lrama + class Output + extend Forwardable + include Report::Duration + + attr_reader :grammar_file_path, :context, :grammar, :error_recovery, :include_header + + def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates, + :yymaxutok, :yypact_ninf, :yytable_ninf + + def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol + + def initialize( + out:, output_file_path:, template_name:, grammar_file_path:, + context:, grammar:, header_out: nil, header_file_path: nil, error_recovery: false + ) + @out = out + @output_file_path = output_file_path + @template_name = template_name + @grammar_file_path = grammar_file_path + @header_out = header_out + @header_file_path = header_file_path + @context = context + @grammar = grammar + @error_recovery = error_recovery + @include_header = header_file_path ? header_file_path.sub("./", "") : nil + end + + if ERB.instance_method(:initialize).parameters.last.first == :key + def self.erb(input) + ERB.new(input, trim_mode: '-') + end + else + def self.erb(input) + ERB.new(input, nil, '-') + end + end + + def render_partial(file) + render_template(partial_file(file)) + end + + def render + report_duration(:render) do + tmp = eval_template(template_file, @output_file_path) + @out << tmp + + if @header_file_path + tmp = eval_template(header_template_file, @header_file_path) + + if @header_out + @header_out << tmp + else + File.write(@header_file_path, tmp) + end + end + end + end + + # A part of b4_token_enums + def token_enums + str = "" + + @context.yytokentype.each do |s_value, token_id, display_name| + s = sprintf("%s = %d%s", s_value, token_id, token_id == yymaxutok ? "" : ",") + + if display_name + str << sprintf(" %-30s /* %s */\n", s, display_name) + else + str << sprintf(" %s\n", s) + end + end + + str + end + + # b4_symbol_enum + def symbol_enum + str = "" + + last_sym_number = @context.yysymbol_kind_t.last[1] + @context.yysymbol_kind_t.each do |s_value, sym_number, display_name| + s = sprintf("%s = %d%s", s_value, sym_number, (sym_number == last_sym_number) ? "" : ",") + + if display_name + str << sprintf(" %-40s /* %s */\n", s, display_name) + else + str << sprintf(" %s\n", s) + end + end + + str + end + + def yytranslate + int_array_to_string(@context.yytranslate) + end + + def yytranslate_inverted + int_array_to_string(@context.yytranslate_inverted) + end + + def yyrline + int_array_to_string(@context.yyrline) + end + + def yytname + string_array_to_string(@context.yytname) + " YY_NULLPTR" + end + + # b4_int_type_for + def int_type_for(ary) + min = ary.min + max = ary.max + + case + when (-127 <= min && min <= 127) && (-127 <= max && max <= 127) + "yytype_int8" + when (0 <= min && min <= 255) && (0 <= max && max <= 255) + "yytype_uint8" + when (-32767 <= min && min <= 32767) && (-32767 <= max && max <= 32767) + "yytype_int16" + when (0 <= min && min <= 65535) && (0 <= max && max <= 65535) + "yytype_uint16" + else + "int" + end + end + + def symbol_actions_for_printer + str = "" + + @grammar.symbols.each do |sym| + next unless sym.printer + + str << <<-STR + case #{sym.enum_name}: /* #{sym.comment} */ +#line #{sym.printer.lineno} "#{@grammar_file_path}" + {#{sym.printer.translated_code(sym.tag)}} +#line [@oline@] [@ofile@] + break; + + STR + end + + str + end + + def symbol_actions_for_destructor + str = "" + + @grammar.symbols.each do |sym| + next unless sym.destructor + + str << <<-STR + case #{sym.enum_name}: /* #{sym.comment} */ +#line #{sym.destructor.lineno} "#{@grammar_file_path}" + {#{sym.destructor.translated_code(sym.tag)}} +#line [@oline@] [@ofile@] + break; + + STR + end + + str + end + + # b4_user_initial_action + def user_initial_action(comment = "") + return "" unless @grammar.initial_action + + <<-STR + #{comment} +#line #{@grammar.initial_action.line} "#{@grammar_file_path}" + {#{@grammar.initial_action.translated_code}} + STR + end + + def after_shift_function(comment = "") + return "" unless @grammar.after_shift + + <<-STR + #{comment} +#line #{@grammar.after_shift.line} "#{@grammar_file_path}" + {#{@grammar.after_shift.s_value}(#{parse_param_name});} +#line [@oline@] [@ofile@] + STR + end + + def before_reduce_function(comment = "") + return "" unless @grammar.before_reduce + + <<-STR + #{comment} +#line #{@grammar.before_reduce.line} "#{@grammar_file_path}" + {#{@grammar.before_reduce.s_value}(yylen#{user_args});} +#line [@oline@] [@ofile@] + STR + end + + def after_reduce_function(comment = "") + return "" unless @grammar.after_reduce + + <<-STR + #{comment} +#line #{@grammar.after_reduce.line} "#{@grammar_file_path}" + {#{@grammar.after_reduce.s_value}(yylen#{user_args});} +#line [@oline@] [@ofile@] + STR + end + + def after_shift_error_token_function(comment = "") + return "" unless @grammar.after_shift_error_token + + <<-STR + #{comment} +#line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}" + {#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});} +#line [@oline@] [@ofile@] + STR + end + + def after_pop_stack_function(len, comment = "") + return "" unless @grammar.after_pop_stack + + <<-STR + #{comment} +#line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}" + {#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});} +#line [@oline@] [@ofile@] + STR + end + + def symbol_actions_for_error_token + str = "" + + @grammar.symbols.each do |sym| + next unless sym.error_token + + str << <<-STR + case #{sym.enum_name}: /* #{sym.comment} */ +#line #{sym.error_token.lineno} "#{@grammar_file_path}" + {#{sym.error_token.translated_code(sym.tag)}} +#line [@oline@] [@ofile@] + break; + + STR + end + + str + end + + # b4_user_actions + def user_actions + str = "" + + @context.states.rules.each do |rule| + next unless rule.token_code + + code = rule.token_code + spaces = " " * (code.column - 1) + + str << <<-STR + case #{rule.id + 1}: /* #{rule.as_comment} */ +#line #{code.line} "#{@grammar_file_path}" +#{spaces}{#{rule.translated_code}} +#line [@oline@] [@ofile@] + break; + + STR + end + + str << <<-STR + +#line [@oline@] [@ofile@] + STR + + str + end + + def omit_blanks(param) + param.strip + end + + # b4_parse_param + def parse_param + if @grammar.parse_param + omit_blanks(@grammar.parse_param) + else + "" + end + end + + def lex_param + if @grammar.lex_param + omit_blanks(@grammar.lex_param) + else + "" + end + end + + # b4_user_formals + def user_formals + if @grammar.parse_param + ", #{parse_param}" + else + "" + end + end + + # b4_user_args + def user_args + if @grammar.parse_param + ", #{parse_param_name}" + else + "" + end + end + + def extract_param_name(param) + param[/\b([a-zA-Z0-9_]+)(?=\s*\z)/] + end + + def parse_param_name + if @grammar.parse_param + extract_param_name(parse_param) + else + "" + end + end + + def lex_param_name + if @grammar.lex_param + extract_param_name(lex_param) + else + "" + end + end + + # b4_parse_param_use + def parse_param_use(val, loc) + str = <<-STR + YY_USE (#{val}); + YY_USE (#{loc}); + STR + + if @grammar.parse_param + str << " YY_USE (#{parse_param_name});" + end + + str + end + + # b4_yylex_formals + def yylex_formals + ary = ["&yylval", "&yylloc"] + + if @grammar.lex_param + ary << lex_param_name + end + + "(#{ary.join(', ')})" + end + + # b4_table_value_equals + def table_value_equals(table, value, literal, symbol) + if literal < table.min || table.max < literal + "0" + else + "((#{value}) == #{symbol})" + end + end + + # b4_yyerror_args + def yyerror_args + ary = ["&yylloc"] + + if @grammar.parse_param + ary << parse_param_name + end + + "#{ary.join(', ')}" + end + + def template_basename + File.basename(template_file) + end + + def aux + @grammar.aux + end + + def int_array_to_string(ary) + last = ary.count - 1 + + s = ary.each_with_index.each_slice(10).map do |slice| + str = " " + + slice.each do |e, i| + str << sprintf("%6d%s", e, (i == last) ? "" : ",") + end + + str + end + + s.join("\n") + end + + def spec_mapped_header_file + @header_file_path + end + + def b4_cpp_guard__b4_spec_mapped_header_file + if @header_file_path + "YY_YY_" + @header_file_path.gsub(/[^a-zA-Z_0-9]+/, "_").upcase + "_INCLUDED" + else + "" + end + end + + # b4_percent_code_get + def percent_code(name) + @grammar.percent_codes.select do |percent_code| + percent_code.name == name + end.map do |percent_code| + percent_code.code + end.join + end + + private + + def eval_template(file, path) + tmp = render_template(file) + replace_special_variables(tmp, path) + end + + def render_template(file) + erb = self.class.erb(File.read(file)) + erb.filename = file + erb.result_with_hash(context: @context, output: self) + end + + def template_file + File.join(template_dir, @template_name) + end + + def header_template_file + File.join(template_dir, "bison/yacc.h") + end + + def partial_file(file) + File.join(template_dir, file) + end + + def template_dir + File.expand_path("../../../template", __FILE__) + end + + def string_array_to_string(ary) + str = "" + tmp = " " + + ary.each do |s| + s = s.gsub('\\', '\\\\\\\\') + s = s.gsub('"', '\\"') + + if (tmp + s + " \"\",").length > 75 + str << tmp << "\n" + tmp = " \"#{s}\"," + else + tmp << " \"#{s}\"," + end + end + + str << tmp + end + + def replace_special_variables(str, ofile) + str.each_line.with_index(1).map do |line, i| + line.gsub!("[@oline@]", (i + 1).to_s) + line.gsub!("[@ofile@]", "\"#{ofile}\"") + line + end.join + end + end +end diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb new file mode 100644 index 0000000000..04603105b4 --- /dev/null +++ b/tool/lrama/lib/lrama/parser.rb @@ -0,0 +1,2234 @@ +# +# DO NOT MODIFY!!!! +# This file is automatically generated by Racc 1.7.3 +# from Racc grammar file "parser.y". +# + +###### racc/parser.rb begin +unless $".find {|p| p.end_with?('/racc/parser.rb')} +$".push "#{__dir__}/racc/parser.rb" +self.class.module_eval(<<'...end racc/parser.rb/module_eval...', 'racc/parser.rb', 1) +#-- +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# +# As a special exception, when this code is copied by Racc +# into a Racc output file, you may use that output file +# without restriction. +#++ + +unless $".find {|p| p.end_with?('/racc/info.rb')} +$".push "#{__dir__}/racc/info.rb" + +module Racc + VERSION = '1.7.3' + Version = VERSION + Copyright = 'Copyright (c) 1999-2006 Minero Aoki' +end + +end + + +unless defined?(NotImplementedError) + NotImplementedError = NotImplementError # :nodoc: +end + +module Racc + class ParseError < StandardError; end +end +unless defined?(::ParseError) + ParseError = Racc::ParseError # :nodoc: +end + +# Racc is a LALR(1) parser generator. +# It is written in Ruby itself, and generates Ruby programs. +# +# == Command-line Reference +# +# racc [-o<var>filename</var>] [--output-file=<var>filename</var>] +# [-e<var>rubypath</var>] [--executable=<var>rubypath</var>] +# [-v] [--verbose] +# [-O<var>filename</var>] [--log-file=<var>filename</var>] +# [-g] [--debug] +# [-E] [--embedded] +# [-l] [--no-line-convert] +# [-c] [--line-convert-all] +# [-a] [--no-omit-actions] +# [-C] [--check-only] +# [-S] [--output-status] +# [--version] [--copyright] [--help] <var>grammarfile</var> +# +# [+grammarfile+] +# Racc grammar file. Any extension is permitted. +# [-o+outfile+, --output-file=+outfile+] +# A filename for output. default is <+filename+>.tab.rb +# [-O+filename+, --log-file=+filename+] +# Place logging output in file +filename+. +# Default log file name is <+filename+>.output. +# [-e+rubypath+, --executable=+rubypath+] +# output executable file(mode 755). where +path+ is the Ruby interpreter. +# [-v, --verbose] +# verbose mode. create +filename+.output file, like yacc's y.output file. +# [-g, --debug] +# add debug code to parser class. To display debugging information, +# use this '-g' option and set @yydebug true in parser class. +# [-E, --embedded] +# Output parser which doesn't need runtime files (racc/parser.rb). +# [-F, --frozen] +# Output parser which declares frozen_string_literals: true +# [-C, --check-only] +# Check syntax of racc grammar file and quit. +# [-S, --output-status] +# Print messages time to time while compiling. +# [-l, --no-line-convert] +# turns off line number converting. +# [-c, --line-convert-all] +# Convert line number of actions, inner, header and footer. +# [-a, --no-omit-actions] +# Call all actions, even if an action is empty. +# [--version] +# print Racc version and quit. +# [--copyright] +# Print copyright and quit. +# [--help] +# Print usage and quit. +# +# == Generating Parser Using Racc +# +# To compile Racc grammar file, simply type: +# +# $ racc parse.y +# +# This creates Ruby script file "parse.tab.y". The -o option can change the output filename. +# +# == Writing A Racc Grammar File +# +# If you want your own parser, you have to write a grammar file. +# A grammar file contains the name of your parser class, grammar for the parser, +# user code, and anything else. +# When writing a grammar file, yacc's knowledge is helpful. +# If you have not used yacc before, Racc is not too difficult. +# +# Here's an example Racc grammar file. +# +# class Calcparser +# rule +# target: exp { print val[0] } +# +# exp: exp '+' exp +# | exp '*' exp +# | '(' exp ')' +# | NUMBER +# end +# +# Racc grammar files resemble yacc files. +# But (of course), this is Ruby code. +# yacc's $$ is the 'result', $0, $1... is +# an array called 'val', and $-1, $-2... is an array called '_values'. +# +# See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for +# more information on grammar files. +# +# == Parser +# +# Then you must prepare the parse entry method. There are two types of +# parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse +# +# Racc::Parser#do_parse is simple. +# +# It's yyparse() of yacc, and Racc::Parser#next_token is yylex(). +# This method must returns an array like [TOKENSYMBOL, ITS_VALUE]. +# EOF is [false, false]. +# (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default. +# If you want to change this, see the grammar reference. +# +# Racc::Parser#yyparse is little complicated, but useful. +# It does not use Racc::Parser#next_token, instead it gets tokens from any iterator. +# +# For example, <code>yyparse(obj, :scan)</code> causes +# calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+. +# +# == Debugging +# +# When debugging, "-v" or/and the "-g" option is helpful. +# +# "-v" creates verbose log file (.output). +# "-g" creates a "Verbose Parser". +# Verbose Parser prints the internal status when parsing. +# But it's _not_ automatic. +# You must use -g option and set +@yydebug+ to +true+ in order to get output. +# -g option only creates the verbose parser. +# +# === Racc reported syntax error. +# +# Isn't there too many "end"? +# grammar of racc file is changed in v0.10. +# +# Racc does not use '%' mark, while yacc uses huge number of '%' marks.. +# +# === Racc reported "XXXX conflicts". +# +# Try "racc -v xxxx.y". +# It causes producing racc's internal log file, xxxx.output. +# +# === Generated parsers does not work correctly +# +# Try "racc -g xxxx.y". +# This command let racc generate "debugging parser". +# Then set @yydebug=true in your parser. +# It produces a working log of your parser. +# +# == Re-distributing Racc runtime +# +# A parser, which is created by Racc, requires the Racc runtime module; +# racc/parser.rb. +# +# Ruby 1.8.x comes with Racc runtime module, +# you need NOT distribute Racc runtime files. +# +# If you want to include the Racc runtime module with your parser. +# This can be done by using '-E' option: +# +# $ racc -E -omyparser.rb myparser.y +# +# This command creates myparser.rb which `includes' Racc runtime. +# Only you must do is to distribute your parser file (myparser.rb). +# +# Note: parser.rb is ruby license, but your parser is not. +# Your own parser is completely yours. +module Racc + + unless defined?(Racc_No_Extensions) + Racc_No_Extensions = false # :nodoc: + end + + class Parser + + Racc_Runtime_Version = ::Racc::VERSION + Racc_Runtime_Core_Version_R = ::Racc::VERSION + + begin + if Object.const_defined?(:RUBY_ENGINE) and RUBY_ENGINE == 'jruby' + require 'jruby' + require 'racc/cparse-jruby.jar' + com.headius.racc.Cparse.new.load(JRuby.runtime, false) + else + require 'racc/cparse' + end + + unless new.respond_to?(:_racc_do_parse_c, true) + raise LoadError, 'old cparse.so' + end + if Racc_No_Extensions + raise LoadError, 'selecting ruby version of racc runtime core' + end + + Racc_Main_Parsing_Routine = :_racc_do_parse_c # :nodoc: + Racc_YY_Parse_Method = :_racc_yyparse_c # :nodoc: + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C # :nodoc: + Racc_Runtime_Type = 'c' # :nodoc: + rescue LoadError + Racc_Main_Parsing_Routine = :_racc_do_parse_rb + Racc_YY_Parse_Method = :_racc_yyparse_rb + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R + Racc_Runtime_Type = 'ruby' + end + + def Parser.racc_runtime_type # :nodoc: + Racc_Runtime_Type + end + + def _racc_setup + @yydebug = false unless self.class::Racc_debug_parser + @yydebug = false unless defined?(@yydebug) + if @yydebug + @racc_debug_out = $stderr unless defined?(@racc_debug_out) + @racc_debug_out ||= $stderr + end + arg = self.class::Racc_arg + arg[13] = true if arg.size < 14 + arg + end + + def _racc_init_sysvars + @racc_state = [0] + @racc_tstack = [] + @racc_vstack = [] + + @racc_t = nil + @racc_val = nil + + @racc_read_next = true + + @racc_user_yyerror = false + @racc_error_status = 0 + end + + # The entry point of the parser. This method is used with #next_token. + # If Racc wants to get token (and its value), calls next_token. + # + # Example: + # def parse + # @q = [[1,1], + # [2,2], + # [3,3], + # [false, '$']] + # do_parse + # end + # + # def next_token + # @q.shift + # end + class_eval <<~RUBY, __FILE__, __LINE__ + 1 + def do_parse + #{Racc_Main_Parsing_Routine}(_racc_setup(), false) + end + RUBY + + # The method to fetch next token. + # If you use #do_parse method, you must implement #next_token. + # + # The format of return value is [TOKEN_SYMBOL, VALUE]. + # +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT + # for 'IDENT'. ";" (String) for ';'. + # + # The final symbol (End of file) must be false. + def next_token + raise NotImplementedError, "#{self.class}\#next_token is not defined" + end + + def _racc_do_parse_rb(arg, in_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + tok = act = i = nil + + catch(:racc_end_parse) { + while true + if i = action_pointer[@racc_state[-1]] + if @racc_read_next + if @racc_t != 0 # not EOF + tok, @racc_val = next_token() + unless tok # EOF + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + racc_read_token(@racc_t, tok, @racc_val) if @yydebug + @racc_read_next = false + end + end + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + else + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + } + end + + # Another entry point for the parser. + # If you use this method, you must implement RECEIVER#METHOD_ID method. + # + # RECEIVER#METHOD_ID is a method to get next token. + # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. + class_eval <<~RUBY, __FILE__, __LINE__ + 1 + def yyparse(recv, mid) + #{Racc_YY_Parse_Method}(recv, mid, _racc_setup(), false) + end + RUBY + + def _racc_yyparse_rb(recv, mid, arg, c_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + + catch(:racc_end_parse) { + until i = action_pointer[@racc_state[-1]] + while act = _racc_evalact(action_default[@racc_state[-1]], arg) + ; + end + end + recv.__send__(mid) do |tok, val| + unless tok + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + @racc_val = val + @racc_read_next = false + + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + + while !(i = action_pointer[@racc_state[-1]]) || + ! @racc_read_next || + @racc_t == 0 # $ + unless i and i += @racc_t and + i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + end + } + end + + ### + ### common + ### + + def _racc_evalact(act, arg) + action_table, action_check, _, action_pointer, + _, _, _, _, + _, _, _, shift_n, + reduce_n, * = arg + nerr = 0 # tmp + + if act > 0 and act < shift_n + # + # shift + # + if @racc_error_status > 0 + @racc_error_status -= 1 unless @racc_t <= 1 # error token or EOF + end + @racc_vstack.push @racc_val + @racc_state.push act + @racc_read_next = true + if @yydebug + @racc_tstack.push @racc_t + racc_shift @racc_t, @racc_tstack, @racc_vstack + end + + elsif act < 0 and act > -reduce_n + # + # reduce + # + code = catch(:racc_jump) { + @racc_state.push _racc_do_reduce(arg, act) + false + } + if code + case code + when 1 # yyerror + @racc_user_yyerror = true # user_yyerror + return -reduce_n + when 2 # yyaccept + return shift_n + else + raise '[Racc Bug] unknown jump code' + end + end + + elsif act == shift_n + # + # accept + # + racc_accept if @yydebug + throw :racc_end_parse, @racc_vstack[0] + + elsif act == -reduce_n + # + # error + # + case @racc_error_status + when 0 + unless arg[21] # user_yyerror + nerr += 1 + on_error @racc_t, @racc_val, @racc_vstack + end + when 3 + if @racc_t == 0 # is $ + # We're at EOF, and another error occurred immediately after + # attempting auto-recovery + throw :racc_end_parse, nil + end + @racc_read_next = true + end + @racc_user_yyerror = false + @racc_error_status = 3 + while true + if i = action_pointer[@racc_state[-1]] + i += 1 # error token + if i >= 0 and + (act = action_table[i]) and + action_check[i] == @racc_state[-1] + break + end + end + throw :racc_end_parse, nil if @racc_state.size <= 1 + @racc_state.pop + @racc_vstack.pop + if @yydebug + @racc_tstack.pop + racc_e_pop @racc_state, @racc_tstack, @racc_vstack + end + end + return act + + else + raise "[Racc Bug] unknown action #{act.inspect}" + end + + racc_next_state(@racc_state[-1], @racc_state) if @yydebug + + nil + end + + def _racc_do_reduce(arg, act) + _, _, _, _, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, _, _, + _, use_result, * = arg + + state = @racc_state + vstack = @racc_vstack + tstack = @racc_tstack + + i = act * -3 + len = reduce_table[i] + reduce_to = reduce_table[i+1] + method_id = reduce_table[i+2] + void_array = [] + + tmp_t = tstack[-len, len] if @yydebug + tmp_v = vstack[-len, len] + tstack[-len, len] = void_array if @yydebug + vstack[-len, len] = void_array + state[-len, len] = void_array + + # tstack must be updated AFTER method call + if use_result + vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0]) + else + vstack.push __send__(method_id, tmp_v, vstack) + end + tstack.push reduce_to + + racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug + + k1 = reduce_to - nt_base + if i = goto_pointer[k1] + i += state[-1] + if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1 + return curstate + end + end + goto_default[k1] + end + + # This method is called when a parse error is found. + # + # ERROR_TOKEN_ID is an internal ID of token which caused error. + # You can get string representation of this ID by calling + # #token_to_str. + # + # ERROR_VALUE is a value of error token. + # + # value_stack is a stack of symbol values. + # DO NOT MODIFY this object. + # + # This method raises ParseError by default. + # + # If this method returns, parsers enter "error recovering mode". + def on_error(t, val, vstack) + raise ParseError, sprintf("parse error on value %s (%s)", + val.inspect, token_to_str(t) || '?') + end + + # Enter error recovering mode. + # This method does not call #on_error. + def yyerror + throw :racc_jump, 1 + end + + # Exit parser. + # Return value is +Symbol_Value_Stack[0]+. + def yyaccept + throw :racc_jump, 2 + end + + # Leave error recovering mode. + def yyerrok + @racc_error_status = 0 + end + + # For debugging output + def racc_read_token(t, tok, val) + @racc_debug_out.print 'read ' + @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') ' + @racc_debug_out.puts val.inspect + @racc_debug_out.puts + end + + def racc_shift(tok, tstack, vstack) + @racc_debug_out.puts "shift #{racc_token2str tok}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_reduce(toks, sim, tstack, vstack) + out = @racc_debug_out + out.print 'reduce ' + if toks.empty? + out.print ' <none>' + else + toks.each {|t| out.print ' ', racc_token2str(t) } + end + out.puts " --> #{racc_token2str(sim)}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_accept + @racc_debug_out.puts 'accept' + @racc_debug_out.puts + end + + def racc_e_pop(state, tstack, vstack) + @racc_debug_out.puts 'error recovering mode: pop token' + racc_print_states state + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_next_state(curstate, state) + @racc_debug_out.puts "goto #{curstate}" + racc_print_states state + @racc_debug_out.puts + end + + def racc_print_stacks(t, v) + out = @racc_debug_out + out.print ' [' + t.each_index do |i| + out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')' + end + out.puts ' ]' + end + + def racc_print_states(s) + out = @racc_debug_out + out.print ' [' + s.each {|st| out.print ' ', st } + out.puts ' ]' + end + + def racc_token2str(tok) + self.class::Racc_token_to_s_table[tok] or + raise "[Racc Bug] can't convert token #{tok} to string" + end + + # Convert internal ID of token symbol to the string. + def token_to_str(t) + self.class::Racc_token_to_s_table[t] + end + + end + +end + +...end racc/parser.rb/module_eval... +end +###### racc/parser.rb end +module Lrama + class Parser < Racc::Parser + +module_eval(<<'...end parser.y/module_eval...', 'parser.y', 536) + +include Lrama::Report::Duration + +def initialize(text, path, debug = false) + @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) + @yydebug = debug + @rule_counter = Lrama::Grammar::Counter.new(0) + @midrule_action_counter = Lrama::Grammar::Counter.new(1) +end + +def parse + report_duration(:parse) do + @lexer = Lrama::Lexer.new(@grammar_file) + @grammar = Lrama::Grammar.new(@rule_counter) + @precedence_number = 0 + reset_precs + do_parse + @grammar + end +end + +def next_token + @lexer.next_token +end + +def on_error(error_token_id, error_value, value_stack) + if error_value.is_a?(Lrama::Lexer::Token) + location = error_value.location + value = "'#{error_value.s_value}'" + else + location = @lexer.location + value = error_value.inspect + end + + error_message = "parse error on value #{value} (#{token_to_str(error_token_id) || '?'})" + + raise_parse_error(error_message, location) +end + +def on_action_error(error_message, error_value) + if error_value.is_a?(Lrama::Lexer::Token) + location = error_value.location + else + location = @lexer.location + end + + raise_parse_error(error_message, location) +end + +private + +def reset_precs + @prec_seen = false + @code_after_prec = false +end + +def begin_c_declaration(end_symbol) + @lexer.status = :c_declaration + @lexer.end_symbol = end_symbol +end + +def end_c_declaration + @lexer.status = :initial + @lexer.end_symbol = nil +end + +def raise_parse_error(error_message, location) + raise ParseError, location.generate_error_message(error_message) +end +...end parser.y/module_eval... +##### State transition tables begin ### + +racc_action_table = [ + 98, 51, 99, 163, 88, 79, 51, 51, 180, 163, + 79, 79, 51, 162, 180, 156, 79, 165, 157, 51, + 3, 50, 181, 165, 70, 51, 8, 50, 181, 79, + 75, 51, 6, 50, 7, 161, 82, 47, 51, 51, + 50, 50, 89, 82, 82, 166, 41, 51, 100, 50, + 182, 166, 82, 51, 48, 50, 182, 23, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 47, 51, 51, 50, 50, 93, 79, 197, + 51, 51, 50, 50, 79, 197, 51, 51, 50, 50, + 79, 197, 23, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 9, 51, 54, + 50, 14, 15, 16, 17, 18, 19, 54, 54, 20, + 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 51, 51, + 50, 50, 79, 197, 51, 51, 50, 50, 79, 197, + 51, 51, 50, 50, 79, 197, 51, 51, 50, 50, + 79, 79, 51, 51, 50, 50, 79, 79, 51, 51, + 50, 50, 79, 79, 51, 51, 50, 207, 79, 79, + 51, 51, 207, 207, 79, 79, 51, 51, 50, 50, + 79, 187, 188, 189, 96, 187, 188, 189, 96, 217, + 221, 229, 218, 218, 218, 51, 51, 50, 50, 187, + 188, 189, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 90, 94, 96, 101, 101, 101, 103, + 109, 113, 114, 117, 117, 117, 117, 120, 47, 124, + 125, 127, 129, 130, 131, 132, 133, 136, 140, 141, + 142, 143, 146, 147, 148, 150, 160, 168, 170, 171, + 172, 173, 174, 176, 177, 178, 146, 184, 192, 193, + 200, 160, 204, 176, 211, 160, 215, 216, 178, 176, + 226, 176, 228, 96, 96, 176 ] + +racc_action_check = [ + 49, 145, 49, 145, 39, 145, 159, 183, 159, 183, + 159, 183, 201, 144, 201, 139, 201, 145, 139, 33, + 1, 33, 159, 183, 33, 34, 3, 34, 201, 34, + 34, 35, 2, 35, 2, 144, 35, 9, 36, 37, + 36, 37, 39, 36, 37, 145, 7, 38, 49, 38, + 159, 183, 38, 15, 14, 15, 201, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 42, 69, 172, 69, 172, 42, 172, 172, + 173, 70, 173, 70, 173, 173, 174, 81, 174, 81, + 174, 174, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 4, 82, 16, + 82, 4, 4, 4, 4, 4, 4, 17, 18, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 194, 109, + 194, 109, 194, 194, 198, 111, 198, 111, 198, 198, + 199, 117, 199, 117, 199, 199, 74, 75, 74, 75, + 74, 75, 114, 116, 114, 116, 114, 116, 137, 166, + 137, 166, 137, 166, 182, 184, 182, 184, 182, 184, + 204, 216, 204, 216, 204, 216, 218, 119, 218, 119, + 218, 164, 164, 164, 164, 179, 179, 179, 179, 208, + 214, 223, 208, 214, 223, 134, 138, 134, 138, 209, + 209, 209, 19, 20, 23, 25, 26, 27, 28, 29, + 30, 31, 32, 40, 45, 46, 53, 55, 56, 57, + 68, 72, 73, 80, 85, 86, 87, 88, 89, 95, + 96, 102, 104, 105, 106, 107, 108, 112, 120, 121, + 122, 123, 124, 125, 126, 128, 141, 149, 151, 152, + 153, 154, 155, 156, 157, 158, 161, 163, 167, 169, + 175, 178, 180, 186, 190, 200, 205, 207, 213, 217, + 220, 221, 222, 226, 228, 230 ] + +racc_action_pointer = [ + nil, 20, 22, 26, 98, nil, nil, 39, nil, 33, + nil, nil, nil, nil, 48, 50, 90, 98, 99, 207, + 194, nil, nil, 195, nil, 196, 197, 198, 213, 214, + 215, 216, 217, 16, 22, 28, 35, 36, 44, -1, + 221, nil, 68, nil, nil, 201, 174, nil, nil, -5, + nil, nil, nil, 207, nil, 208, 209, 210, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, 222, 70, + 78, nil, 225, 224, 153, 154, nil, nil, nil, nil, + 225, 84, 105, nil, nil, 226, 227, 228, 197, 234, + nil, nil, nil, nil, nil, 197, 235, nil, nil, nil, + nil, nil, 239, nil, 240, 241, 242, 243, 244, 136, + nil, 142, 240, nil, 159, nil, 160, 148, nil, 184, + 243, 207, 239, 249, 206, 201, 252, nil, 253, nil, + nil, nil, nil, nil, 202, nil, nil, 165, 203, -26, + nil, 210, nil, nil, -10, -2, nil, nil, nil, 237, + nil, 238, 239, 240, 241, 242, 255, 259, 220, 3, + nil, 220, nil, 227, 143, nil, 166, 248, nil, 249, + nil, nil, 71, 77, 83, 228, nil, nil, 225, 147, + 232, nil, 171, 4, 172, nil, 265, nil, nil, nil, + 272, nil, nil, nil, 135, nil, nil, nil, 141, 147, + 229, 9, nil, nil, 177, 274, nil, 237, 158, 161, + nil, nil, nil, 233, 159, nil, 178, 271, 183, nil, + 260, 273, 262, 160, nil, nil, 232, nil, 233, nil, + 277, nil, nil ] + +racc_action_default = [ + -2, -138, -8, -138, -138, -3, -4, -138, 233, -138, + -9, -10, -11, -12, -138, -138, -138, -138, -138, -138, + -138, -24, -25, -138, -29, -138, -138, -138, -138, -138, + -138, -138, -138, -138, -138, -138, -138, -138, -138, -138, + -138, -7, -123, -96, -98, -138, -120, -122, -13, -127, + -94, -95, -126, -15, -85, -16, -17, -138, -21, -26, + -30, -33, -36, -39, -40, -41, -42, -43, -44, -50, + -138, -53, -71, -45, -75, -138, -78, -80, -81, -135, + -46, -88, -138, -91, -93, -47, -48, -49, -138, -138, + -5, -1, -97, -124, -99, -138, -138, -14, -128, -129, + -130, -82, -138, -18, -138, -138, -138, -138, -138, -138, + -54, -51, -73, -72, -138, -79, -76, -138, -92, -89, + -138, -138, -138, -138, -104, -138, -138, -86, -138, -22, + -27, -31, -34, -37, -52, -55, -74, -77, -90, -138, + -58, -62, -6, -125, -100, -101, -105, -121, -83, -138, + -19, -138, -138, -138, -138, -138, -136, -138, -57, -60, + -63, -104, -103, -94, -120, -109, -138, -138, -87, -138, + -23, -28, -138, -138, -138, -138, -137, -59, -62, -120, + -94, -67, -138, -102, -138, -106, -136, -113, -114, -115, + -138, -112, -84, -20, -32, -131, -133, -134, -35, -38, + -62, -61, -64, -65, -138, -138, -70, -94, -138, -116, + -107, -110, -132, -56, -138, -68, -138, -136, -138, -118, + -138, -136, -138, -138, -108, -117, -120, -66, -120, -119, + -136, -69, -111 ] + +racc_goto_table = [ + 76, 95, 69, 52, 74, 158, 110, 175, 118, 119, + 145, 208, 1, 212, 186, 2, 43, 212, 212, 4, + 42, 72, 91, 84, 84, 84, 84, 5, 40, 203, + 122, 214, 80, 85, 86, 87, 10, 210, 11, 111, + 115, 76, 12, 223, 138, 116, 118, 183, 110, 92, + 53, 55, 56, 194, 198, 199, 13, 72, 72, 219, + 49, 97, 128, 169, 213, 118, 104, 151, 224, 84, + 84, 110, 227, 105, 152, 106, 153, 107, 134, 154, + 76, 232, 115, 108, 137, 155, 68, 73, 112, 135, + 139, 121, 201, 205, 222, 126, 167, 72, 102, 72, + 149, 144, 190, 115, 220, 84, 123, 84, nil, nil, + nil, 164, nil, nil, nil, nil, nil, nil, nil, 185, + nil, nil, 72, nil, nil, 179, 84, nil, nil, nil, + nil, nil, 191, nil, 202, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, 206, 164, + 209, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, 179, nil, nil, + 209, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, 230, 209, 231, 225 ] + +racc_goto_check = [ + 43, 44, 33, 35, 49, 40, 34, 39, 56, 55, + 60, 46, 1, 64, 45, 2, 57, 64, 64, 3, + 4, 35, 5, 35, 35, 35, 35, 6, 7, 45, + 8, 46, 32, 32, 32, 32, 9, 39, 10, 33, + 43, 43, 11, 46, 55, 49, 56, 60, 34, 57, + 15, 15, 15, 21, 21, 21, 12, 35, 35, 45, + 13, 14, 16, 17, 40, 56, 18, 19, 39, 35, + 35, 34, 39, 22, 23, 24, 25, 26, 33, 27, + 43, 39, 43, 28, 49, 29, 30, 31, 36, 37, + 38, 41, 42, 47, 48, 51, 52, 35, 53, 35, + 54, 59, 61, 43, 62, 35, 63, 35, nil, nil, + nil, 43, nil, nil, nil, nil, nil, nil, nil, 44, + nil, nil, 35, nil, nil, 43, 35, nil, nil, nil, + nil, nil, 43, nil, 44, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, 43, 43, + 43, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, 43, nil, nil, + 43, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, 44, 43, 44, 43 ] + +racc_goto_pointer = [ + nil, 12, 15, 17, 11, -20, 25, 22, -60, 32, + 34, 38, 52, 45, 12, 34, -41, -87, 8, -62, + nil, -119, 14, -56, 15, -55, 16, -53, 21, -48, + 53, 53, -3, -31, -63, -12, 16, -23, -30, -149, + -136, 2, -86, -34, -45, -150, -173, -88, -121, -30, + nil, -6, -52, 44, -27, -73, -73, 7, nil, -23, + -114, -63, -107, 13, -181 ] + +racc_goto_default = [ + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + 45, nil, nil, nil, nil, nil, nil, nil, nil, nil, + 24, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, 71, 77, nil, nil, nil, nil, + nil, 46, 159, 196, nil, nil, nil, nil, nil, nil, + 78, nil, nil, nil, nil, 81, 83, nil, 44, nil, + nil, nil, nil, nil, 195 ] + +racc_reduce_table = [ + 0, 0, :racc_error, + 5, 55, :_reduce_none, + 0, 56, :_reduce_none, + 2, 56, :_reduce_none, + 0, 61, :_reduce_4, + 0, 62, :_reduce_5, + 5, 60, :_reduce_6, + 2, 60, :_reduce_none, + 0, 57, :_reduce_8, + 2, 57, :_reduce_none, + 1, 63, :_reduce_none, + 1, 63, :_reduce_none, + 1, 63, :_reduce_none, + 2, 63, :_reduce_13, + 3, 63, :_reduce_none, + 2, 63, :_reduce_none, + 2, 63, :_reduce_16, + 2, 63, :_reduce_17, + 0, 70, :_reduce_18, + 0, 71, :_reduce_19, + 7, 63, :_reduce_20, + 0, 72, :_reduce_21, + 0, 73, :_reduce_22, + 6, 63, :_reduce_23, + 1, 63, :_reduce_24, + 1, 63, :_reduce_none, + 0, 76, :_reduce_26, + 0, 77, :_reduce_27, + 6, 64, :_reduce_28, + 1, 64, :_reduce_none, + 0, 78, :_reduce_30, + 0, 79, :_reduce_31, + 7, 64, :_reduce_32, + 0, 80, :_reduce_33, + 0, 81, :_reduce_34, + 7, 64, :_reduce_35, + 0, 82, :_reduce_36, + 0, 83, :_reduce_37, + 7, 64, :_reduce_38, + 2, 64, :_reduce_39, + 2, 64, :_reduce_40, + 2, 64, :_reduce_41, + 2, 64, :_reduce_42, + 2, 64, :_reduce_43, + 2, 74, :_reduce_none, + 2, 74, :_reduce_45, + 2, 74, :_reduce_46, + 2, 74, :_reduce_47, + 2, 74, :_reduce_48, + 2, 74, :_reduce_49, + 1, 84, :_reduce_50, + 2, 84, :_reduce_51, + 3, 84, :_reduce_52, + 1, 87, :_reduce_53, + 2, 87, :_reduce_54, + 3, 88, :_reduce_55, + 8, 65, :_reduce_56, + 5, 66, :_reduce_57, + 1, 92, :_reduce_58, + 3, 92, :_reduce_59, + 1, 94, :_reduce_60, + 3, 94, :_reduce_61, + 0, 96, :_reduce_62, + 1, 96, :_reduce_63, + 3, 96, :_reduce_64, + 3, 96, :_reduce_65, + 6, 96, :_reduce_66, + 0, 101, :_reduce_67, + 0, 102, :_reduce_68, + 7, 96, :_reduce_69, + 3, 96, :_reduce_70, + 0, 90, :_reduce_none, + 1, 90, :_reduce_none, + 0, 91, :_reduce_none, + 1, 91, :_reduce_none, + 1, 85, :_reduce_75, + 2, 85, :_reduce_76, + 3, 85, :_reduce_77, + 1, 103, :_reduce_78, + 2, 103, :_reduce_79, + 1, 97, :_reduce_none, + 1, 97, :_reduce_none, + 0, 105, :_reduce_82, + 0, 106, :_reduce_83, + 6, 69, :_reduce_84, + 0, 107, :_reduce_85, + 0, 108, :_reduce_86, + 5, 69, :_reduce_87, + 1, 86, :_reduce_88, + 2, 86, :_reduce_89, + 3, 86, :_reduce_90, + 1, 109, :_reduce_91, + 2, 109, :_reduce_92, + 1, 110, :_reduce_none, + 1, 89, :_reduce_94, + 1, 89, :_reduce_95, + 1, 58, :_reduce_none, + 2, 58, :_reduce_none, + 1, 111, :_reduce_none, + 2, 111, :_reduce_none, + 4, 112, :_reduce_100, + 1, 113, :_reduce_101, + 3, 113, :_reduce_102, + 2, 113, :_reduce_none, + 0, 114, :_reduce_104, + 1, 114, :_reduce_105, + 3, 114, :_reduce_106, + 4, 114, :_reduce_107, + 6, 114, :_reduce_108, + 0, 115, :_reduce_109, + 0, 116, :_reduce_110, + 8, 114, :_reduce_111, + 3, 114, :_reduce_112, + 1, 99, :_reduce_113, + 1, 99, :_reduce_114, + 1, 99, :_reduce_115, + 1, 100, :_reduce_116, + 3, 100, :_reduce_117, + 2, 100, :_reduce_118, + 4, 100, :_reduce_119, + 0, 98, :_reduce_none, + 3, 98, :_reduce_121, + 1, 95, :_reduce_none, + 0, 59, :_reduce_none, + 0, 117, :_reduce_124, + 3, 59, :_reduce_125, + 1, 67, :_reduce_none, + 0, 68, :_reduce_none, + 1, 68, :_reduce_none, + 1, 68, :_reduce_none, + 1, 68, :_reduce_none, + 1, 75, :_reduce_131, + 2, 75, :_reduce_132, + 1, 118, :_reduce_none, + 1, 118, :_reduce_none, + 1, 104, :_reduce_135, + 0, 93, :_reduce_none, + 1, 93, :_reduce_none ] + +racc_reduce_n = 138 + +racc_shift_n = 233 + +racc_token_table = { + false => 0, + :error => 1, + :C_DECLARATION => 2, + :CHARACTER => 3, + :IDENT_COLON => 4, + :IDENTIFIER => 5, + :INTEGER => 6, + :STRING => 7, + :TAG => 8, + "%%" => 9, + "%{" => 10, + "%}" => 11, + "%require" => 12, + "%expect" => 13, + "%define" => 14, + "%param" => 15, + "%lex-param" => 16, + "%parse-param" => 17, + "%code" => 18, + "{" => 19, + "}" => 20, + "%initial-action" => 21, + "%no-stdlib" => 22, + ";" => 23, + "%union" => 24, + "%destructor" => 25, + "%printer" => 26, + "%error-token" => 27, + "%after-shift" => 28, + "%before-reduce" => 29, + "%after-reduce" => 30, + "%after-shift-error-token" => 31, + "%after-pop-stack" => 32, + "%token" => 33, + "%type" => 34, + "%left" => 35, + "%right" => 36, + "%precedence" => 37, + "%nonassoc" => 38, + "%rule" => 39, + "(" => 40, + ")" => 41, + ":" => 42, + "%inline" => 43, + "," => 44, + "|" => 45, + "%empty" => 46, + "%prec" => 47, + "?" => 48, + "+" => 49, + "*" => 50, + "[" => 51, + "]" => 52, + "{...}" => 53 } + +racc_nt_base = 54 + +racc_use_result_var = true + +Racc_arg = [ + racc_action_table, + racc_action_check, + racc_action_default, + racc_action_pointer, + racc_goto_table, + racc_goto_check, + racc_goto_default, + racc_goto_pointer, + racc_nt_base, + racc_reduce_table, + racc_token_table, + racc_shift_n, + racc_reduce_n, + racc_use_result_var ] +Ractor.make_shareable(Racc_arg) if defined?(Ractor) + +Racc_token_to_s_table = [ + "$end", + "error", + "C_DECLARATION", + "CHARACTER", + "IDENT_COLON", + "IDENTIFIER", + "INTEGER", + "STRING", + "TAG", + "\"%%\"", + "\"%{\"", + "\"%}\"", + "\"%require\"", + "\"%expect\"", + "\"%define\"", + "\"%param\"", + "\"%lex-param\"", + "\"%parse-param\"", + "\"%code\"", + "\"{\"", + "\"}\"", + "\"%initial-action\"", + "\"%no-stdlib\"", + "\";\"", + "\"%union\"", + "\"%destructor\"", + "\"%printer\"", + "\"%error-token\"", + "\"%after-shift\"", + "\"%before-reduce\"", + "\"%after-reduce\"", + "\"%after-shift-error-token\"", + "\"%after-pop-stack\"", + "\"%token\"", + "\"%type\"", + "\"%left\"", + "\"%right\"", + "\"%precedence\"", + "\"%nonassoc\"", + "\"%rule\"", + "\"(\"", + "\")\"", + "\":\"", + "\"%inline\"", + "\",\"", + "\"|\"", + "\"%empty\"", + "\"%prec\"", + "\"?\"", + "\"+\"", + "\"*\"", + "\"[\"", + "\"]\"", + "\"{...}\"", + "$start", + "input", + "prologue_declarations", + "bison_declarations", + "grammar", + "epilogue_opt", + "prologue_declaration", + "@1", + "@2", + "bison_declaration", + "grammar_declaration", + "rule_declaration", + "inline_declaration", + "variable", + "value", + "params", + "@3", + "@4", + "@5", + "@6", + "symbol_declaration", + "generic_symlist", + "@7", + "@8", + "@9", + "@10", + "@11", + "@12", + "@13", + "@14", + "token_declarations", + "symbol_declarations", + "token_declarations_for_precedence", + "token_declaration_list", + "token_declaration", + "id", + "int_opt", + "alias", + "rule_args", + "tag_opt", + "rule_rhs_list", + "id_colon", + "rule_rhs", + "symbol", + "named_ref_opt", + "parameterizing_suffix", + "parameterizing_args", + "@15", + "@16", + "symbol_declaration_list", + "string_as_id", + "@17", + "@18", + "@19", + "@20", + "token_declaration_list_for_precedence", + "token_declaration_for_precedence", + "rules_or_grammar_declaration", + "rules", + "rhs_list", + "rhs", + "@21", + "@22", + "@23", + "generic_symlist_item" ] +Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor) + +Racc_debug_parser = true + +##### State transition tables end ##### + +# reduce 0 omitted + +# reduce 1 omitted + +# reduce 2 omitted + +# reduce 3 omitted + +module_eval(<<'.,.,', 'parser.y', 14) + def _reduce_4(val, _values, result) + begin_c_declaration("%}") + @grammar.prologue_first_lineno = @lexer.line + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 19) + def _reduce_5(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 23) + def _reduce_6(val, _values, result) + @grammar.prologue = val[2].s_value + + result + end +.,., + +# reduce 7 omitted + +module_eval(<<'.,.,', 'parser.y', 27) + def _reduce_8(val, _values, result) + result = "" + result + end +.,., + +# reduce 9 omitted + +# reduce 10 omitted + +# reduce 11 omitted + +# reduce 12 omitted + +module_eval(<<'.,.,', 'parser.y', 33) + def _reduce_13(val, _values, result) + @grammar.expect = val[1] + result + end +.,., + +# reduce 14 omitted + +# reduce 15 omitted + +module_eval(<<'.,.,', 'parser.y', 38) + def _reduce_16(val, _values, result) + val[1].each {|token| + @grammar.lex_param = Grammar::Code::NoReferenceCode.new(type: :lex_param, token_code: token).token_code.s_value + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 44) + def _reduce_17(val, _values, result) + val[1].each {|token| + @grammar.parse_param = Grammar::Code::NoReferenceCode.new(type: :parse_param, token_code: token).token_code.s_value + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 50) + def _reduce_18(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 54) + def _reduce_19(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 58) + def _reduce_20(val, _values, result) + @grammar.add_percent_code(id: val[1], code: val[4]) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 62) + def _reduce_21(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 66) + def _reduce_22(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 70) + def _reduce_23(val, _values, result) + @grammar.initial_action = Grammar::Code::InitialActionCode.new(type: :initial_action, token_code: val[3]) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 72) + def _reduce_24(val, _values, result) + @grammar.no_stdlib = true + result + end +.,., + +# reduce 25 omitted + +module_eval(<<'.,.,', 'parser.y', 77) + def _reduce_26(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 81) + def _reduce_27(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 85) + def _reduce_28(val, _values, result) + @grammar.set_union( + Grammar::Code::NoReferenceCode.new(type: :union, token_code: val[3]), + val[3].line + ) + + result + end +.,., + +# reduce 29 omitted + +module_eval(<<'.,.,', 'parser.y', 93) + def _reduce_30(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 97) + def _reduce_31(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 101) + def _reduce_32(val, _values, result) + @grammar.add_destructor( + ident_or_tags: val[6], + token_code: val[3], + lineno: val[3].line + ) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 109) + def _reduce_33(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 113) + def _reduce_34(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 117) + def _reduce_35(val, _values, result) + @grammar.add_printer( + ident_or_tags: val[6], + token_code: val[3], + lineno: val[3].line + ) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 125) + def _reduce_36(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 129) + def _reduce_37(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 133) + def _reduce_38(val, _values, result) + @grammar.add_error_token( + ident_or_tags: val[6], + token_code: val[3], + lineno: val[3].line + ) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 141) + def _reduce_39(val, _values, result) + @grammar.after_shift = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 145) + def _reduce_40(val, _values, result) + @grammar.before_reduce = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 149) + def _reduce_41(val, _values, result) + @grammar.after_reduce = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 153) + def _reduce_42(val, _values, result) + @grammar.after_shift_error_token = val[1] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 157) + def _reduce_43(val, _values, result) + @grammar.after_pop_stack = val[1] + + result + end +.,., + +# reduce 44 omitted + +module_eval(<<'.,.,', 'parser.y', 163) + def _reduce_45(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + @grammar.add_type(id: id, tag: hash[:tag]) + } + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 171) + def _reduce_46(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_left(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 181) + def _reduce_47(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_right(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 191) + def _reduce_48(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_precedence(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 201) + def _reduce_49(val, _values, result) + val[1].each {|hash| + hash[:tokens].each {|id| + sym = @grammar.add_term(id: id) + @grammar.add_nonassoc(sym, @precedence_number) + } + } + @precedence_number += 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 212) + def _reduce_50(val, _values, result) + val[0].each {|token_declaration| + @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: nil, replace: true) + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 218) + def _reduce_51(val, _values, result) + val[1].each {|token_declaration| + @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[0], replace: true) + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 224) + def _reduce_52(val, _values, result) + val[2].each {|token_declaration| + @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1], tag: val[1], replace: true) + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 229) + def _reduce_53(val, _values, result) + result = [val[0]] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 230) + def _reduce_54(val, _values, result) + result = val[0].append(val[1]) + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 232) + def _reduce_55(val, _values, result) + result = val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 236) + def _reduce_56(val, _values, result) + rule = Grammar::ParameterizingRule::Rule.new(val[1].s_value, val[3], val[7], tag: val[5]) + @grammar.add_parameterizing_rule(rule) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 242) + def _reduce_57(val, _values, result) + rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, [], val[4], is_inline: true) + @grammar.add_parameterizing_rule(rule) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 246) + def _reduce_58(val, _values, result) + result = [val[0]] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 247) + def _reduce_59(val, _values, result) + result = val[0].append(val[2]) + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 251) + def _reduce_60(val, _values, result) + builder = val[0] + result = [builder] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 256) + def _reduce_61(val, _values, result) + builder = val[2] + result = val[0].append(builder) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 262) + def _reduce_62(val, _values, result) + reset_precs + result = Grammar::ParameterizingRule::Rhs.new + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 267) + def _reduce_63(val, _values, result) + reset_precs + result = Grammar::ParameterizingRule::Rhs.new + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 272) + def _reduce_64(val, _values, result) + token = val[1] + token.alias_name = val[2] + builder = val[0] + builder.symbols << token + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 280) + def _reduce_65(val, _values, result) + builder = val[0] + builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]]) + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 286) + def _reduce_66(val, _values, result) + builder = val[0] + builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5]) + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 292) + def _reduce_67(val, _values, result) + if @prec_seen + on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec + @code_after_prec = true + end + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 300) + def _reduce_68(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 304) + def _reduce_69(val, _values, result) + user_code = val[3] + user_code.alias_name = val[6] + builder = val[0] + builder.user_code = user_code + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 312) + def _reduce_70(val, _values, result) + sym = @grammar.find_symbol_by_id!(val[2]) + @prec_seen = true + builder = val[0] + builder.precedence_sym = sym + result = builder + + result + end +.,., + +# reduce 71 omitted + +# reduce 72 omitted + +# reduce 73 omitted + +# reduce 74 omitted + +module_eval(<<'.,.,', 'parser.y', 327) + def _reduce_75(val, _values, result) + result = [{tag: nil, tokens: val[0]}] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 331) + def _reduce_76(val, _values, result) + result = [{tag: val[0], tokens: val[1]}] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 335) + def _reduce_77(val, _values, result) + result = val[0].append({tag: val[1], tokens: val[2]}) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 338) + def _reduce_78(val, _values, result) + result = [val[0]] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 339) + def _reduce_79(val, _values, result) + result = val[0].append(val[1]) + result + end +.,., + +# reduce 80 omitted + +# reduce 81 omitted + +module_eval(<<'.,.,', 'parser.y', 346) + def _reduce_82(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 350) + def _reduce_83(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 354) + def _reduce_84(val, _values, result) + result = val[0].append(val[3]) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 358) + def _reduce_85(val, _values, result) + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 362) + def _reduce_86(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 366) + def _reduce_87(val, _values, result) + result = [val[2]] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 371) + def _reduce_88(val, _values, result) + result = [{tag: nil, tokens: val[0]}] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 375) + def _reduce_89(val, _values, result) + result = [{tag: val[0], tokens: val[1]}] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 379) + def _reduce_90(val, _values, result) + result = val[0].append({tag: val[1], tokens: val[2]}) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 382) + def _reduce_91(val, _values, result) + result = [val[0]] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 383) + def _reduce_92(val, _values, result) + result = val[0].append(val[1]) + result + end +.,., + +# reduce 93 omitted + +module_eval(<<'.,.,', 'parser.y', 387) + def _reduce_94(val, _values, result) + on_action_error("ident after %prec", val[0]) if @prec_seen + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 388) + def _reduce_95(val, _values, result) + on_action_error("char after %prec", val[0]) if @prec_seen + result + end +.,., + +# reduce 96 omitted + +# reduce 97 omitted + +# reduce 98 omitted + +# reduce 99 omitted + +module_eval(<<'.,.,', 'parser.y', 398) + def _reduce_100(val, _values, result) + lhs = val[0] + lhs.alias_name = val[1] + val[3].each do |builder| + builder.lhs = lhs + builder.complete_input + @grammar.add_rule_builder(builder) + end + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 409) + def _reduce_101(val, _values, result) + builder = val[0] + if !builder.line + builder.line = @lexer.line - 1 + end + result = [builder] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 417) + def _reduce_102(val, _values, result) + builder = val[2] + if !builder.line + builder.line = @lexer.line - 1 + end + result = val[0].append(builder) + + result + end +.,., + +# reduce 103 omitted + +module_eval(<<'.,.,', 'parser.y', 427) + def _reduce_104(val, _values, result) + reset_precs + result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 432) + def _reduce_105(val, _values, result) + reset_precs + result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter) + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 437) + def _reduce_106(val, _values, result) + token = val[1] + token.alias_name = val[2] + builder = val[0] + builder.add_rhs(token) + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 445) + def _reduce_107(val, _values, result) + token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]], lhs_tag: val[3]) + builder = val[0] + builder.add_rhs(token) + builder.line = val[1].first_line + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 453) + def _reduce_108(val, _values, result) + token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5]) + builder = val[0] + builder.add_rhs(token) + builder.line = val[1].first_line + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 461) + def _reduce_109(val, _values, result) + if @prec_seen + on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec + @code_after_prec = true + end + begin_c_declaration("}") + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 469) + def _reduce_110(val, _values, result) + end_c_declaration + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 473) + def _reduce_111(val, _values, result) + user_code = val[3] + user_code.alias_name = val[6] + user_code.tag = val[7] + builder = val[0] + builder.user_code = user_code + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 482) + def _reduce_112(val, _values, result) + sym = @grammar.find_symbol_by_id!(val[2]) + @prec_seen = true + builder = val[0] + builder.precedence_sym = sym + result = builder + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 489) + def _reduce_113(val, _values, result) + result = "option" + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 490) + def _reduce_114(val, _values, result) + result = "nonempty_list" + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 491) + def _reduce_115(val, _values, result) + result = "list" + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 493) + def _reduce_116(val, _values, result) + result = [val[0]] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 494) + def _reduce_117(val, _values, result) + result = val[0].append(val[2]) + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 495) + def _reduce_118(val, _values, result) + result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 496) + def _reduce_119(val, _values, result) + result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] + result + end +.,., + +# reduce 120 omitted + +module_eval(<<'.,.,', 'parser.y', 499) + def _reduce_121(val, _values, result) + result = val[1].s_value + result + end +.,., + +# reduce 122 omitted + +# reduce 123 omitted + +module_eval(<<'.,.,', 'parser.y', 506) + def _reduce_124(val, _values, result) + begin_c_declaration('\Z') + @grammar.epilogue_first_lineno = @lexer.line + 1 + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 511) + def _reduce_125(val, _values, result) + end_c_declaration + @grammar.epilogue = val[2].s_value + + result + end +.,., + +# reduce 126 omitted + +# reduce 127 omitted + +# reduce 128 omitted + +# reduce 129 omitted + +# reduce 130 omitted + +module_eval(<<'.,.,', 'parser.y', 522) + def _reduce_131(val, _values, result) + result = [val[0]] + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 523) + def _reduce_132(val, _values, result) + result = val[0].append(val[1]) + result + end +.,., + +# reduce 133 omitted + +# reduce 134 omitted + +module_eval(<<'.,.,', 'parser.y', 528) + def _reduce_135(val, _values, result) + result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) + result + end +.,., + +# reduce 136 omitted + +# reduce 137 omitted + +def _reduce_none(val, _values, result) + val[0] +end + + end # class Parser +end # module Lrama diff --git a/tool/lrama/lib/lrama/report.rb b/tool/lrama/lib/lrama/report.rb new file mode 100644 index 0000000000..650ac09d52 --- /dev/null +++ b/tool/lrama/lib/lrama/report.rb @@ -0,0 +1,2 @@ +require 'lrama/report/duration' +require 'lrama/report/profile' diff --git a/tool/lrama/lib/lrama/report/duration.rb b/tool/lrama/lib/lrama/report/duration.rb new file mode 100644 index 0000000000..7afe284f1a --- /dev/null +++ b/tool/lrama/lib/lrama/report/duration.rb @@ -0,0 +1,25 @@ +module Lrama + class Report + module Duration + def self.enable + @_report_duration_enabled = true + end + + def self.enabled? + !!@_report_duration_enabled + end + + def report_duration(method_name) + time1 = Time.now.to_f + result = yield + time2 = Time.now.to_f + + if Duration.enabled? + puts sprintf("%s %10.5f s", method_name, time2 - time1) + end + + return result + end + end + end +end diff --git a/tool/lrama/lib/lrama/report/profile.rb b/tool/lrama/lib/lrama/report/profile.rb new file mode 100644 index 0000000000..36156800a4 --- /dev/null +++ b/tool/lrama/lib/lrama/report/profile.rb @@ -0,0 +1,14 @@ +module Lrama + class Report + module Profile + # See "Profiling Lrama" in README.md for how to use. + def self.report_profile + require "stackprof" + + StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do + yield + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state.rb b/tool/lrama/lib/lrama/state.rb new file mode 100644 index 0000000000..ceb74d856a --- /dev/null +++ b/tool/lrama/lib/lrama/state.rb @@ -0,0 +1,144 @@ +require "lrama/state/reduce" +require "lrama/state/reduce_reduce_conflict" +require "lrama/state/resolved_conflict" +require "lrama/state/shift" +require "lrama/state/shift_reduce_conflict" + +module Lrama + class State + attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, + :default_reduction_rule, :closure, :items + attr_accessor :shifts, :reduces + + def initialize(id, accessing_symbol, kernels) + @id = id + @accessing_symbol = accessing_symbol + @kernels = kernels.freeze + @items = @kernels + # Manage relationships between items to state + # to resolve next state + @items_to_state = {} + @conflicts = [] + @resolved_conflicts = [] + @default_reduction_rule = nil + end + + def closure=(closure) + @closure = closure + @items = @kernels + @closure + end + + def non_default_reduces + reduces.reject do |reduce| + reduce.rule == @default_reduction_rule + end + end + + def compute_shifts_reduces + _shifts = {} + reduces = [] + items.each do |item| + # TODO: Consider what should be pushed + if item.end_of_rule? + reduces << Reduce.new(item) + else + key = item.next_sym + _shifts[key] ||= [] + _shifts[key] << item.new_by_next_position + end + end + + # It seems Bison 3.8.2 iterates transitions order by symbol number + shifts = _shifts.sort_by do |next_sym, new_items| + next_sym.number + end.map do |next_sym, new_items| + Shift.new(next_sym, new_items.flatten) + end + self.shifts = shifts.freeze + self.reduces = reduces.freeze + end + + def set_items_to_state(items, next_state) + @items_to_state[items] = next_state + end + + def set_look_ahead(rule, look_ahead) + reduce = reduces.find do |r| + r.rule == rule + end + + reduce.look_ahead = look_ahead + end + + def nterm_transitions + @nterm_transitions ||= transitions.select {|shift, _| shift.next_sym.nterm? } + end + + def term_transitions + @term_transitions ||= transitions.select {|shift, _| shift.next_sym.term? } + end + + def transitions + @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] } + end + + def selected_term_transitions + term_transitions.reject do |shift, next_state| + shift.not_selected + end + end + + # Move to next state by sym + def transition(sym) + result = nil + + if sym.term? + term_transitions.each do |shift, next_state| + term = shift.next_sym + result = next_state if term == sym + end + else + nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + result = next_state if nterm == sym + end + end + + raise "Can not transit by #{sym} #{self}" if result.nil? + + result + end + + def find_reduce_by_item!(item) + reduces.find do |r| + r.item == item + end || (raise "reduce is not found. #{item}") + end + + def default_reduction_rule=(default_reduction_rule) + @default_reduction_rule = default_reduction_rule + + reduces.each do |r| + if r.rule == default_reduction_rule + r.default_reduction = true + end + end + end + + def has_conflicts? + !@conflicts.empty? + end + + def sr_conflicts + @conflicts.select do |conflict| + conflict.type == :shift_reduce + end + end + + def rr_conflicts + @conflicts.select do |conflict| + conflict.type == :reduce_reduce + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/reduce.rb b/tool/lrama/lib/lrama/state/reduce.rb new file mode 100644 index 0000000000..8ba51f45f2 --- /dev/null +++ b/tool/lrama/lib/lrama/state/reduce.rb @@ -0,0 +1,35 @@ +module Lrama + class State + class Reduce + # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html + attr_reader :item, :look_ahead, :not_selected_symbols + attr_accessor :default_reduction + + def initialize(item) + @item = item + @look_ahead = nil + @not_selected_symbols = [] + end + + def rule + @item.rule + end + + def look_ahead=(look_ahead) + @look_ahead = look_ahead.freeze + end + + def add_not_selected_symbol(sym) + @not_selected_symbols << sym + end + + def selected_look_ahead + if @look_ahead + @look_ahead - @not_selected_symbols + else + [] + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb b/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb new file mode 100644 index 0000000000..0a0e4dc20a --- /dev/null +++ b/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class ReduceReduceConflict < Struct.new(:symbols, :reduce1, :reduce2, keyword_init: true) + def type + :reduce_reduce + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/resolved_conflict.rb b/tool/lrama/lib/lrama/state/resolved_conflict.rb new file mode 100644 index 0000000000..02ea892147 --- /dev/null +++ b/tool/lrama/lib/lrama/state/resolved_conflict.rb @@ -0,0 +1,29 @@ +module Lrama + class State + # * symbol: A symbol under discussion + # * reduce: A reduce under discussion + # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative) + class ResolvedConflict < Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) + def report_message + s = symbol.display_name + r = reduce.rule.precedence_sym.display_name + case + when which == :shift && same_prec + msg = "resolved as #{which} (%right #{s})" + when which == :shift + msg = "resolved as #{which} (#{r} < #{s})" + when which == :reduce && same_prec + msg = "resolved as #{which} (%left #{s})" + when which == :reduce + msg = "resolved as #{which} (#{s} < #{r})" + when which == :error + msg = "resolved as an #{which} (%nonassoc #{s})" + else + raise "Unknown direction. #{self}" + end + + "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}." + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/shift.rb b/tool/lrama/lib/lrama/state/shift.rb new file mode 100644 index 0000000000..2021eb61f6 --- /dev/null +++ b/tool/lrama/lib/lrama/state/shift.rb @@ -0,0 +1,13 @@ +module Lrama + class State + class Shift + attr_reader :next_sym, :next_items + attr_accessor :not_selected + + def initialize(next_sym, next_items) + @next_sym = next_sym + @next_items = next_items + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb b/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb new file mode 100644 index 0000000000..f80bd5f352 --- /dev/null +++ b/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class ShiftReduceConflict < Struct.new(:symbols, :shift, :reduce, keyword_init: true) + def type + :shift_reduce + end + end + end +end diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb new file mode 100644 index 0000000000..290e996b82 --- /dev/null +++ b/tool/lrama/lib/lrama/states.rb @@ -0,0 +1,556 @@ +require "forwardable" +require "lrama/report/duration" +require "lrama/states/item" + +module Lrama + # States is passed to a template file + # + # "Efficient Computation of LALR(1) Look-Ahead Sets" + # https://dl.acm.org/doi/pdf/10.1145/69622.357187 + class States + extend Forwardable + include Lrama::Report::Duration + + def_delegators "@grammar", :symbols, :terms, :nterms, :rules, + :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value! + + attr_reader :states, :reads_relation, :includes_relation, :lookback_relation + + def initialize(grammar, warning, trace_state: false) + @grammar = grammar + @warning = warning + @trace_state = trace_state + + @states = [] + + # `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }` + # where p is state, A is nterm, t is term. + # + # `@direct_read_sets` is a hash whose + # key is [state.id, nterm.token_id], + # value is bitmap of term. + @direct_read_sets = {} + + # Reads relation on nonterminal transitions (pair of state and nterm) + # `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε` + # where p, r are state, A, C are nterm. + # + # `@reads_relation` is a hash whose + # key is [state.id, nterm.token_id], + # value is array of [state.id, nterm.token_id]. + @reads_relation = {} + + # `@read_sets` is a hash whose + # key is [state.id, nterm.token_id], + # value is bitmap of term. + @read_sets = {} + + # `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p` + # where p, p' are state, A, B are nterm, β, γ is sequence of symbol. + # + # `@includes_relation` is a hash whose + # key is [state.id, nterm.token_id], + # value is array of [state.id, nterm.token_id]. + @includes_relation = {} + + # `(q, A -> ω) lookback (p, A) iff p -(ω)-> q` + # where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol. + # + # `@lookback_relation` is a hash whose + # key is [state.id, rule.id], + # value is array of [state.id, nterm.token_id]. + @lookback_relation = {} + + # `@follow_sets` is a hash whose + # key is [state.id, rule.id], + # value is bitmap of term. + @follow_sets = {} + + # `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)` + # + # `@la` is a hash whose + # key is [state.id, rule.id], + # value is bitmap of term. + @la = {} + end + + def compute + # Look Ahead Sets + report_duration(:compute_lr0_states) { compute_lr0_states } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } + + # Conflicts + report_duration(:compute_conflicts) { compute_conflicts } + + report_duration(:compute_default_reduction) { compute_default_reduction } + + check_conflicts + end + + def reporter + StatesReporter.new(self) + end + + def states_count + @states.count + end + + def direct_read_sets + @direct_read_sets.transform_values do |v| + bitmap_to_terms(v) + end + end + + def read_sets + @read_sets.transform_values do |v| + bitmap_to_terms(v) + end + end + + def follow_sets + @follow_sets.transform_values do |v| + bitmap_to_terms(v) + end + end + + def la + @la.transform_values do |v| + bitmap_to_terms(v) + end + end + + private + + def sr_conflicts + @states.flat_map(&:sr_conflicts) + end + + def rr_conflicts + @states.flat_map(&:rr_conflicts) + end + + def trace_state + if @trace_state + yield STDERR + end + end + + def create_state(accessing_symbol, kernels, states_created) + # A item can appear in some states, + # so need to use `kernels` (not `kernels.first`) as a key. + # + # For example... + # + # %% + # program: '+' strings_1 + # | '-' strings_2 + # ; + # + # strings_1: string_1 + # ; + # + # strings_2: string_1 + # | string_2 + # ; + # + # string_1: string + # ; + # + # string_2: string '+' + # ; + # + # string: tSTRING + # ; + # %% + # + # For these grammar, there are 2 states + # + # State A + # string_1: string • + # + # State B + # string_1: string • + # string_2: string • '+' + # + return [states_created[kernels], false] if states_created[kernels] + + state = State.new(@states.count, accessing_symbol, kernels) + @states << state + states_created[kernels] = state + + return [state, true] + end + + def setup_state(state) + # closure + closure = [] + visited = {} + queued = {} + items = state.kernels.dup + + items.each do |item| + queued[item] = true + end + + while (item = items.shift) do + visited[item] = true + + if (sym = item.next_sym) && sym.nterm? + @grammar.find_rules_by_symbol!(sym).each do |rule| + i = Item.new(rule: rule, position: 0) + next if queued[i] + closure << i + items << i + queued[i] = true + end + end + end + + state.closure = closure.sort_by {|i| i.rule.id } + + # Trace + trace_state do |out| + out << "Closure: input\n" + state.kernels.each do |item| + out << " #{item.display_rest}\n" + end + out << "\n\n" + out << "Closure: output\n" + state.items.each do |item| + out << " #{item.display_rest}\n" + end + out << "\n\n" + end + + # shift & reduce + state.compute_shifts_reduces + end + + def enqueue_state(states, state) + # Trace + previous = state.kernels.first.previous_sym + trace_state do |out| + out << sprintf("state_list_append (state = %d, symbol = %d (%s))", + @states.count, previous.number, previous.display_name) + end + + states << state + end + + def compute_lr0_states + # State queue + states = [] + states_created = {} + + state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_created) + enqueue_state(states, state) + + while (state = states.shift) do + # Trace + # + # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but + # I think it is not correct... + previous = state.kernels.first.previous_sym + trace_state do |out| + out << "Processing state #{state.id} (reached by #{previous.display_name})\n" + end + + setup_state(state) + + state.shifts.each do |shift| + new_state, created = create_state(shift.next_sym, shift.next_items, states_created) + state.set_items_to_state(shift.next_items, new_state) + enqueue_state(states, new_state) if created + end + end + end + + def nterm_transitions + a = [] + + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + a << [state, nterm, next_state] + end + end + + a + end + + def compute_direct_read_sets + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + + ary = next_state.term_transitions.map do |shift, _| + shift.next_sym.number + end + + key = [state.id, nterm.token_id] + @direct_read_sets[key] = Bitmap.from_array(ary) + end + end + end + + def compute_reads_relation + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + next_state.nterm_transitions.each do |shift2, _next_state2| + nterm2 = shift2.next_sym + if nterm2.nullable + key = [state.id, nterm.token_id] + @reads_relation[key] ||= [] + @reads_relation[key] << [next_state.id, nterm2.token_id] + end + end + end + end + end + + def compute_read_sets + sets = nterm_transitions.map do |state, nterm, next_state| + [state.id, nterm.token_id] + end + + @read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute + end + + # Execute transition of state by symbols + # then return final state. + def transition(state, symbols) + symbols.each do |sym| + state = state.transition(sym) + end + + state + end + + def compute_includes_relation + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + @grammar.find_rules_by_symbol!(nterm).each do |rule| + i = rule.rhs.count - 1 + + while (i > -1) do + sym = rule.rhs[i] + + break if sym.term? + state2 = transition(state, rule.rhs[0...i]) + # p' = state, B = nterm, p = state2, A = sym + key = [state2.id, sym.token_id] + # TODO: need to omit if state == state2 ? + @includes_relation[key] ||= [] + @includes_relation[key] << [state.id, nterm.token_id] + break if !sym.nullable + i -= 1 + end + end + end + end + end + + def compute_lookback_relation + @states.each do |state| + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + @grammar.find_rules_by_symbol!(nterm).each do |rule| + state2 = transition(state, rule.rhs) + # p = state, A = nterm, q = state2, A -> ω = rule + key = [state2.id, rule.id] + @lookback_relation[key] ||= [] + @lookback_relation[key] << [state.id, nterm.token_id] + end + end + end + end + + def compute_follow_sets + sets = nterm_transitions.map do |state, nterm, next_state| + [state.id, nterm.token_id] + end + + @follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute + end + + def compute_look_ahead_sets + @states.each do |state| + rules.each do |rule| + ary = @lookback_relation[[state.id, rule.id]] + next if !ary + + ary.each do |state2_id, nterm_token_id| + # q = state, A -> ω = rule, p = state2, A = nterm + follows = @follow_sets[[state2_id, nterm_token_id]] + + next if follows == 0 + + key = [state.id, rule.id] + @la[key] ||= 0 + look_ahead = @la[key] | follows + @la[key] |= look_ahead + + # No risk of conflict when + # * the state only has single reduce + # * the state only has nterm_transitions (GOTO) + next if state.reduces.count == 1 && state.term_transitions.count == 0 + + state.set_look_ahead(rule, bitmap_to_terms(look_ahead)) + end + end + end + end + + def bitmap_to_terms(bit) + ary = Bitmap.to_array(bit) + ary.map do |i| + @grammar.find_symbol_by_number!(i) + end + end + + def compute_conflicts + compute_shift_reduce_conflicts + compute_reduce_reduce_conflicts + end + + def compute_shift_reduce_conflicts + states.each do |state| + state.shifts.each do |shift| + state.reduces.each do |reduce| + sym = shift.next_sym + + next unless reduce.look_ahead + next if !reduce.look_ahead.include?(sym) + + # Shift/Reduce conflict + shift_prec = sym.precedence + reduce_prec = reduce.item.rule.precedence + + # Can resolve only when both have prec + unless shift_prec && reduce_prec + state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce) + next + end + + case + when shift_prec < reduce_prec + # Reduce is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce) + shift.not_selected = true + next + when shift_prec > reduce_prec + # Shift is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift) + reduce.add_not_selected_symbol(sym) + next + end + + # shift_prec == reduce_prec, then check associativity + case sym.precedence.type + when :precedence + # %precedence only specifies precedence and not specify associativity + # then a conflict is unresolved if precedence is same. + state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce) + next + when :right + # Shift is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true) + reduce.add_not_selected_symbol(sym) + next + when :left + # Reduce is selected + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true) + shift.not_selected = true + next + when :nonassoc + # Can not resolve + # + # nonassoc creates "run-time" error, precedence creates "compile-time" error. + # Then omit both the shift and reduce. + # + # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html + state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error) + shift.not_selected = true + reduce.add_not_selected_symbol(sym) + else + raise "Unknown precedence type. #{sym}" + end + end + end + end + end + + def compute_reduce_reduce_conflicts + states.each do |state| + count = state.reduces.count + + for i in 0...count do + reduce1 = state.reduces[i] + next if reduce1.look_ahead.nil? + + for j in (i+1)...count do + reduce2 = state.reduces[j] + next if reduce2.look_ahead.nil? + + intersection = reduce1.look_ahead & reduce2.look_ahead + + if !intersection.empty? + state.conflicts << State::ReduceReduceConflict.new(symbols: intersection, reduce1: reduce1, reduce2: reduce2) + end + end + end + end + end + + def compute_default_reduction + states.each do |state| + next if state.reduces.empty? + # Do not set, if conflict exist + next if !state.conflicts.empty? + # Do not set, if shift with `error` exists. + next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol) + + state.default_reduction_rule = state.reduces.map do |r| + [r.rule, r.rule.id, (r.look_ahead || []).count] + end.min_by do |rule, rule_id, count| + [-count, rule_id] + end.first + end + end + + def check_conflicts + sr_count = sr_conflicts.count + rr_count = rr_conflicts.count + + if @grammar.expect + + expected_sr_conflicts = @grammar.expect + expected_rr_conflicts = 0 + + if expected_sr_conflicts != sr_count + @warning.error("shift/reduce conflicts: #{sr_count} found, #{expected_sr_conflicts} expected") + end + + if expected_rr_conflicts != rr_count + @warning.error("reduce/reduce conflicts: #{rr_count} found, #{expected_rr_conflicts} expected") + end + else + if sr_count != 0 + @warning.warn("shift/reduce conflicts: #{sr_count} found") + end + + if rr_count != 0 + @warning.warn("reduce/reduce conflicts: #{rr_count} found") + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/states/item.rb b/tool/lrama/lib/lrama/states/item.rb new file mode 100644 index 0000000000..31b74b9d34 --- /dev/null +++ b/tool/lrama/lib/lrama/states/item.rb @@ -0,0 +1,81 @@ +# TODO: Validate position is not over rule rhs + +require "forwardable" + +module Lrama + class States + class Item < Struct.new(:rule, :position, keyword_init: true) + extend Forwardable + + def_delegators "rule", :lhs, :rhs + + # Optimization for States#setup_state + def hash + [rule_id, position].hash + end + + def rule_id + rule.id + end + + def empty_rule? + rule.empty_rule? + end + + def number_of_rest_symbols + rhs.count - position + end + + def next_sym + rhs[position] + end + + def next_next_sym + rhs[position + 1] + end + + def previous_sym + rhs[position - 1] + end + + def end_of_rule? + rhs.count == position + end + + def beginning_of_rule? + position == 0 + end + + def start_item? + rule.initial_rule? && beginning_of_rule? + end + + def new_by_next_position + Item.new(rule: rule, position: position + 1) + end + + def symbols_before_dot + rhs[0...position] + end + + def symbols_after_dot + rhs[position..-1] + end + + def to_s + "#{lhs.id.s_value}: #{display_name}" + end + + def display_name + r = rhs.map(&:display_name).insert(position, "•").join(" ") + "#{r} (rule #{rule_id})" + end + + # Right after position + def display_rest + r = rhs[position..-1].map(&:display_name).join(" ") + ". #{r} (rule #{rule_id})" + end + end + end +end diff --git a/tool/lrama/lib/lrama/states_reporter.rb b/tool/lrama/lib/lrama/states_reporter.rb new file mode 100644 index 0000000000..6f96cc6f65 --- /dev/null +++ b/tool/lrama/lib/lrama/states_reporter.rb @@ -0,0 +1,321 @@ +module Lrama + class StatesReporter + include Lrama::Report::Duration + + def initialize(states) + @states = states + end + + def report(io, **options) + report_duration(:report) do + _report(io, **options) + end + end + + private + + def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, counterexamples: false, verbose: false) + # TODO: Unused terms + # TODO: Unused rules + + report_conflicts(io) + report_grammar(io) if grammar + report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) + end + + def report_conflicts(io) + has_conflict = false + + @states.states.each do |state| + messages = [] + cs = state.conflicts.group_by(&:type) + if cs[:shift_reduce] + messages << "#{cs[:shift_reduce].count} shift/reduce" + end + + if cs[:reduce_reduce] + messages << "#{cs[:reduce_reduce].count} reduce/reduce" + end + + if !messages.empty? + has_conflict = true + io << "State #{state.id} conflicts: #{messages.join(', ')}\n" + end + end + + if has_conflict + io << "\n\n" + end + end + + def report_grammar(io) + io << "Grammar\n" + last_lhs = nil + + @states.rules.each do |rule| + if rule.empty_rule? + r = "ε" + else + r = rule.rhs.map(&:display_name).join(" ") + end + + if rule.lhs == last_lhs + io << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r) + else + io << "\n" + io << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r) + end + + last_lhs = rule.lhs + end + io << "\n\n" + end + + def report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) + if counterexamples + cex = Counterexamples.new(@states) + end + + @states.states.each do |state| + # Report State + io << "State #{state.id}\n\n" + + # Report item + last_lhs = nil + list = itemsets ? state.items : state.kernels + list.sort_by {|i| [i.rule_id, i.position] }.each do |item| + if item.empty_rule? + r = "ε •" + else + r = item.rhs.map(&:display_name).insert(item.position, "•").join(" ") + end + if item.lhs == last_lhs + l = " " * item.lhs.id.s_value.length + "|" + else + l = item.lhs.id.s_value + ":" + end + la = "" + if lookaheads && item.end_of_rule? + reduce = state.find_reduce_by_item!(item) + look_ahead = reduce.selected_look_ahead + if !look_ahead.empty? + la = " [#{look_ahead.map(&:display_name).join(", ")}]" + end + end + last_lhs = item.lhs + + io << sprintf("%5i %s %s%s\n", item.rule_id, l, r, la) + end + io << "\n" + + # Report shifts + tmp = state.term_transitions.reject do |shift, _| + shift.not_selected + end.map do |shift, next_state| + [shift.next_sym, next_state.id] + end + max_len = tmp.map(&:first).map(&:display_name).map(&:length).max + tmp.each do |term, state_id| + io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n" + end + io << "\n" if !tmp.empty? + + # Report error caused by %nonassoc + nl = false + tmp = state.resolved_conflicts.select do |resolved| + resolved.which == :error + end.map do |error| + error.symbol.display_name + end + max_len = tmp.map(&:length).max + tmp.each do |name| + nl = true + io << " #{name.ljust(max_len)} error (nonassociative)\n" + end + io << "\n" if !tmp.empty? + + # Report reduces + nl = false + max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0 + max_len = [max_len, "$default".length].max if state.default_reduction_rule + ary = [] + + state.non_default_reduces.each do |reduce| + reduce.look_ahead.each do |term| + ary << [term, reduce] + end + end + + ary.sort_by do |term, reduce| + term.number + end.each do |term, reduce| + rule = reduce.item.rule + io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n" + nl = true + end + + if (r = state.default_reduction_rule) + nl = true + s = "$default".ljust(max_len) + + if r.initial_rule? + io << " #{s} accept\n" + else + io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n" + end + end + io << "\n" if nl + + # Report nonterminal transitions + tmp = [] + max_len = 0 + state.nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + tmp << [nterm, next_state.id] + max_len = [max_len, nterm.id.s_value.length].max + end + tmp.uniq! + tmp.sort_by! do |nterm, state_id| + nterm.number + end + tmp.each do |nterm, state_id| + io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n" + end + io << "\n" if !tmp.empty? + + if solved + # Report conflict resolutions + state.resolved_conflicts.each do |resolved| + io << " #{resolved.report_message}\n" + end + io << "\n" if !state.resolved_conflicts.empty? + end + + if counterexamples && state.has_conflicts? + # Report counterexamples + examples = cex.compute(state) + examples.each do |example| + label0 = example.type == :shift_reduce ? "shift/reduce" : "reduce/reduce" + label1 = example.type == :shift_reduce ? "Shift derivation" : "First Reduce derivation" + label2 = example.type == :shift_reduce ? "Reduce derivation" : "Second Reduce derivation" + + io << " #{label0} conflict on token #{example.conflict_symbol.id.s_value}:\n" + io << " #{example.path1_item}\n" + io << " #{example.path2_item}\n" + io << " #{label1}\n" + example.derivations1.render_strings_for_report.each do |str| + io << " #{str}\n" + end + io << " #{label2}\n" + example.derivations2.render_strings_for_report.each do |str| + io << " #{str}\n" + end + end + end + + if verbose + # Report direct_read_sets + io << " [Direct Read sets]\n" + direct_read_sets = @states.direct_read_sets + @states.nterms.each do |nterm| + terms = direct_read_sets[[state.id, nterm.token_id]] + next if !terms + next if terms.empty? + + str = terms.map {|sym| sym.id.s_value }.join(", ") + io << " read #{nterm.id.s_value} shift #{str}\n" + end + io << "\n" + + # Report reads_relation + io << " [Reads Relation]\n" + @states.nterms.each do |nterm| + a = @states.reads_relation[[state.id, nterm.token_id]] + next if !a + + a.each do |state_id2, nterm_id2| + n = @states.nterms.find {|n| n.token_id == nterm_id2 } + io << " (State #{state_id2}, #{n.id.s_value})\n" + end + end + io << "\n" + + # Report read_sets + io << " [Read sets]\n" + read_sets = @states.read_sets + @states.nterms.each do |nterm| + terms = read_sets[[state.id, nterm.token_id]] + next if !terms + next if terms.empty? + + terms.each do |sym| + io << " #{sym.id.s_value}\n" + end + end + io << "\n" + + # Report includes_relation + io << " [Includes Relation]\n" + @states.nterms.each do |nterm| + a = @states.includes_relation[[state.id, nterm.token_id]] + next if !a + + a.each do |state_id2, nterm_id2| + n = @states.nterms.find {|n| n.token_id == nterm_id2 } + io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n" + end + end + io << "\n" + + # Report lookback_relation + io << " [Lookback Relation]\n" + @states.rules.each do |rule| + a = @states.lookback_relation[[state.id, rule.id]] + next if !a + + a.each do |state_id2, nterm_id2| + n = @states.nterms.find {|n| n.token_id == nterm_id2 } + io << " (Rule: #{rule}) -> (State #{state_id2}, #{n.id.s_value})\n" + end + end + io << "\n" + + # Report follow_sets + io << " [Follow sets]\n" + follow_sets = @states.follow_sets + @states.nterms.each do |nterm| + terms = follow_sets[[state.id, nterm.token_id]] + + next if !terms + + terms.each do |sym| + io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n" + end + end + io << "\n" + + # Report LA + io << " [Look-Ahead Sets]\n" + tmp = [] + max_len = 0 + @states.rules.each do |rule| + syms = @states.la[[state.id, rule.id]] + next if !syms + + tmp << [rule, syms] + max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max + end + tmp.each do |rule, syms| + syms.each do |sym| + io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n" + end + end + io << "\n" if !tmp.empty? + end + + # End of Report State + io << "\n" + end + end + end +end diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb new file mode 100644 index 0000000000..ef840ce435 --- /dev/null +++ b/tool/lrama/lib/lrama/version.rb @@ -0,0 +1,3 @@ +module Lrama + VERSION = "0.6.9".freeze +end diff --git a/tool/lrama/lib/lrama/warning.rb b/tool/lrama/lib/lrama/warning.rb new file mode 100644 index 0000000000..3c99791ebf --- /dev/null +++ b/tool/lrama/lib/lrama/warning.rb @@ -0,0 +1,25 @@ +module Lrama + class Warning + attr_reader :errors, :warns + + def initialize(out = STDERR) + @out = out + @errors = [] + @warns = [] + end + + def error(message) + @out << message << "\n" + @errors << message + end + + def warn(message) + @out << message << "\n" + @warns << message + end + + def has_error? + !@errors.empty? + end + end +end |