diff options
| author | yui-knk <spiketeika@gmail.com> | 2023-05-20 12:27:45 +0900 |
|---|---|---|
| committer | Yuichiro Kaneko <spiketeika@gmail.com> | 2023-05-20 16:42:39 +0900 |
| commit | 41512cd1bff9ad06823e7dfd4bd024b9e82b3edb (patch) | |
| tree | 97efac11ce13ee4fc9f4a242729cec3bc187e696 | |
| parent | 9ce6c08cafc96f59a6cdf7436c1e708a8c6e4ce8 (diff) | |
Lrama v0.5.1
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7831
| -rw-r--r-- | tool/lrama/lib/lrama.rb | 1 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/grammar.rb | 10 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/lexer.rb | 26 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/parser.rb | 53 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/parser/token_scanner.rb | 55 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/state.rb | 184 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/state/reduce.rb | 35 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/state/shift.rb | 13 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/states.rb | 244 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/states_reporter.rb | 8 | ||||
| -rw-r--r-- | tool/lrama/lib/lrama/version.rb | 2 |
11 files changed, 328 insertions, 303 deletions
diff --git a/tool/lrama/lib/lrama.rb b/tool/lrama/lib/lrama.rb index 1973cc6646..19f579c330 100644 --- a/tool/lrama/lib/lrama.rb +++ b/tool/lrama/lib/lrama.rb @@ -7,6 +7,7 @@ require "lrama/lexer" require "lrama/output" require "lrama/parser" require "lrama/report" +require "lrama/state" require "lrama/states" require "lrama/states_reporter" require "lrama/version" diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb index 8ca75cb60b..1daec4446b 100644 --- a/tool/lrama/lib/lrama/grammar.rb +++ b/tool/lrama/lib/lrama/grammar.rb @@ -166,7 +166,7 @@ module Lrama when ref.type == :at # @n raise "@#{ref.number} can not be used in %printer." else - raise "Unexpected. #{code}, #{ref}" + raise "Unexpected. #{self}, #{ref}" end t_code[first_column..last_column] = str @@ -205,7 +205,7 @@ module Lrama i = -ref.position_in_rhs + ref.number str = "(yylsp[#{i}])" else - raise "Unexpected. #{code}, #{ref}" + raise "Unexpected. #{self}, #{ref}" end t_code[first_column..last_column] = str @@ -235,7 +235,7 @@ module Lrama when ref.type == :at # @n raise "@#{ref.number} can not be used in initial_action." else - raise "Unexpected. #{code}, #{ref}" + raise "Unexpected. #{self}, #{ref}" end t_code[first_column..last_column] = str @@ -716,7 +716,7 @@ module Lrama # If id is Token::Char, it uses ASCII code if sym.term? && sym.token_id.nil? if sym.id.type == Token::Char - # Igonre ' on the both sides + # Ignore ' on the both sides case sym.id.s_value[1..-2] when "\\b" sym.token_id = 8 @@ -844,7 +844,7 @@ module Lrama return if invalid.empty? - raise "Symbol number is dupulicated. #{invalid}" + raise "Symbol number is duplicated. #{invalid}" end end end diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb index 5c9583327b..6c1139b416 100644 --- a/tool/lrama/lib/lrama/lexer.rb +++ b/tool/lrama/lib/lrama/lexer.rb @@ -206,6 +206,8 @@ module Lrama when ss.scan(/\/\*/) # TODO: Need to keep comment? line = lex_comment(ss, line, lines, "") + when ss.scan(/\/\//) + line = lex_line_comment(ss, line, "") when ss.scan(/'(.)'/) tokens << create_token(Token::Char, ss[0], line, ss.pos - column) when ss.scan(/'\\(.)'/) # '\\', '\t' @@ -218,7 +220,7 @@ module Lrama l = line - lines.first[1] split = ss.string.split("\n") col = ss.pos - split[0...l].join("\n").length - raise "Parse error (unknow token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})" + raise "Parse error (unknown token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})" end end end @@ -276,6 +278,9 @@ module Lrama when ss.scan(/\/\*/) str << ss[0] line = lex_comment(ss, line, lines, str) + when ss.scan(/\/\//) + str << ss[0] + line = lex_line_comment(ss, line, str) else # noop, just consume char str << ss.getch @@ -314,8 +319,6 @@ module Lrama raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})" end - # TODO: Need to handle // style comment - # # /* */ style comment def lex_comment(ss, line, lines, str) while !ss.eos? do @@ -337,6 +340,23 @@ module Lrama raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})" end + # // style comment + def lex_line_comment(ss, line, str) + while !ss.eos? do + case + when ss.scan(/\n/) + return line + 1 + else + str << ss.getch + next + end + + str << ss[0] + end + + line # Reach to end of input + end + def lex_grammar_rules_tokens lex_common(@grammar_rules, @grammar_rules_tokens) end diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb index e90a94c637..a2d3b9e0d4 100644 --- a/tool/lrama/lib/lrama/parser.rb +++ b/tool/lrama/lib/lrama/parser.rb @@ -1,4 +1,5 @@ require "lrama/report" +require "lrama/parser/token_scanner" module Lrama # Parser for parse.y, generates a grammar @@ -7,58 +8,6 @@ module Lrama T = Lrama::Lexer::Token - class TokenScanner - def initialize(tokens) - @tokens = tokens - @index = 0 - end - - def current_token - @tokens[@index] - end - - def current_type - current_token && current_token.type - end - - def next - token = current_token - @index += 1 - return token - end - - def consume(*token_types) - if token_types.include?(current_type) - token = current_token - self.next - return token - end - - return nil - end - - def consume!(*token_types) - consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}") - end - - def consume_multi(*token_types) - a = [] - - while token_types.include?(current_type) - a << current_token - self.next - end - - raise "No token is consumed. #{token_types}" if a.empty? - - return a - end - - def eots? - current_token.nil? - end - end - def initialize(text) @text = text end diff --git a/tool/lrama/lib/lrama/parser/token_scanner.rb b/tool/lrama/lib/lrama/parser/token_scanner.rb new file mode 100644 index 0000000000..b9c1522aff --- /dev/null +++ b/tool/lrama/lib/lrama/parser/token_scanner.rb @@ -0,0 +1,55 @@ +module Lrama + class Parser + class TokenScanner + def initialize(tokens) + @tokens = tokens + @index = 0 + end + + def current_token + @tokens[@index] + end + + def current_type + current_token && current_token.type + end + + def next + token = current_token + @index += 1 + return token + end + + def consume(*token_types) + if token_types.include?(current_type) + token = current_token + self.next + return token + end + + return nil + end + + def consume!(*token_types) + consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}") + end + + def consume_multi(*token_types) + a = [] + + while token_types.include?(current_type) + a << current_token + self.next + end + + raise "No token is consumed. #{token_types}" if a.empty? + + return a + end + + def eots? + current_token.nil? + end + end + end +end diff --git a/tool/lrama/lib/lrama/state.rb b/tool/lrama/lib/lrama/state.rb new file mode 100644 index 0000000000..65ca3bcb46 --- /dev/null +++ b/tool/lrama/lib/lrama/state.rb @@ -0,0 +1,184 @@ +require "lrama/state/reduce" +require "lrama/state/shift" + +module Lrama + class State + # * symbol: A symbol under discussion + # * reduce: A reduce under discussion + # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative) + ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do + def report_message + s = symbol.display_name + r = reduce.rule.precedence_sym.display_name + case + when which == :shift && same_prec + msg = "resolved as #{which} (%right #{s})" + when which == :shift + msg = "resolved as #{which} (#{r} < #{s})" + when which == :reduce && same_prec + msg = "resolved as #{which} (%left #{s})" + when which == :reduce + msg = "resolved as #{which} (#{s} < #{r})" + when which == :error + msg = "resolved as an #{which} (%nonassoc #{s})" + else + raise "Unknown direction. #{self}" + end + + "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}." + end + end + + Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true) + + attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, + :default_reduction_rule, :closure, :items + attr_accessor :shifts, :reduces + + def initialize(id, accessing_symbol, kernels) + @id = id + @accessing_symbol = accessing_symbol + @kernels = kernels.freeze + @items = @kernels + # Manage relationships between items to state + # to resolve next state + @items_to_state = {} + @conflicts = [] + @resolved_conflicts = [] + @default_reduction_rule = nil + end + + def closure=(closure) + @closure = closure + @items = @kernels + @closure + end + + def non_default_reduces + reduces.select do |reduce| + reduce.rule != @default_reduction_rule + end + end + + def compute_shifts_reduces + _shifts = {} + reduces = [] + items.each do |item| + # TODO: Consider what should be pushed + if item.end_of_rule? + reduces << Reduce.new(item) + else + key = item.next_sym + _shifts[key] ||= [] + _shifts[key] << item.new_by_next_position + end + end + + # It seems Bison 3.8.2 iterates transitions order by symbol number + shifts = _shifts.sort_by do |next_sym, new_items| + next_sym.number + end.map do |next_sym, new_items| + Shift.new(next_sym, new_items.flatten) + end + self.shifts = shifts.freeze + self.reduces = reduces.freeze + end + + def set_items_to_state(items, next_state) + @items_to_state[items] = next_state + end + + # + def set_look_ahead(rule, look_ahead) + reduce = reduces.find do |r| + r.rule == rule + end + + reduce.look_ahead = look_ahead + end + + # Returns array of [nterm, next_state] + def nterm_transitions + return @nterm_transitions if @nterm_transitions + + @nterm_transitions = [] + + shifts.each do |shift| + next if shift.next_sym.term? + + @nterm_transitions << [shift, @items_to_state[shift.next_items]] + end + + @nterm_transitions + end + + # Returns array of [term, next_state] + def term_transitions + return @term_transitions if @term_transitions + + @term_transitions = [] + + shifts.each do |shift| + next if shift.next_sym.nterm? + + @term_transitions << [shift, @items_to_state[shift.next_items]] + end + + @term_transitions + end + + def selected_term_transitions + term_transitions.select do |shift, next_state| + !shift.not_selected + end + end + + # Move to next state by sym + def transition(sym) + result = nil + + if sym.term? + term_transitions.each do |shift, next_state| + term = shift.next_sym + result = next_state if term == sym + end + else + nterm_transitions.each do |shift, next_state| + nterm = shift.next_sym + result = next_state if nterm == sym + end + end + + raise "Can not transit by #{sym} #{self}" if result.nil? + + result + end + + def find_reduce_by_item!(item) + reduces.find do |r| + r.item == item + end || (raise "reduce is not found. #{item}") + end + + def default_reduction_rule=(default_reduction_rule) + @default_reduction_rule = default_reduction_rule + + reduces.each do |r| + if r.rule == default_reduction_rule + r.default_reduction = true + end + end + end + + def sr_conflicts + @conflicts.select do |conflict| + conflict.type == :shift_reduce + end + end + + def rr_conflicts + @conflicts.select do |conflict| + conflict.type == :reduce_reduce + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/reduce.rb b/tool/lrama/lib/lrama/state/reduce.rb new file mode 100644 index 0000000000..8ba51f45f2 --- /dev/null +++ b/tool/lrama/lib/lrama/state/reduce.rb @@ -0,0 +1,35 @@ +module Lrama + class State + class Reduce + # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html + attr_reader :item, :look_ahead, :not_selected_symbols + attr_accessor :default_reduction + + def initialize(item) + @item = item + @look_ahead = nil + @not_selected_symbols = [] + end + + def rule + @item.rule + end + + def look_ahead=(look_ahead) + @look_ahead = look_ahead.freeze + end + + def add_not_selected_symbol(sym) + @not_selected_symbols << sym + end + + def selected_look_ahead + if @look_ahead + @look_ahead - @not_selected_symbols + else + [] + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/shift.rb b/tool/lrama/lib/lrama/state/shift.rb new file mode 100644 index 0000000000..2021eb61f6 --- /dev/null +++ b/tool/lrama/lib/lrama/state/shift.rb @@ -0,0 +1,13 @@ +module Lrama + class State + class Shift + attr_reader :next_sym, :next_items + attr_accessor :not_selected + + def initialize(next_sym, next_items) + @next_sym = next_sym + @next_items = next_items + end + end + end +end diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb index f907db30cc..64be781df6 100644 --- a/tool/lrama/lib/lrama/states.rb +++ b/tool/lrama/lib/lrama/states.rb @@ -2,228 +2,6 @@ require "forwardable" require "lrama/report" module Lrama - class State - class Reduce - # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html - attr_reader :item, :look_ahead, :not_selected_symbols - attr_accessor :default_reduction - - def initialize(item) - @item = item - @look_ahead = nil - @not_selected_symbols = [] - end - - def rule - @item.rule - end - - def look_ahead=(look_ahead) - @look_ahead = look_ahead.freeze - end - - def add_not_selected_symbol(sym) - @not_selected_symbols << sym - end - - def selected_look_ahead - if @look_ahead - @look_ahead - @not_selected_symbols - else - [] - end - end - end - - class Shift - attr_reader :next_sym, :next_items - attr_accessor :not_selected - - def initialize(next_sym, next_items) - @next_sym = next_sym - @next_items = next_items - end - end - - # * symbol: A symbol under discussion - # * reduce: A reduce under discussion - # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative) - ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do - def report_message - s = symbol.display_name - r = reduce.rule.precedence_sym.display_name - case - when which == :shift && same_prec - msg = "resolved as #{which} (%right #{s})" - when which == :shift - msg = "resolved as #{which} (#{r} < #{s})" - when which == :reduce && same_prec - msg = "resolved as #{which} (%left #{s})" - when which == :reduce - msg = "resolved as #{which} (#{s} < #{r})" - when which == :error - msg = "resolved as an #{which} (%nonassoc #{s})" - else - raise "Unknown direction. #{self}" - end - - "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}." - end - end - - Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true) - - attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, - :default_reduction_rule, :closure, :items - attr_accessor :shifts, :reduces - - def initialize(id, accessing_symbol, kernels) - @id = id - @accessing_symbol = accessing_symbol - @kernels = kernels.freeze - @items = @kernels - # Manage relationships between items to state - # to resolve next state - @items_to_state = {} - @conflicts = [] - @resolved_conflicts = [] - @default_reduction_rule = nil - end - - def closure=(closure) - @closure = closure - @items = @kernels + @closure - end - - def non_default_reduces - reduces.select do |reduce| - reduce.rule != @default_reduction_rule - end - end - - def compute_shifts_reduces - _shifts = {} - reduces = [] - items.each do |item| - # TODO: Consider what should be pushed - if item.end_of_rule? - reduces << Reduce.new(item) - else - key = item.next_sym - _shifts[key] ||= [] - _shifts[key] << item.new_by_next_position - end - end - - # It seems Bison 3.8.2 iterates transitions order by symbol number - shifts = _shifts.sort_by do |next_sym, new_items| - next_sym.number - end.map do |next_sym, new_items| - Shift.new(next_sym, new_items.flatten) - end - self.shifts = shifts.freeze - self.reduces = reduces.freeze - end - - def set_items_to_state(items, next_state) - @items_to_state[items] = next_state - end - - # - def set_look_ahead(rule, look_ahead) - reduce = reduces.find do |r| - r.rule == rule - end - - reduce.look_ahead = look_ahead - end - - # Returns array of [nterm, next_state] - def nterm_transitions - return @nterm_transitions if @nterm_transitions - - @nterm_transitions = [] - - shifts.each do |shift| - next if shift.next_sym.term? - - @nterm_transitions << [shift, @items_to_state[shift.next_items]] - end - - @nterm_transitions - end - - # Returns array of [term, next_state] - def term_transitions - return @term_transitions if @term_transitions - - @term_transitions = [] - - shifts.each do |shift| - next if shift.next_sym.nterm? - - @term_transitions << [shift, @items_to_state[shift.next_items]] - end - - @term_transitions - end - - def selected_term_transitions - term_transitions.select do |shift, next_state| - !shift.not_selected - end - end - - # Move to next state by sym - def transition(sym) - result = nil - - if sym.term? - term_transitions.each do |shift, next_state| - term = shift.next_sym - result = next_state if term == sym - end - else - nterm_transitions.each do |shift, next_state| - nterm = shift.next_sym - result = next_state if nterm == sym - end - end - - raise "Can not transit by #{sym} #{self}" if result.nil? - - result - end - - def find_reduce_by_item!(item) - reduces.find do |r| - r.item == item - end || (raise "reduce is not found. #{item}, #{state}") - end - - def default_reduction_rule=(default_reduction_rule) - @default_reduction_rule = default_reduction_rule - - reduces.each do |r| - if r.rule == default_reduction_rule - r.default_reduction = true - end - end - end - - def sr_conflicts - @conflicts.select do |conflict| - conflict.type == :shift_reduce - end - end - - def rr_conflicts - @conflicts.select do |conflict| - conflict.type == :reduce_reduce - end - end - end - # States is passed to a template file # # "Efficient Computation of LALR(1) Look-Ahead Sets" @@ -411,23 +189,13 @@ module Lrama @states.flat_map(&:rr_conflicts) end - def initial_attrs - h = {} - - attrs.each do |attr| - h[attr.id] = false - end - - h - end - def trace_state if @trace_state yield STDERR end end - def create_state(accessing_symbol, kernels, states_creted) + def create_state(accessing_symbol, kernels, states_created) # A item can appear in some states, # so need to use `kernels` (not `kernels.first`) as a key. # @@ -464,11 +232,11 @@ module Lrama # string_1: string • # string_2: string • '+' # - return [states_creted[kernels], false] if states_creted[kernels] + return [states_created[kernels], false] if states_created[kernels] state = State.new(@states.count, accessing_symbol, kernels) @states << state - states_creted[kernels] = state + states_created[kernels] = state return [state, true] end @@ -532,9 +300,9 @@ module Lrama def compute_lr0_states # State queue states = [] - states_creted = {} + states_created = {} - state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_creted) + state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_created) enqueue_state(states, state) while (state = states.shift) do @@ -550,7 +318,7 @@ module Lrama setup_state(state) state.shifts.each do |shift| - new_state, created = create_state(shift.next_sym, shift.next_items, states_creted) + new_state, created = create_state(shift.next_sym, shift.next_items, states_created) state.set_items_to_state(shift.next_items, new_state) enqueue_state(states, new_state) if created end diff --git a/tool/lrama/lib/lrama/states_reporter.rb b/tool/lrama/lib/lrama/states_reporter.rb index 25893e61be..d3dbe6b1b4 100644 --- a/tool/lrama/lib/lrama/states_reporter.rb +++ b/tool/lrama/lib/lrama/states_reporter.rb @@ -210,7 +210,7 @@ module Lrama io << "\n" - # Reprot reads_relation + # Report reads_relation io << " [Reads Relation]\n" @states.nterms.each do |nterm| a = @states.reads_relation[[state.id, nterm.token_id]] @@ -224,7 +224,7 @@ module Lrama io << "\n" - # Reprot read_sets + # Report read_sets io << " [Read sets]\n" read_sets = @states.read_sets @states.nterms.each do |nterm| @@ -239,7 +239,7 @@ module Lrama io << "\n" - # Reprot includes_relation + # Report includes_relation io << " [Includes Relation]\n" @states.nterms.each do |nterm| a = @states.includes_relation[[state.id, nterm.token_id]] @@ -267,7 +267,7 @@ module Lrama io << "\n" - # Reprot follow_sets + # Report follow_sets io << " [Follow sets]\n" follow_sets = @states.follow_sets @states.nterms.each do |nterm| diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb index d928cdad45..0054f3d0bd 100644 --- a/tool/lrama/lib/lrama/version.rb +++ b/tool/lrama/lib/lrama/version.rb @@ -1,3 +1,3 @@ module Lrama - VERSION = "0.5.0".freeze + VERSION = "0.5.1".freeze end |
