summaryrefslogtreecommitdiff
path: root/tool/lrama/lib/lrama/grammar/rule_builder.rb
diff options
context:
space:
mode:
Diffstat (limited to 'tool/lrama/lib/lrama/grammar/rule_builder.rb')
-rw-r--r--tool/lrama/lib/lrama/grammar/rule_builder.rb268
1 files changed, 268 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama/grammar/rule_builder.rb b/tool/lrama/lib/lrama/grammar/rule_builder.rb
new file mode 100644
index 0000000000..ccb41e67f8
--- /dev/null
+++ b/tool/lrama/lib/lrama/grammar/rule_builder.rb
@@ -0,0 +1,268 @@
+module Lrama
+ class Grammar
+ class RuleBuilder
+ attr_accessor :lhs, :line
+ attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
+
+ def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
+ @rule_counter = rule_counter
+ @midrule_action_counter = midrule_action_counter
+ @position_in_original_rule_rhs = position_in_original_rule_rhs
+ @skip_preprocess_references = skip_preprocess_references
+
+ @lhs = nil
+ @lhs_tag = lhs_tag
+ @rhs = []
+ @user_code = nil
+ @precedence_sym = nil
+ @line = nil
+ @rules = []
+ @rule_builders_for_parameterizing_rules = []
+ @rule_builders_for_derived_rules = []
+ @rule_builders_for_inline_rules = []
+ @parameterizing_rules = []
+ @inline_rules = []
+ @midrule_action_rules = []
+ end
+
+ def add_rhs(rhs)
+ if !@line
+ @line = rhs.line
+ end
+
+ flush_user_code
+
+ @rhs << rhs
+ end
+
+ def user_code=(user_code)
+ if !@line
+ @line = user_code&.line
+ end
+
+ flush_user_code
+
+ @user_code = user_code
+ end
+
+ def precedence_sym=(precedence_sym)
+ flush_user_code
+
+ @precedence_sym = precedence_sym
+ end
+
+ def complete_input
+ freeze_rhs
+ end
+
+ def setup_rules(parameterizing_rule_resolver)
+ preprocess_references unless @skip_preprocess_references
+ if rhs.any? { |token| parameterizing_rule_resolver.find_inline(token) }
+ resolve_inline(parameterizing_rule_resolver)
+ else
+ process_rhs(parameterizing_rule_resolver)
+ end
+ build_rules
+ end
+
+ def rules
+ @parameterizing_rules + @inline_rules + @midrule_action_rules + @rules
+ end
+
+ private
+
+ def freeze_rhs
+ @rhs.freeze
+ end
+
+ def preprocess_references
+ numberize_references
+ end
+
+ def build_rules
+ tokens = @replaced_rhs
+
+ if tokens
+ rule = Rule.new(
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
+ position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
+ )
+ @rules = [rule]
+ @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
+ rule_builder.rules
+ end.flatten
+ @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
+ rule_builder.rules
+ end.flatten
+ @midrule_action_rules.each do |r|
+ r.original_rule = rule
+ end
+ else
+ @inline_rules = @rule_builders_for_inline_rules.map do |rule_builder|
+ rule_builder.rules
+ end.flatten
+ end
+ end
+
+ # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
+ # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
+ def process_rhs(parameterizing_rule_resolver)
+ return if @replaced_rhs
+
+ @replaced_rhs = []
+
+ rhs.each_with_index do |token, i|
+ case token
+ when Lrama::Lexer::Token::Char
+ @replaced_rhs << token
+ when Lrama::Lexer::Token::Ident
+ @replaced_rhs << token
+ when Lrama::Lexer::Token::InstantiateRule
+ parameterizing_rule = parameterizing_rule_resolver.find_rule(token)
+ raise "Unexpected token. #{token}" unless parameterizing_rule
+
+ bindings = Binding.new(parameterizing_rule, token.args)
+ lhs_s_value = lhs_s_value(token, bindings)
+ if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
+ @replaced_rhs << created_lhs
+ else
+ lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
+ @replaced_rhs << lhs_token
+ parameterizing_rule_resolver.created_lhs_list << lhs_token
+ parameterizing_rule.rhs_list.each do |r|
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
+ rule_builder.lhs = lhs_token
+ r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
+ rule_builder.line = line
+ rule_builder.precedence_sym = r.precedence_sym
+ rule_builder.user_code = r.resolve_user_code(bindings)
+ rule_builder.complete_input
+ rule_builder.setup_rules(parameterizing_rule_resolver)
+ @rule_builders_for_parameterizing_rules << rule_builder
+ end
+ end
+ when Lrama::Lexer::Token::UserCode
+ prefix = token.referred ? "@" : "$@"
+ tag = token.tag || lhs_tag
+ new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
+ @replaced_rhs << new_token
+
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: tag, skip_preprocess_references: true)
+ rule_builder.lhs = new_token
+ rule_builder.user_code = token
+ rule_builder.complete_input
+ rule_builder.setup_rules(parameterizing_rule_resolver)
+
+ @rule_builders_for_derived_rules << rule_builder
+ else
+ raise "Unexpected token. #{token}"
+ end
+ end
+ end
+
+ def lhs_s_value(token, bindings)
+ s_values = token.args.map do |arg|
+ resolved = bindings.resolve_symbol(arg)
+ if resolved.is_a?(Lexer::Token::InstantiateRule)
+ [resolved.s_value, resolved.args.map(&:s_value)]
+ else
+ resolved.s_value
+ end
+ end
+ "#{token.rule_name}_#{s_values.join('_')}"
+ end
+
+ def resolve_inline(parameterizing_rule_resolver)
+ rhs.each_with_index do |token, i|
+ if inline_rule = parameterizing_rule_resolver.find_inline(token)
+ inline_rule.rhs_list.each_with_index do |inline_rhs|
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: lhs_tag, skip_preprocess_references: true)
+ resolve_inline_rhs(rule_builder, inline_rhs, i)
+ rule_builder.lhs = lhs
+ rule_builder.line = line
+ rule_builder.user_code = replace_inline_user_code(inline_rhs, i)
+ rule_builder.complete_input
+ rule_builder.setup_rules(parameterizing_rule_resolver)
+ @rule_builders_for_inline_rules << rule_builder
+ end
+ end
+ end
+ end
+
+ def resolve_inline_rhs(rule_builder, inline_rhs, index)
+ rhs.each_with_index do |token, i|
+ if index == i
+ inline_rhs.symbols.each { |sym| rule_builder.add_rhs(sym) }
+ else
+ rule_builder.add_rhs(token)
+ end
+ end
+ end
+
+ def replace_inline_user_code(inline_rhs, index)
+ return user_code if inline_rhs.user_code.nil?
+ return user_code if user_code.nil?
+
+ code = user_code.s_value.gsub(/\$#{index + 1}/, inline_rhs.user_code.s_value)
+ Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location)
+ end
+
+ def numberize_references
+ # Bison n'th component is 1-origin
+ (rhs + [user_code]).compact.each.with_index(1) do |token, i|
+ next unless token.is_a?(Lrama::Lexer::Token::UserCode)
+
+ token.references.each do |ref|
+ ref_name = ref.name
+
+ if ref_name
+ if ref_name == '$'
+ ref.name = '$'
+ else
+ candidates = ([lhs] + rhs).each_with_index.select {|token, _i| token.referred_by?(ref_name) }
+
+ if candidates.size >= 2
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.")
+ end
+
+ unless (referring_symbol = candidates.first)
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.")
+ end
+
+ if referring_symbol[1] == 0 # Refers to LHS
+ ref.name = '$'
+ else
+ ref.number = referring_symbol[1]
+ end
+ end
+ end
+
+ if ref.number
+ # TODO: When Inlining is implemented, for example, if `$1` is expanded to multiple RHS tokens,
+ # `$2` needs to access `$2 + n` to actually access it. So, after the Inlining implementation,
+ # it needs resolves from number to index.
+ ref.index = ref.number
+ end
+
+ # TODO: Need to check index of @ too?
+ next if ref.type == :at
+
+ if ref.index
+ # TODO: Prohibit $0 even so Bison allows it?
+ # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
+ token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i
+ rhs[ref.index - 1].referred = true
+ end
+ end
+ end
+ end
+
+ def flush_user_code
+ if (c = @user_code)
+ @rhs << c
+ @user_code = nil
+ end
+ end
+ end
+ end
+end