summaryrefslogtreecommitdiff
path: root/lib/racc/grammarfileparser.rb
diff options
context:
space:
mode:
authorHiroshi SHIBATA <hsbt@ruby-lang.org>2019-05-13 21:25:22 +0900
committerHiroshi SHIBATA <hsbt@ruby-lang.org>2019-06-19 18:17:25 +0900
commit1a2546c2be839baa7d0a50dc056d4d6987d26852 (patch)
tree19fef5d8b8d96452a51ab68e8093ea895192ca27 /lib/racc/grammarfileparser.rb
parentcbe06cd3501fdadd0e6e63094da2973484d70b0b (diff)
Backport racc-1.4.15 from upstream.
Diffstat (limited to 'lib/racc/grammarfileparser.rb')
-rw-r--r--lib/racc/grammarfileparser.rb559
1 files changed, 559 insertions, 0 deletions
diff --git a/lib/racc/grammarfileparser.rb b/lib/racc/grammarfileparser.rb
new file mode 100644
index 0000000000..7548a9ea37
--- /dev/null
+++ b/lib/racc/grammarfileparser.rb
@@ -0,0 +1,559 @@
+#
+# $Id: 5e1871defa15d288d2252e6a76bb2c4cf2119ed3 $
+#
+# Copyright (c) 1999-2006 Minero Aoki
+#
+# This program is free software.
+# You can distribute/modify this program under the terms of
+# the GNU LGPL, Lesser General Public License version 2.1.
+# For details of the GNU LGPL, see the file "COPYING".
+#
+
+require 'racc'
+require 'racc/compat'
+require 'racc/grammar'
+require 'racc/parserfilegenerator'
+require 'racc/sourcetext'
+require 'stringio'
+
+module Racc
+
+ grammar = Grammar.define {
+ g = self
+
+ g.class = seq(:CLASS, :cname, many(:param), :RULE, :rules, option(:END))
+
+ g.cname = seq(:rubyconst) {|name|
+ @result.params.classname = name
+ }\
+ | seq(:rubyconst, "<", :rubyconst) {|c, _, s|
+ @result.params.classname = c
+ @result.params.superclass = s
+ }
+
+ g.rubyconst = separated_by1(:colon2, :SYMBOL) {|syms|
+ syms.map {|s| s.to_s }.join('::')
+ }
+
+ g.colon2 = seq(':', ':')
+
+ g.param = seq(:CONV, many1(:convdef), :END) {|*|
+ #@grammar.end_convert_block # FIXME
+ }\
+ | seq(:PRECHIGH, many1(:precdef), :PRECLOW) {|*|
+ @grammar.end_precedence_declaration true
+ }\
+ | seq(:PRECLOW, many1(:precdef), :PRECHIGH) {|*|
+ @grammar.end_precedence_declaration false
+ }\
+ | seq(:START, :symbol) {|_, sym|
+ @grammar.start_symbol = sym
+ }\
+ | seq(:TOKEN, :symbols) {|_, syms|
+ syms.each do |s|
+ s.should_terminal
+ end
+ }\
+ | seq(:OPTION, :options) {|_, syms|
+ syms.each do |opt|
+ case opt
+ when 'result_var'
+ @result.params.result_var = true
+ when 'no_result_var'
+ @result.params.result_var = false
+ when 'omit_action_call'
+ @result.params.omit_action_call = true
+ when 'no_omit_action_call'
+ @result.params.omit_action_call = false
+ else
+ raise CompileError, "unknown option: #{opt}"
+ end
+ end
+ }\
+ | seq(:EXPECT, :DIGIT) {|_, num|
+ if @grammar.n_expected_srconflicts
+ raise CompileError, "`expect' seen twice"
+ end
+ @grammar.n_expected_srconflicts = num
+ }
+
+ g.convdef = seq(:symbol, :STRING) {|sym, code|
+ sym.serialized = code
+ }
+
+ g.precdef = seq(:LEFT, :symbols) {|_, syms|
+ @grammar.declare_precedence :Left, syms
+ }\
+ | seq(:RIGHT, :symbols) {|_, syms|
+ @grammar.declare_precedence :Right, syms
+ }\
+ | seq(:NONASSOC, :symbols) {|_, syms|
+ @grammar.declare_precedence :Nonassoc, syms
+ }
+
+ g.symbols = seq(:symbol) {|sym|
+ [sym]
+ }\
+ | seq(:symbols, :symbol) {|list, sym|
+ list.push sym
+ list
+ }\
+ | seq(:symbols, "|")
+
+ g.symbol = seq(:SYMBOL) {|sym| @grammar.intern(sym) }\
+ | seq(:STRING) {|str| @grammar.intern(str) }
+
+ g.options = many(:SYMBOL) {|syms| syms.map {|s| s.to_s } }
+
+ g.rules = option(:rules_core) {|list|
+ add_rule_block list unless list.empty?
+ nil
+ }
+
+ g.rules_core = seq(:symbol) {|sym|
+ [sym]
+ }\
+ | seq(:rules_core, :rule_item) {|list, i|
+ list.push i
+ list
+ }\
+ | seq(:rules_core, ';') {|list, *|
+ add_rule_block list unless list.empty?
+ list.clear
+ list
+ }\
+ | seq(:rules_core, ':') {|list, *|
+ next_target = list.pop
+ add_rule_block list unless list.empty?
+ [next_target]
+ }
+
+ g.rule_item = seq(:symbol)\
+ | seq("|") {|*|
+ OrMark.new(@scanner.lineno)
+ }\
+ | seq("=", :symbol) {|_, sym|
+ Prec.new(sym, @scanner.lineno)
+ }\
+ | seq(:ACTION) {|src|
+ UserAction.source_text(src)
+ }
+ }
+
+ GrammarFileParser = grammar.parser_class
+
+ if grammar.states.srconflict_exist?
+ raise 'Racc boot script fatal: S/R conflict in build'
+ end
+ if grammar.states.rrconflict_exist?
+ raise 'Racc boot script fatal: R/R conflict in build'
+ end
+
+ class GrammarFileParser # reopen
+
+ class Result
+ def initialize(grammar)
+ @grammar = grammar
+ @params = ParserFileGenerator::Params.new
+ end
+
+ attr_reader :grammar
+ attr_reader :params
+ end
+
+ def GrammarFileParser.parse_file(filename)
+ parse(File.read(filename), filename, 1)
+ end
+
+ def GrammarFileParser.parse(src, filename = '-', lineno = 1)
+ new().parse(src, filename, lineno)
+ end
+
+ def initialize(debug_flags = DebugFlags.new)
+ @yydebug = debug_flags.parse
+ end
+
+ def parse(src, filename = '-', lineno = 1)
+ @filename = filename
+ @lineno = lineno
+ @scanner = GrammarFileScanner.new(src, @filename)
+ @scanner.debug = @yydebug
+ @grammar = Grammar.new
+ @result = Result.new(@grammar)
+ @embedded_action_seq = 0
+ yyparse @scanner, :yylex
+ parse_user_code
+ @result.grammar.init
+ @result
+ end
+
+ private
+
+ def next_token
+ @scanner.scan
+ end
+
+ def on_error(tok, val, _values)
+ if val.respond_to?(:id2name)
+ v = val.id2name
+ elsif val.kind_of?(String)
+ v = val
+ else
+ v = val.inspect
+ end
+ raise CompileError, "#{location()}: unexpected token '#{v}'"
+ end
+
+ def location
+ "#{@filename}:#{@lineno - 1 + @scanner.lineno}"
+ end
+
+ def add_rule_block(list)
+ sprec = nil
+ target = list.shift
+ case target
+ when OrMark, UserAction, Prec
+ raise CompileError, "#{target.lineno}: unexpected symbol #{target.name}"
+ end
+ curr = []
+ list.each do |i|
+ case i
+ when OrMark
+ add_rule target, curr, sprec
+ curr = []
+ sprec = nil
+ when Prec
+ raise CompileError, "'=<prec>' used twice in one rule" if sprec
+ sprec = i.symbol
+ else
+ curr.push i
+ end
+ end
+ add_rule target, curr, sprec
+ end
+
+ def add_rule(target, list, sprec)
+ if list.last.kind_of?(UserAction)
+ act = list.pop
+ else
+ act = UserAction.empty
+ end
+ list.map! {|s| s.kind_of?(UserAction) ? embedded_action(s) : s }
+ rule = Rule.new(target, list, act)
+ rule.specified_prec = sprec
+ @grammar.add rule
+ end
+
+ def embedded_action(act)
+ sym = @grammar.intern("@#{@embedded_action_seq += 1}".intern, true)
+ @grammar.add Rule.new(sym, [], act)
+ sym
+ end
+
+ #
+ # User Code Block
+ #
+
+ def parse_user_code
+ line = @scanner.lineno
+ _, *blocks = *@scanner.epilogue.split(/^----/)
+ blocks.each do |block|
+ header, *body = block.lines.to_a
+ label0, pathes = *header.sub(/\A-+/, '').split('=', 2)
+ label = canonical_label(label0)
+ (pathes ? pathes.strip.split(' ') : []).each do |path|
+ add_user_code label, SourceText.new(File.read(path), path, 1)
+ end
+ add_user_code label, SourceText.new(body.join(''), @filename, line + 1)
+ line += (1 + body.size)
+ end
+ end
+
+ USER_CODE_LABELS = {
+ 'header' => :header,
+ 'prepare' => :header, # obsolete
+ 'inner' => :inner,
+ 'footer' => :footer,
+ 'driver' => :footer # obsolete
+ }
+
+ def canonical_label(src)
+ label = src.to_s.strip.downcase.slice(/\w+/)
+ unless USER_CODE_LABELS.key?(label)
+ raise CompileError, "unknown user code type: #{label.inspect}"
+ end
+ label
+ end
+
+ def add_user_code(label, src)
+ @result.params.send(USER_CODE_LABELS[label]).push src
+ end
+
+ end
+
+
+ class GrammarFileScanner
+
+ def initialize(str, filename = '-')
+ @lines = str.split(/\n|\r\n|\r/)
+ @filename = filename
+ @lineno = -1
+ @line_head = true
+ @in_rule_blk = false
+ @in_conv_blk = false
+ @in_block = nil
+ @epilogue = ''
+ @debug = false
+ next_line
+ end
+
+ attr_reader :epilogue
+
+ def lineno
+ @lineno + 1
+ end
+
+ attr_accessor :debug
+
+ def yylex(&block)
+ unless @debug
+ yylex0(&block)
+ else
+ yylex0 do |sym, tok|
+ $stderr.printf "%7d %-10s %s\n", lineno(), sym.inspect, tok.inspect
+ yield [sym, tok]
+ end
+ end
+ end
+
+ private
+
+ def yylex0
+ begin
+ until @line.empty?
+ @line.sub!(/\A\s+/, '')
+ if /\A\#/ =~ @line
+ break
+ elsif /\A\/\*/ =~ @line
+ skip_comment
+ elsif s = reads(/\A[a-zA-Z_]\w*/)
+ yield [atom_symbol(s), s.intern]
+ elsif s = reads(/\A\d+/)
+ yield [:DIGIT, s.to_i]
+ elsif ch = reads(/\A./)
+ case ch
+ when '"', "'"
+ yield [:STRING, eval(scan_quoted(ch))]
+ when '{'
+ lineno = lineno()
+ yield [:ACTION, SourceText.new(scan_action(), @filename, lineno)]
+ else
+ if ch == '|'
+ @line_head = false
+ end
+ yield [ch, ch]
+ end
+ else
+ end
+ end
+ end while next_line()
+ yield nil
+ end
+
+ def next_line
+ @lineno += 1
+ @line = @lines[@lineno]
+ if not @line or /\A----/ =~ @line
+ @epilogue = @lines.join("\n")
+ @lines.clear
+ @line = nil
+ if @in_block
+ @lineno -= 1
+ scan_error! sprintf('unterminated %s', @in_block)
+ end
+ false
+ else
+ @line.sub!(/(?:\n|\r\n|\r)\z/, '')
+ @line_head = true
+ true
+ end
+ end
+
+ ReservedWord = {
+ 'right' => :RIGHT,
+ 'left' => :LEFT,
+ 'nonassoc' => :NONASSOC,
+ 'preclow' => :PRECLOW,
+ 'prechigh' => :PRECHIGH,
+ 'token' => :TOKEN,
+ 'convert' => :CONV,
+ 'options' => :OPTION,
+ 'start' => :START,
+ 'expect' => :EXPECT,
+ 'class' => :CLASS,
+ 'rule' => :RULE,
+ 'end' => :END
+ }
+
+ def atom_symbol(token)
+ if token == 'end'
+ symbol = :END
+ @in_conv_blk = false
+ @in_rule_blk = false
+ else
+ if @line_head and not @in_conv_blk and not @in_rule_blk
+ symbol = ReservedWord[token] || :SYMBOL
+ else
+ symbol = :SYMBOL
+ end
+ case symbol
+ when :RULE then @in_rule_blk = true
+ when :CONV then @in_conv_blk = true
+ end
+ end
+ @line_head = false
+ symbol
+ end
+
+ def skip_comment
+ @in_block = 'comment'
+ until m = /\*\//.match(@line)
+ next_line
+ end
+ @line = m.post_match
+ @in_block = nil
+ end
+
+ $raccs_print_type = false
+
+ def scan_action
+ buf = ''
+ nest = 1
+ pre = nil
+ @in_block = 'action'
+ begin
+ pre = nil
+ if s = reads(/\A\s+/)
+ # does not set 'pre'
+ buf << s
+ end
+ until @line.empty?
+ if s = reads(/\A[^'"`{}%#\/\$]+/)
+ buf << (pre = s)
+ next
+ end
+ case ch = read(1)
+ when '{'
+ nest += 1
+ buf << (pre = ch)
+ when '}'
+ nest -= 1
+ if nest == 0
+ @in_block = nil
+ return buf
+ end
+ buf << (pre = ch)
+ when '#' # comment
+ buf << ch << @line
+ break
+ when "'", '"', '`'
+ buf << (pre = scan_quoted(ch))
+ when '%'
+ if literal_head? pre, @line
+ # % string, regexp, array
+ buf << ch
+ case ch = read(1)
+ when /[qQx]/n
+ buf << ch << (pre = scan_quoted(read(1), '%string'))
+ when /wW/n
+ buf << ch << (pre = scan_quoted(read(1), '%array'))
+ when /s/n
+ buf << ch << (pre = scan_quoted(read(1), '%symbol'))
+ when /r/n
+ buf << ch << (pre = scan_quoted(read(1), '%regexp'))
+ when /[a-zA-Z0-9= ]/n # does not include "_"
+ scan_error! "unknown type of % literal '%#{ch}'"
+ else
+ buf << (pre = scan_quoted(ch, '%string'))
+ end
+ else
+ # operator
+ buf << '||op->' if $raccs_print_type
+ buf << (pre = ch)
+ end
+ when '/'
+ if literal_head? pre, @line
+ # regexp
+ buf << (pre = scan_quoted(ch, 'regexp'))
+ else
+ # operator
+ buf << '||op->' if $raccs_print_type
+ buf << (pre = ch)
+ end
+ when '$' # gvar
+ buf << ch << (pre = read(1))
+ else
+ raise 'racc: fatal: must not happen'
+ end
+ end
+ buf << "\n"
+ end while next_line()
+ raise 'racc: fatal: scan finished before parser finished'
+ end
+
+ def literal_head?(pre, post)
+ (!pre || /[a-zA-Z_0-9]/n !~ pre[-1,1]) &&
+ !post.empty? && /\A[\s\=]/n !~ post
+ end
+
+ def read(len)
+ s = @line[0, len]
+ @line = @line[len .. -1]
+ s
+ end
+
+ def reads(re)
+ m = re.match(@line) or return nil
+ @line = m.post_match
+ m[0]
+ end
+
+ def scan_quoted(left, tag = 'string')
+ buf = left.dup
+ buf = "||#{tag}->" + buf if $raccs_print_type
+ re = get_quoted_re(left)
+ sv, @in_block = @in_block, tag
+ begin
+ if s = reads(re)
+ buf << s
+ break
+ else
+ buf << @line
+ end
+ end while next_line()
+ @in_block = sv
+ buf << "<-#{tag}||" if $raccs_print_type
+ buf
+ end
+
+ LEFT_TO_RIGHT = {
+ '(' => ')',
+ '{' => '}',
+ '[' => ']',
+ '<' => '>'
+ }
+
+ CACHE = {}
+
+ def get_quoted_re(left)
+ term = Regexp.quote(LEFT_TO_RIGHT[left] || left)
+ CACHE[left] ||= /\A[^#{term}\\]*(?:\\.[^\\#{term}]*)*#{term}/
+ end
+
+ def scan_error!(msg)
+ raise CompileError, "#{lineno()}: #{msg}"
+ end
+
+ end
+
+end # module Racc