summaryrefslogtreecommitdiff
path: root/tool/mk_builtin_loader.rb
diff options
context:
space:
mode:
Diffstat (limited to 'tool/mk_builtin_loader.rb')
-rw-r--r--tool/mk_builtin_loader.rb399
1 files changed, 190 insertions, 209 deletions
diff --git a/tool/mk_builtin_loader.rb b/tool/mk_builtin_loader.rb
index 6e1f5c666a..a84f322e84 100644
--- a/tool/mk_builtin_loader.rb
+++ b/tool/mk_builtin_loader.rb
@@ -1,12 +1,13 @@
# Parse built-in script and make rbinc file
-require 'ripper'
+require 'json'
+require 'open3'
require 'stringio'
require_relative 'ruby_vm/helpers/c_escape'
SUBLIBS = {}
REQUIRED = {}
-BUILTIN_ATTRS = %w[leaf inline_block use_block c_trace]
+BUILTIN_ATTRS = %w[leaf inline_block use_block c_trace without_interrupts]
module CompileWarning
@@warnings = 0
@@ -24,231 +25,204 @@ end
Warning.extend CompileWarning
-def string_literal(lit, str = [])
- while lit
- case lit.first
- when :string_concat, :string_embexpr, :string_content
- _, *lit = lit
- lit.each {|s| string_literal(s, str)}
- return str
- when :string_literal
- _, lit = lit
- when :@tstring_content
- str << lit[1]
- return str
- else
- raise "unexpected #{lit.first}"
- end
- end
-end
+# ruby mk_builtin_loader.rb path/to/dump_ast TARGET_FILE.rb
+# #=> generate TARGET_FILE.rbinc
+#
+# dump_ast is a standalone C program (tool/dump_ast.c) that parses Ruby files
+# with prism and dumps the AST as JSON. It must be compiled with CC before this
+# script can run, which means rbinc generation is skipped during `make up`
+# (where CC=false). The rbinc files are gitignored build artifacts, so they do
+# not need to be present in srcdir after `make up` — they will be generated in
+# the build directory during `make all` once dump_ast has been compiled.
+
+LOCALS_DB = {} # [method_name, first_line] = locals
-# e.g. [:symbol_literal, [:symbol, [:@ident, "inline", [19, 21]]]]
-def symbol_literal(lit)
- symbol_literal, symbol_lit = lit
- raise "#{lit.inspect} was not :symbol_literal" if symbol_literal != :symbol_literal
- symbol, ident_lit = symbol_lit
- raise "#{symbol_lit.inspect} was not :symbol" if symbol != :symbol
- ident, symbol_name, = ident_lit
- raise "#{ident.inspect} was not :@ident" if ident != :@ident
- symbol_name
+# Extract the contents of the given string node.
+def extract_string_literal(node)
+ case node["type"]
+ when "StringNode"
+ node["unescaped"]
+ when "InterpolatedStringNode"
+ node["parts"].map { |part| extract_string_literal(part) }.join
+ else
+ raise "unexpected #{node["type"]}"
+ end
end
-def inline_text argc, arg1
- raise "argc (#{argc}) of inline! should be 1" unless argc == 1
- arg1 = string_literal(arg1)
- raise "1st argument should be string literal" unless arg1
- arg1.join("").rstrip
+# Retrieve the line number of the given node in the source.
+def line_number(source, node)
+ source.b.byteslice(0, node["location"]["start"]).count("\n") + 1
end
-def inline_attrs(args)
- raise "args was empty" if args.empty?
- args.each do |arg|
- attr = symbol_literal(arg)
- unless BUILTIN_ATTRS.include?(attr)
- raise "attr (#{attr}) was not in: #{BUILTIN_ATTRS.join(', ')}"
- end
+def visit_call_node(source, node, name, locals, requires, bs, inlines)
+ # If this is a call to require or require relative with a single string node
+ # argument, then we will attempt to find the file that is being required and
+ # add it to the files that should be processed.
+ if %w[require require_relative].include?(node["name"]) && !node["arguments"].nil? && (argument = node["arguments"]["arguments"][0])["type"] == "StringNode"
+ requires << argument["unescaped"]
+ return true
end
-end
-def make_cfunc_name inlines, name, lineno
- case name
- when /\[\]/
- name = '_GETTER'
- when /\[\]=/
- name = '_SETTER'
+ primitive_name = nil
+
+ receiver = node["receiver"]
+
+ if (!receiver.nil? && receiver["type"] == "ConstantReadNode" && receiver["name"] == "Primitive") ||
+ (!receiver.nil? && receiver["type"] == "CallNode" && receiver["flags"].include?("VARIABLE_CALL") && receiver["name"] == "__builtin")
+ primitive_name = node["name"]
+ elsif node["name"].start_with?("__builtin_")
+ primitive_name = node["name"][10..-1]
else
- name = name.tr('!?', 'EP')
+ # If we get here, then this isn't a primitive function call and we can
+ # continue the visit.
+ return true
end
- base = "builtin_inline_#{name}_#{lineno}"
- if inlines[base]
- 1000.times{|i|
- name = "#{base}_#{i}"
- return name unless inlines[name]
- }
- raise "too many functions in same line..."
- else
- base
+ # The name of the C function that we will be calling for this call node. It
+ # may change later in this method depending on the type of primitive.
+ cfunction_name = primitive_name
+
+ args = node["arguments"].nil? ? [] : node["arguments"]["arguments"]
+ argc = args.size
+
+ if primitive_name.match?(/[\!\?]$/)
+ case (primitive_macro = primitive_name[0...-1])
+ when "arg"
+ # This is a call to Primitive.arg!, which expects a single symbol argument
+ # detailing the name of the argument.
+ raise "unexpected argument number #{argc}" if argc != 1
+ raise "symbol literal expected, got #{args[0]["type"]}" if args[0]["type"] != "SymbolNode"
+ return true
+ when "attr"
+ # This is a call to Primitive.attr!, which expects a list of known
+ # symbols. We will check that each of the arguments is a symbol and that
+ # the symbol is one of the known symbols.
+ raise "args was empty" if argc == 0
+
+ args.each do |arg|
+ raise "#{arg["type"]} was not a SymbolNode" if arg["type"] != "SymbolNode"
+ raise "attr (#{arg["unescaped"]}) was not in: leaf, inline_block, use_block" unless BUILTIN_ATTRS.include?(arg["unescaped"])
+ end
+
+ return true
+ when "mandatory_only"
+ # This is a call to Primitive.mandatory_only?. This method does not
+ # require any further processing.
+ return true
+ when "cstmt", "cexpr", "cconst", "cinit"
+ # This is a call to Primitive.cstmt!, Primitive.cexpr!, Primitive.cconst!,
+ # or Primitive.cinit!. These methods expect a single string argument that
+ # is the C code that should be executed. We will extract the string, emit
+ # an inline function, and then continue the visit.
+ raise "argc (#{argc}) of inline! should be 1" if argc != 1
+
+ text = extract_string_literal(args[0]).rstrip
+ lineno = line_number(source, node)
+
+ case primitive_macro
+ when "cstmt", "cexpr", "cconst"
+ cfunction_name = "builtin_inline_#{name}_#{lineno}"
+ primitive_name = "_bi#{lineno}"
+
+ if primitive_macro == "cstmt"
+ inlines << [cfunction_name, lineno, text, locals, primitive_name]
+ else
+ inlines << [cfunction_name, lineno, "return #{text};", primitive_macro == "cexpr" ? locals : nil, primitive_name]
+ end
+ when "cinit"
+ inlines << [inlines.size, lineno, text, nil, nil]
+ return true
+ end
+
+ argc -= 1
+ else
+ # This is a call to Primitive that is not a known method, so it must be a
+ # regular C function. In this case we do not need any special processing.
+ end
end
+
+ bs << [primitive_name, argc, cfunction_name]
+ return true
end
-def collect_locals tree
- _type, name, (line, _cols) = tree
- if locals = LOCALS_DB[[name, line]]
- locals
- else
- if false # for debugging
- pp LOCALS_DB
- raise "not found: [#{name}, #{line}]"
+def each_node(root, &blk)
+ return unless yield root
+
+ root.each do |key, value|
+ next if key == "type" || key == "location"
+
+ if value.is_a?(Hash)
+ each_node(value, &blk) if value.key?("type")
+ elsif value.is_a?(Array) && value[0].is_a?(Hash)
+ value.each { |node| each_node(node, &blk) }
end
end
end
-def collect_builtin base, tree, name, bs, inlines, locals = nil
- while tree
- recv = sep = mid = args = nil
- case tree.first
- when :def
- locals = collect_locals(tree[1])
- tree = tree[3]
- next
- when :defs
- locals = collect_locals(tree[3])
- tree = tree[5]
- next
- when :class
- name = 'class'
- tree = tree[3]
- next
- when :sclass, :module
- name = 'class'
- tree = tree[2]
- next
- when :method_add_arg
- _method_add_arg, mid, (_arg_paren, args) = tree
- case mid.first
- when :call
- _, recv, sep, mid = mid
- when :fcall
- _, mid = mid
- else
- mid = nil
- end
- # w/ trailing comma: [[:method_add_arg, ...]]
- # w/o trailing comma: [:args_add_block, [[:method_add_arg, ...]], false]
- if args && args.first == :args_add_block
- args = args[1]
- end
- when :vcall
- _, mid = tree
- when :command # FCALL
- _, mid, (_, args) = tree
- when :call, :command_call # CALL
- _, recv, sep, mid, (_, args) = tree
+def visit_node(source, root, name, locals, requires, bs, inlines)
+ each_node(root) do |node|
+ case node["type"]
+ when "CallNode"
+ visit_call_node(source, node, name, locals, requires, bs, inlines)
+ when "DefNode"
+ lineno = line_number(source, node)
+ visit_node(source, node["body"], name, LOCALS_DB[[node["name"], lineno]], requires, bs, inlines) if node["body"]
+ false
+ when "ClassNode", "ModuleNode", "SingletonClassNode"
+ visit_node(source, node["body"], "class", nil, requires, bs, inlines) if node["body"]
+ false
+ else
+ true
end
+ end
+end
- if mid
- raise "unknown sexp: #{mid.inspect}" unless %i[@ident @const].include?(mid.first)
- _, mid, (lineno,) = mid
- if recv
- func_name = nil
- case recv.first
- when :var_ref
- _, recv = recv
- if recv.first == :@const and recv[1] == "Primitive"
- func_name = mid.to_s
- end
- when :vcall
- _, recv = recv
- if recv.first == :@ident and recv[1] == "__builtin"
- func_name = mid.to_s
- end
- end
- collect_builtin(base, recv, name, bs, inlines) unless func_name
- else
- func_name = mid[/\A__builtin_(.+)/, 1]
- end
- if func_name
- cfunc_name = func_name
- args.pop unless (args ||= []).last
- argc = args.size
-
- if /(.+)[\!\?]\z/ =~ func_name
- case $1
- when 'attr'
- # Compile-time validation only. compile.c will parse them.
- inline_attrs(args)
- break
- when 'cstmt'
- text = inline_text argc, args.first
-
- func_name = "_bi#{lineno}"
- cfunc_name = make_cfunc_name(inlines, name, lineno)
- inlines[cfunc_name] = [lineno, text, locals, func_name]
- argc -= 1
- when 'cexpr', 'cconst'
- text = inline_text argc, args.first
- code = "return #{text};"
-
- func_name = "_bi#{lineno}"
- cfunc_name = make_cfunc_name(inlines, name, lineno)
-
- locals = [] if $1 == 'cconst'
- inlines[cfunc_name] = [lineno, code, locals, func_name]
- argc -= 1
- when 'cinit'
- text = inline_text argc, args.first
- func_name = nil # required
- inlines[inlines.size] = [lineno, text, nil, nil]
- argc -= 1
- when 'mandatory_only'
- func_name = nil
- when 'arg'
- argc == 1 or raise "unexpected argument number #{argc}"
- (arg = args.first)[0] == :symbol_literal or raise "symbol literal expected #{args}"
- (arg = arg[1])[0] == :symbol or raise "symbol expected #{arg}"
- (var = arg[1] and var = var[1]) or raise "argument name expected #{arg}"
- func_name = nil
- end
- end
+def collect_builtins(dump_ast, file)
+ stdout, stderr, status = Open3.capture3(dump_ast, file)
+ unless status.success?
+ warn(stderr)
+ exit(1)
+ end
- if bs[func_name] &&
- bs[func_name] != [argc, cfunc_name]
- raise "same builtin function \"#{func_name}\", but different arity (was #{bs[func_name]} but #{argc})"
- end
+ source = File.read(file)
+ root = JSON.parse(stdout)
+ visit_node(source, root, "top", nil, requires = [], builtins = [], inlines = [])
- bs[func_name] = [argc, cfunc_name] if func_name
- elsif /\Arequire(?:_relative)\z/ =~ mid and args.size == 1 and
- (arg1 = args[0])[0] == :string_literal and
- (arg1 = arg1[1])[0] == :string_content and
- (arg1 = arg1[1])[0] == :@tstring_content and
- sublib = arg1[1]
- if File.exist?(f = File.join(@dir, sublib)+".rb")
- puts "- #{@base}.rb requires #{sublib}"
- if REQUIRED[sublib]
- warn "!!! #{sublib} is required from #{REQUIRED[sublib]} already; ignored"
- else
- REQUIRED[sublib] = @base
- (SUBLIBS[@base] ||= []) << sublib
- end
- ARGV.push(f)
- end
+ requires.each do |sublib|
+ if File.exist?(f = File.join(@dir, sublib)+".rb")
+ puts "- #{@base}.rb requires #{sublib}"
+ if REQUIRED[sublib]
+ warn "!!! #{sublib} is required from #{REQUIRED[sublib]} already; ignored"
+ else
+ REQUIRED[sublib] = @base
+ (SUBLIBS[@base] ||= []) << sublib
end
- break unless tree = args
+ ARGV.push(f)
end
+ end
- tree.each do |t|
- collect_builtin base, t, name, bs, inlines, locals if Array === t
+ processed_builtins = {}
+ builtins.each do |(primitive_name, argc, cfunction_name)|
+ if processed_builtins.key?(primitive_name) && processed_builtins[primitive_name] != [argc, cfunction_name]
+ raise "same builtin function \"#{primitive_name}\", but different arity (was #{processed_builtins[primitive_name]} but #{argc})"
end
- break
+
+ processed_builtins[primitive_name] = [argc, cfunction_name]
end
-end
-# ruby mk_builtin_loader.rb TARGET_FILE.rb
-# #=> generate TARGET_FILE.rbinc
-#
+ processed_inlines = {}
+ inlines.each do |(cfunction_name, lineno, text, locals, primitive_name)|
+ if processed_inlines.key?(cfunction_name)
+ found = 1000.times.find { |i| !processed_inlines.key?("#{cfunction_name}_#{i}") }
+ raise "too many functions in same line..." unless found
+ cfunction_name = "#{cfunction_name}_#{found}"
+ end
-LOCALS_DB = {} # [method_name, first_line] = locals
+ processed_inlines[cfunction_name] = [lineno, text, locals, primitive_name]
+ end
+
+ [processed_builtins, processed_inlines]
+end
def collect_iseq iseq_ary
# iseq_ary.each_with_index{|e, i| p [i, e]}
@@ -282,17 +256,22 @@ def generate_cexpr(ofile, lineno, line_file, body_lineno, text, locals, func_nam
# Avoid generating fetches of lvars we don't need. This is imperfect as it
# will match text inside strings or other false positives.
- local_candidates = text.scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+ local_ptrs = []
+ local_candidates = text.gsub(/\bLOCAL_PTR\(\K[a-zA-Z_][a-zA-Z0-9_]*(?=\))/) {
+ local_ptrs << $&; ''
+ }.scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
f.puts '{'
lineno += 1
# locals is nil outside methods
locals&.reverse_each&.with_index{|param, i|
next unless Symbol === param
- next unless local_candidates.include?(param.to_s)
+ param = param.to_s
+ lvar = local_candidates.include?(param)
+ next unless lvar or local_ptrs.include?(param)
f.puts "VALUE *const #{param}__ptr = (VALUE *)&ec->cfp->ep[#{-3 - i}];"
- f.puts "MAYBE_UNUSED(const VALUE) #{param} = *#{param}__ptr;"
- lineno += 1
+ f.puts "MAYBE_UNUSED(const VALUE) #{param} = *#{param}__ptr;" if lvar
+ lineno += lvar ? 2 : 1
}
f.puts "#line #{body_lineno} \"#{line_file}\""
lineno += 1
@@ -308,24 +287,24 @@ def generate_cexpr(ofile, lineno, line_file, body_lineno, text, locals, func_nam
return lineno, f.string
end
-def mk_builtin_header file
+def mk_builtin_header dump_ast, file
@dir = File.dirname(file)
base = File.basename(file, '.rb')
@base = base
ofile = "#{file}inc"
- # bs = { func_name => argc }
- code = File.read(file)
begin
verbose, $VERBOSE = $VERBOSE, true
- collect_iseq RubyVM::InstructionSequence.compile(code, base).to_a
+ collect_iseq RubyVM::InstructionSequence.compile_file(file).to_a
ensure
$VERBOSE = verbose
end
if warnings = CompileWarning.reset
raise "#{warnings} warnings in #{file}"
end
- collect_builtin(base, Ripper.sexp(code), 'top', bs = {}, inlines = {})
+
+ # bs = { func_name => argc }
+ bs, inlines = collect_builtins(dump_ast, file)
StringIO.open do |f|
if File::ALT_SEPARATOR
@@ -418,7 +397,9 @@ def mk_builtin_header file
end
end
+dump_ast = ARGV.shift
+
ARGV.each{|file|
# feature.rb => load_feature.inc
- mk_builtin_header file
+ mk_builtin_header dump_ast, file
}