diff options
Diffstat (limited to 'tool/mk_builtin_loader.rb')
| -rw-r--r-- | tool/mk_builtin_loader.rb | 399 |
1 files changed, 190 insertions, 209 deletions
diff --git a/tool/mk_builtin_loader.rb b/tool/mk_builtin_loader.rb index 6e1f5c666a..a84f322e84 100644 --- a/tool/mk_builtin_loader.rb +++ b/tool/mk_builtin_loader.rb @@ -1,12 +1,13 @@ # Parse built-in script and make rbinc file -require 'ripper' +require 'json' +require 'open3' require 'stringio' require_relative 'ruby_vm/helpers/c_escape' SUBLIBS = {} REQUIRED = {} -BUILTIN_ATTRS = %w[leaf inline_block use_block c_trace] +BUILTIN_ATTRS = %w[leaf inline_block use_block c_trace without_interrupts] module CompileWarning @@warnings = 0 @@ -24,231 +25,204 @@ end Warning.extend CompileWarning -def string_literal(lit, str = []) - while lit - case lit.first - when :string_concat, :string_embexpr, :string_content - _, *lit = lit - lit.each {|s| string_literal(s, str)} - return str - when :string_literal - _, lit = lit - when :@tstring_content - str << lit[1] - return str - else - raise "unexpected #{lit.first}" - end - end -end +# ruby mk_builtin_loader.rb path/to/dump_ast TARGET_FILE.rb +# #=> generate TARGET_FILE.rbinc +# +# dump_ast is a standalone C program (tool/dump_ast.c) that parses Ruby files +# with prism and dumps the AST as JSON. It must be compiled with CC before this +# script can run, which means rbinc generation is skipped during `make up` +# (where CC=false). The rbinc files are gitignored build artifacts, so they do +# not need to be present in srcdir after `make up` — they will be generated in +# the build directory during `make all` once dump_ast has been compiled. + +LOCALS_DB = {} # [method_name, first_line] = locals -# e.g. [:symbol_literal, [:symbol, [:@ident, "inline", [19, 21]]]] -def symbol_literal(lit) - symbol_literal, symbol_lit = lit - raise "#{lit.inspect} was not :symbol_literal" if symbol_literal != :symbol_literal - symbol, ident_lit = symbol_lit - raise "#{symbol_lit.inspect} was not :symbol" if symbol != :symbol - ident, symbol_name, = ident_lit - raise "#{ident.inspect} was not :@ident" if ident != :@ident - symbol_name +# Extract the contents of the given string node. +def extract_string_literal(node) + case node["type"] + when "StringNode" + node["unescaped"] + when "InterpolatedStringNode" + node["parts"].map { |part| extract_string_literal(part) }.join + else + raise "unexpected #{node["type"]}" + end end -def inline_text argc, arg1 - raise "argc (#{argc}) of inline! should be 1" unless argc == 1 - arg1 = string_literal(arg1) - raise "1st argument should be string literal" unless arg1 - arg1.join("").rstrip +# Retrieve the line number of the given node in the source. +def line_number(source, node) + source.b.byteslice(0, node["location"]["start"]).count("\n") + 1 end -def inline_attrs(args) - raise "args was empty" if args.empty? - args.each do |arg| - attr = symbol_literal(arg) - unless BUILTIN_ATTRS.include?(attr) - raise "attr (#{attr}) was not in: #{BUILTIN_ATTRS.join(', ')}" - end +def visit_call_node(source, node, name, locals, requires, bs, inlines) + # If this is a call to require or require relative with a single string node + # argument, then we will attempt to find the file that is being required and + # add it to the files that should be processed. + if %w[require require_relative].include?(node["name"]) && !node["arguments"].nil? && (argument = node["arguments"]["arguments"][0])["type"] == "StringNode" + requires << argument["unescaped"] + return true end -end -def make_cfunc_name inlines, name, lineno - case name - when /\[\]/ - name = '_GETTER' - when /\[\]=/ - name = '_SETTER' + primitive_name = nil + + receiver = node["receiver"] + + if (!receiver.nil? && receiver["type"] == "ConstantReadNode" && receiver["name"] == "Primitive") || + (!receiver.nil? && receiver["type"] == "CallNode" && receiver["flags"].include?("VARIABLE_CALL") && receiver["name"] == "__builtin") + primitive_name = node["name"] + elsif node["name"].start_with?("__builtin_") + primitive_name = node["name"][10..-1] else - name = name.tr('!?', 'EP') + # If we get here, then this isn't a primitive function call and we can + # continue the visit. + return true end - base = "builtin_inline_#{name}_#{lineno}" - if inlines[base] - 1000.times{|i| - name = "#{base}_#{i}" - return name unless inlines[name] - } - raise "too many functions in same line..." - else - base + # The name of the C function that we will be calling for this call node. It + # may change later in this method depending on the type of primitive. + cfunction_name = primitive_name + + args = node["arguments"].nil? ? [] : node["arguments"]["arguments"] + argc = args.size + + if primitive_name.match?(/[\!\?]$/) + case (primitive_macro = primitive_name[0...-1]) + when "arg" + # This is a call to Primitive.arg!, which expects a single symbol argument + # detailing the name of the argument. + raise "unexpected argument number #{argc}" if argc != 1 + raise "symbol literal expected, got #{args[0]["type"]}" if args[0]["type"] != "SymbolNode" + return true + when "attr" + # This is a call to Primitive.attr!, which expects a list of known + # symbols. We will check that each of the arguments is a symbol and that + # the symbol is one of the known symbols. + raise "args was empty" if argc == 0 + + args.each do |arg| + raise "#{arg["type"]} was not a SymbolNode" if arg["type"] != "SymbolNode" + raise "attr (#{arg["unescaped"]}) was not in: leaf, inline_block, use_block" unless BUILTIN_ATTRS.include?(arg["unescaped"]) + end + + return true + when "mandatory_only" + # This is a call to Primitive.mandatory_only?. This method does not + # require any further processing. + return true + when "cstmt", "cexpr", "cconst", "cinit" + # This is a call to Primitive.cstmt!, Primitive.cexpr!, Primitive.cconst!, + # or Primitive.cinit!. These methods expect a single string argument that + # is the C code that should be executed. We will extract the string, emit + # an inline function, and then continue the visit. + raise "argc (#{argc}) of inline! should be 1" if argc != 1 + + text = extract_string_literal(args[0]).rstrip + lineno = line_number(source, node) + + case primitive_macro + when "cstmt", "cexpr", "cconst" + cfunction_name = "builtin_inline_#{name}_#{lineno}" + primitive_name = "_bi#{lineno}" + + if primitive_macro == "cstmt" + inlines << [cfunction_name, lineno, text, locals, primitive_name] + else + inlines << [cfunction_name, lineno, "return #{text};", primitive_macro == "cexpr" ? locals : nil, primitive_name] + end + when "cinit" + inlines << [inlines.size, lineno, text, nil, nil] + return true + end + + argc -= 1 + else + # This is a call to Primitive that is not a known method, so it must be a + # regular C function. In this case we do not need any special processing. + end end + + bs << [primitive_name, argc, cfunction_name] + return true end -def collect_locals tree - _type, name, (line, _cols) = tree - if locals = LOCALS_DB[[name, line]] - locals - else - if false # for debugging - pp LOCALS_DB - raise "not found: [#{name}, #{line}]" +def each_node(root, &blk) + return unless yield root + + root.each do |key, value| + next if key == "type" || key == "location" + + if value.is_a?(Hash) + each_node(value, &blk) if value.key?("type") + elsif value.is_a?(Array) && value[0].is_a?(Hash) + value.each { |node| each_node(node, &blk) } end end end -def collect_builtin base, tree, name, bs, inlines, locals = nil - while tree - recv = sep = mid = args = nil - case tree.first - when :def - locals = collect_locals(tree[1]) - tree = tree[3] - next - when :defs - locals = collect_locals(tree[3]) - tree = tree[5] - next - when :class - name = 'class' - tree = tree[3] - next - when :sclass, :module - name = 'class' - tree = tree[2] - next - when :method_add_arg - _method_add_arg, mid, (_arg_paren, args) = tree - case mid.first - when :call - _, recv, sep, mid = mid - when :fcall - _, mid = mid - else - mid = nil - end - # w/ trailing comma: [[:method_add_arg, ...]] - # w/o trailing comma: [:args_add_block, [[:method_add_arg, ...]], false] - if args && args.first == :args_add_block - args = args[1] - end - when :vcall - _, mid = tree - when :command # FCALL - _, mid, (_, args) = tree - when :call, :command_call # CALL - _, recv, sep, mid, (_, args) = tree +def visit_node(source, root, name, locals, requires, bs, inlines) + each_node(root) do |node| + case node["type"] + when "CallNode" + visit_call_node(source, node, name, locals, requires, bs, inlines) + when "DefNode" + lineno = line_number(source, node) + visit_node(source, node["body"], name, LOCALS_DB[[node["name"], lineno]], requires, bs, inlines) if node["body"] + false + when "ClassNode", "ModuleNode", "SingletonClassNode" + visit_node(source, node["body"], "class", nil, requires, bs, inlines) if node["body"] + false + else + true end + end +end - if mid - raise "unknown sexp: #{mid.inspect}" unless %i[@ident @const].include?(mid.first) - _, mid, (lineno,) = mid - if recv - func_name = nil - case recv.first - when :var_ref - _, recv = recv - if recv.first == :@const and recv[1] == "Primitive" - func_name = mid.to_s - end - when :vcall - _, recv = recv - if recv.first == :@ident and recv[1] == "__builtin" - func_name = mid.to_s - end - end - collect_builtin(base, recv, name, bs, inlines) unless func_name - else - func_name = mid[/\A__builtin_(.+)/, 1] - end - if func_name - cfunc_name = func_name - args.pop unless (args ||= []).last - argc = args.size - - if /(.+)[\!\?]\z/ =~ func_name - case $1 - when 'attr' - # Compile-time validation only. compile.c will parse them. - inline_attrs(args) - break - when 'cstmt' - text = inline_text argc, args.first - - func_name = "_bi#{lineno}" - cfunc_name = make_cfunc_name(inlines, name, lineno) - inlines[cfunc_name] = [lineno, text, locals, func_name] - argc -= 1 - when 'cexpr', 'cconst' - text = inline_text argc, args.first - code = "return #{text};" - - func_name = "_bi#{lineno}" - cfunc_name = make_cfunc_name(inlines, name, lineno) - - locals = [] if $1 == 'cconst' - inlines[cfunc_name] = [lineno, code, locals, func_name] - argc -= 1 - when 'cinit' - text = inline_text argc, args.first - func_name = nil # required - inlines[inlines.size] = [lineno, text, nil, nil] - argc -= 1 - when 'mandatory_only' - func_name = nil - when 'arg' - argc == 1 or raise "unexpected argument number #{argc}" - (arg = args.first)[0] == :symbol_literal or raise "symbol literal expected #{args}" - (arg = arg[1])[0] == :symbol or raise "symbol expected #{arg}" - (var = arg[1] and var = var[1]) or raise "argument name expected #{arg}" - func_name = nil - end - end +def collect_builtins(dump_ast, file) + stdout, stderr, status = Open3.capture3(dump_ast, file) + unless status.success? + warn(stderr) + exit(1) + end - if bs[func_name] && - bs[func_name] != [argc, cfunc_name] - raise "same builtin function \"#{func_name}\", but different arity (was #{bs[func_name]} but #{argc})" - end + source = File.read(file) + root = JSON.parse(stdout) + visit_node(source, root, "top", nil, requires = [], builtins = [], inlines = []) - bs[func_name] = [argc, cfunc_name] if func_name - elsif /\Arequire(?:_relative)\z/ =~ mid and args.size == 1 and - (arg1 = args[0])[0] == :string_literal and - (arg1 = arg1[1])[0] == :string_content and - (arg1 = arg1[1])[0] == :@tstring_content and - sublib = arg1[1] - if File.exist?(f = File.join(@dir, sublib)+".rb") - puts "- #{@base}.rb requires #{sublib}" - if REQUIRED[sublib] - warn "!!! #{sublib} is required from #{REQUIRED[sublib]} already; ignored" - else - REQUIRED[sublib] = @base - (SUBLIBS[@base] ||= []) << sublib - end - ARGV.push(f) - end + requires.each do |sublib| + if File.exist?(f = File.join(@dir, sublib)+".rb") + puts "- #{@base}.rb requires #{sublib}" + if REQUIRED[sublib] + warn "!!! #{sublib} is required from #{REQUIRED[sublib]} already; ignored" + else + REQUIRED[sublib] = @base + (SUBLIBS[@base] ||= []) << sublib end - break unless tree = args + ARGV.push(f) end + end - tree.each do |t| - collect_builtin base, t, name, bs, inlines, locals if Array === t + processed_builtins = {} + builtins.each do |(primitive_name, argc, cfunction_name)| + if processed_builtins.key?(primitive_name) && processed_builtins[primitive_name] != [argc, cfunction_name] + raise "same builtin function \"#{primitive_name}\", but different arity (was #{processed_builtins[primitive_name]} but #{argc})" end - break + + processed_builtins[primitive_name] = [argc, cfunction_name] end -end -# ruby mk_builtin_loader.rb TARGET_FILE.rb -# #=> generate TARGET_FILE.rbinc -# + processed_inlines = {} + inlines.each do |(cfunction_name, lineno, text, locals, primitive_name)| + if processed_inlines.key?(cfunction_name) + found = 1000.times.find { |i| !processed_inlines.key?("#{cfunction_name}_#{i}") } + raise "too many functions in same line..." unless found + cfunction_name = "#{cfunction_name}_#{found}" + end -LOCALS_DB = {} # [method_name, first_line] = locals + processed_inlines[cfunction_name] = [lineno, text, locals, primitive_name] + end + + [processed_builtins, processed_inlines] +end def collect_iseq iseq_ary # iseq_ary.each_with_index{|e, i| p [i, e]} @@ -282,17 +256,22 @@ def generate_cexpr(ofile, lineno, line_file, body_lineno, text, locals, func_nam # Avoid generating fetches of lvars we don't need. This is imperfect as it # will match text inside strings or other false positives. - local_candidates = text.scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + local_ptrs = [] + local_candidates = text.gsub(/\bLOCAL_PTR\(\K[a-zA-Z_][a-zA-Z0-9_]*(?=\))/) { + local_ptrs << $&; '' + }.scan(/[a-zA-Z_][a-zA-Z0-9_]*/) f.puts '{' lineno += 1 # locals is nil outside methods locals&.reverse_each&.with_index{|param, i| next unless Symbol === param - next unless local_candidates.include?(param.to_s) + param = param.to_s + lvar = local_candidates.include?(param) + next unless lvar or local_ptrs.include?(param) f.puts "VALUE *const #{param}__ptr = (VALUE *)&ec->cfp->ep[#{-3 - i}];" - f.puts "MAYBE_UNUSED(const VALUE) #{param} = *#{param}__ptr;" - lineno += 1 + f.puts "MAYBE_UNUSED(const VALUE) #{param} = *#{param}__ptr;" if lvar + lineno += lvar ? 2 : 1 } f.puts "#line #{body_lineno} \"#{line_file}\"" lineno += 1 @@ -308,24 +287,24 @@ def generate_cexpr(ofile, lineno, line_file, body_lineno, text, locals, func_nam return lineno, f.string end -def mk_builtin_header file +def mk_builtin_header dump_ast, file @dir = File.dirname(file) base = File.basename(file, '.rb') @base = base ofile = "#{file}inc" - # bs = { func_name => argc } - code = File.read(file) begin verbose, $VERBOSE = $VERBOSE, true - collect_iseq RubyVM::InstructionSequence.compile(code, base).to_a + collect_iseq RubyVM::InstructionSequence.compile_file(file).to_a ensure $VERBOSE = verbose end if warnings = CompileWarning.reset raise "#{warnings} warnings in #{file}" end - collect_builtin(base, Ripper.sexp(code), 'top', bs = {}, inlines = {}) + + # bs = { func_name => argc } + bs, inlines = collect_builtins(dump_ast, file) StringIO.open do |f| if File::ALT_SEPARATOR @@ -418,7 +397,9 @@ def mk_builtin_header file end end +dump_ast = ARGV.shift + ARGV.each{|file| # feature.rb => load_feature.inc - mk_builtin_header file + mk_builtin_header dump_ast, file } |
