diff options
Diffstat (limited to 'prism_compile.c')
-rw-r--r-- | prism_compile.c | 2020 |
1 files changed, 1578 insertions, 442 deletions
diff --git a/prism_compile.c b/prism_compile.c index d207c6bf07..3aeb9d0e8e 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -152,12 +152,11 @@ pm_location_line_number(const pm_parser_t *parser, const pm_location_t *location } /** - * Convert the value of an integer node into a Ruby Integer. + * Parse the value of a pm_integer_t into a Ruby Integer. */ static VALUE -parse_integer(const pm_integer_node_t *node) +parse_integer_value(const pm_integer_t *integer) { - const pm_integer_t *integer = &node->value; VALUE result; if (integer->values == NULL) { @@ -188,6 +187,15 @@ parse_integer(const pm_integer_node_t *node) } /** + * Convert the value of an integer node into a Ruby Integer. + */ +static inline VALUE +parse_integer(const pm_integer_node_t *node) +{ + return parse_integer_value(&node->value); +} + +/** * Convert the value of a float node into a Ruby Float. */ static VALUE @@ -205,36 +213,9 @@ parse_float(const pm_float_node_t *node) static VALUE parse_rational(const pm_rational_node_t *node) { - VALUE result; - - if (PM_NODE_TYPE_P(node->numeric, PM_FLOAT_NODE)) { - const uint8_t *start = node->base.location.start; - const uint8_t *end = node->base.location.end - 1; - size_t length = end - start; - - char *buffer = malloc(length + 1); - memcpy(buffer, start, length); - - buffer[length] = '\0'; - - char *decimal = memchr(buffer, '.', length); - RUBY_ASSERT(decimal); - size_t seen_decimal = decimal - buffer; - size_t fraclen = length - seen_decimal - 1; - memmove(decimal, decimal + 1, fraclen + 1); - - VALUE numerator = rb_cstr_to_inum(buffer, 10, false); - result = rb_rational_new(numerator, rb_int_positive_pow(10, fraclen)); - - free(buffer); - } - else { - RUBY_ASSERT(PM_NODE_TYPE_P(node->numeric, PM_INTEGER_NODE)); - VALUE numerator = parse_integer((const pm_integer_node_t *) node->numeric); - result = rb_rational_raw(numerator, INT2FIX(1)); - } - - return result; + VALUE numerator = parse_integer_value(&node->numerator); + VALUE denominator = parse_integer_value(&node->denominator); + return rb_rational_new(numerator, denominator); } /** @@ -279,7 +260,7 @@ parse_string(const pm_scope_node_t *scope_node, const pm_string_t *string) * creating those strings based on the flags set on the owning node. */ static inline VALUE -parse_string_encoded(const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *string) +parse_string_encoded(const pm_node_t *node, const pm_string_t *string, rb_encoding *default_encoding) { rb_encoding *encoding; @@ -290,7 +271,7 @@ parse_string_encoded(const pm_scope_node_t *scope_node, const pm_node_t *node, c encoding = rb_utf8_encoding(); } else { - encoding = scope_node->encoding; + encoding = default_encoding; } return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding); @@ -301,17 +282,17 @@ parse_static_literal_string(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, { rb_encoding *encoding; - if (node->flags & PM_ENCODING_FLAGS_FORCED_BINARY_ENCODING) { + if (node->flags & PM_STRING_FLAGS_FORCED_BINARY_ENCODING) { encoding = rb_ascii8bit_encoding(); } - else if (node->flags & PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING) { + else if (node->flags & PM_STRING_FLAGS_FORCED_UTF8_ENCODING) { encoding = rb_utf8_encoding(); } else { encoding = scope_node->encoding; } - VALUE value = rb_enc_interned_str((const char *) pm_string_source(string), pm_string_length(string), encoding); + VALUE value = rb_enc_literal_str((const char *) pm_string_source(string), pm_string_length(string), encoding); rb_enc_str_coderange(value); if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) { @@ -351,91 +332,46 @@ pm_optimizable_range_item_p(const pm_node_t *node) return (!node || PM_NODE_TYPE_P(node, PM_INTEGER_NODE) || PM_NODE_TYPE_P(node, PM_NIL_NODE)); } -static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); - -static int -pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +/** Raise an error corresponding to the invalid regular expression. */ +static VALUE +parse_regexp_error(rb_iseq_t *iseq, int32_t line_number, const char *fmt, ...) { - int stack_size = 0; - size_t parts_size = parts->size; - bool interpolated = false; - - if (parts_size > 0) { - VALUE current_string = Qnil; - - for (size_t index = 0; index < parts_size; index++) { - const pm_node_t *part = parts->nodes[index]; - - if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) { - const pm_string_node_t *string_node = (const pm_string_node_t *) part; - VALUE string_value = parse_string_encoded(scope_node, (const pm_node_t *) string_node, &string_node->unescaped); - - if (RTEST(current_string)) { - current_string = rb_str_concat(current_string, string_value); - } - else { - current_string = string_value; - } - } - else { - interpolated = true; - - if ( - PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) && - ((const pm_embedded_statements_node_t *) part)->statements != NULL && - ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 && - PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE) - ) { - const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0]; - VALUE string_value = parse_string_encoded(scope_node, (const pm_node_t *) string_node, &string_node->unescaped); - - if (RTEST(current_string)) { - current_string = rb_str_concat(current_string, string_value); - } - else { - current_string = string_value; - } - } - else { - if (!RTEST(current_string)) { - current_string = rb_enc_str_new(NULL, 0, scope_node->encoding); - } - - PUSH_INSN1(ret, *node_location, putobject, rb_fstring(current_string)); - PM_COMPILE_NOT_POPPED(part); - PUSH_INSN(ret, *node_location, dup); - PUSH_INSN1(ret, *node_location, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE , NULL, FALSE)); - PUSH_INSN(ret, *node_location, anytostring); - - current_string = Qnil; - stack_size += 2; - } - } - } - - if (RTEST(current_string)) { - current_string = rb_fstring(current_string); + va_list args; + va_start(args, fmt); + VALUE error = rb_syntax_error_append(Qnil, rb_iseq_path(iseq), line_number, -1, NULL, "%" PRIsVALUE, args); + va_end(args); + rb_exc_raise(error); +} - if (stack_size == 0 && interpolated) { - PUSH_INSN1(ret, *node_location, putstring, current_string); - } - else { - PUSH_INSN1(ret, *node_location, putobject, current_string); - } +static VALUE +parse_regexp_string_part(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding) +{ + // If we were passed an explicit regexp encoding, then we need to double + // check that it's okay here for this fragment of the string. + rb_encoding *encoding; - current_string = Qnil; - stack_size++; - } + if (explicit_regexp_encoding != NULL) { + encoding = explicit_regexp_encoding; + } + else if (node->flags & PM_STRING_FLAGS_FORCED_BINARY_ENCODING) { + encoding = rb_ascii8bit_encoding(); + } + else if (node->flags & PM_STRING_FLAGS_FORCED_UTF8_ENCODING) { + encoding = rb_utf8_encoding(); } else { - PUSH_INSN(ret, *node_location, putnil); + encoding = implicit_regexp_encoding; } - return stack_size; + VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), encoding); + VALUE error = rb_reg_check_preprocess(string); + + if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number(scope_node->parser, node), "%" PRIsVALUE, rb_obj_as_string(error)); + return string; } static VALUE -pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, bool top) +pm_static_literal_concat(rb_iseq_t *iseq, const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding, bool top) { VALUE current = Qnil; @@ -445,11 +381,28 @@ pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *sco switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: - string = parse_string_encoded(scope_node, part, &((const pm_string_node_t *) part)->unescaped); + if (implicit_regexp_encoding != NULL) { + if (top) { + string = parse_regexp_string_part(iseq, scope_node, part, &((const pm_string_node_t *) part)->unescaped, implicit_regexp_encoding, explicit_regexp_encoding); + } + else { + string = parse_string_encoded(part, &((const pm_string_node_t *) part)->unescaped, scope_node->encoding); + VALUE error = rb_reg_check_preprocess(string); + if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number(scope_node->parser, part), "%" PRIsVALUE, rb_obj_as_string(error)); + } + } + else { + string = parse_string_encoded(part, &((const pm_string_node_t *) part)->unescaped, scope_node->encoding); + } break; case PM_INTERPOLATED_STRING_NODE: - string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) part)->parts, scope_node, false); + string = pm_static_literal_concat(iseq, &((const pm_interpolated_string_node_t *) part)->parts, scope_node, implicit_regexp_encoding, explicit_regexp_encoding, false); + break; + case PM_EMBEDDED_STATEMENTS_NODE: { + const pm_embedded_statements_node_t *cast = (const pm_embedded_statements_node_t *) part; + string = pm_static_literal_concat(iseq, &cast->statements->body, scope_node, implicit_regexp_encoding, explicit_regexp_encoding, false); break; + } default: RUBY_ASSERT(false && "unexpected node type in pm_static_literal_concat"); return Qnil; @@ -543,21 +496,10 @@ parse_regexp_encoding(const pm_scope_node_t *scope_node, const pm_node_t *node) return rb_enc_get_from_index(ENCINDEX_Windows_31J); } else { - return scope_node->encoding; + return NULL; } } -/** Raise an error corresponding to the invalid regular expression. */ -static VALUE -parse_regexp_error(rb_iseq_t *iseq, int32_t line_number, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - VALUE error = rb_syntax_error_append(Qnil, rb_iseq_path(iseq), line_number, -1, NULL, "%" PRIsVALUE, args); - va_end(args); - rb_exc_raise(error); -} - static VALUE parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, VALUE string) { @@ -581,22 +523,149 @@ parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t static inline VALUE parse_regexp_literal(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped) { - VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), parse_regexp_encoding(scope_node, node)); + rb_encoding *regexp_encoding = parse_regexp_encoding(scope_node, node); + if (regexp_encoding == NULL) regexp_encoding = scope_node->encoding; + + VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), regexp_encoding); return parse_regexp(iseq, scope_node, node, string); } static inline VALUE parse_regexp_concat(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_node_list_t *parts) { - VALUE string = pm_static_literal_concat(parts, scope_node, false); - rb_enc_associate(string, parse_regexp_encoding(scope_node, node)); + rb_encoding *explicit_regexp_encoding = parse_regexp_encoding(scope_node, node); + rb_encoding *implicit_regexp_encoding = explicit_regexp_encoding != NULL ? explicit_regexp_encoding : scope_node->encoding; + + VALUE string = pm_static_literal_concat(iseq, parts, scope_node, implicit_regexp_encoding, explicit_regexp_encoding, false); return parse_regexp(iseq, scope_node, node, string); } +static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); + +static int +pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding) +{ + int stack_size = 0; + size_t parts_size = parts->size; + bool interpolated = false; + + if (parts_size > 0) { + VALUE current_string = Qnil; + pm_line_column_t current_location = *node_location; + + for (size_t index = 0; index < parts_size; index++) { + const pm_node_t *part = parts->nodes[index]; + + if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) { + const pm_string_node_t *string_node = (const pm_string_node_t *) part; + VALUE string_value; + + if (implicit_regexp_encoding == NULL) { + string_value = parse_string_encoded(part, &string_node->unescaped, scope_node->encoding); + } + else { + string_value = parse_regexp_string_part(iseq, scope_node, (const pm_node_t *) string_node, &string_node->unescaped, implicit_regexp_encoding, explicit_regexp_encoding); + } + + if (RTEST(current_string)) { + current_string = rb_str_concat(current_string, string_value); + } + else { + current_string = string_value; + if (index != 0) current_location = PM_NODE_END_LINE_COLUMN(scope_node->parser, part); + } + } + else { + interpolated = true; + + if ( + PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) && + ((const pm_embedded_statements_node_t *) part)->statements != NULL && + ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 && + PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE) + ) { + const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0]; + VALUE string_value; + + if (implicit_regexp_encoding == NULL) { + string_value = parse_string_encoded(part, &string_node->unescaped, scope_node->encoding); + } + else { + string_value = parse_regexp_string_part(iseq, scope_node, (const pm_node_t *) string_node, &string_node->unescaped, implicit_regexp_encoding, explicit_regexp_encoding); + } + + if (RTEST(current_string)) { + current_string = rb_str_concat(current_string, string_value); + } + else { + current_string = string_value; + current_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, part); + } + } + else { + if (!RTEST(current_string)) { + rb_encoding *encoding; + + if (implicit_regexp_encoding != NULL) { + if (explicit_regexp_encoding != NULL) { + encoding = explicit_regexp_encoding; + } + else if (scope_node->parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { + encoding = rb_ascii8bit_encoding(); + } + else { + encoding = implicit_regexp_encoding; + } + } + else { + encoding = scope_node->encoding; + } + + current_string = rb_enc_str_new(NULL, 0, encoding); + } + + PUSH_INSN1(ret, current_location, putobject, rb_fstring(current_string)); + PM_COMPILE_NOT_POPPED(part); + + const pm_line_column_t current_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, part); + PUSH_INSN(ret, current_location, dup); + PUSH_INSN1(ret, current_location, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE, NULL, FALSE)); + PUSH_INSN(ret, current_location, anytostring); + + current_string = Qnil; + stack_size += 2; + } + } + } + + if (RTEST(current_string)) { + current_string = rb_fstring(current_string); + + if (stack_size == 0 && interpolated) { + PUSH_INSN1(ret, current_location, putstring, current_string); + } + else { + PUSH_INSN1(ret, current_location, putobject, current_string); + } + + current_string = Qnil; + stack_size++; + } + } + else { + PUSH_INSN(ret, *node_location, putnil); + } + + return stack_size; +} + static void pm_compile_regexp_dynamic(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - int length = pm_interpolated_node_compile(iseq, parts, node_location, ret, popped, scope_node); + rb_encoding *explicit_regexp_encoding = parse_regexp_encoding(scope_node, node); + rb_encoding *implicit_regexp_encoding = explicit_regexp_encoding != NULL ? explicit_regexp_encoding : scope_node->encoding; + + int length = pm_interpolated_node_compile(iseq, parts, node_location, ret, popped, scope_node, implicit_regexp_encoding, explicit_regexp_encoding); PUSH_INSN2(ret, *node_location, toregexp, INT2FIX(parse_regexp_flags(node) & 0xFF), INT2FIX(length)); } @@ -693,13 +762,13 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n return parse_regexp_concat(iseq, scope_node, (const pm_node_t *) cast, &cast->parts); } case PM_INTERPOLATED_STRING_NODE: { - VALUE string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) node)->parts, scope_node, false); + VALUE string = pm_static_literal_concat(iseq, &((const pm_interpolated_string_node_t *) node)->parts, scope_node, NULL, NULL, false); int line_number = pm_node_line_number(scope_node->parser, node); return pm_static_literal_string(iseq, string, line_number); } case PM_INTERPOLATED_SYMBOL_NODE: { const pm_interpolated_symbol_node_t *cast = (const pm_interpolated_symbol_node_t *) node; - VALUE string = pm_static_literal_concat(&cast->parts, scope_node, true); + VALUE string = pm_static_literal_concat(iseq, &cast->parts, scope_node, NULL, NULL, true); return ID2SYM(rb_intern_str(string)); } @@ -918,9 +987,12 @@ pm_compile_conditional(rb_iseq_t *iseq, const pm_line_column_t *line_column, pm_ LABEL *else_label = NEW_LABEL(location.line); LABEL *end_label = NULL; - pm_compile_branch_condition(iseq, ret, predicate, then_label, else_label, false, scope_node); + DECL_ANCHOR(cond_seq); + INIT_ANCHOR(cond_seq); + pm_compile_branch_condition(iseq, cond_seq, predicate, then_label, else_label, false, scope_node); + PUSH_SEQ(ret, cond_seq); - rb_code_location_t conditional_location; + rb_code_location_t conditional_location = { 0 }; VALUE branches = Qfalse; if (then_label->refcnt && else_label->refcnt && PM_BRANCH_COVERAGE_P(iseq)) { @@ -1489,7 +1561,17 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b break; } case PM_FORWARDING_ARGUMENTS_NODE: { + if (ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->param.flags.forwardable) { + *flags |= VM_CALL_FORWARDING; + + pm_local_index_t mult_local = pm_lookup_local_index(iseq, scope_node, PM_CONSTANT_DOT3, 0); + PUSH_GETLOCAL(ret, location, mult_local.index, mult_local.level); + + break; + } + orig_argc += 2; + *flags |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_SPLAT_MUT | VM_CALL_ARGS_BLOCKARG | VM_CALL_KW_SPLAT; // Forwarding arguments nodes are treated as foo(*, **, &) @@ -1654,7 +1736,7 @@ pm_compile_index_operator_write_node(rb_iseq_t *iseq, const pm_index_operator_wr PUSH_SEND_R(ret, location, idAREF, INT2FIX(argc), NULL, INT2FIX(flag & ~(VM_CALL_ARGS_SPLAT_MUT | VM_CALL_KW_SPLAT_MUT)), keywords); PM_COMPILE_NOT_POPPED(node->value); - ID id_operator = pm_constant_id_lookup(scope_node, node->operator); + ID id_operator = pm_constant_id_lookup(scope_node, node->binary_operator); PUSH_SEND(ret, location, id_operator, INT2FIX(1)); if (!popped) { @@ -2573,7 +2655,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t const char *name = rb_id2name(id); if (name && strlen(name) > 0 && name[0] != '_') { - COMPILE_ERROR(ERROR_ARGS "illegal variable in alternative pattern (%"PRIsVALUE")", rb_id2str(id)); + COMPILE_ERROR(iseq, location.line, "illegal variable in alternative pattern (%"PRIsVALUE")", rb_id2str(id)); return COMPILE_NG; } } @@ -2780,6 +2862,7 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ scope->encoding = previous->encoding; scope->filepath_encoding = previous->filepath_encoding; scope->constants = previous->constants; + scope->coverage_enabled = previous->coverage_enabled; } switch (PM_NODE_TYPE(node)) { @@ -2907,7 +2990,7 @@ pm_compile_retry_end_label(rb_iseq_t *iseq, LINK_ANCHOR *const ret, LABEL *retry INSN *iobj; LINK_ELEMENT *last_elem = LAST_ELEMENT(ret); iobj = IS_INSN(last_elem) ? (INSN*) last_elem : (INSN*) get_prev_insn((INSN*) last_elem); - while (INSN_OF(iobj) != BIN(send) && INSN_OF(iobj) != BIN(invokesuper)) { + while (!IS_INSN_ID(iobj, send) && !IS_INSN_ID(iobj, invokesuper) && !IS_INSN_ID(iobj, sendforward) && !IS_INSN_ID(iobj, invokesuperforward)) { iobj = (INSN*) get_prev_insn(iobj); } ELEM_INSERT_NEXT(&iobj->link, (LINK_ELEMENT*) retry_end_l); @@ -2920,6 +3003,264 @@ pm_compile_retry_end_label(rb_iseq_t *iseq, LINK_ANCHOR *const ret, LABEL *retry } } +static const char * +pm_iseq_builtin_function_name(const pm_scope_node_t *scope_node, const pm_node_t *receiver, ID method_id) +{ + const char *name = rb_id2name(method_id); + static const char prefix[] = "__builtin_"; + const size_t prefix_len = sizeof(prefix) - 1; + + if (receiver == NULL) { + if (UNLIKELY(strncmp(prefix, name, prefix_len) == 0)) { + // __builtin_foo + return &name[prefix_len]; + } + } + else if (PM_NODE_TYPE_P(receiver, PM_CALL_NODE)) { + if (PM_NODE_FLAG_P(receiver, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + const pm_call_node_t *cast = (const pm_call_node_t *) receiver; + if (pm_constant_id_lookup(scope_node, cast->name) == rb_intern_const("__builtin")) { + // __builtin.foo + return name; + } + } + } + else if (PM_NODE_TYPE_P(receiver, PM_CONSTANT_READ_NODE)) { + const pm_constant_read_node_t *cast = (const pm_constant_read_node_t *) receiver; + if (pm_constant_id_lookup(scope_node, cast->name) == rb_intern_const("Primitive")) { + // Primitive.foo + return name; + } + } + + return NULL; +} + +// Compile Primitive.attr! :leaf, ... +static int +pm_compile_builtin_attr(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_arguments_node_t *arguments, const pm_line_column_t *node_location) +{ + if (arguments == NULL) { + COMPILE_ERROR(iseq, node_location->line, "attr!: no argument"); + return COMPILE_NG; + } + + const pm_node_t *argument; + PM_NODE_LIST_FOREACH(&arguments->arguments, index, argument) { + if (!PM_NODE_TYPE_P(argument, PM_SYMBOL_NODE)) { + COMPILE_ERROR(iseq, node_location->line, "non symbol argument to attr!: %s", pm_node_type_to_str(PM_NODE_TYPE(argument))); + return COMPILE_NG; + } + + VALUE symbol = pm_static_literal_value(iseq, argument, scope_node); + VALUE string = rb_sym_to_s(symbol); + + if (strcmp(RSTRING_PTR(string), "leaf") == 0) { + ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF; + } + else if (strcmp(RSTRING_PTR(string), "inline_block") == 0) { + ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_INLINE_BLOCK; + } + else if (strcmp(RSTRING_PTR(string), "use_block") == 0) { + iseq_set_use_block(iseq); + } + else { + COMPILE_ERROR(iseq, node_location->line, "unknown argument to attr!: %s", RSTRING_PTR(string)); + return COMPILE_NG; + } + } + + return COMPILE_OK; +} + +static int +pm_compile_builtin_arg(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_scope_node_t *scope_node, const pm_arguments_node_t *arguments, const pm_line_column_t *node_location, int popped) +{ + if (arguments == NULL) { + COMPILE_ERROR(iseq, node_location->line, "arg!: no argument"); + return COMPILE_NG; + } + + if (arguments->arguments.size != 1) { + COMPILE_ERROR(iseq, node_location->line, "arg!: too many argument"); + return COMPILE_NG; + } + + const pm_node_t *argument = arguments->arguments.nodes[0]; + if (!PM_NODE_TYPE_P(argument, PM_SYMBOL_NODE)) { + COMPILE_ERROR(iseq, node_location->line, "non symbol argument to arg!: %s", pm_node_type_to_str(PM_NODE_TYPE(argument))); + return COMPILE_NG; + } + + if (!popped) { + ID name = parse_string_symbol(scope_node, ((const pm_symbol_node_t *) argument)); + int index = ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->local_table_size - get_local_var_idx(iseq, name); + + debugs("id: %s idx: %d\n", rb_id2name(name), index); + PUSH_GETLOCAL(ret, *node_location, index, get_lvar_level(iseq)); + } + + return COMPILE_OK; +} + +static int +pm_compile_builtin_mandatory_only_method(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_call_node_t *call_node, const pm_line_column_t *node_location) +{ + const pm_node_t *ast_node = scope_node->ast_node; + if (!PM_NODE_TYPE_P(ast_node, PM_DEF_NODE)) { + rb_bug("mandatory_only?: not in method definition"); + return COMPILE_NG; + } + + const pm_def_node_t *def_node = (const pm_def_node_t *) ast_node; + const pm_parameters_node_t *parameters_node = def_node->parameters; + if (parameters_node == NULL) { + rb_bug("mandatory_only?: in method definition with no parameters"); + return COMPILE_NG; + } + + const pm_node_t *body_node = def_node->body; + if (body_node == NULL || !PM_NODE_TYPE_P(body_node, PM_STATEMENTS_NODE) || (((const pm_statements_node_t *) body_node)->body.size != 1) || !PM_NODE_TYPE_P(((const pm_statements_node_t *) body_node)->body.nodes[0], PM_IF_NODE)) { + rb_bug("mandatory_only?: not in method definition with plain statements"); + return COMPILE_NG; + } + + const pm_if_node_t *if_node = (const pm_if_node_t *) ((const pm_statements_node_t *) body_node)->body.nodes[0]; + if (if_node->predicate != ((const pm_node_t *) call_node)) { + rb_bug("mandatory_only?: can't find mandatory node"); + return COMPILE_NG; + } + + pm_parameters_node_t parameters = { + .base = parameters_node->base, + .requireds = parameters_node->requireds + }; + + const pm_def_node_t def = { + .base = def_node->base, + .name = def_node->name, + .receiver = def_node->receiver, + .parameters = ¶meters, + .body = (pm_node_t *) if_node->statements, + .locals = { + .ids = def_node->locals.ids, + .size = parameters_node->requireds.size, + .capacity = def_node->locals.capacity + } + }; + + pm_scope_node_t next_scope_node; + pm_scope_node_init(&def.base, &next_scope_node, scope_node); + + ISEQ_BODY(iseq)->mandatory_only_iseq = pm_iseq_new_with_opt( + &next_scope_node, + rb_iseq_base_label(iseq), + rb_iseq_path(iseq), + rb_iseq_realpath(iseq), + node_location->line, + NULL, + 0, + ISEQ_TYPE_METHOD, + ISEQ_COMPILE_DATA(iseq)->option + ); + + pm_scope_node_destroy(&next_scope_node); + return COMPILE_OK; +} + +static int +pm_compile_builtin_function_call(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node, const pm_call_node_t *call_node, const pm_line_column_t *node_location, int popped, const rb_iseq_t *parent_block, const char *builtin_func) +{ + const pm_arguments_node_t *arguments = call_node->arguments; + + if (parent_block != NULL) { + COMPILE_ERROR(iseq, node_location->line, "should not call builtins here."); + return COMPILE_NG; + } + +#define BUILTIN_INLINE_PREFIX "_bi" + char inline_func[sizeof(BUILTIN_INLINE_PREFIX) + DECIMAL_SIZE_OF(int)]; + bool cconst = false; +retry:; + const struct rb_builtin_function *bf = iseq_builtin_function_lookup(iseq, builtin_func); + + if (bf == NULL) { + if (strcmp("cstmt!", builtin_func) == 0 || strcmp("cexpr!", builtin_func) == 0) { + // ok + } + else if (strcmp("cconst!", builtin_func) == 0) { + cconst = true; + } + else if (strcmp("cinit!", builtin_func) == 0) { + // ignore + return COMPILE_OK; + } + else if (strcmp("attr!", builtin_func) == 0) { + return pm_compile_builtin_attr(iseq, scope_node, arguments, node_location); + } + else if (strcmp("arg!", builtin_func) == 0) { + return pm_compile_builtin_arg(iseq, ret, scope_node, arguments, node_location, popped); + } + else if (strcmp("mandatory_only?", builtin_func) == 0) { + if (popped) { + rb_bug("mandatory_only? should be in if condition"); + } + else if (!LIST_INSN_SIZE_ZERO(ret)) { + rb_bug("mandatory_only? should be put on top"); + } + + PUSH_INSN1(ret, *node_location, putobject, Qfalse); + return pm_compile_builtin_mandatory_only_method(iseq, scope_node, call_node, node_location); + } + else if (1) { + rb_bug("can't find builtin function:%s", builtin_func); + } + else { + COMPILE_ERROR(iseq, node_location->line, "can't find builtin function:%s", builtin_func); + return COMPILE_NG; + } + + int inline_index = node_location->line; + snprintf(inline_func, sizeof(inline_func), BUILTIN_INLINE_PREFIX "%d", inline_index); + builtin_func = inline_func; + arguments = NULL; + goto retry; + } + + if (cconst) { + typedef VALUE(*builtin_func0)(void *, VALUE); + VALUE const_val = (*(builtin_func0)bf->func_ptr)(NULL, Qnil); + PUSH_INSN1(ret, *node_location, putobject, const_val); + return COMPILE_OK; + } + + // fprintf(stderr, "func_name:%s -> %p\n", builtin_func, bf->func_ptr); + + DECL_ANCHOR(args_seq); + INIT_ANCHOR(args_seq); + + int flags = 0; + struct rb_callinfo_kwarg *keywords = NULL; + int argc = pm_setup_args(arguments, call_node->block, &flags, &keywords, iseq, args_seq, scope_node, node_location); + + if (argc != bf->argc) { + COMPILE_ERROR(iseq, node_location->line, "argc is not match for builtin function:%s (expect %d but %d)", builtin_func, bf->argc, argc); + return COMPILE_NG; + } + + unsigned int start_index; + if (delegate_call_p(iseq, argc, args_seq, &start_index)) { + PUSH_INSN2(ret, *node_location, opt_invokebuiltin_delegate, bf, INT2FIX(start_index)); + } + else { + PUSH_SEQ(ret, args_seq); + PUSH_INSN1(ret, *node_location, invokebuiltin, bf); + } + + if (popped) PUSH_INSN(ret, *node_location, pop); + return COMPILE_OK; +} + /** * Compile a call node into the given iseq. */ @@ -2971,6 +3312,7 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c struct rb_callinfo_kwarg *kw_arg = NULL; int orig_argc = pm_setup_args(call_node->arguments, call_node->block, &flags, &kw_arg, iseq, ret, scope_node, &location); + const rb_iseq_t *previous_block = ISEQ_COMPILE_DATA(iseq)->current_block; const rb_iseq_t *block_iseq = NULL; if (call_node->block != NULL && PM_NODE_TYPE_P(call_node->block, PM_BLOCK_NODE)) { @@ -3041,6 +3383,7 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c } if (popped) PUSH_INSN(ret, location, pop); + ISEQ_COMPILE_DATA(iseq)->current_block = previous_block; } static void @@ -3218,7 +3561,7 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_line_c } case PM_CONSTANT_PATH_NODE: { const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name)); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); if (cast->parent != NULL) { if (!lfinish[1]) lfinish[1] = NEW_LABEL(location.line); @@ -3514,8 +3857,11 @@ pm_compile_destructured_param_locals(const pm_multi_target_node_t *node, st_tabl if (rest->expression != NULL) { RUBY_ASSERT(PM_NODE_TYPE_P(rest->expression, PM_REQUIRED_PARAMETER_NODE)); - pm_insert_local_index(((const pm_required_parameter_node_t *) rest->expression)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); - local_index++; + + if (!PM_NODE_FLAG_P(rest->expression, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + pm_insert_local_index(((const pm_required_parameter_node_t *) rest->expression)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + local_index++; + } } } @@ -3523,8 +3869,10 @@ pm_compile_destructured_param_locals(const pm_multi_target_node_t *node, st_tabl const pm_node_t *right = node->rights.nodes[index]; if (PM_NODE_TYPE_P(right, PM_REQUIRED_PARAMETER_NODE)) { - pm_insert_local_index(((const pm_required_parameter_node_t *) right)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); - local_index++; + if (!PM_NODE_FLAG_P(right, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + pm_insert_local_index(((const pm_required_parameter_node_t *) right)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + local_index++; + } } else { RUBY_ASSERT(PM_NODE_TYPE_P(right, PM_MULTI_TARGET_NODE)); @@ -3717,11 +4065,11 @@ pm_multi_target_state_update(pm_multi_target_state_t *state) previous = current; current = current->next; - free(previous); + xfree(previous); } } -static size_t +static void pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const parents, LINK_ANCHOR *const writes, LINK_ANCHOR *const cleanup, pm_scope_node_t *scope_node, pm_multi_target_state_t *state); /** @@ -3829,7 +4177,7 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons // for I::J in []; end // const pm_constant_path_target_node_t *cast = (const pm_constant_path_target_node_t *) node; - ID name = pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name); + ID name = pm_constant_id_lookup(scope_node, cast->name); if (cast->parent != NULL) { pm_compile_node(iseq, cast->parent, parents, false, scope_node); @@ -3868,6 +4216,13 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons pm_compile_node(iseq, cast->receiver, parents, false, scope_node); + LABEL *safe_label = NULL; + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + safe_label = NEW_LABEL(location.line); + PUSH_INSN(parents, location, dup); + PUSH_INSNL(parents, location, branchnil, safe_label); + } + if (state != NULL) { PUSH_INSN1(writes, location, topn, INT2FIX(1)); pm_multi_target_state_push(state, (INSN *) LAST_ELEMENT(writes), 1); @@ -3878,7 +4233,9 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) flags |= VM_CALL_FCALL; PUSH_SEND_WITH_FLAG(writes, location, method_id, INT2FIX(1), INT2FIX(flags)); + if (safe_label != NULL && state == NULL) PUSH_LABEL(writes, safe_label); PUSH_INSN(writes, location, pop); + if (safe_label != NULL && state != NULL) PUSH_LABEL(writes, safe_label); if (state != NULL) { PUSH_INSN(cleanup, location, pop); @@ -3952,9 +4309,15 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons // // for i, j in []; end // - if (state != NULL) state->position--; + size_t before_position; + if (state != NULL) { + before_position = state->position; + state->position--; + } + pm_compile_multi_target_node(iseq, node, parents, writes, cleanup, scope_node, state); - if (state != NULL) state->position++; + if (state != NULL) state->position = before_position; + break; } default: @@ -3968,7 +4331,7 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons * on the stack that correspond to the parent expressions of the various * targets. */ -static size_t +static void pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const parents, LINK_ANCHOR *const writes, LINK_ANCHOR *const cleanup, pm_scope_node_t *scope_node, pm_multi_target_state_t *state) { const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); @@ -4008,26 +4371,28 @@ pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR // going through the targets because we will need to revisit them once // we know how many values are being pushed onto the stack. pm_multi_target_state_t target_state = { 0 }; - size_t base_position = state == NULL ? 0 : state->position; - size_t splat_position = has_rest ? 1 : 0; + if (state == NULL) state = &target_state; + + size_t base_position = state->position; + size_t splat_position = (has_rest || has_posts) ? 1 : 0; // Next, we'll iterate through all of the leading targets. for (size_t index = 0; index < lefts->size; index++) { const pm_node_t *target = lefts->nodes[index]; - target_state.position = lefts->size - index + splat_position + base_position; - pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, &target_state); + state->position = lefts->size - index + splat_position + base_position; + pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, state); } // Next, we'll compile the rest target if there is one. if (has_rest) { const pm_node_t *target = ((const pm_splat_node_t *) rest)->expression; - target_state.position = 1 + rights->size + base_position; + state->position = 1 + rights->size + base_position; if (has_posts) { PUSH_INSN2(writes, location, expandarray, INT2FIX(rights->size), INT2FIX(3)); } - pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, &target_state); + pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, state); } // Finally, we'll compile the trailing targets. @@ -4038,18 +4403,10 @@ pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR for (size_t index = 0; index < rights->size; index++) { const pm_node_t *target = rights->nodes[index]; - target_state.position = rights->size - index + base_position; - pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, &target_state); + state->position = rights->size - index + base_position; + pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, state); } } - - // Now, we need to go back and modify the topn instructions in order to - // ensure they can correctly retrieve the parent expressions. - pm_multi_target_state_update(&target_state); - - if (state != NULL) state->stack_size += target_state.stack_size; - - return target_state.stack_size; } /** @@ -4179,7 +4536,7 @@ pm_compile_rescue(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_line_co PM_COMPILE_NOT_POPPED((const pm_node_t *) cast->statements); } else { - PUSH_INSN(ret, *node_location, putnil); + PUSH_SYNTHETIC_PUTNIL(ret, iseq); } ISEQ_COMPILE_DATA(iseq)->in_rescue = prev_in_rescue; @@ -4245,7 +4602,6 @@ pm_compile_ensure(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_line_co ); pm_scope_node_destroy(&next_scope_node); - ISEQ_COMPILE_DATA(iseq)->current_block = child_iseq; erange = ISEQ_COMPILE_DATA(iseq)->ensure_node_stack->erange; if (estart->link.next != &eend->link) { @@ -4359,7 +4715,7 @@ pm_constant_path_parts(const pm_node_t *node, const pm_scope_node_t *scope_node) } case PM_CONSTANT_PATH_NODE: { const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name)); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); rb_ary_unshift(parts, name); if (cast->parent == NULL) { @@ -4397,7 +4753,7 @@ pm_compile_constant_path(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *co } case PM_CONSTANT_PATH_NODE: { const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name)); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); if (cast->parent == NULL) { PUSH_INSN(body, location, pop); @@ -4475,7 +4831,496 @@ pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t * return dispatch; } -/* +/** + * Return the object that will be pushed onto the stack for the given node. + */ +static VALUE +pm_compile_shareable_constant_literal(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_node_t *scope_node) +{ + switch (PM_NODE_TYPE(node)) { + case PM_TRUE_NODE: + case PM_FALSE_NODE: + case PM_NIL_NODE: + case PM_SYMBOL_NODE: + case PM_REGULAR_EXPRESSION_NODE: + case PM_SOURCE_LINE_NODE: + case PM_INTEGER_NODE: + case PM_FLOAT_NODE: + case PM_RATIONAL_NODE: + case PM_IMAGINARY_NODE: + case PM_SOURCE_ENCODING_NODE: + return pm_static_literal_value(iseq, node, scope_node); + case PM_STRING_NODE: + return parse_static_literal_string(iseq, scope_node, node, &((const pm_string_node_t *) node)->unescaped); + case PM_SOURCE_FILE_NODE: + return pm_source_file_value((const pm_source_file_node_t *) node, scope_node); + case PM_ARRAY_NODE: { + const pm_array_node_t *cast = (const pm_array_node_t *) node; + VALUE result = rb_ary_new_capa(cast->elements.size); + + for (size_t index = 0; index < cast->elements.size; index++) { + VALUE element = pm_compile_shareable_constant_literal(iseq, cast->elements.nodes[index], scope_node); + if (element == Qundef) return Qundef; + + rb_ary_push(result, element); + } + + return rb_ractor_make_shareable(result); + } + case PM_HASH_NODE: { + const pm_hash_node_t *cast = (const pm_hash_node_t *) node; + VALUE result = rb_hash_new_capa(cast->elements.size); + + for (size_t index = 0; index < cast->elements.size; index++) { + const pm_node_t *element = cast->elements.nodes[index]; + if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return Qundef; + + const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element; + + VALUE key = pm_compile_shareable_constant_literal(iseq, assoc->key, scope_node); + if (key == Qundef) return Qundef; + + VALUE value = pm_compile_shareable_constant_literal(iseq, assoc->value, scope_node); + if (value == Qundef) return Qundef; + + rb_hash_aset(result, key, value); + } + + return rb_ractor_make_shareable(result); + } + default: + return Qundef; + } +} + +/** + * Compile the instructions for pushing the value that will be written to a + * shared constant. + */ +static void +pm_compile_shareable_constant_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_flags_t shareability, VALUE path, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node, bool top) +{ + VALUE literal = pm_compile_shareable_constant_literal(iseq, node, scope_node); + if (literal != Qundef) { + const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); + PUSH_INSN1(ret, location, putobject, literal); + return; + } + + const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); + switch (PM_NODE_TYPE(node)) { + case PM_ARRAY_NODE: { + const pm_array_node_t *cast = (const pm_array_node_t *) node; + + if (top) { + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + } + + for (size_t index = 0; index < cast->elements.size; index++) { + pm_compile_shareable_constant_value(iseq, cast->elements.nodes[index], shareability, path, ret, scope_node, false); + } + + PUSH_INSN1(ret, location, newarray, INT2FIX(cast->elements.size)); + + if (top) { + ID method_id = (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY) ? rb_intern("make_shareable_copy") : rb_intern("make_shareable"); + PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + return; + } + case PM_HASH_NODE: { + const pm_hash_node_t *cast = (const pm_hash_node_t *) node; + + if (top) { + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + } + + for (size_t index = 0; index < cast->elements.size; index++) { + const pm_node_t *element = cast->elements.nodes[index]; + + if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) { + COMPILE_ERROR(iseq, location.line, "Ractor constant writes do not support **"); + } + + const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element; + pm_compile_shareable_constant_value(iseq, assoc->key, shareability, path, ret, scope_node, false); + pm_compile_shareable_constant_value(iseq, assoc->value, shareability, path, ret, scope_node, false); + } + + PUSH_INSN1(ret, location, newhash, INT2FIX(cast->elements.size * 2)); + + if (top) { + ID method_id = (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY) ? rb_intern("make_shareable_copy") : rb_intern("make_shareable"); + PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + return; + } + default: { + DECL_ANCHOR(value_seq); + INIT_ANCHOR(value_seq); + + pm_compile_node(iseq, node, value_seq, false, scope_node); + if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) { + PUSH_SEND_WITH_FLAG(value_seq, location, idUMinus, INT2FIX(0), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + if (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL) { + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + PUSH_SEQ(ret, value_seq); + PUSH_INSN1(ret, location, putobject, path); + PUSH_SEND_WITH_FLAG(ret, location, rb_intern("ensure_shareable"), INT2FIX(2), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + else if (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY) { + if (top) PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + PUSH_SEQ(ret, value_seq); + if (top) PUSH_SEND_WITH_FLAG(ret, location, rb_intern("make_shareable_copy"), INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + else if (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING) { + if (top) PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + PUSH_SEQ(ret, value_seq); + if (top) PUSH_SEND_WITH_FLAG(ret, location, rb_intern("make_shareable"), INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + break; + } + } +} + +/** + * Compile a constant write node, either in the context of a ractor pragma or + * not. + */ +static void +pm_compile_constant_write_node(rb_iseq_t *iseq, const pm_constant_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + ID name_id = pm_constant_id_lookup(scope_node, node->name); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, rb_id2str(name_id), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, ID2SYM(name_id)); +} + +/** + * Compile a constant and write node, either in the context of a ractor pragma + * or not. + */ +static void +pm_compile_constant_and_write_node(rb_iseq_t *iseq, const pm_constant_and_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, node->name)); + LABEL *end_label = NEW_LABEL(location.line); + + pm_compile_constant_read(iseq, name, &node->name_loc, ret, scope_node); + if (!popped) PUSH_INSN(ret, location, dup); + + PUSH_INSNL(ret, location, branchunless, end_label); + if (!popped) PUSH_INSN(ret, location, pop); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, name, ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, end_label); +} + +/** + * Compile a constant or write node, either in the context of a ractor pragma or + * not. + */ +static void +pm_compile_constant_or_write_node(rb_iseq_t *iseq, const pm_constant_or_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, node->name)); + + LABEL *set_label = NEW_LABEL(location.line); + LABEL *end_label = NEW_LABEL(location.line); + + PUSH_INSN(ret, location, putnil); + PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST), name, Qtrue); + PUSH_INSNL(ret, location, branchunless, set_label); + + pm_compile_constant_read(iseq, name, &node->name_loc, ret, scope_node); + if (!popped) PUSH_INSN(ret, location, dup); + + PUSH_INSNL(ret, location, branchif, end_label); + if (!popped) PUSH_INSN(ret, location, pop); + PUSH_LABEL(ret, set_label); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, name, ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, end_label); +} + +/** + * Compile a constant operator write node, either in the context of a ractor + * pragma or not. + */ +static void +pm_compile_constant_operator_write_node(rb_iseq_t *iseq, const pm_constant_operator_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, node->name)); + ID method_id = pm_constant_id_lookup(scope_node, node->binary_operator); + + pm_compile_constant_read(iseq, name, &node->name_loc, ret, scope_node); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, name, ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + if (!popped) PUSH_INSN(ret, location, dup); + + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, name); +} + +/** + * Creates a string that is used in ractor error messages to describe the + * constant path being written. + */ +static VALUE +pm_constant_path_path(const pm_constant_path_node_t *node, const pm_scope_node_t *scope_node) +{ + VALUE parts = rb_ary_new(); + rb_ary_push(parts, rb_id2str(pm_constant_id_lookup(scope_node, node->name))); + + const pm_node_t *current = node->parent; + while (current != NULL && PM_NODE_TYPE_P(current, PM_CONSTANT_PATH_NODE)) { + const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) current; + rb_ary_unshift(parts, rb_id2str(pm_constant_id_lookup(scope_node, cast->name))); + current = cast->parent; + } + + if (current == NULL) { + rb_ary_unshift(parts, rb_id2str(idNULL)); + } + else if (PM_NODE_TYPE_P(current, PM_CONSTANT_READ_NODE)) { + rb_ary_unshift(parts, rb_id2str(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) current)->name))); + } + else { + rb_ary_unshift(parts, rb_str_new_cstr("...")); + } + + return rb_ary_join(parts, rb_str_new_cstr("::")); +} + +/** + * Compile a constant path write node, either in the context of a ractor pragma + * or not. + */ +static void +pm_compile_constant_path_write_node(rb_iseq_t *iseq, const pm_constant_path_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + + if (target->parent) { + PM_COMPILE_NOT_POPPED((const pm_node_t *) target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) { + PUSH_INSN(ret, location, swap); + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + } + + PUSH_INSN(ret, location, swap); + PUSH_INSN1(ret, location, setconstant, name); +} + +/** + * Compile a constant path and write node, either in the context of a ractor + * pragma or not. + */ +static void +pm_compile_constant_path_and_write_node(rb_iseq_t *iseq, const pm_constant_path_and_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + LABEL *lfin = NEW_LABEL(location.line); + + if (target->parent) { + PM_COMPILE_NOT_POPPED(target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putobject, Qtrue); + PUSH_INSN1(ret, location, getconstant, name); + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSNL(ret, location, branchunless, lfin); + + if (!popped) PUSH_INSN(ret, location, pop); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (popped) { + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + } + else { + PUSH_INSN1(ret, location, dupn, INT2FIX(2)); + PUSH_INSN(ret, location, swap); + } + + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, lfin); + + if (!popped) PUSH_INSN(ret, location, swap); + PUSH_INSN(ret, location, pop); +} + +/** + * Compile a constant path or write node, either in the context of a ractor + * pragma or not. + */ +static void +pm_compile_constant_path_or_write_node(rb_iseq_t *iseq, const pm_constant_path_or_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + LABEL *lassign = NEW_LABEL(location.line); + LABEL *lfin = NEW_LABEL(location.line); + + if (target->parent) { + PM_COMPILE_NOT_POPPED(target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + PUSH_INSN(ret, location, dup); + PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST_FROM), name, Qtrue); + PUSH_INSNL(ret, location, branchunless, lassign); + + PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putobject, Qtrue); + PUSH_INSN1(ret, location, getconstant, name); + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSNL(ret, location, branchif, lfin); + + if (!popped) PUSH_INSN(ret, location, pop); + PUSH_LABEL(ret, lassign); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (popped) { + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + } + else { + PUSH_INSN1(ret, location, dupn, INT2FIX(2)); + PUSH_INSN(ret, location, swap); + } + + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, lfin); + + if (!popped) PUSH_INSN(ret, location, swap); + PUSH_INSN(ret, location, pop); +} + +/** + * Compile a constant path operator write node, either in the context of a + * ractor pragma or not. + */ +static void +pm_compile_constant_path_operator_write_node(rb_iseq_t *iseq, const pm_constant_path_operator_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + + ID method_id = pm_constant_id_lookup(scope_node, node->binary_operator); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + + if (target->parent) { + PM_COMPILE_NOT_POPPED(target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putobject, Qtrue); + PUSH_INSN1(ret, location, getconstant, name); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + PUSH_CALL(ret, location, method_id, INT2FIX(1)); + PUSH_INSN(ret, location, swap); + + if (!popped) { + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + PUSH_INSN(ret, location, swap); + } + + PUSH_INSN1(ret, location, setconstant, name); +} + +/** * Compiles a prism node into instruction sequences. * * iseq - The current instruction sequence object (used for locals) @@ -4492,14 +5337,32 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(parser, node); int lineno = (int) location.line; - if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_NEWLINE) && ISEQ_COMPILE_DATA(iseq)->last_line != lineno) { - int event = RUBY_EVENT_LINE; + if (PM_NODE_TYPE_P(node, PM_RETURN_NODE) && PM_NODE_FLAG_P(node, PM_RETURN_NODE_FLAGS_REDUNDANT) && ((const pm_return_node_t *) node)->arguments == NULL) { + // If the node that we're compiling is a return node that is redundant, + // then it cannot be considered a line node because the other parser + // eliminates it from the parse tree. In this case we must replicate + // this behavior. + } else { + if (PM_NODE_TYPE_P(node, PM_BEGIN_NODE) && (((const pm_begin_node_t *) node)->statements == NULL) && (((const pm_begin_node_t *) node)->rescue_clause != NULL)) { + // If this node is a begin node and it has empty statements and also + // has a rescue clause, then the other parser considers it as + // starting on the same line as the rescue, as opposed to the + // location of the begin keyword. We replicate that behavior here. + lineno = (int) PM_NODE_START_LINE_COLUMN(parser, ((const pm_begin_node_t *) node)->rescue_clause).line; + } + + if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_NEWLINE) && ISEQ_COMPILE_DATA(iseq)->last_line != lineno) { + // If this node has the newline flag set and it is on a new line + // from the previous nodes that have been compiled for this ISEQ, + // then we need to emit a newline event. + int event = RUBY_EVENT_LINE; - ISEQ_COMPILE_DATA(iseq)->last_line = lineno; - if (ISEQ_COVERAGE(iseq) && ISEQ_LINE_COVERAGE(iseq)) { - event |= RUBY_EVENT_COVERAGE_LINE; + ISEQ_COMPILE_DATA(iseq)->last_line = lineno; + if (ISEQ_COVERAGE(iseq) && ISEQ_LINE_COVERAGE(iseq)) { + event |= RUBY_EVENT_COVERAGE_LINE; + } + PUSH_TRACE(ret, event); } - PUSH_TRACE(ret, event); } switch (PM_NODE_TYPE(node)) { @@ -4741,7 +5604,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE((const pm_node_t *) cast->statements); } else if (!popped) { - PUSH_INSN(ret, location, putnil); + PUSH_SYNTHETIC_PUTNIL(ret, iseq); } } return; @@ -4804,7 +5667,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, throw_flag = 0; } else if (ISEQ_BODY(ip)->type == ISEQ_TYPE_EVAL) { - COMPILE_ERROR(ERROR_ARGS "Can't escape from eval with break"); + COMPILE_ERROR(iseq, location.line, "Can't escape from eval with break"); return; } else { @@ -4826,8 +5689,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } - COMPILE_ERROR(ERROR_ARGS "Invalid break"); - rb_bug("Invalid break"); + COMPILE_ERROR(iseq, location.line, "Invalid break"); } return; } @@ -4841,13 +5703,21 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // foo.bar() {} // ^^^^^^^^^^^^ const pm_call_node_t *cast = (const pm_call_node_t *) node; - LABEL *start = NEW_LABEL(location.line); + ID method_id = pm_constant_id_lookup(scope_node, cast->name); - if (cast->block) { - PUSH_LABEL(ret, start); + const pm_location_t *message_loc = &cast->message_loc; + if (message_loc->start == NULL) message_loc = &cast->base.location; + + const pm_line_column_t location = PM_LOCATION_START_LINE_COLUMN(scope_node->parser, message_loc); + const char *builtin_func; + + if (UNLIKELY(iseq_has_builtin_function_table(iseq)) && (builtin_func = pm_iseq_builtin_function_name(scope_node, cast->receiver, method_id)) != NULL) { + pm_compile_builtin_function_call(iseq, ret, scope_node, cast, &location, popped, ISEQ_COMPILE_DATA(iseq)->current_block, builtin_func); + return; } - ID method_id = pm_constant_id_lookup(scope_node, cast->name); + LABEL *start = NEW_LABEL(location.line); + if (cast->block) PUSH_LABEL(ret, start); switch (method_id) { case idUMinus: { @@ -4963,7 +5833,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_SEND_WITH_FLAG(ret, location, id_read_name, INT2FIX(0), INT2FIX(flag)); PM_COMPILE_NOT_POPPED(cast->value); - ID id_operator = pm_constant_id_lookup(scope_node, cast->operator); + ID id_operator = pm_constant_id_lookup(scope_node, cast->binary_operator); PUSH_SEND(ret, location, id_operator, INT2FIX(1)); if (!popped) { @@ -5008,7 +5878,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (cast->predicate == NULL) { // Establish branch coverage for the case node. VALUE branches = Qfalse; - rb_code_location_t case_location; + rb_code_location_t case_location = { 0 }; int branch_id = 0; if (PM_BRANCH_COVERAGE_P(iseq)) { @@ -5037,7 +5907,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_compile_node(iseq, (const pm_node_t *) clause->statements, body_seq, popped, scope_node); } else if (!popped) { - PUSH_INSN(body_seq, location, putnil); + PUSH_SYNTHETIC_PUTNIL(body_seq, iseq); } PUSH_INSNL(body_seq, location, jump, end_label); @@ -5095,7 +5965,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, else { // Establish branch coverage for the case node. VALUE branches = Qfalse; - rb_code_location_t case_location; + rb_code_location_t case_location = { 0 }; int branch_id = 0; if (PM_BRANCH_COVERAGE_P(iseq)) { @@ -5186,7 +6056,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_compile_node(iseq, (const pm_node_t *) clause->statements, body_seq, popped, scope_node); } else if (!popped) { - PUSH_INSN(body_seq, clause_location, putnil); + PUSH_SYNTHETIC_PUTNIL(body_seq, iseq); } PUSH_INSNL(body_seq, clause_location, jump, end_label); @@ -5229,7 +6099,9 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN(ret, location, pop); // Establish branch coverage for the implicit else clause. - add_trace_branch_coverage(iseq, ret, &case_location, case_location.beg_pos.column, branch_id, "else", branches); + if (PM_BRANCH_COVERAGE_P(iseq)) { + add_trace_branch_coverage(iseq, ret, &case_location, case_location.beg_pos.column, branch_id, "else", branches); + } if (!popped) PUSH_INSN(ret, location, putnil); PUSH_INSNL(ret, location, jump, end_label); @@ -5275,7 +6147,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // We're going to use this to uniquely identify each branch so that we // can track coverage information. - rb_code_location_t case_location; + rb_code_location_t case_location = { 0 }; VALUE branches = Qfalse; int branch_id = 0; @@ -5332,7 +6204,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE_INTO_ANCHOR(body_seq, (const pm_node_t *) in_node->statements); } else if (!popped) { - PUSH_INSN(body_seq, in_location, putnil); + PUSH_SYNTHETIC_PUTNIL(body_seq, iseq); } PUSH_INSNL(body_seq, in_location, jump, end_label); @@ -5465,7 +6337,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN2(ret, location, getclassvariable, name, get_cvar_ic_value(iseq, name_id)); PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); int flags = VM_CALL_ARGS_SIMPLE; PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(flags)); @@ -5559,154 +6431,28 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // Foo::Bar &&= baz // ^^^^^^^^^^^^^^^^ const pm_constant_path_and_write_node_t *cast = (const pm_constant_path_and_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - LABEL *lfin = NEW_LABEL(location.line); - - if (target->parent) { - PM_COMPILE_NOT_POPPED(target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PUSH_INSN(ret, location, dup); - PUSH_INSN1(ret, location, putobject, Qtrue); - PUSH_INSN1(ret, location, getconstant, name); - - if (!popped) PUSH_INSN(ret, location, dup); - PUSH_INSNL(ret, location, branchunless, lfin); - - if (!popped) PUSH_INSN(ret, location, pop); - PM_COMPILE_NOT_POPPED(cast->value); - - if (popped) { - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - } - else { - PUSH_INSN1(ret, location, dupn, INT2FIX(2)); - PUSH_INSN(ret, location, swap); - } - - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, lfin); - - if (!popped) PUSH_INSN(ret, location, swap); - PUSH_INSN(ret, location, pop); - + pm_compile_constant_path_and_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_PATH_OR_WRITE_NODE: { // Foo::Bar ||= baz // ^^^^^^^^^^^^^^^^ const pm_constant_path_or_write_node_t *cast = (const pm_constant_path_or_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - - LABEL *lassign = NEW_LABEL(location.line); - LABEL *lfin = NEW_LABEL(location.line); - - if (target->parent) { - PM_COMPILE_NOT_POPPED(target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PUSH_INSN(ret, location, dup); - PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST_FROM), name, Qtrue); - PUSH_INSNL(ret, location, branchunless, lassign); - - PUSH_INSN(ret, location, dup); - PUSH_INSN1(ret, location, putobject, Qtrue); - PUSH_INSN1(ret, location, getconstant, name); - - if (!popped) PUSH_INSN(ret, location, dup); - PUSH_INSNL(ret, location, branchif, lfin); - - if (!popped) PUSH_INSN(ret, location, pop); - PUSH_LABEL(ret, lassign); - PM_COMPILE_NOT_POPPED(cast->value); - - if (popped) { - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - } - else { - PUSH_INSN1(ret, location, dupn, INT2FIX(2)); - PUSH_INSN(ret, location, swap); - } - - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, lfin); - - if (!popped) PUSH_INSN(ret, location, swap); - PUSH_INSN(ret, location, pop); - + pm_compile_constant_path_or_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_PATH_OPERATOR_WRITE_NODE: { // Foo::Bar += baz // ^^^^^^^^^^^^^^^ const pm_constant_path_operator_write_node_t *cast = (const pm_constant_path_operator_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - - if (target->parent) { - PM_COMPILE_NOT_POPPED(target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PUSH_INSN(ret, location, dup); - PUSH_INSN1(ret, location, putobject, Qtrue); - PUSH_INSN1(ret, location, getconstant, name); - - PM_COMPILE_NOT_POPPED(cast->value); - PUSH_CALL(ret, location, method_id, INT2FIX(1)); - PUSH_INSN(ret, location, swap); - - if (!popped) { - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - PUSH_INSN(ret, location, swap); - } - - PUSH_INSN1(ret, location, setconstant, name); + pm_compile_constant_path_operator_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_PATH_WRITE_NODE: { // Foo::Bar = 1 // ^^^^^^^^^^^^ const pm_constant_path_write_node_t *cast = (const pm_constant_path_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - - if (target->parent) { - PM_COMPILE_NOT_POPPED((const pm_node_t *) target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PM_COMPILE_NOT_POPPED(cast->value); - - if (!popped) { - PUSH_INSN(ret, location, swap); - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - } - - PUSH_INSN(ret, location, swap); - PUSH_INSN1(ret, location, setconstant, name); - + pm_compile_constant_path_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_READ_NODE: { @@ -5724,82 +6470,28 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // Foo &&= bar // ^^^^^^^^^^^ const pm_constant_and_write_node_t *cast = (const pm_constant_and_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - LABEL *end_label = NEW_LABEL(location.line); - - pm_compile_constant_read(iseq, name, &cast->name_loc, ret, scope_node); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSNL(ret, location, branchunless, end_label); - if (!popped) PUSH_INSN(ret, location, pop); - - PM_COMPILE_NOT_POPPED(cast->value); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, end_label); - + pm_compile_constant_and_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_OR_WRITE_NODE: { // Foo ||= bar // ^^^^^^^^^^^ const pm_constant_or_write_node_t *cast = (const pm_constant_or_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - LABEL *set_label = NEW_LABEL(location.line); - LABEL *end_label = NEW_LABEL(location.line); - - PUSH_INSN(ret, location, putnil); - PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST), name, Qtrue); - PUSH_INSNL(ret, location, branchunless, set_label); - - pm_compile_constant_read(iseq, name, &cast->name_loc, ret, scope_node); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSNL(ret, location, branchif, end_label); - if (!popped) PUSH_INSN(ret, location, pop); - - PUSH_LABEL(ret, set_label); - PM_COMPILE_NOT_POPPED(cast->value); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, end_label); - + pm_compile_constant_or_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_OPERATOR_WRITE_NODE: { // Foo += bar // ^^^^^^^^^^ const pm_constant_operator_write_node_t *cast = (const pm_constant_operator_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); - - pm_compile_constant_read(iseq, name, &cast->name_loc, ret, scope_node); - PM_COMPILE_NOT_POPPED(cast->value); - - PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - + pm_compile_constant_operator_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_WRITE_NODE: { // Foo = 1 // ^^^^^^^ const pm_constant_write_node_t *cast = (const pm_constant_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - - PM_COMPILE_NOT_POPPED(cast->value); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - + pm_compile_constant_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_DEF_NODE: { @@ -5848,7 +6540,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE((const pm_node_t *) (cast->statements)); } else { - PUSH_INSN(ret, location, putnil); + PUSH_SYNTHETIC_PUTNIL(ret, iseq); } if (popped) PUSH_INSN(ret, location, pop); @@ -6011,6 +6703,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, int argc = 0; int depth = get_lvar_level(iseq); + if (ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->param.flags.forwardable) { + flag |= VM_CALL_FORWARDING; + pm_local_index_t mult_local = pm_lookup_local_index(iseq, scope_node, PM_CONSTANT_DOT3, 0); + PUSH_GETLOCAL(ret, location, mult_local.index, mult_local.level); + PUSH_INSN2(ret, location, invokesuperforward, new_callinfo(iseq, 0, 0, flag, NULL, block != NULL), block); + if (popped) PUSH_INSN(ret, location, pop); + return; + } + if (local_body->param.flags.has_lead) { /* required arguments */ for (int i = 0; i < local_body->param.lead_num; i++) { @@ -6136,7 +6837,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN1(ret, location, getglobal, name); PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); int flags = VM_CALL_ARGS_SIMPLE; PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(flags)); @@ -6334,7 +7035,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN2(ret, location, getinstancevariable, name, get_ivar_ic_value(iseq, name_id)); PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); int flags = VM_CALL_ARGS_SIMPLE; PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(flags)); @@ -6470,7 +7171,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } else { const pm_interpolated_string_node_t *cast = (const pm_interpolated_string_node_t *) node; - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL); if (length > 1) PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); if (popped) PUSH_INSN(ret, location, pop); } @@ -6489,7 +7190,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } } else { - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL); if (length > 1) { PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); } @@ -6511,7 +7212,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN(ret, location, putself); - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, false, scope_node); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, false, scope_node, NULL, NULL); if (length > 1) PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); PUSH_SEND_WITH_FLAG(ret, location, idBackquote, INT2NUM(1), INT2FIX(VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE)); @@ -6519,6 +7220,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } + case PM_IT_LOCAL_VARIABLE_READ_NODE: { + // -> { it } + // ^^ + if (!popped) { + PUSH_GETLOCAL(ret, location, scope_node->local_table_for_iseq_size, 0); + } + + return; + } case PM_KEYWORD_HASH_NODE: { // foo(bar: baz) // ^^^^^^^^ @@ -6584,7 +7294,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); if (!popped) PUSH_INSN(ret, location, dup); @@ -6622,9 +7332,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case PM_LOCAL_VARIABLE_READ_NODE: { // foo // ^^^ - const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) node; - if (!popped) { + const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) node; pm_local_index_t index = pm_lookup_local_index(iseq, scope_node, cast->name, cast->depth); PUSH_GETLOCAL(ret, location, index.index, index.level); } @@ -6885,18 +7594,22 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_multi_target_state_t state = { 0 }; state.position = popped ? 0 : 1; - size_t stack_size = pm_compile_multi_target_node(iseq, node, ret, writes, cleanup, scope_node, &state); + pm_compile_multi_target_node(iseq, node, ret, writes, cleanup, scope_node, &state); PM_COMPILE_NOT_POPPED(cast->value); if (!popped) PUSH_INSN(ret, location, dup); PUSH_SEQ(ret, writes); - if (!popped && stack_size >= 1) { + if (!popped && state.stack_size >= 1) { // Make sure the value on the right-hand side of the = operator is // being returned before we pop the parent expressions. - PUSH_INSN1(ret, location, setn, INT2FIX(stack_size)); + PUSH_INSN1(ret, location, setn, INT2FIX(state.stack_size)); } + // Now, we need to go back and modify the topn instructions in order to + // ensure they can correctly retrieve the parent expressions. + pm_multi_target_state_update(&state); + PUSH_SEQ(ret, cleanup); return; } @@ -6965,7 +7678,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, break; } else if (ISEQ_BODY(ip)->type == ISEQ_TYPE_EVAL) { - COMPILE_ERROR(ERROR_ARGS "Can't escape from eval with next"); + COMPILE_ERROR(iseq, location.line, "Can't escape from eval with next"); return; } @@ -6983,7 +7696,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (popped) PUSH_INSN(ret, location, pop); } else { - COMPILE_ERROR(ERROR_ARGS "Invalid next"); + COMPILE_ERROR(iseq, location.line, "Invalid next"); return; } } @@ -7218,7 +7931,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, break; } else if (ISEQ_BODY(ip)->type == ISEQ_TYPE_EVAL) { - COMPILE_ERROR(ERROR_ARGS "Can't escape from eval with redo"); + COMPILE_ERROR(iseq, location.line, "Can't escape from eval with redo"); return; } @@ -7231,7 +7944,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (popped) PUSH_INSN(ret, location, pop); } else { - COMPILE_ERROR(ERROR_ARGS "Invalid redo"); + COMPILE_ERROR(iseq, location.line, "Invalid redo"); return; } } @@ -7459,7 +8172,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (popped) PUSH_INSN(ret, location, pop); } else { - COMPILE_ERROR(ERROR_ARGS "Invalid retry"); + COMPILE_ERROR(iseq, location.line, "Invalid retry"); return; } return; @@ -7566,6 +8279,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } } + // If we have the `it` implicit local variable, we need to account for + // it in the local table size. + if (scope_node->parameters != NULL && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { + table_size++; + } + // Ensure there is enough room in the local table for any // parameters that have been repeated // ex: def underscore_parameters(_, _ = 1, _ = 2); _; end @@ -7600,7 +8319,14 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // When we have a `...` as the keyword_rest, it's a forwarding_parameter_node and // we need to leave space for 4 locals: *, **, &, ... if (PM_NODE_TYPE_P(parameters_node->keyword_rest, PM_FORWARDING_PARAMETER_NODE)) { - table_size += 4; + // Only optimize specifically methods like this: `foo(...)` + if (requireds_list->size == 0 && optionals_list->size == 0 && keywords_list->size == 0) { + ISEQ_BODY(iseq)->param.flags.forwardable = TRUE; + table_size += 1; + } + else { + table_size += 4; + } } else { const pm_keyword_rest_parameter_node_t *kw_rest = (const pm_keyword_rest_parameter_node_t *) parameters_node->keyword_rest; @@ -7714,6 +8440,11 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, body->param.flags.has_lead = true; } + if (scope_node->parameters != NULL && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { + ID local = rb_make_temporary_id(local_index); + local_table_for_iseq->ids[local_index++] = local; + } + // def foo(a, (b, *c, d), e = 1, *f, g, (h, *i, j), k:, l: 1, **m, &n) // ^^^^^ if (optionals_list && optionals_list->size) { @@ -7949,29 +8680,31 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // def foo(...) // ^^^ case PM_FORWARDING_PARAMETER_NODE: { - body->param.rest_start = local_index; - body->param.flags.has_rest = true; + if (!ISEQ_BODY(iseq)->param.flags.forwardable) { + body->param.rest_start = local_index; + body->param.flags.has_rest = true; - // Add the leading * - pm_insert_local_special(idMULT, local_index++, index_lookup_table, local_table_for_iseq); + // Add the leading * + pm_insert_local_special(idMULT, local_index++, index_lookup_table, local_table_for_iseq); - // Add the kwrest ** - RUBY_ASSERT(!body->param.flags.has_kw); + // Add the kwrest ** + RUBY_ASSERT(!body->param.flags.has_kw); - // There are no keywords declared (in the text of the program) - // but the forwarding node implies we support kwrest (**) - body->param.flags.has_kw = false; - body->param.flags.has_kwrest = true; - body->param.keyword = keyword = ZALLOC_N(struct rb_iseq_param_keyword, 1); + // There are no keywords declared (in the text of the program) + // but the forwarding node implies we support kwrest (**) + body->param.flags.has_kw = false; + body->param.flags.has_kwrest = true; + body->param.keyword = keyword = ZALLOC_N(struct rb_iseq_param_keyword, 1); - keyword->rest_start = local_index; + keyword->rest_start = local_index; - pm_insert_local_special(idPow, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(idPow, local_index++, index_lookup_table, local_table_for_iseq); - body->param.block_start = local_index; - body->param.flags.has_block = true; + body->param.block_start = local_index; + body->param.flags.has_block = true; - pm_insert_local_special(idAnd, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(idAnd, local_index++, index_lookup_table, local_table_for_iseq); + } pm_insert_local_special(idDot3, local_index++, index_lookup_table, local_table_for_iseq); break; } @@ -8074,18 +8807,6 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, body->param.flags.has_lead = true; } - // Fill in the it variable, if it exists - if (scope_node->parameters && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { - const uint8_t param_name[] = { '0', 'i', 't' }; - pm_constant_id_t constant_id = pm_constant_pool_find(&parser->constant_pool, param_name, 3); - RUBY_ASSERT(constant_id && "parser should have inserted 0it for 'it' local"); - - ID local = rb_make_temporary_id(local_index); - local_table_for_iseq->ids[local_index] = local; - st_insert(index_lookup_table, (st_data_t) constant_id, (st_data_t) local_index); - local_index++; - } - //********END OF STEP 3********** //********STEP 4********** @@ -8127,7 +8848,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } scope_node->index_lookup_table = index_lookup_table; iseq_calc_param_size(iseq); - iseq_set_local_table(iseq, local_table_for_iseq); + + if (ISEQ_BODY(iseq)->param.flags.forwardable) { + // We're treating `...` as a parameter so that frame + // pushing won't clobber it. + ISEQ_BODY(iseq)->param.size += 1; + } + + // FIXME: args? + iseq_set_local_table(iseq, local_table_for_iseq, 0); scope_node->local_table_for_iseq_size = local_table_for_iseq->size; //********STEP 5************ @@ -8298,7 +9027,9 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } case ISEQ_TYPE_METHOD: { + ISEQ_COMPILE_DATA(iseq)->root_node = (const void *) scope_node->body; PUSH_TRACE(ret, RUBY_EVENT_CALL); + if (scope_node->body) { PM_COMPILE((const pm_node_t *) scope_node->body); } @@ -8306,9 +9037,10 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN(ret, location, putnil); } + ISEQ_COMPILE_DATA(iseq)->root_node = (const void *) scope_node->body; PUSH_TRACE(ret, RUBY_EVENT_RETURN); - ISEQ_COMPILE_DATA(iseq)->last_line = body->location.code_location.end_pos.lineno; + ISEQ_COMPILE_DATA(iseq)->last_line = body->location.code_location.end_pos.lineno; break; } case ISEQ_TYPE_RESCUE: { @@ -8344,7 +9076,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, break; } - if (PM_NODE_TYPE_P(scope_node->ast_node, PM_CLASS_NODE)) { + if (PM_NODE_TYPE_P(scope_node->ast_node, PM_CLASS_NODE) || PM_NODE_TYPE_P(scope_node->ast_node, PM_MODULE_NODE)) { const pm_line_column_t end_location = PM_NODE_END_LINE_COLUMN(scope_node->parser, scope_node->ast_node); ADD_TRACE(ret, RUBY_EVENT_END); ISEQ_COMPILE_DATA(iseq)->last_line = end_location.line; @@ -8368,7 +9100,38 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case PM_SHAREABLE_CONSTANT_NODE: { // A value that is being written to a constant that is being marked as // shared depending on the current lexical context. - PM_COMPILE(((const pm_shareable_constant_node_t *) node)->write); + const pm_shareable_constant_node_t *cast = (const pm_shareable_constant_node_t *) node; + + switch (PM_NODE_TYPE(cast->write)) { + case PM_CONSTANT_WRITE_NODE: + pm_compile_constant_write_node(iseq, (const pm_constant_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_AND_WRITE_NODE: + pm_compile_constant_and_write_node(iseq, (const pm_constant_and_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_OR_WRITE_NODE: + pm_compile_constant_or_write_node(iseq, (const pm_constant_or_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_OPERATOR_WRITE_NODE: + pm_compile_constant_operator_write_node(iseq, (const pm_constant_operator_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_WRITE_NODE: + pm_compile_constant_path_write_node(iseq, (const pm_constant_path_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_AND_WRITE_NODE: + pm_compile_constant_path_and_write_node(iseq, (const pm_constant_path_and_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_OR_WRITE_NODE: + pm_compile_constant_path_or_write_node(iseq, (const pm_constant_path_or_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_OPERATOR_WRITE_NODE: + pm_compile_constant_path_operator_write_node(iseq, (const pm_constant_path_operator_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + default: + rb_bug("Unexpected node type for shareable constant write: %s", pm_node_type_to_str(PM_NODE_TYPE(cast->write))); + break; + } + return; } case PM_SINGLETON_CLASS_NODE: { @@ -8514,8 +9277,14 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } PUSH_SEQ(ret, args); - PUSH_INSN2(ret, location, invokesuper, new_callinfo(iseq, 0, argc, flags, keywords, current_block != NULL), current_block); - pm_compile_retry_end_label(iseq, ret, retry_end_l); + if (ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->param.flags.forwardable) { + flags |= VM_CALL_FORWARDING; + PUSH_INSN2(ret, location, invokesuperforward, new_callinfo(iseq, 0, argc, flags, keywords, current_block != NULL), current_block); + } + else { + PUSH_INSN2(ret, location, invokesuper, new_callinfo(iseq, 0, argc, flags, keywords, current_block != NULL), current_block); + pm_compile_retry_end_label(iseq, ret, retry_end_l); + } if (popped) PUSH_INSN(ret, location, pop); ISEQ_COMPILE_DATA(iseq)->current_block = previous_block; @@ -8621,7 +9390,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case ISEQ_TYPE_TOP: case ISEQ_TYPE_MAIN: case ISEQ_TYPE_CLASS: - COMPILE_ERROR(ERROR_ARGS "Invalid yield"); + COMPILE_ERROR(iseq, location.line, "Invalid yield"); return; default: /* valid */; } @@ -8727,6 +9496,368 @@ pm_parse_result_free(pm_parse_result_t *result) pm_options_free(&result->options); } +/** An error that is going to be formatted into the output. */ +typedef struct { + /** A pointer to the diagnostic that was generated during parsing. */ + pm_diagnostic_t *error; + + /** The start line of the diagnostic message. */ + int32_t line; + + /** The column start of the diagnostic message. */ + uint32_t column_start; + + /** The column end of the diagnostic message. */ + uint32_t column_end; +} pm_parse_error_t; + +/** The format that will be used to format the errors into the output. */ +typedef struct { + /** The prefix that will be used for line numbers. */ + const char *number_prefix; + + /** The prefix that will be used for blank lines. */ + const char *blank_prefix; + + /** The divider that will be used between sections of source code. */ + const char *divider; + + /** The length of the blank prefix. */ + size_t blank_prefix_length; + + /** The length of the divider. */ + size_t divider_length; +} pm_parse_error_format_t; + +#define PM_COLOR_GRAY "\033[38;5;102m" +#define PM_COLOR_RED "\033[1;31m" +#define PM_COLOR_RESET "\033[m" +#define PM_ERROR_TRUNCATE 30 + +static inline pm_parse_error_t * +pm_parse_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) { + pm_parse_error_t *errors = xcalloc(error_list->size, sizeof(pm_parse_error_t)); + if (errors == NULL) return NULL; + + int32_t start_line = parser->start_line; + for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { + pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line); + pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line); + + // We're going to insert this error into the array in sorted order. We + // do this by finding the first error that has a line number greater + // than the current error and then inserting the current error before + // that one. + size_t index = 0; + while ( + (index < error_list->size) && + (errors[index].error != NULL) && + ( + (errors[index].line < start.line) || + ((errors[index].line == start.line) && (errors[index].column_start < start.column)) + ) + ) index++; + + // Now we're going to shift all of the errors after this one down one + // index to make room for the new error. + if (index + 1 < error_list->size) { + memmove(&errors[index + 1], &errors[index], sizeof(pm_parse_error_t) * (error_list->size - index - 1)); + } + + // Finally, we'll insert the error into the array. + uint32_t column_end; + if (start.line == end.line) { + column_end = end.column; + } else { + column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1); + } + + // Ensure we have at least one column of error. + if (start.column == column_end) column_end++; + + errors[index] = (pm_parse_error_t) { + .error = error, + .line = start.line, + .column_start = start.column, + .column_end = column_end + }; + } + + return errors; +} + +static inline void +pm_parse_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, uint32_t column_start, uint32_t column_end, pm_buffer_t *buffer) { + int32_t line_delta = line - parser->start_line; + assert(line_delta >= 0); + + size_t index = (size_t) line_delta; + assert(index < newline_list->size); + + const uint8_t *start = &parser->start[newline_list->offsets[index]]; + const uint8_t *end; + + if (index >= newline_list->size - 1) { + end = parser->end; + } else { + end = &parser->start[newline_list->offsets[index + 1]]; + } + + pm_buffer_append_format(buffer, number_prefix, line); + + // Here we determine if we should truncate the end of the line. + bool truncate_end = false; + if ((column_end != 0) && ((end - (start + column_end)) >= PM_ERROR_TRUNCATE)) { + end = start + column_end + PM_ERROR_TRUNCATE; + truncate_end = true; + } + + // Here we determine if we should truncate the start of the line. + if (column_start >= PM_ERROR_TRUNCATE) { + pm_buffer_append_string(buffer, "... ", 4); + start += column_start; + } + + pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start)); + + if (truncate_end) { + pm_buffer_append_string(buffer, " ...\n", 5); + } else if (end == parser->end && end[-1] != '\n') { + pm_buffer_append_string(buffer, "\n", 1); + } +} + +/** + * Format the errors on the parser into the given buffer. + */ +static void +pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) { + assert(error_list->size != 0); + + // First, we're going to sort all of the errors by line number using an + // insertion sort into a newly allocated array. + const int32_t start_line = parser->start_line; + const pm_newline_list_t *newline_list = &parser->newline_list; + + pm_parse_error_t *errors = pm_parse_errors_format_sort(parser, error_list, newline_list); + if (errors == NULL) return; + + // Now we're going to determine how we're going to format line numbers and + // blank lines based on the maximum number of digits in the line numbers + // that are going to be displaid. + pm_parse_error_format_t error_format; + int32_t first_line_number = errors[0].line; + int32_t last_line_number = errors[error_list->size - 1].line; + + // If we have a maximum line number that is negative, then we're going to + // use the absolute value for comparison but multiple by 10 to additionally + // have a column for the negative sign. + if (first_line_number < 0) first_line_number = (-first_line_number) * 10; + if (last_line_number < 0) last_line_number = (-last_line_number) * 10; + int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number; + + if (max_line_number < 10) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%1" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~\n" + }; + } + } else if (max_line_number < 100) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%2" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~\n" + }; + } + } else if (max_line_number < 1000) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%3" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~~\n" + }; + } + } else if (max_line_number < 10000) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%4" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~~~\n" + }; + } + } else { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%5" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~~~\n" + }; + } + } + + error_format.blank_prefix_length = strlen(error_format.blank_prefix); + error_format.divider_length = strlen(error_format.divider); + + // Now we're going to iterate through every error in our error list and + // display it. While we're iterating, we will display some padding lines of + // the source before the error to give some context. We'll be careful not to + // display the same line twice in case the errors are close enough in the + // source. + int32_t last_line = parser->start_line - 1; + uint32_t last_column_start = 0; + const pm_encoding_t *encoding = parser->encoding; + + for (size_t index = 0; index < error_list->size; index++) { + pm_parse_error_t *error = &errors[index]; + + // Here we determine how many lines of padding of the source to display, + // based on the difference from the last line that was displaid. + if (error->line - last_line > 1) { + if (error->line - last_line > 2) { + if ((index != 0) && (error->line - last_line > 3)) { + pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length); + } + + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, 0, 0, buffer); + } + + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, 0, 0, buffer); + } + + // If this is the first error or we're on a new line, then we'll display + // the line that has the error in it. + if ((index == 0) || (error->line != last_line)) { + if (colorize) { + pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12); + } else { + pm_buffer_append_string(buffer, "> ", 2); + } + + last_column_start = error->column_start; + + // Find the maximum column end of all the errors on this line. + uint32_t column_end = error->column_end; + for (size_t next_index = index + 1; next_index < error_list->size; next_index++) { + if (errors[next_index].line != error->line) break; + if (errors[next_index].column_end > column_end) column_end = errors[next_index].column_end; + } + + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, error->column_start, column_end, buffer); + } + + const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]]; + if (start == parser->end) pm_buffer_append_byte(buffer, '\n'); + + // Now we'll display the actual error message. We'll do this by first + // putting the prefix to the line, then a bunch of blank spaces + // depending on the column, then as many carets as we need to display + // the width of the error, then the error message itself. + // + // Note that this doesn't take into account the width of the actual + // character when displaid in the terminal. For some east-asian + // languages or emoji, this means it can be thrown off pretty badly. We + // will need to solve this eventually. + pm_buffer_append_string(buffer, " ", 2); + pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length); + + size_t column = 0; + if (last_column_start >= PM_ERROR_TRUNCATE) { + pm_buffer_append_string(buffer, " ", 4); + column = last_column_start; + } + + while (column < error->column_start) { + pm_buffer_append_byte(buffer, ' '); + + size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + column += (char_width == 0 ? 1 : char_width); + } + + if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7); + pm_buffer_append_byte(buffer, '^'); + + size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + column += (char_width == 0 ? 1 : char_width); + + while (column < error->column_end) { + pm_buffer_append_byte(buffer, '~'); + + size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + column += (char_width == 0 ? 1 : char_width); + } + + if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3); + + if (inline_messages) { + pm_buffer_append_byte(buffer, ' '); + assert(error->error != NULL); + + const char *message = error->error->message; + pm_buffer_append_string(buffer, message, strlen(message)); + } + + pm_buffer_append_byte(buffer, '\n'); + + // Here we determine how many lines of padding to display after the + // error, depending on where the next error is in source. + last_line = error->line; + int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line; + + if (next_line - last_line > 1) { + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer); + } + + if (next_line - last_line > 1) { + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer); + } + } + + // Finally, we'll free the array of errors that we allocated. + xfree(errors); +} + +#undef PM_ERROR_TRUNCATE +#undef PM_COLOR_GRAY +#undef PM_COLOR_RED +#undef PM_COLOR_RESET + /** * Check if the given source slice is valid UTF-8. The location represents the * location of the error, but the slice of the source will include the content @@ -8798,7 +9929,7 @@ pm_parse_process_error(const pm_parse_result_t *result) pm_list_node_t *list_node = (pm_list_node_t *) error; pm_list_t error_list = { .size = 1, .head = list_node, .tail = list_node }; - pm_parser_errors_format(parser, &error_list, &buffer, rb_stderr_tty_p(), false); + pm_parse_errors_format(parser, &error_list, &buffer, rb_stderr_tty_p(), false); } VALUE value = rb_exc_new(rb_eArgError, pm_buffer_value(&buffer), pm_buffer_length(&buffer)); @@ -8828,7 +9959,7 @@ pm_parse_process_error(const pm_parse_result_t *result) ); if (valid_utf8) { - pm_parser_errors_format(parser, &parser->error_list, &buffer, rb_stderr_tty_p(), true); + pm_parse_errors_format(parser, &parser->error_list, &buffer, rb_stderr_tty_p(), true); } else { for (const pm_diagnostic_t *error = head; error != NULL; error = (const pm_diagnostic_t *) error->node.next) { @@ -8837,7 +9968,8 @@ pm_parse_process_error(const pm_parse_result_t *result) } } - VALUE error = rb_exc_new(rb_eSyntaxError, pm_buffer_value(&buffer), pm_buffer_length(&buffer)); + VALUE message = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), result->node.encoding); + VALUE error = rb_exc_new_str(rb_eSyntaxError, message); rb_encoding *filepath_encoding = result->node.filepath_encoding != NULL ? result->node.filepath_encoding : rb_utf8_encoding(); VALUE path = rb_enc_str_new((const char *) pm_string_source(filepath), pm_string_length(filepath), filepath_encoding); @@ -8862,10 +9994,16 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node) // freed regardless of whether or we return an error. pm_scope_node_t *scope_node = &result->node; rb_encoding *filepath_encoding = scope_node->filepath_encoding; + int coverage_enabled = scope_node->coverage_enabled; pm_scope_node_init(node, scope_node, NULL); scope_node->filepath_encoding = filepath_encoding; + scope_node->encoding = rb_enc_find(parser->encoding->name); + if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name); + + scope_node->coverage_enabled = coverage_enabled; + // Emit all of the various warnings from the parse. const pm_diagnostic_t *warning; const char *warning_filepath = (const char *) pm_string_source(&parser->filepath); @@ -8874,10 +10012,10 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node) int line = pm_location_line_number(parser, &warning->location); if (warning->level == PM_WARNING_LEVEL_VERBOSE) { - rb_compile_warning(warning_filepath, line, "%s", warning->message); + rb_enc_compile_warning(scope_node->encoding, warning_filepath, line, "%s", warning->message); } else { - rb_compile_warn(warning_filepath, line, "%s", warning->message); + rb_enc_compile_warn(scope_node->encoding, warning_filepath, line, "%s", warning->message); } } @@ -8892,9 +10030,6 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node) // Now set up the constant pool and intern all of the various constants into // their corresponding IDs. - scope_node->encoding = rb_enc_find(parser->encoding->name); - if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name); - scope_node->parser = parser; scope_node->constants = calloc(parser->constant_pool.size, sizeof(ID)); @@ -9018,6 +10153,7 @@ pm_load_file(pm_parse_result_t *result, VALUE filepath, bool load_error) VALUE pm_parse_file(pm_parse_result_t *result, VALUE filepath) { + result->node.filepath_encoding = rb_enc_get(filepath); pm_options_filepath_set(&result->options, RSTRING_PTR(filepath)); RB_GC_GUARD(filepath); |