diff options
Diffstat (limited to 'prism_compile.c')
-rw-r--r-- | prism_compile.c | 2484 |
1 files changed, 1913 insertions, 571 deletions
diff --git a/prism_compile.c b/prism_compile.c index 26d94e979d..3aeb9d0e8e 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -55,6 +55,13 @@ #define PUSH_SEQ(seq1, seq2) \ APPEND_LIST((seq1), (seq2)) +#define PUSH_SYNTHETIC_PUTNIL(seq, iseq) \ + do { \ + int lineno = ISEQ_COMPILE_DATA(iseq)->last_line; \ + if (lineno == 0) lineno = FIX2INT(rb_iseq_first_lineno(iseq)); \ + ADD_SYNTHETIC_INSN(seq, lineno, -1, putnil); \ + } while (0) + /******************************************************************************/ /* These functions compile getlocal/setlocal instructions but operate on */ /* prism locations instead of NODEs. */ @@ -130,7 +137,7 @@ pm_iseq_add_setlocal(rb_iseq_t *iseq, LINK_ANCHOR *const seq, int line_no, int c #define PM_NODE_END_LINE_COLUMN(parser, node) \ pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.end, (parser)->start_line) -#define PM_LOCATION_LINE_COLUMN(parser, location) \ +#define PM_LOCATION_START_LINE_COLUMN(parser, location) \ pm_newline_list_line_column(&(parser)->newline_list, (location)->start, (parser)->start_line) static int @@ -141,16 +148,15 @@ pm_node_line_number(const pm_parser_t *parser, const pm_node_t *node) static int pm_location_line_number(const pm_parser_t *parser, const pm_location_t *location) { - return (int) PM_LOCATION_LINE_COLUMN(parser, location).line; + return (int) PM_LOCATION_START_LINE_COLUMN(parser, location).line; } /** - * Convert the value of an integer node into a Ruby Integer. + * Parse the value of a pm_integer_t into a Ruby Integer. */ static VALUE -parse_integer(const pm_integer_node_t *node) +parse_integer_value(const pm_integer_t *integer) { - const pm_integer_t *integer = &node->value; VALUE result; if (integer->values == NULL) { @@ -181,6 +187,15 @@ parse_integer(const pm_integer_node_t *node) } /** + * Convert the value of an integer node into a Ruby Integer. + */ +static inline VALUE +parse_integer(const pm_integer_node_t *node) +{ + return parse_integer_value(&node->value); +} + +/** * Convert the value of a float node into a Ruby Float. */ static VALUE @@ -198,36 +213,9 @@ parse_float(const pm_float_node_t *node) static VALUE parse_rational(const pm_rational_node_t *node) { - VALUE result; - - if (PM_NODE_TYPE_P(node->numeric, PM_FLOAT_NODE)) { - const uint8_t *start = node->base.location.start; - const uint8_t *end = node->base.location.end - 1; - size_t length = end - start; - - char *buffer = malloc(length + 1); - memcpy(buffer, start, length); - - buffer[length] = '\0'; - - char *decimal = memchr(buffer, '.', length); - RUBY_ASSERT(decimal); - size_t seen_decimal = decimal - buffer; - size_t fraclen = length - seen_decimal - 1; - memmove(decimal, decimal + 1, fraclen + 1); - - VALUE numerator = rb_cstr_to_inum(buffer, 10, false); - result = rb_rational_new(numerator, rb_int_positive_pow(10, fraclen)); - - free(buffer); - } - else { - RUBY_ASSERT(PM_NODE_TYPE_P(node->numeric, PM_INTEGER_NODE)); - VALUE numerator = parse_integer((const pm_integer_node_t *) node->numeric); - result = rb_rational_raw(numerator, INT2FIX(1)); - } - - return result; + VALUE numerator = parse_integer_value(&node->numerator); + VALUE denominator = parse_integer_value(&node->denominator); + return rb_rational_new(numerator, denominator); } /** @@ -272,7 +260,7 @@ parse_string(const pm_scope_node_t *scope_node, const pm_string_t *string) * creating those strings based on the flags set on the owning node. */ static inline VALUE -parse_string_encoded(const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *string) +parse_string_encoded(const pm_node_t *node, const pm_string_t *string, rb_encoding *default_encoding) { rb_encoding *encoding; @@ -283,7 +271,7 @@ parse_string_encoded(const pm_scope_node_t *scope_node, const pm_node_t *node, c encoding = rb_utf8_encoding(); } else { - encoding = scope_node->encoding; + encoding = default_encoding; } return rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding); @@ -294,17 +282,17 @@ parse_static_literal_string(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, { rb_encoding *encoding; - if (node->flags & PM_ENCODING_FLAGS_FORCED_BINARY_ENCODING) { + if (node->flags & PM_STRING_FLAGS_FORCED_BINARY_ENCODING) { encoding = rb_ascii8bit_encoding(); } - else if (node->flags & PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING) { + else if (node->flags & PM_STRING_FLAGS_FORCED_UTF8_ENCODING) { encoding = rb_utf8_encoding(); } else { encoding = scope_node->encoding; } - VALUE value = rb_enc_interned_str((const char *) pm_string_source(string), pm_string_length(string), encoding); + VALUE value = rb_enc_literal_str((const char *) pm_string_source(string), pm_string_length(string), encoding); rb_enc_str_coderange(value); if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) { @@ -344,91 +332,46 @@ pm_optimizable_range_item_p(const pm_node_t *node) return (!node || PM_NODE_TYPE_P(node, PM_INTEGER_NODE) || PM_NODE_TYPE_P(node, PM_NIL_NODE)); } -static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); - -static int -pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +/** Raise an error corresponding to the invalid regular expression. */ +static VALUE +parse_regexp_error(rb_iseq_t *iseq, int32_t line_number, const char *fmt, ...) { - int stack_size = 0; - size_t parts_size = parts->size; - bool interpolated = false; - - if (parts_size > 0) { - VALUE current_string = Qnil; - - for (size_t index = 0; index < parts_size; index++) { - const pm_node_t *part = parts->nodes[index]; - - if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) { - const pm_string_node_t *string_node = (const pm_string_node_t *) part; - VALUE string_value = parse_string_encoded(scope_node, (const pm_node_t *) string_node, &string_node->unescaped); - - if (RTEST(current_string)) { - current_string = rb_str_concat(current_string, string_value); - } - else { - current_string = string_value; - } - } - else { - interpolated = true; - - if ( - PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) && - ((const pm_embedded_statements_node_t *) part)->statements != NULL && - ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 && - PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE) - ) { - const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0]; - VALUE string_value = parse_string_encoded(scope_node, (const pm_node_t *) string_node, &string_node->unescaped); - - if (RTEST(current_string)) { - current_string = rb_str_concat(current_string, string_value); - } - else { - current_string = string_value; - } - } - else { - if (!RTEST(current_string)) { - current_string = rb_enc_str_new(NULL, 0, scope_node->encoding); - } - - PUSH_INSN1(ret, *node_location, putobject, rb_fstring(current_string)); - PM_COMPILE_NOT_POPPED(part); - PUSH_INSN(ret, *node_location, dup); - PUSH_INSN1(ret, *node_location, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE , NULL, FALSE)); - PUSH_INSN(ret, *node_location, anytostring); - - current_string = Qnil; - stack_size += 2; - } - } - } - - if (RTEST(current_string)) { - current_string = rb_fstring(current_string); + va_list args; + va_start(args, fmt); + VALUE error = rb_syntax_error_append(Qnil, rb_iseq_path(iseq), line_number, -1, NULL, "%" PRIsVALUE, args); + va_end(args); + rb_exc_raise(error); +} - if (stack_size == 0 && interpolated) { - PUSH_INSN1(ret, *node_location, putstring, current_string); - } - else { - PUSH_INSN1(ret, *node_location, putobject, current_string); - } +static VALUE +parse_regexp_string_part(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding) +{ + // If we were passed an explicit regexp encoding, then we need to double + // check that it's okay here for this fragment of the string. + rb_encoding *encoding; - current_string = Qnil; - stack_size++; - } + if (explicit_regexp_encoding != NULL) { + encoding = explicit_regexp_encoding; + } + else if (node->flags & PM_STRING_FLAGS_FORCED_BINARY_ENCODING) { + encoding = rb_ascii8bit_encoding(); + } + else if (node->flags & PM_STRING_FLAGS_FORCED_UTF8_ENCODING) { + encoding = rb_utf8_encoding(); } else { - PUSH_INSN(ret, *node_location, putnil); + encoding = implicit_regexp_encoding; } - return stack_size; + VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), encoding); + VALUE error = rb_reg_check_preprocess(string); + + if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number(scope_node->parser, node), "%" PRIsVALUE, rb_obj_as_string(error)); + return string; } static VALUE -pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, bool top) +pm_static_literal_concat(rb_iseq_t *iseq, const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding, bool top) { VALUE current = Qnil; @@ -438,11 +381,28 @@ pm_static_literal_concat(const pm_node_list_t *nodes, const pm_scope_node_t *sco switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: - string = parse_string_encoded(scope_node, part, &((const pm_string_node_t *) part)->unescaped); + if (implicit_regexp_encoding != NULL) { + if (top) { + string = parse_regexp_string_part(iseq, scope_node, part, &((const pm_string_node_t *) part)->unescaped, implicit_regexp_encoding, explicit_regexp_encoding); + } + else { + string = parse_string_encoded(part, &((const pm_string_node_t *) part)->unescaped, scope_node->encoding); + VALUE error = rb_reg_check_preprocess(string); + if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number(scope_node->parser, part), "%" PRIsVALUE, rb_obj_as_string(error)); + } + } + else { + string = parse_string_encoded(part, &((const pm_string_node_t *) part)->unescaped, scope_node->encoding); + } break; case PM_INTERPOLATED_STRING_NODE: - string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) part)->parts, scope_node, false); + string = pm_static_literal_concat(iseq, &((const pm_interpolated_string_node_t *) part)->parts, scope_node, implicit_regexp_encoding, explicit_regexp_encoding, false); break; + case PM_EMBEDDED_STATEMENTS_NODE: { + const pm_embedded_statements_node_t *cast = (const pm_embedded_statements_node_t *) part; + string = pm_static_literal_concat(iseq, &cast->statements->body, scope_node, implicit_regexp_encoding, explicit_regexp_encoding, false); + break; + } default: RUBY_ASSERT(false && "unexpected node type in pm_static_literal_concat"); return Qnil; @@ -536,21 +496,10 @@ parse_regexp_encoding(const pm_scope_node_t *scope_node, const pm_node_t *node) return rb_enc_get_from_index(ENCINDEX_Windows_31J); } else { - return scope_node->encoding; + return NULL; } } -/** Raise an error corresponding to the invalid regular expression. */ -static VALUE -parse_regexp_error(rb_iseq_t *iseq, int32_t line_number, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - VALUE error = rb_syntax_error_append(Qnil, rb_iseq_path(iseq), line_number, -1, NULL, "%" PRIsVALUE, args); - va_end(args); - rb_exc_raise(error); -} - static VALUE parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, VALUE string) { @@ -574,22 +523,149 @@ parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t static inline VALUE parse_regexp_literal(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped) { - VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), parse_regexp_encoding(scope_node, node)); + rb_encoding *regexp_encoding = parse_regexp_encoding(scope_node, node); + if (regexp_encoding == NULL) regexp_encoding = scope_node->encoding; + + VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), regexp_encoding); return parse_regexp(iseq, scope_node, node, string); } static inline VALUE parse_regexp_concat(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_node_list_t *parts) { - VALUE string = pm_static_literal_concat(parts, scope_node, false); - rb_enc_associate(string, parse_regexp_encoding(scope_node, node)); + rb_encoding *explicit_regexp_encoding = parse_regexp_encoding(scope_node, node); + rb_encoding *implicit_regexp_encoding = explicit_regexp_encoding != NULL ? explicit_regexp_encoding : scope_node->encoding; + + VALUE string = pm_static_literal_concat(iseq, parts, scope_node, implicit_regexp_encoding, explicit_regexp_encoding, false); return parse_regexp(iseq, scope_node, node, string); } +static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); + +static int +pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding) +{ + int stack_size = 0; + size_t parts_size = parts->size; + bool interpolated = false; + + if (parts_size > 0) { + VALUE current_string = Qnil; + pm_line_column_t current_location = *node_location; + + for (size_t index = 0; index < parts_size; index++) { + const pm_node_t *part = parts->nodes[index]; + + if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) { + const pm_string_node_t *string_node = (const pm_string_node_t *) part; + VALUE string_value; + + if (implicit_regexp_encoding == NULL) { + string_value = parse_string_encoded(part, &string_node->unescaped, scope_node->encoding); + } + else { + string_value = parse_regexp_string_part(iseq, scope_node, (const pm_node_t *) string_node, &string_node->unescaped, implicit_regexp_encoding, explicit_regexp_encoding); + } + + if (RTEST(current_string)) { + current_string = rb_str_concat(current_string, string_value); + } + else { + current_string = string_value; + if (index != 0) current_location = PM_NODE_END_LINE_COLUMN(scope_node->parser, part); + } + } + else { + interpolated = true; + + if ( + PM_NODE_TYPE_P(part, PM_EMBEDDED_STATEMENTS_NODE) && + ((const pm_embedded_statements_node_t *) part)->statements != NULL && + ((const pm_embedded_statements_node_t *) part)->statements->body.size == 1 && + PM_NODE_TYPE_P(((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0], PM_STRING_NODE) + ) { + const pm_string_node_t *string_node = (const pm_string_node_t *) ((const pm_embedded_statements_node_t *) part)->statements->body.nodes[0]; + VALUE string_value; + + if (implicit_regexp_encoding == NULL) { + string_value = parse_string_encoded(part, &string_node->unescaped, scope_node->encoding); + } + else { + string_value = parse_regexp_string_part(iseq, scope_node, (const pm_node_t *) string_node, &string_node->unescaped, implicit_regexp_encoding, explicit_regexp_encoding); + } + + if (RTEST(current_string)) { + current_string = rb_str_concat(current_string, string_value); + } + else { + current_string = string_value; + current_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, part); + } + } + else { + if (!RTEST(current_string)) { + rb_encoding *encoding; + + if (implicit_regexp_encoding != NULL) { + if (explicit_regexp_encoding != NULL) { + encoding = explicit_regexp_encoding; + } + else if (scope_node->parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { + encoding = rb_ascii8bit_encoding(); + } + else { + encoding = implicit_regexp_encoding; + } + } + else { + encoding = scope_node->encoding; + } + + current_string = rb_enc_str_new(NULL, 0, encoding); + } + + PUSH_INSN1(ret, current_location, putobject, rb_fstring(current_string)); + PM_COMPILE_NOT_POPPED(part); + + const pm_line_column_t current_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, part); + PUSH_INSN(ret, current_location, dup); + PUSH_INSN1(ret, current_location, objtostring, new_callinfo(iseq, idTo_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE, NULL, FALSE)); + PUSH_INSN(ret, current_location, anytostring); + + current_string = Qnil; + stack_size += 2; + } + } + } + + if (RTEST(current_string)) { + current_string = rb_fstring(current_string); + + if (stack_size == 0 && interpolated) { + PUSH_INSN1(ret, current_location, putstring, current_string); + } + else { + PUSH_INSN1(ret, current_location, putobject, current_string); + } + + current_string = Qnil; + stack_size++; + } + } + else { + PUSH_INSN(ret, *node_location, putnil); + } + + return stack_size; +} + static void pm_compile_regexp_dynamic(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_list_t *parts, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - int length = pm_interpolated_node_compile(iseq, parts, node_location, ret, popped, scope_node); + rb_encoding *explicit_regexp_encoding = parse_regexp_encoding(scope_node, node); + rb_encoding *implicit_regexp_encoding = explicit_regexp_encoding != NULL ? explicit_regexp_encoding : scope_node->encoding; + + int length = pm_interpolated_node_compile(iseq, parts, node_location, ret, popped, scope_node, implicit_regexp_encoding, explicit_regexp_encoding); PUSH_INSN2(ret, *node_location, toregexp, INT2FIX(parse_regexp_flags(node) & 0xFF), INT2FIX(length)); } @@ -686,13 +762,13 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n return parse_regexp_concat(iseq, scope_node, (const pm_node_t *) cast, &cast->parts); } case PM_INTERPOLATED_STRING_NODE: { - VALUE string = pm_static_literal_concat(&((const pm_interpolated_string_node_t *) node)->parts, scope_node, false); + VALUE string = pm_static_literal_concat(iseq, &((const pm_interpolated_string_node_t *) node)->parts, scope_node, NULL, NULL, false); int line_number = pm_node_line_number(scope_node->parser, node); return pm_static_literal_string(iseq, string, line_number); } case PM_INTERPOLATED_SYMBOL_NODE: { const pm_interpolated_symbol_node_t *cast = (const pm_interpolated_symbol_node_t *) node; - VALUE string = pm_static_literal_concat(&cast->parts, scope_node, true); + VALUE string = pm_static_literal_concat(iseq, &cast->parts, scope_node, NULL, NULL, true); return ID2SYM(rb_intern_str(string)); } @@ -730,6 +806,28 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n } } +/** + * A helper for converting a pm_location_t into a rb_code_location_t. + */ +static rb_code_location_t +pm_code_location(const pm_scope_node_t *scope_node, const pm_node_t *node) +{ + const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); + const pm_line_column_t end_location = PM_NODE_END_LINE_COLUMN(scope_node->parser, node); + + return (rb_code_location_t) { + .beg_pos = { .lineno = start_location.line, .column = start_location.column }, + .end_pos = { .lineno = end_location.line, .column = end_location.column } + }; +} + +/** + * A macro for determining if we should go through the work of adding branch + * coverage to the current iseq. We check this manually each time because we + * want to avoid the overhead of creating rb_code_location_t objects. + */ +#define PM_BRANCH_COVERAGE_P(iseq) (ISEQ_COVERAGE(iseq) && ISEQ_BRANCH_COVERAGE(iseq)) + static void pm_compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_node_t *cond, LABEL *then_label, LABEL *else_label, bool popped, pm_scope_node_t *scope_node); @@ -882,14 +980,25 @@ again: * Compile an if or unless node. */ static void -pm_compile_conditional(rb_iseq_t *iseq, const pm_line_column_t *line_column, const pm_statements_node_t *statements, const pm_node_t *consequent, const pm_node_t *predicate, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +pm_compile_conditional(rb_iseq_t *iseq, const pm_line_column_t *line_column, pm_node_type_t type, const pm_node_t *node, const pm_statements_node_t *statements, const pm_node_t *consequent, const pm_node_t *predicate, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { const pm_line_column_t location = *line_column; LABEL *then_label = NEW_LABEL(location.line); LABEL *else_label = NEW_LABEL(location.line); LABEL *end_label = NULL; - pm_compile_branch_condition(iseq, ret, predicate, then_label, else_label, false, scope_node); + DECL_ANCHOR(cond_seq); + INIT_ANCHOR(cond_seq); + pm_compile_branch_condition(iseq, cond_seq, predicate, then_label, else_label, false, scope_node); + PUSH_SEQ(ret, cond_seq); + + rb_code_location_t conditional_location = { 0 }; + VALUE branches = Qfalse; + + if (then_label->refcnt && else_label->refcnt && PM_BRANCH_COVERAGE_P(iseq)) { + conditional_location = pm_code_location(scope_node, node); + branches = decl_branch_base(iseq, PTR2NUM(node), &conditional_location, type == PM_IF_NODE ? "if" : "unless"); + } if (then_label->refcnt) { PUSH_LABEL(ret, then_label); @@ -897,14 +1006,33 @@ pm_compile_conditional(rb_iseq_t *iseq, const pm_line_column_t *line_column, con DECL_ANCHOR(then_seq); INIT_ANCHOR(then_seq); - if (statements) { + if (statements != NULL) { pm_compile_node(iseq, (const pm_node_t *) statements, then_seq, popped, scope_node); } else if (!popped) { - PUSH_INSN(then_seq, location, putnil); + PUSH_SYNTHETIC_PUTNIL(then_seq, iseq); } if (else_label->refcnt) { + // Establish branch coverage for the then block. + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location; + + if (statements != NULL) { + branch_location = pm_code_location(scope_node, (const pm_node_t *) statements); + } else if (type == PM_IF_NODE) { + pm_line_column_t predicate_end = PM_NODE_END_LINE_COLUMN(scope_node->parser, predicate); + branch_location = (rb_code_location_t) { + .beg_pos = { .lineno = predicate_end.line, .column = predicate_end.column }, + .end_pos = { .lineno = predicate_end.line, .column = predicate_end.column } + }; + } else { + branch_location = conditional_location; + } + + add_trace_branch_coverage(iseq, ret, &branch_location, branch_location.beg_pos.column, 0, type == PM_IF_NODE ? "then" : "else", branches); + } + end_label = NEW_LABEL(location.line); PUSH_INSNL(then_seq, location, jump, end_label); if (!popped) PUSH_INSN(then_seq, location, pop); @@ -919,11 +1047,27 @@ pm_compile_conditional(rb_iseq_t *iseq, const pm_line_column_t *line_column, con DECL_ANCHOR(else_seq); INIT_ANCHOR(else_seq); - if (consequent) { - pm_compile_node(iseq, (const pm_node_t *) consequent, else_seq, popped, scope_node); + if (consequent != NULL) { + pm_compile_node(iseq, consequent, else_seq, popped, scope_node); } else if (!popped) { - PUSH_INSN(else_seq, location, putnil); + PUSH_SYNTHETIC_PUTNIL(else_seq, iseq); + } + + // Establish branch coverage for the else block. + if (then_label->refcnt && PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location; + + if (consequent == NULL) { + branch_location = conditional_location; + } else if (PM_NODE_TYPE_P(consequent, PM_ELSE_NODE)) { + const pm_else_node_t *else_node = (const pm_else_node_t *) consequent; + branch_location = pm_code_location(scope_node, else_node->statements != NULL ? ((const pm_node_t *) else_node->statements) : (const pm_node_t *) else_node); + } else { + branch_location = pm_code_location(scope_node, (const pm_node_t *) consequent); + } + + add_trace_branch_coverage(iseq, ret, &branch_location, branch_location.beg_pos.column, 1, type == PM_IF_NODE ? "else" : "then", branches); } PUSH_SEQ(ret, else_seq); @@ -940,7 +1084,7 @@ pm_compile_conditional(rb_iseq_t *iseq, const pm_line_column_t *line_column, con * Compile a while or until loop. */ static void -pm_compile_loop(rb_iseq_t *iseq, const pm_line_column_t *line_column, pm_node_flags_t flags, enum pm_node_type type, const pm_statements_node_t *statements, const pm_node_t *predicate, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +pm_compile_loop(rb_iseq_t *iseq, const pm_line_column_t *line_column, pm_node_flags_t flags, enum pm_node_type type, const pm_node_t *node, const pm_statements_node_t *statements, const pm_node_t *predicate, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { const pm_line_column_t location = *line_column; @@ -976,9 +1120,19 @@ pm_compile_loop(rb_iseq_t *iseq, const pm_line_column_t *line_column, pm_node_fl if (tmp_label) PUSH_LABEL(ret, tmp_label); PUSH_LABEL(ret, redo_label); - if (statements != NULL) PM_COMPILE_POPPED((const pm_node_t *) statements); + // Establish branch coverage for the loop. + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t loop_location = pm_code_location(scope_node, node); + VALUE branches = decl_branch_base(iseq, PTR2NUM(node), &loop_location, type == PM_WHILE_NODE ? "while" : "until"); + + rb_code_location_t branch_location = statements != NULL ? pm_code_location(scope_node, (const pm_node_t *) statements) : loop_location; + add_trace_branch_coverage(iseq, ret, &branch_location, branch_location.beg_pos.column, 0, "body", branches); + } + + if (statements != NULL) PM_COMPILE_POPPED((const pm_node_t *) statements); PUSH_LABEL(ret, next_label); + if (type == PM_WHILE_NODE) { pm_compile_branch_condition(iseq, ret, predicate, redo_label, end_label, popped, scope_node); } @@ -1407,7 +1561,17 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b break; } case PM_FORWARDING_ARGUMENTS_NODE: { + if (ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->param.flags.forwardable) { + *flags |= VM_CALL_FORWARDING; + + pm_local_index_t mult_local = pm_lookup_local_index(iseq, scope_node, PM_CONSTANT_DOT3, 0); + PUSH_GETLOCAL(ret, location, mult_local.index, mult_local.level); + + break; + } + orig_argc += 2; + *flags |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_SPLAT_MUT | VM_CALL_ARGS_BLOCKARG | VM_CALL_KW_SPLAT; // Forwarding arguments nodes are treated as foo(*, **, &) @@ -1572,7 +1736,7 @@ pm_compile_index_operator_write_node(rb_iseq_t *iseq, const pm_index_operator_wr PUSH_SEND_R(ret, location, idAREF, INT2FIX(argc), NULL, INT2FIX(flag & ~(VM_CALL_ARGS_SPLAT_MUT | VM_CALL_KW_SPLAT_MUT)), keywords); PM_COMPILE_NOT_POPPED(node->value); - ID id_operator = pm_constant_id_lookup(scope_node, node->operator); + ID id_operator = pm_constant_id_lookup(scope_node, node->binary_operator); PUSH_SEND(ret, location, id_operator, INT2FIX(1)); if (!popped) { @@ -2491,7 +2655,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t const char *name = rb_id2name(id); if (name && strlen(name) > 0 && name[0] != '_') { - COMPILE_ERROR(ERROR_ARGS "illegal variable in alternative pattern (%"PRIsVALUE")", rb_id2str(id)); + COMPILE_ERROR(iseq, location.line, "illegal variable in alternative pattern (%"PRIsVALUE")", rb_id2str(id)); return COMPILE_NG; } } @@ -2698,6 +2862,7 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ scope->encoding = previous->encoding; scope->filepath_encoding = previous->filepath_encoding; scope->constants = previous->constants; + scope->coverage_enabled = previous->coverage_enabled; } switch (PM_NODE_TYPE(node)) { @@ -2808,25 +2973,346 @@ pm_scope_node_destroy(pm_scope_node_t *scope_node) } } +/** + * We need to put the label "retry_end_l" immediately after the last "send" + * instruction. This because vm_throw checks if the break cont is equal to the + * index of next insn of the "send". (Otherwise, it is considered + * "break from proc-closure". See "TAG_BREAK" handling in "vm_throw_start".) + * + * Normally, "send" instruction is at the last. However, qcall under branch + * coverage measurement adds some instructions after the "send". + * + * Note that "invokesuper" appears instead of "send". + */ +static void +pm_compile_retry_end_label(rb_iseq_t *iseq, LINK_ANCHOR *const ret, LABEL *retry_end_l) +{ + INSN *iobj; + LINK_ELEMENT *last_elem = LAST_ELEMENT(ret); + iobj = IS_INSN(last_elem) ? (INSN*) last_elem : (INSN*) get_prev_insn((INSN*) last_elem); + while (!IS_INSN_ID(iobj, send) && !IS_INSN_ID(iobj, invokesuper) && !IS_INSN_ID(iobj, sendforward) && !IS_INSN_ID(iobj, invokesuperforward)) { + iobj = (INSN*) get_prev_insn(iobj); + } + ELEM_INSERT_NEXT(&iobj->link, (LINK_ELEMENT*) retry_end_l); + + // LINK_ANCHOR has a pointer to the last element, but + // ELEM_INSERT_NEXT does not update it even if we add an insn to the + // last of LINK_ANCHOR. So this updates it manually. + if (&iobj->link == LAST_ELEMENT(ret)) { + ret->last = (LINK_ELEMENT*) retry_end_l; + } +} + +static const char * +pm_iseq_builtin_function_name(const pm_scope_node_t *scope_node, const pm_node_t *receiver, ID method_id) +{ + const char *name = rb_id2name(method_id); + static const char prefix[] = "__builtin_"; + const size_t prefix_len = sizeof(prefix) - 1; + + if (receiver == NULL) { + if (UNLIKELY(strncmp(prefix, name, prefix_len) == 0)) { + // __builtin_foo + return &name[prefix_len]; + } + } + else if (PM_NODE_TYPE_P(receiver, PM_CALL_NODE)) { + if (PM_NODE_FLAG_P(receiver, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { + const pm_call_node_t *cast = (const pm_call_node_t *) receiver; + if (pm_constant_id_lookup(scope_node, cast->name) == rb_intern_const("__builtin")) { + // __builtin.foo + return name; + } + } + } + else if (PM_NODE_TYPE_P(receiver, PM_CONSTANT_READ_NODE)) { + const pm_constant_read_node_t *cast = (const pm_constant_read_node_t *) receiver; + if (pm_constant_id_lookup(scope_node, cast->name) == rb_intern_const("Primitive")) { + // Primitive.foo + return name; + } + } + + return NULL; +} + +// Compile Primitive.attr! :leaf, ... +static int +pm_compile_builtin_attr(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_arguments_node_t *arguments, const pm_line_column_t *node_location) +{ + if (arguments == NULL) { + COMPILE_ERROR(iseq, node_location->line, "attr!: no argument"); + return COMPILE_NG; + } + + const pm_node_t *argument; + PM_NODE_LIST_FOREACH(&arguments->arguments, index, argument) { + if (!PM_NODE_TYPE_P(argument, PM_SYMBOL_NODE)) { + COMPILE_ERROR(iseq, node_location->line, "non symbol argument to attr!: %s", pm_node_type_to_str(PM_NODE_TYPE(argument))); + return COMPILE_NG; + } + + VALUE symbol = pm_static_literal_value(iseq, argument, scope_node); + VALUE string = rb_sym_to_s(symbol); + + if (strcmp(RSTRING_PTR(string), "leaf") == 0) { + ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF; + } + else if (strcmp(RSTRING_PTR(string), "inline_block") == 0) { + ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_INLINE_BLOCK; + } + else if (strcmp(RSTRING_PTR(string), "use_block") == 0) { + iseq_set_use_block(iseq); + } + else { + COMPILE_ERROR(iseq, node_location->line, "unknown argument to attr!: %s", RSTRING_PTR(string)); + return COMPILE_NG; + } + } + + return COMPILE_OK; +} + +static int +pm_compile_builtin_arg(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_scope_node_t *scope_node, const pm_arguments_node_t *arguments, const pm_line_column_t *node_location, int popped) +{ + if (arguments == NULL) { + COMPILE_ERROR(iseq, node_location->line, "arg!: no argument"); + return COMPILE_NG; + } + + if (arguments->arguments.size != 1) { + COMPILE_ERROR(iseq, node_location->line, "arg!: too many argument"); + return COMPILE_NG; + } + + const pm_node_t *argument = arguments->arguments.nodes[0]; + if (!PM_NODE_TYPE_P(argument, PM_SYMBOL_NODE)) { + COMPILE_ERROR(iseq, node_location->line, "non symbol argument to arg!: %s", pm_node_type_to_str(PM_NODE_TYPE(argument))); + return COMPILE_NG; + } + + if (!popped) { + ID name = parse_string_symbol(scope_node, ((const pm_symbol_node_t *) argument)); + int index = ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->local_table_size - get_local_var_idx(iseq, name); + + debugs("id: %s idx: %d\n", rb_id2name(name), index); + PUSH_GETLOCAL(ret, *node_location, index, get_lvar_level(iseq)); + } + + return COMPILE_OK; +} + +static int +pm_compile_builtin_mandatory_only_method(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_call_node_t *call_node, const pm_line_column_t *node_location) +{ + const pm_node_t *ast_node = scope_node->ast_node; + if (!PM_NODE_TYPE_P(ast_node, PM_DEF_NODE)) { + rb_bug("mandatory_only?: not in method definition"); + return COMPILE_NG; + } + + const pm_def_node_t *def_node = (const pm_def_node_t *) ast_node; + const pm_parameters_node_t *parameters_node = def_node->parameters; + if (parameters_node == NULL) { + rb_bug("mandatory_only?: in method definition with no parameters"); + return COMPILE_NG; + } + + const pm_node_t *body_node = def_node->body; + if (body_node == NULL || !PM_NODE_TYPE_P(body_node, PM_STATEMENTS_NODE) || (((const pm_statements_node_t *) body_node)->body.size != 1) || !PM_NODE_TYPE_P(((const pm_statements_node_t *) body_node)->body.nodes[0], PM_IF_NODE)) { + rb_bug("mandatory_only?: not in method definition with plain statements"); + return COMPILE_NG; + } + + const pm_if_node_t *if_node = (const pm_if_node_t *) ((const pm_statements_node_t *) body_node)->body.nodes[0]; + if (if_node->predicate != ((const pm_node_t *) call_node)) { + rb_bug("mandatory_only?: can't find mandatory node"); + return COMPILE_NG; + } + + pm_parameters_node_t parameters = { + .base = parameters_node->base, + .requireds = parameters_node->requireds + }; + + const pm_def_node_t def = { + .base = def_node->base, + .name = def_node->name, + .receiver = def_node->receiver, + .parameters = ¶meters, + .body = (pm_node_t *) if_node->statements, + .locals = { + .ids = def_node->locals.ids, + .size = parameters_node->requireds.size, + .capacity = def_node->locals.capacity + } + }; + + pm_scope_node_t next_scope_node; + pm_scope_node_init(&def.base, &next_scope_node, scope_node); + + ISEQ_BODY(iseq)->mandatory_only_iseq = pm_iseq_new_with_opt( + &next_scope_node, + rb_iseq_base_label(iseq), + rb_iseq_path(iseq), + rb_iseq_realpath(iseq), + node_location->line, + NULL, + 0, + ISEQ_TYPE_METHOD, + ISEQ_COMPILE_DATA(iseq)->option + ); + + pm_scope_node_destroy(&next_scope_node); + return COMPILE_OK; +} + +static int +pm_compile_builtin_function_call(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node, const pm_call_node_t *call_node, const pm_line_column_t *node_location, int popped, const rb_iseq_t *parent_block, const char *builtin_func) +{ + const pm_arguments_node_t *arguments = call_node->arguments; + + if (parent_block != NULL) { + COMPILE_ERROR(iseq, node_location->line, "should not call builtins here."); + return COMPILE_NG; + } + +#define BUILTIN_INLINE_PREFIX "_bi" + char inline_func[sizeof(BUILTIN_INLINE_PREFIX) + DECIMAL_SIZE_OF(int)]; + bool cconst = false; +retry:; + const struct rb_builtin_function *bf = iseq_builtin_function_lookup(iseq, builtin_func); + + if (bf == NULL) { + if (strcmp("cstmt!", builtin_func) == 0 || strcmp("cexpr!", builtin_func) == 0) { + // ok + } + else if (strcmp("cconst!", builtin_func) == 0) { + cconst = true; + } + else if (strcmp("cinit!", builtin_func) == 0) { + // ignore + return COMPILE_OK; + } + else if (strcmp("attr!", builtin_func) == 0) { + return pm_compile_builtin_attr(iseq, scope_node, arguments, node_location); + } + else if (strcmp("arg!", builtin_func) == 0) { + return pm_compile_builtin_arg(iseq, ret, scope_node, arguments, node_location, popped); + } + else if (strcmp("mandatory_only?", builtin_func) == 0) { + if (popped) { + rb_bug("mandatory_only? should be in if condition"); + } + else if (!LIST_INSN_SIZE_ZERO(ret)) { + rb_bug("mandatory_only? should be put on top"); + } + + PUSH_INSN1(ret, *node_location, putobject, Qfalse); + return pm_compile_builtin_mandatory_only_method(iseq, scope_node, call_node, node_location); + } + else if (1) { + rb_bug("can't find builtin function:%s", builtin_func); + } + else { + COMPILE_ERROR(iseq, node_location->line, "can't find builtin function:%s", builtin_func); + return COMPILE_NG; + } + + int inline_index = node_location->line; + snprintf(inline_func, sizeof(inline_func), BUILTIN_INLINE_PREFIX "%d", inline_index); + builtin_func = inline_func; + arguments = NULL; + goto retry; + } + + if (cconst) { + typedef VALUE(*builtin_func0)(void *, VALUE); + VALUE const_val = (*(builtin_func0)bf->func_ptr)(NULL, Qnil); + PUSH_INSN1(ret, *node_location, putobject, const_val); + return COMPILE_OK; + } + + // fprintf(stderr, "func_name:%s -> %p\n", builtin_func, bf->func_ptr); + + DECL_ANCHOR(args_seq); + INIT_ANCHOR(args_seq); + + int flags = 0; + struct rb_callinfo_kwarg *keywords = NULL; + int argc = pm_setup_args(arguments, call_node->block, &flags, &keywords, iseq, args_seq, scope_node, node_location); + + if (argc != bf->argc) { + COMPILE_ERROR(iseq, node_location->line, "argc is not match for builtin function:%s (expect %d but %d)", builtin_func, bf->argc, argc); + return COMPILE_NG; + } + + unsigned int start_index; + if (delegate_call_p(iseq, argc, args_seq, &start_index)) { + PUSH_INSN2(ret, *node_location, opt_invokebuiltin_delegate, bf, INT2FIX(start_index)); + } + else { + PUSH_SEQ(ret, args_seq); + PUSH_INSN1(ret, *node_location, invokebuiltin, bf); + } + + if (popped) PUSH_INSN(ret, *node_location, pop); + return COMPILE_OK; +} + +/** + * Compile a call node into the given iseq. + */ static void pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, ID method_id, LABEL *start) { const pm_location_t *message_loc = &call_node->message_loc; if (message_loc->start == NULL) message_loc = &call_node->base.location; - const pm_line_column_t location = PM_LOCATION_LINE_COLUMN(scope_node->parser, message_loc); + const pm_line_column_t location = PM_LOCATION_START_LINE_COLUMN(scope_node->parser, message_loc); LABEL *else_label = NEW_LABEL(location.line); LABEL *end_label = NEW_LABEL(location.line); + LABEL *retry_end_l = NEW_LABEL(location.line); + + VALUE branches = Qfalse; + rb_code_location_t code_location = { 0 }; + int node_id = location.column; if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + if (PM_BRANCH_COVERAGE_P(iseq)) { + const uint8_t *cursors[3] = { + call_node->closing_loc.end, + call_node->arguments == NULL ? NULL : call_node->arguments->base.location.end, + call_node->message_loc.end + }; + + const uint8_t *end_cursor = cursors[0]; + end_cursor = (end_cursor == NULL || cursors[1] == NULL) ? cursors[1] : (end_cursor > cursors[1] ? end_cursor : cursors[1]); + end_cursor = (end_cursor == NULL || cursors[2] == NULL) ? cursors[2] : (end_cursor > cursors[2] ? end_cursor : cursors[2]); + + const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, call_node); + const pm_line_column_t end_location = pm_newline_list_line_column(&scope_node->parser->newline_list, end_cursor, scope_node->parser->start_line); + + code_location = (rb_code_location_t) { + .beg_pos = { .lineno = start_location.line, .column = start_location.column }, + .end_pos = { .lineno = end_location.line, .column = end_location.column } + }; + + branches = decl_branch_base(iseq, PTR2NUM(call_node), &code_location, "&."); + } + PUSH_INSN(ret, location, dup); PUSH_INSNL(ret, location, branchnil, else_label); + + add_trace_branch_coverage(iseq, ret, &code_location, node_id, 0, "then", branches); } int flags = 0; struct rb_callinfo_kwarg *kw_arg = NULL; int orig_argc = pm_setup_args(call_node->arguments, call_node->block, &flags, &kw_arg, iseq, ret, scope_node, &location); + const rb_iseq_t *previous_block = ISEQ_COMPILE_DATA(iseq)->current_block; const rb_iseq_t *block_iseq = NULL; if (call_node->block != NULL && PM_NODE_TYPE_P(call_node->block, PM_BLOCK_NODE)) { @@ -2836,10 +3322,6 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c block_iseq = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, pm_node_line_number(scope_node->parser, call_node->block)); pm_scope_node_destroy(&next_scope_node); - - if (ISEQ_BODY(block_iseq)->catch_table) { - PUSH_CATCH_ENTRY(CATCH_TYPE_BREAK, start, end_label, block_iseq, end_label); - } ISEQ_COMPILE_DATA(iseq)->current_block = block_iseq; } else { @@ -2884,12 +3366,15 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c PUSH_SEND_R(ret, location, method_id, INT2FIX(orig_argc), block_iseq, INT2FIX(flags), kw_arg); + if (block_iseq && ISEQ_BODY(block_iseq)->catch_table) { + pm_compile_retry_end_label(iseq, ret, retry_end_l); + PUSH_CATCH_ENTRY(CATCH_TYPE_BREAK, start, retry_end_l, block_iseq, retry_end_l); + } + if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { PUSH_INSNL(ret, location, jump, end_label); PUSH_LABEL(ret, else_label); - } - - if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION) || (block_iseq && ISEQ_BODY(block_iseq)->catch_table)) { + add_trace_branch_coverage(iseq, ret, &code_location, node_id, 1, "else", branches); PUSH_LABEL(ret, end_label); } @@ -2898,6 +3383,7 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c } if (popped) PUSH_INSN(ret, location, pop); + ISEQ_COMPILE_DATA(iseq)->current_block = previous_block; } static void @@ -3075,7 +3561,7 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_line_c } case PM_CONSTANT_PATH_NODE: { const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name)); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); if (cast->parent != NULL) { if (!lfinish[1]) lfinish[1] = NEW_LABEL(location.line); @@ -3371,8 +3857,11 @@ pm_compile_destructured_param_locals(const pm_multi_target_node_t *node, st_tabl if (rest->expression != NULL) { RUBY_ASSERT(PM_NODE_TYPE_P(rest->expression, PM_REQUIRED_PARAMETER_NODE)); - pm_insert_local_index(((const pm_required_parameter_node_t *) rest->expression)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); - local_index++; + + if (!PM_NODE_FLAG_P(rest->expression, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + pm_insert_local_index(((const pm_required_parameter_node_t *) rest->expression)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + local_index++; + } } } @@ -3380,8 +3869,10 @@ pm_compile_destructured_param_locals(const pm_multi_target_node_t *node, st_tabl const pm_node_t *right = node->rights.nodes[index]; if (PM_NODE_TYPE_P(right, PM_REQUIRED_PARAMETER_NODE)) { - pm_insert_local_index(((const pm_required_parameter_node_t *) right)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); - local_index++; + if (!PM_NODE_FLAG_P(right, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + pm_insert_local_index(((const pm_required_parameter_node_t *) right)->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + local_index++; + } } else { RUBY_ASSERT(PM_NODE_TYPE_P(right, PM_MULTI_TARGET_NODE)); @@ -3574,11 +4065,11 @@ pm_multi_target_state_update(pm_multi_target_state_t *state) previous = current; current = current->next; - free(previous); + xfree(previous); } } -static size_t +static void pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const parents, LINK_ANCHOR *const writes, LINK_ANCHOR *const cleanup, pm_scope_node_t *scope_node, pm_multi_target_state_t *state); /** @@ -3686,7 +4177,7 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons // for I::J in []; end // const pm_constant_path_target_node_t *cast = (const pm_constant_path_target_node_t *) node; - ID name = pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name); + ID name = pm_constant_id_lookup(scope_node, cast->name); if (cast->parent != NULL) { pm_compile_node(iseq, cast->parent, parents, false, scope_node); @@ -3725,6 +4216,13 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons pm_compile_node(iseq, cast->receiver, parents, false, scope_node); + LABEL *safe_label = NULL; + if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { + safe_label = NEW_LABEL(location.line); + PUSH_INSN(parents, location, dup); + PUSH_INSNL(parents, location, branchnil, safe_label); + } + if (state != NULL) { PUSH_INSN1(writes, location, topn, INT2FIX(1)); pm_multi_target_state_push(state, (INSN *) LAST_ELEMENT(writes), 1); @@ -3735,7 +4233,9 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY)) flags |= VM_CALL_FCALL; PUSH_SEND_WITH_FLAG(writes, location, method_id, INT2FIX(1), INT2FIX(flags)); + if (safe_label != NULL && state == NULL) PUSH_LABEL(writes, safe_label); PUSH_INSN(writes, location, pop); + if (safe_label != NULL && state != NULL) PUSH_LABEL(writes, safe_label); if (state != NULL) { PUSH_INSN(cleanup, location, pop); @@ -3809,9 +4309,15 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons // // for i, j in []; end // - if (state != NULL) state->position--; + size_t before_position; + if (state != NULL) { + before_position = state->position; + state->position--; + } + pm_compile_multi_target_node(iseq, node, parents, writes, cleanup, scope_node, state); - if (state != NULL) state->position++; + if (state != NULL) state->position = before_position; + break; } default: @@ -3825,7 +4331,7 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons * on the stack that correspond to the parent expressions of the various * targets. */ -static size_t +static void pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const parents, LINK_ANCHOR *const writes, LINK_ANCHOR *const cleanup, pm_scope_node_t *scope_node, pm_multi_target_state_t *state) { const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); @@ -3865,26 +4371,28 @@ pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR // going through the targets because we will need to revisit them once // we know how many values are being pushed onto the stack. pm_multi_target_state_t target_state = { 0 }; - size_t base_position = state == NULL ? 0 : state->position; - size_t splat_position = has_rest ? 1 : 0; + if (state == NULL) state = &target_state; + + size_t base_position = state->position; + size_t splat_position = (has_rest || has_posts) ? 1 : 0; // Next, we'll iterate through all of the leading targets. for (size_t index = 0; index < lefts->size; index++) { const pm_node_t *target = lefts->nodes[index]; - target_state.position = lefts->size - index + splat_position + base_position; - pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, &target_state); + state->position = lefts->size - index + splat_position + base_position; + pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, state); } // Next, we'll compile the rest target if there is one. if (has_rest) { const pm_node_t *target = ((const pm_splat_node_t *) rest)->expression; - target_state.position = 1 + rights->size + base_position; + state->position = 1 + rights->size + base_position; if (has_posts) { PUSH_INSN2(writes, location, expandarray, INT2FIX(rights->size), INT2FIX(3)); } - pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, &target_state); + pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, state); } // Finally, we'll compile the trailing targets. @@ -3895,18 +4403,10 @@ pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR for (size_t index = 0; index < rights->size; index++) { const pm_node_t *target = rights->nodes[index]; - target_state.position = rights->size - index + base_position; - pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, &target_state); + state->position = rights->size - index + base_position; + pm_compile_target_node(iseq, target, parents, writes, cleanup, scope_node, state); } } - - // Now, we need to go back and modify the topn instructions in order to - // ensure they can correctly retrieve the parent expressions. - pm_multi_target_state_update(&target_state); - - if (state != NULL) state->stack_size += target_state.stack_size; - - return target_state.stack_size; } /** @@ -4036,7 +4536,7 @@ pm_compile_rescue(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_line_co PM_COMPILE_NOT_POPPED((const pm_node_t *) cast->statements); } else { - PUSH_INSN(ret, *node_location, putnil); + PUSH_SYNTHETIC_PUTNIL(ret, iseq); } ISEQ_COMPILE_DATA(iseq)->in_rescue = prev_in_rescue; @@ -4102,7 +4602,6 @@ pm_compile_ensure(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_line_co ); pm_scope_node_destroy(&next_scope_node); - ISEQ_COMPILE_DATA(iseq)->current_block = child_iseq; erange = ISEQ_COMPILE_DATA(iseq)->ensure_node_stack->erange; if (estart->link.next != &eend->link) { @@ -4182,7 +4681,7 @@ pm_opt_aset_with_p(const rb_iseq_t *iseq, const pm_call_node_t *node) static void pm_compile_constant_read(rb_iseq_t *iseq, VALUE name, const pm_location_t *name_loc, LINK_ANCHOR *const ret, const pm_scope_node_t *scope_node) { - const pm_line_column_t location = PM_LOCATION_LINE_COLUMN(scope_node->parser, name_loc); + const pm_line_column_t location = PM_LOCATION_START_LINE_COLUMN(scope_node->parser, name_loc); if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { ISEQ_BODY(iseq)->ic_size++; @@ -4216,7 +4715,7 @@ pm_constant_path_parts(const pm_node_t *node, const pm_scope_node_t *scope_node) } case PM_CONSTANT_PATH_NODE: { const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name)); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); rb_ary_unshift(parts, name); if (cast->parent == NULL) { @@ -4254,7 +4753,7 @@ pm_compile_constant_path(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *co } case PM_CONSTANT_PATH_NODE: { const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) cast->child)->name)); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); if (cast->parent == NULL) { PUSH_INSN(body, location, pop); @@ -4332,7 +4831,496 @@ pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t * return dispatch; } -/* +/** + * Return the object that will be pushed onto the stack for the given node. + */ +static VALUE +pm_compile_shareable_constant_literal(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_node_t *scope_node) +{ + switch (PM_NODE_TYPE(node)) { + case PM_TRUE_NODE: + case PM_FALSE_NODE: + case PM_NIL_NODE: + case PM_SYMBOL_NODE: + case PM_REGULAR_EXPRESSION_NODE: + case PM_SOURCE_LINE_NODE: + case PM_INTEGER_NODE: + case PM_FLOAT_NODE: + case PM_RATIONAL_NODE: + case PM_IMAGINARY_NODE: + case PM_SOURCE_ENCODING_NODE: + return pm_static_literal_value(iseq, node, scope_node); + case PM_STRING_NODE: + return parse_static_literal_string(iseq, scope_node, node, &((const pm_string_node_t *) node)->unescaped); + case PM_SOURCE_FILE_NODE: + return pm_source_file_value((const pm_source_file_node_t *) node, scope_node); + case PM_ARRAY_NODE: { + const pm_array_node_t *cast = (const pm_array_node_t *) node; + VALUE result = rb_ary_new_capa(cast->elements.size); + + for (size_t index = 0; index < cast->elements.size; index++) { + VALUE element = pm_compile_shareable_constant_literal(iseq, cast->elements.nodes[index], scope_node); + if (element == Qundef) return Qundef; + + rb_ary_push(result, element); + } + + return rb_ractor_make_shareable(result); + } + case PM_HASH_NODE: { + const pm_hash_node_t *cast = (const pm_hash_node_t *) node; + VALUE result = rb_hash_new_capa(cast->elements.size); + + for (size_t index = 0; index < cast->elements.size; index++) { + const pm_node_t *element = cast->elements.nodes[index]; + if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return Qundef; + + const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element; + + VALUE key = pm_compile_shareable_constant_literal(iseq, assoc->key, scope_node); + if (key == Qundef) return Qundef; + + VALUE value = pm_compile_shareable_constant_literal(iseq, assoc->value, scope_node); + if (value == Qundef) return Qundef; + + rb_hash_aset(result, key, value); + } + + return rb_ractor_make_shareable(result); + } + default: + return Qundef; + } +} + +/** + * Compile the instructions for pushing the value that will be written to a + * shared constant. + */ +static void +pm_compile_shareable_constant_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_flags_t shareability, VALUE path, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node, bool top) +{ + VALUE literal = pm_compile_shareable_constant_literal(iseq, node, scope_node); + if (literal != Qundef) { + const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); + PUSH_INSN1(ret, location, putobject, literal); + return; + } + + const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); + switch (PM_NODE_TYPE(node)) { + case PM_ARRAY_NODE: { + const pm_array_node_t *cast = (const pm_array_node_t *) node; + + if (top) { + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + } + + for (size_t index = 0; index < cast->elements.size; index++) { + pm_compile_shareable_constant_value(iseq, cast->elements.nodes[index], shareability, path, ret, scope_node, false); + } + + PUSH_INSN1(ret, location, newarray, INT2FIX(cast->elements.size)); + + if (top) { + ID method_id = (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY) ? rb_intern("make_shareable_copy") : rb_intern("make_shareable"); + PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + return; + } + case PM_HASH_NODE: { + const pm_hash_node_t *cast = (const pm_hash_node_t *) node; + + if (top) { + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + } + + for (size_t index = 0; index < cast->elements.size; index++) { + const pm_node_t *element = cast->elements.nodes[index]; + + if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) { + COMPILE_ERROR(iseq, location.line, "Ractor constant writes do not support **"); + } + + const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element; + pm_compile_shareable_constant_value(iseq, assoc->key, shareability, path, ret, scope_node, false); + pm_compile_shareable_constant_value(iseq, assoc->value, shareability, path, ret, scope_node, false); + } + + PUSH_INSN1(ret, location, newhash, INT2FIX(cast->elements.size * 2)); + + if (top) { + ID method_id = (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY) ? rb_intern("make_shareable_copy") : rb_intern("make_shareable"); + PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + return; + } + default: { + DECL_ANCHOR(value_seq); + INIT_ANCHOR(value_seq); + + pm_compile_node(iseq, node, value_seq, false, scope_node); + if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) { + PUSH_SEND_WITH_FLAG(value_seq, location, idUMinus, INT2FIX(0), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + if (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL) { + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + PUSH_SEQ(ret, value_seq); + PUSH_INSN1(ret, location, putobject, path); + PUSH_SEND_WITH_FLAG(ret, location, rb_intern("ensure_shareable"), INT2FIX(2), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + else if (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY) { + if (top) PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + PUSH_SEQ(ret, value_seq); + if (top) PUSH_SEND_WITH_FLAG(ret, location, rb_intern("make_shareable_copy"), INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + else if (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING) { + if (top) PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); + PUSH_SEQ(ret, value_seq); + if (top) PUSH_SEND_WITH_FLAG(ret, location, rb_intern("make_shareable"), INT2FIX(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + } + + break; + } + } +} + +/** + * Compile a constant write node, either in the context of a ractor pragma or + * not. + */ +static void +pm_compile_constant_write_node(rb_iseq_t *iseq, const pm_constant_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + ID name_id = pm_constant_id_lookup(scope_node, node->name); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, rb_id2str(name_id), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, ID2SYM(name_id)); +} + +/** + * Compile a constant and write node, either in the context of a ractor pragma + * or not. + */ +static void +pm_compile_constant_and_write_node(rb_iseq_t *iseq, const pm_constant_and_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, node->name)); + LABEL *end_label = NEW_LABEL(location.line); + + pm_compile_constant_read(iseq, name, &node->name_loc, ret, scope_node); + if (!popped) PUSH_INSN(ret, location, dup); + + PUSH_INSNL(ret, location, branchunless, end_label); + if (!popped) PUSH_INSN(ret, location, pop); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, name, ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, end_label); +} + +/** + * Compile a constant or write node, either in the context of a ractor pragma or + * not. + */ +static void +pm_compile_constant_or_write_node(rb_iseq_t *iseq, const pm_constant_or_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, node->name)); + + LABEL *set_label = NEW_LABEL(location.line); + LABEL *end_label = NEW_LABEL(location.line); + + PUSH_INSN(ret, location, putnil); + PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST), name, Qtrue); + PUSH_INSNL(ret, location, branchunless, set_label); + + pm_compile_constant_read(iseq, name, &node->name_loc, ret, scope_node); + if (!popped) PUSH_INSN(ret, location, dup); + + PUSH_INSNL(ret, location, branchif, end_label); + if (!popped) PUSH_INSN(ret, location, pop); + PUSH_LABEL(ret, set_label); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, name, ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, end_label); +} + +/** + * Compile a constant operator write node, either in the context of a ractor + * pragma or not. + */ +static void +pm_compile_constant_operator_write_node(rb_iseq_t *iseq, const pm_constant_operator_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, node->name)); + ID method_id = pm_constant_id_lookup(scope_node, node->binary_operator); + + pm_compile_constant_read(iseq, name, &node->name_loc, ret, scope_node); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, name, ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); + if (!popped) PUSH_INSN(ret, location, dup); + + PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); + PUSH_INSN1(ret, location, setconstant, name); +} + +/** + * Creates a string that is used in ractor error messages to describe the + * constant path being written. + */ +static VALUE +pm_constant_path_path(const pm_constant_path_node_t *node, const pm_scope_node_t *scope_node) +{ + VALUE parts = rb_ary_new(); + rb_ary_push(parts, rb_id2str(pm_constant_id_lookup(scope_node, node->name))); + + const pm_node_t *current = node->parent; + while (current != NULL && PM_NODE_TYPE_P(current, PM_CONSTANT_PATH_NODE)) { + const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) current; + rb_ary_unshift(parts, rb_id2str(pm_constant_id_lookup(scope_node, cast->name))); + current = cast->parent; + } + + if (current == NULL) { + rb_ary_unshift(parts, rb_id2str(idNULL)); + } + else if (PM_NODE_TYPE_P(current, PM_CONSTANT_READ_NODE)) { + rb_ary_unshift(parts, rb_id2str(pm_constant_id_lookup(scope_node, ((const pm_constant_read_node_t *) current)->name))); + } + else { + rb_ary_unshift(parts, rb_str_new_cstr("...")); + } + + return rb_ary_join(parts, rb_str_new_cstr("::")); +} + +/** + * Compile a constant path write node, either in the context of a ractor pragma + * or not. + */ +static void +pm_compile_constant_path_write_node(rb_iseq_t *iseq, const pm_constant_path_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + + if (target->parent) { + PM_COMPILE_NOT_POPPED((const pm_node_t *) target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (!popped) { + PUSH_INSN(ret, location, swap); + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + } + + PUSH_INSN(ret, location, swap); + PUSH_INSN1(ret, location, setconstant, name); +} + +/** + * Compile a constant path and write node, either in the context of a ractor + * pragma or not. + */ +static void +pm_compile_constant_path_and_write_node(rb_iseq_t *iseq, const pm_constant_path_and_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + LABEL *lfin = NEW_LABEL(location.line); + + if (target->parent) { + PM_COMPILE_NOT_POPPED(target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putobject, Qtrue); + PUSH_INSN1(ret, location, getconstant, name); + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSNL(ret, location, branchunless, lfin); + + if (!popped) PUSH_INSN(ret, location, pop); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (popped) { + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + } + else { + PUSH_INSN1(ret, location, dupn, INT2FIX(2)); + PUSH_INSN(ret, location, swap); + } + + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, lfin); + + if (!popped) PUSH_INSN(ret, location, swap); + PUSH_INSN(ret, location, pop); +} + +/** + * Compile a constant path or write node, either in the context of a ractor + * pragma or not. + */ +static void +pm_compile_constant_path_or_write_node(rb_iseq_t *iseq, const pm_constant_path_or_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + LABEL *lassign = NEW_LABEL(location.line); + LABEL *lfin = NEW_LABEL(location.line); + + if (target->parent) { + PM_COMPILE_NOT_POPPED(target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + PUSH_INSN(ret, location, dup); + PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST_FROM), name, Qtrue); + PUSH_INSNL(ret, location, branchunless, lassign); + + PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putobject, Qtrue); + PUSH_INSN1(ret, location, getconstant, name); + + if (!popped) PUSH_INSN(ret, location, dup); + PUSH_INSNL(ret, location, branchif, lfin); + + if (!popped) PUSH_INSN(ret, location, pop); + PUSH_LABEL(ret, lassign); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + if (popped) { + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + } + else { + PUSH_INSN1(ret, location, dupn, INT2FIX(2)); + PUSH_INSN(ret, location, swap); + } + + PUSH_INSN1(ret, location, setconstant, name); + PUSH_LABEL(ret, lfin); + + if (!popped) PUSH_INSN(ret, location, swap); + PUSH_INSN(ret, location, pop); +} + +/** + * Compile a constant path operator write node, either in the context of a + * ractor pragma or not. + */ +static void +pm_compile_constant_path_operator_write_node(rb_iseq_t *iseq, const pm_constant_path_operator_write_node_t *node, const pm_node_flags_t shareability, const pm_line_column_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) +{ + const pm_line_column_t location = *node_location; + const pm_constant_path_node_t *target = node->target; + + ID method_id = pm_constant_id_lookup(scope_node, node->binary_operator); + VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, target->name)); + + if (target->parent) { + PM_COMPILE_NOT_POPPED(target->parent); + } + else { + PUSH_INSN1(ret, location, putobject, rb_cObject); + } + + PUSH_INSN(ret, location, dup); + PUSH_INSN1(ret, location, putobject, Qtrue); + PUSH_INSN1(ret, location, getconstant, name); + + if (shareability != 0) { + pm_compile_shareable_constant_value(iseq, node->value, shareability, pm_constant_path_path(node->target, scope_node), ret, scope_node, true); + } + else { + PM_COMPILE_NOT_POPPED(node->value); + } + + PUSH_CALL(ret, location, method_id, INT2FIX(1)); + PUSH_INSN(ret, location, swap); + + if (!popped) { + PUSH_INSN1(ret, location, topn, INT2FIX(1)); + PUSH_INSN(ret, location, swap); + } + + PUSH_INSN1(ret, location, setconstant, name); +} + +/** * Compiles a prism node into instruction sequences. * * iseq - The current instruction sequence object (used for locals) @@ -4349,14 +5337,32 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const pm_line_column_t location = PM_NODE_START_LINE_COLUMN(parser, node); int lineno = (int) location.line; - if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_NEWLINE) && ISEQ_COMPILE_DATA(iseq)->last_line != lineno) { - int event = RUBY_EVENT_LINE; + if (PM_NODE_TYPE_P(node, PM_RETURN_NODE) && PM_NODE_FLAG_P(node, PM_RETURN_NODE_FLAGS_REDUNDANT) && ((const pm_return_node_t *) node)->arguments == NULL) { + // If the node that we're compiling is a return node that is redundant, + // then it cannot be considered a line node because the other parser + // eliminates it from the parse tree. In this case we must replicate + // this behavior. + } else { + if (PM_NODE_TYPE_P(node, PM_BEGIN_NODE) && (((const pm_begin_node_t *) node)->statements == NULL) && (((const pm_begin_node_t *) node)->rescue_clause != NULL)) { + // If this node is a begin node and it has empty statements and also + // has a rescue clause, then the other parser considers it as + // starting on the same line as the rescue, as opposed to the + // location of the begin keyword. We replicate that behavior here. + lineno = (int) PM_NODE_START_LINE_COLUMN(parser, ((const pm_begin_node_t *) node)->rescue_clause).line; + } + + if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_NEWLINE) && ISEQ_COMPILE_DATA(iseq)->last_line != lineno) { + // If this node has the newline flag set and it is on a new line + // from the previous nodes that have been compiled for this ISEQ, + // then we need to emit a newline event. + int event = RUBY_EVENT_LINE; - ISEQ_COMPILE_DATA(iseq)->last_line = lineno; - if (ISEQ_COVERAGE(iseq) && ISEQ_LINE_COVERAGE(iseq)) { - event |= RUBY_EVENT_COVERAGE_LINE; + ISEQ_COMPILE_DATA(iseq)->last_line = lineno; + if (ISEQ_COVERAGE(iseq) && ISEQ_LINE_COVERAGE(iseq)) { + event |= RUBY_EVENT_COVERAGE_LINE; + } + PUSH_TRACE(ret, event); } - PUSH_TRACE(ret, event); } switch (PM_NODE_TYPE(node)) { @@ -4598,7 +5604,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE((const pm_node_t *) cast->statements); } else if (!popped) { - PUSH_INSN(ret, location, putnil); + PUSH_SYNTHETIC_PUTNIL(ret, iseq); } } return; @@ -4661,7 +5667,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, throw_flag = 0; } else if (ISEQ_BODY(ip)->type == ISEQ_TYPE_EVAL) { - COMPILE_ERROR(ERROR_ARGS "Can't escape from eval with break"); + COMPILE_ERROR(iseq, location.line, "Can't escape from eval with break"); return; } else { @@ -4683,8 +5689,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } - COMPILE_ERROR(ERROR_ARGS "Invalid break"); - rb_bug("Invalid break"); + COMPILE_ERROR(iseq, location.line, "Invalid break"); } return; } @@ -4698,13 +5703,21 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // foo.bar() {} // ^^^^^^^^^^^^ const pm_call_node_t *cast = (const pm_call_node_t *) node; - LABEL *start = NEW_LABEL(location.line); + ID method_id = pm_constant_id_lookup(scope_node, cast->name); - if (cast->block) { - PUSH_LABEL(ret, start); + const pm_location_t *message_loc = &cast->message_loc; + if (message_loc->start == NULL) message_loc = &cast->base.location; + + const pm_line_column_t location = PM_LOCATION_START_LINE_COLUMN(scope_node->parser, message_loc); + const char *builtin_func; + + if (UNLIKELY(iseq_has_builtin_function_table(iseq)) && (builtin_func = pm_iseq_builtin_function_name(scope_node, cast->receiver, method_id)) != NULL) { + pm_compile_builtin_function_call(iseq, ret, scope_node, cast, &location, popped, ISEQ_COMPILE_DATA(iseq)->current_block, builtin_func); + return; } - ID method_id = pm_constant_id_lookup(scope_node, cast->name); + LABEL *start = NEW_LABEL(location.line); + if (cast->block) PUSH_LABEL(ret, start); switch (method_id) { case idUMinus: { @@ -4820,7 +5833,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_SEND_WITH_FLAG(ret, location, id_read_name, INT2FIX(0), INT2FIX(flag)); PM_COMPILE_NOT_POPPED(cast->value); - ID id_operator = pm_constant_id_lookup(scope_node, cast->operator); + ID id_operator = pm_constant_id_lookup(scope_node, cast->binary_operator); PUSH_SEND(ret, location, id_operator, INT2FIX(1)); if (!popped) { @@ -4863,6 +5876,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // compare all of the various when clauses to the predicate. If we // don't, then it's basically an if-elsif-else chain. if (cast->predicate == NULL) { + // Establish branch coverage for the case node. + VALUE branches = Qfalse; + rb_code_location_t case_location = { 0 }; + int branch_id = 0; + + if (PM_BRANCH_COVERAGE_P(iseq)) { + case_location = pm_code_location(scope_node, (const pm_node_t *) cast); + branches = decl_branch_base(iseq, PTR2NUM(cast), &case_location, "case"); + } + // Loop through each clauses in the case node and compile each of // the conditions within them into cond_seq. If they match, they // should jump into their respective bodies in body_seq. @@ -4872,13 +5895,19 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, int clause_lineno = pm_node_line_number(parser, (const pm_node_t *) clause); LABEL *label = NEW_LABEL(clause_lineno); - PUSH_LABEL(body_seq, label); + + // Establish branch coverage for the when clause. + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location = pm_code_location(scope_node, clause->statements != NULL ? ((const pm_node_t *) clause->statements) : ((const pm_node_t *) clause)); + add_trace_branch_coverage(iseq, body_seq, &branch_location, branch_location.beg_pos.column, branch_id++, "when", branches); + } + if (clause->statements != NULL) { pm_compile_node(iseq, (const pm_node_t *) clause->statements, body_seq, popped, scope_node); } else if (!popped) { - PUSH_INSN(body_seq, location, putnil); + PUSH_SYNTHETIC_PUTNIL(body_seq, iseq); } PUSH_INSNL(body_seq, location, jump, end_label); @@ -4890,10 +5919,11 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const pm_node_t *condition = conditions->nodes[condition_index]; if (PM_NODE_TYPE_P(condition, PM_SPLAT_NODE)) { - PUSH_INSN(cond_seq, location, putnil); + pm_line_column_t cond_location = PM_NODE_START_LINE_COLUMN(parser, condition); + PUSH_INSN(cond_seq, cond_location, putnil); pm_compile_node(iseq, condition, cond_seq, false, scope_node); - PUSH_INSN1(cond_seq, location, checkmatch, INT2FIX(VM_CHECKMATCH_TYPE_WHEN | VM_CHECKMATCH_ARRAY)); - PUSH_INSNL(cond_seq, location, branchif, label); + PUSH_INSN1(cond_seq, cond_location, checkmatch, INT2FIX(VM_CHECKMATCH_TYPE_WHEN | VM_CHECKMATCH_ARRAY)); + PUSH_INSNL(cond_seq, cond_location, branchif, label); } else { LABEL *next_label = NEW_LABEL(pm_node_line_number(parser, condition)); @@ -4903,12 +5933,28 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } } + // Establish branch coverage for the else clause (implicit or + // explicit). + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location; + + if (cast->consequent == NULL) { + branch_location = case_location; + } else if (cast->consequent->statements == NULL) { + branch_location = pm_code_location(scope_node, (const pm_node_t *) cast->consequent); + } else { + branch_location = pm_code_location(scope_node, (const pm_node_t *) cast->consequent->statements); + } + + add_trace_branch_coverage(iseq, cond_seq, &branch_location, branch_location.beg_pos.column, branch_id, "else", branches); + } + // Compile the consequent else clause if there is one. - if (cast->consequent) { + if (cast->consequent != NULL) { pm_compile_node(iseq, (const pm_node_t *) cast->consequent, cond_seq, popped, scope_node); } else if (!popped) { - PUSH_INSN(cond_seq, location, putnil); + PUSH_SYNTHETIC_PUTNIL(cond_seq, iseq); } // Finally, jump to the end label if none of the other conditions @@ -4917,6 +5963,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_SEQ(ret, cond_seq); } else { + // Establish branch coverage for the case node. + VALUE branches = Qfalse; + rb_code_location_t case_location = { 0 }; + int branch_id = 0; + + if (PM_BRANCH_COVERAGE_P(iseq)) { + case_location = pm_code_location(scope_node, (const pm_node_t *) cast); + branches = decl_branch_base(iseq, PTR2NUM(cast), &case_location, "case"); + } + // This is the label where everything will fall into if none of the // conditions matched. LABEL *else_label = NEW_LABEL(location.line); @@ -4942,15 +5998,17 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // node instructions later. for (size_t clause_index = 0; clause_index < conditions->size; clause_index++) { const pm_when_node_t *clause = (const pm_when_node_t *) conditions->nodes[clause_index]; - const pm_node_list_t *conditions = &clause->conditions; + pm_line_column_t clause_location = PM_NODE_START_LINE_COLUMN(parser, (const pm_node_t *) clause); - LABEL *label = NEW_LABEL(location.line); + const pm_node_list_t *conditions = &clause->conditions; + LABEL *label = NEW_LABEL(clause_location.line); // Compile each of the conditions for the when clause into the // cond_seq. Each one should have a unique comparison that then // jumps into the body if it matches. for (size_t condition_index = 0; condition_index < conditions->size; condition_index++) { const pm_node_t *condition = conditions->nodes[condition_index]; + const pm_line_column_t condition_location = PM_NODE_START_LINE_COLUMN(parser, condition); // If we haven't already abandoned the optimization, then // we're going to try to compile the condition into the @@ -4960,25 +6018,25 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } if (PM_NODE_TYPE_P(condition, PM_SPLAT_NODE)) { - PUSH_INSN(cond_seq, location, dup); + PUSH_INSN(cond_seq, condition_location, dup); pm_compile_node(iseq, condition, cond_seq, false, scope_node); - PUSH_INSN1(cond_seq, location, checkmatch, INT2FIX(VM_CHECKMATCH_TYPE_CASE | VM_CHECKMATCH_ARRAY)); + PUSH_INSN1(cond_seq, condition_location, checkmatch, INT2FIX(VM_CHECKMATCH_TYPE_CASE | VM_CHECKMATCH_ARRAY)); } else { if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) { const pm_string_node_t *string = (const pm_string_node_t *) condition; VALUE value = parse_static_literal_string(iseq, scope_node, condition, &string->unescaped); - PUSH_INSN1(cond_seq, location, putobject, value); + PUSH_INSN1(cond_seq, condition_location, putobject, value); } else { pm_compile_node(iseq, condition, cond_seq, false, scope_node); } - PUSH_INSN1(cond_seq, location, topn, INT2FIX(1)); - PUSH_SEND_WITH_FLAG(cond_seq, location, idEqq, INT2NUM(1), INT2FIX(VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE)); + PUSH_INSN1(cond_seq, condition_location, topn, INT2FIX(1)); + PUSH_SEND_WITH_FLAG(cond_seq, condition_location, idEqq, INT2NUM(1), INT2FIX(VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE)); } - PUSH_INSNL(cond_seq, location, branchif, label); + PUSH_INSNL(cond_seq, condition_location, branchif, label); } // Now, add the label to the body and compile the body of the @@ -4986,16 +6044,22 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // the statements to be executed, and then compiling a jump to // the end of the case node. PUSH_LABEL(body_seq, label); - PUSH_INSN(body_seq, location, pop); + PUSH_INSN(body_seq, clause_location, pop); + + // Establish branch coverage for the when clause. + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location = pm_code_location(scope_node, clause->statements != NULL ? ((const pm_node_t *) clause->statements) : ((const pm_node_t *) clause)); + add_trace_branch_coverage(iseq, body_seq, &branch_location, branch_location.beg_pos.column, branch_id++, "when", branches); + } if (clause->statements != NULL) { pm_compile_node(iseq, (const pm_node_t *) clause->statements, body_seq, popped, scope_node); } else if (!popped) { - PUSH_INSN(body_seq, location, putnil); + PUSH_SYNTHETIC_PUTNIL(body_seq, iseq); } - PUSH_INSNL(body_seq, location, jump, end_label); + PUSH_INSNL(body_seq, clause_location, jump, end_label); } // Now that we have compiled the conditions and the bodies of the @@ -5017,16 +6081,31 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // Compile either the explicit else clause or an implicit else // clause. PUSH_LABEL(ret, else_label); - PUSH_INSN(ret, location, pop); if (cast->consequent != NULL) { + pm_line_column_t else_location = PM_NODE_START_LINE_COLUMN(parser, cast->consequent->statements != NULL ? ((const pm_node_t *) cast->consequent->statements) : ((const pm_node_t *) cast->consequent)); + PUSH_INSN(ret, else_location, pop); + + // Establish branch coverage for the else clause. + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location = pm_code_location(scope_node, cast->consequent->statements != NULL ? ((const pm_node_t *) cast->consequent->statements) : ((const pm_node_t *) cast->consequent)); + add_trace_branch_coverage(iseq, ret, &branch_location, branch_location.beg_pos.column, branch_id, "else", branches); + } + PM_COMPILE((const pm_node_t *) cast->consequent); + PUSH_INSNL(ret, else_location, jump, end_label); } - else if (!popped) { - PUSH_INSN(ret, location, putnil); - } + else { + PUSH_INSN(ret, location, pop); - PUSH_INSNL(ret, location, jump, end_label); + // Establish branch coverage for the implicit else clause. + if (PM_BRANCH_COVERAGE_P(iseq)) { + add_trace_branch_coverage(iseq, ret, &case_location, case_location.beg_pos.column, branch_id, "else", branches); + } + + if (!popped) PUSH_INSN(ret, location, putnil); + PUSH_INSNL(ret, location, jump, end_label); + } } PUSH_SEQ(ret, body_seq); @@ -5068,8 +6147,14 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // We're going to use this to uniquely identify each branch so that we // can track coverage information. + rb_code_location_t case_location = { 0 }; + VALUE branches = Qfalse; int branch_id = 0; - // VALUE branches = 0; + + if (PM_BRANCH_COVERAGE_P(iseq)) { + case_location = pm_code_location(scope_node, (const pm_node_t *) cast); + branches = decl_branch_base(iseq, PTR2NUM(cast), &case_location, "case"); + } // If there is only one pattern, then the behavior changes a bit. It // effectively gets treated as a match required node (this is how it is @@ -5109,17 +6194,17 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_LABEL(body_seq, body_label); PUSH_INSN1(body_seq, in_location, adjuststack, INT2FIX(in_single_pattern ? 6 : 2)); - // TODO: We need to come back to this and enable trace branch - // coverage. At the moment we can't call this function because it - // accepts a NODE* and not a pm_node_t*. - // add_trace_branch_coverage(iseq, body_seq, in_node->statements || in, branch_id++, "in", branches); + // Establish branch coverage for the in clause. + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location = pm_code_location(scope_node, in_node->statements != NULL ? ((const pm_node_t *) in_node->statements) : ((const pm_node_t *) in_node)); + add_trace_branch_coverage(iseq, body_seq, &branch_location, branch_location.beg_pos.column, branch_id++, "in", branches); + } - branch_id++; if (in_node->statements != NULL) { PM_COMPILE_INTO_ANCHOR(body_seq, (const pm_node_t *) in_node->statements); } else if (!popped) { - PUSH_INSN(body_seq, in_location, putnil); + PUSH_SYNTHETIC_PUTNIL(body_seq, iseq); } PUSH_INSNL(body_seq, in_location, jump, end_label); @@ -5141,16 +6226,13 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN(cond_seq, location, pop); PUSH_INSN(cond_seq, location, pop); - // TODO: trace branch coverage - // add_trace_branch_coverage(iseq, cond_seq, cast->consequent, branch_id, "else", branches); - - if (else_node->statements != NULL) { - PM_COMPILE_INTO_ANCHOR(cond_seq, (const pm_node_t *) else_node->statements); - } - else if (!popped) { - PUSH_INSN(cond_seq, location, putnil); + // Establish branch coverage for the else clause. + if (PM_BRANCH_COVERAGE_P(iseq)) { + rb_code_location_t branch_location = pm_code_location(scope_node, else_node->statements != NULL ? ((const pm_node_t *) else_node->statements) : ((const pm_node_t *) else_node)); + add_trace_branch_coverage(iseq, cond_seq, &branch_location, branch_location.beg_pos.column, branch_id, "else", branches); } + PM_COMPILE_INTO_ANCHOR(cond_seq, (const pm_node_t *) else_node); PUSH_INSNL(cond_seq, location, jump, end_label); PUSH_INSN(cond_seq, location, putnil); if (popped) PUSH_INSN(cond_seq, location, putnil); @@ -5160,8 +6242,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // the code to handle raising an appropriate error. PUSH_LABEL(cond_seq, else_label); - // TODO: trace branch coverage - // add_trace_branch_coverage(iseq, cond_seq, orig_node, branch_id, "else", branches); + // Establish branch coverage for the implicit else clause. + add_trace_branch_coverage(iseq, cond_seq, &case_location, case_location.beg_pos.column, branch_id, "else", branches); if (in_single_pattern) { pm_compile_pattern_error_handler(iseq, scope_node, node, cond_seq, end_label, popped); @@ -5255,7 +6337,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN2(ret, location, getclassvariable, name, get_cvar_ic_value(iseq, name_id)); PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); int flags = VM_CALL_ARGS_SIMPLE; PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(flags)); @@ -5349,154 +6431,28 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // Foo::Bar &&= baz // ^^^^^^^^^^^^^^^^ const pm_constant_path_and_write_node_t *cast = (const pm_constant_path_and_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - LABEL *lfin = NEW_LABEL(location.line); - - if (target->parent) { - PM_COMPILE_NOT_POPPED(target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PUSH_INSN(ret, location, dup); - PUSH_INSN1(ret, location, putobject, Qtrue); - PUSH_INSN1(ret, location, getconstant, name); - - if (!popped) PUSH_INSN(ret, location, dup); - PUSH_INSNL(ret, location, branchunless, lfin); - - if (!popped) PUSH_INSN(ret, location, pop); - PM_COMPILE_NOT_POPPED(cast->value); - - if (popped) { - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - } - else { - PUSH_INSN1(ret, location, dupn, INT2FIX(2)); - PUSH_INSN(ret, location, swap); - } - - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, lfin); - - if (!popped) PUSH_INSN(ret, location, swap); - PUSH_INSN(ret, location, pop); - + pm_compile_constant_path_and_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_PATH_OR_WRITE_NODE: { // Foo::Bar ||= baz // ^^^^^^^^^^^^^^^^ const pm_constant_path_or_write_node_t *cast = (const pm_constant_path_or_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - - LABEL *lassign = NEW_LABEL(location.line); - LABEL *lfin = NEW_LABEL(location.line); - - if (target->parent) { - PM_COMPILE_NOT_POPPED(target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PUSH_INSN(ret, location, dup); - PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST_FROM), name, Qtrue); - PUSH_INSNL(ret, location, branchunless, lassign); - - PUSH_INSN(ret, location, dup); - PUSH_INSN1(ret, location, putobject, Qtrue); - PUSH_INSN1(ret, location, getconstant, name); - - if (!popped) PUSH_INSN(ret, location, dup); - PUSH_INSNL(ret, location, branchif, lfin); - - if (!popped) PUSH_INSN(ret, location, pop); - PUSH_LABEL(ret, lassign); - PM_COMPILE_NOT_POPPED(cast->value); - - if (popped) { - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - } - else { - PUSH_INSN1(ret, location, dupn, INT2FIX(2)); - PUSH_INSN(ret, location, swap); - } - - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, lfin); - - if (!popped) PUSH_INSN(ret, location, swap); - PUSH_INSN(ret, location, pop); - + pm_compile_constant_path_or_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_PATH_OPERATOR_WRITE_NODE: { // Foo::Bar += baz // ^^^^^^^^^^^^^^^ const pm_constant_path_operator_write_node_t *cast = (const pm_constant_path_operator_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - - if (target->parent) { - PM_COMPILE_NOT_POPPED(target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PUSH_INSN(ret, location, dup); - PUSH_INSN1(ret, location, putobject, Qtrue); - PUSH_INSN1(ret, location, getconstant, name); - - PM_COMPILE_NOT_POPPED(cast->value); - PUSH_CALL(ret, location, method_id, INT2FIX(1)); - PUSH_INSN(ret, location, swap); - - if (!popped) { - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - PUSH_INSN(ret, location, swap); - } - - PUSH_INSN1(ret, location, setconstant, name); + pm_compile_constant_path_operator_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_PATH_WRITE_NODE: { // Foo::Bar = 1 // ^^^^^^^^^^^^ const pm_constant_path_write_node_t *cast = (const pm_constant_path_write_node_t *) node; - const pm_constant_path_node_t *target = cast->target; - - const pm_constant_read_node_t *child = (const pm_constant_read_node_t *) target->child; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, child->name)); - - if (target->parent) { - PM_COMPILE_NOT_POPPED((const pm_node_t *) target->parent); - } - else { - PUSH_INSN1(ret, location, putobject, rb_cObject); - } - - PM_COMPILE_NOT_POPPED(cast->value); - - if (!popped) { - PUSH_INSN(ret, location, swap); - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - } - - PUSH_INSN(ret, location, swap); - PUSH_INSN1(ret, location, setconstant, name); - + pm_compile_constant_path_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_READ_NODE: { @@ -5514,82 +6470,28 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // Foo &&= bar // ^^^^^^^^^^^ const pm_constant_and_write_node_t *cast = (const pm_constant_and_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - LABEL *end_label = NEW_LABEL(location.line); - - pm_compile_constant_read(iseq, name, &cast->name_loc, ret, scope_node); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSNL(ret, location, branchunless, end_label); - if (!popped) PUSH_INSN(ret, location, pop); - - PM_COMPILE_NOT_POPPED(cast->value); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, end_label); - + pm_compile_constant_and_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_OR_WRITE_NODE: { // Foo ||= bar // ^^^^^^^^^^^ const pm_constant_or_write_node_t *cast = (const pm_constant_or_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - LABEL *set_label = NEW_LABEL(location.line); - LABEL *end_label = NEW_LABEL(location.line); - - PUSH_INSN(ret, location, putnil); - PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_CONST), name, Qtrue); - PUSH_INSNL(ret, location, branchunless, set_label); - - pm_compile_constant_read(iseq, name, &cast->name_loc, ret, scope_node); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSNL(ret, location, branchif, end_label); - if (!popped) PUSH_INSN(ret, location, pop); - - PUSH_LABEL(ret, set_label); - PM_COMPILE_NOT_POPPED(cast->value); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - PUSH_LABEL(ret, end_label); - + pm_compile_constant_or_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_OPERATOR_WRITE_NODE: { // Foo += bar // ^^^^^^^^^^ const pm_constant_operator_write_node_t *cast = (const pm_constant_operator_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); - - pm_compile_constant_read(iseq, name, &cast->name_loc, ret, scope_node); - PM_COMPILE_NOT_POPPED(cast->value); - - PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - + pm_compile_constant_operator_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_CONSTANT_WRITE_NODE: { // Foo = 1 // ^^^^^^^ const pm_constant_write_node_t *cast = (const pm_constant_write_node_t *) node; - VALUE name = ID2SYM(pm_constant_id_lookup(scope_node, cast->name)); - - PM_COMPILE_NOT_POPPED(cast->value); - if (!popped) PUSH_INSN(ret, location, dup); - - PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_CONST_BASE)); - PUSH_INSN1(ret, location, setconstant, name); - + pm_compile_constant_write_node(iseq, cast, 0, &location, ret, popped, scope_node); return; } case PM_DEF_NODE: { @@ -5638,7 +6540,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE((const pm_node_t *) (cast->statements)); } else { - PUSH_INSN(ret, location, putnil); + PUSH_SYNTHETIC_PUTNIL(ret, iseq); } if (popped) PUSH_INSN(ret, location, pop); @@ -5686,7 +6588,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE((const pm_node_t *) cast->statements); } else if (!popped) { - PUSH_INSN(ret, location, putnil); + PUSH_SYNTHETIC_PUTNIL(ret, iseq); } return; @@ -5745,33 +6647,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // Now, create the method call to each that will be used to iterate over // the collection, and pass the newly created iseq as the block. PUSH_SEND_WITH_BLOCK(ret, location, idEach, INT2FIX(0), child_iseq); - - // We need to put the label "retry_end_l" immediately after the last - // "send" instruction. This because vm_throw checks if the break cont is - // equal to the index of next insn of the "send". (Otherwise, it is - // considered "break from proc-closure". See "TAG_BREAK" handling in - // "vm_throw_start".) - // - // Normally, "send" instruction is at the last. However, qcall under - // branch coverage measurement adds some instructions after the "send". - // - // Note that "invokesuper" appears instead of "send". - { - INSN *iobj; - LINK_ELEMENT *last_elem = LAST_ELEMENT(ret); - iobj = IS_INSN(last_elem) ? (INSN*) last_elem : (INSN*) get_prev_insn((INSN*) last_elem); - while (INSN_OF(iobj) != BIN(send) && INSN_OF(iobj) != BIN(invokesuper)) { - iobj = (INSN*) get_prev_insn(iobj); - } - ELEM_INSERT_NEXT(&iobj->link, (LINK_ELEMENT*) retry_end_l); - - // LINK_ANCHOR has a pointer to the last element, but - // ELEM_INSERT_NEXT does not update it even if we add an insn to the - // last of LINK_ANCHOR. So this updates it manually. - if (&iobj->link == LAST_ELEMENT(ret)) { - ret->last = (LINK_ELEMENT*) retry_end_l; - } - } + pm_compile_retry_end_label(iseq, ret, retry_end_l); if (popped) PUSH_INSN(ret, location, pop); ISEQ_COMPILE_DATA(iseq)->current_block = prev_block; @@ -5827,6 +6703,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, int argc = 0; int depth = get_lvar_level(iseq); + if (ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->param.flags.forwardable) { + flag |= VM_CALL_FORWARDING; + pm_local_index_t mult_local = pm_lookup_local_index(iseq, scope_node, PM_CONSTANT_DOT3, 0); + PUSH_GETLOCAL(ret, location, mult_local.index, mult_local.level); + PUSH_INSN2(ret, location, invokesuperforward, new_callinfo(iseq, 0, 0, flag, NULL, block != NULL), block); + if (popped) PUSH_INSN(ret, location, pop); + return; + } + if (local_body->param.flags.has_lead) { /* required arguments */ for (int i = 0; i < local_body->param.lead_num; i++) { @@ -5914,7 +6799,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN2(ret, location, invokesuper, new_callinfo(iseq, 0, argc, flag, NULL, block != NULL), block); if (cast->block != NULL) { - PUSH_LABEL(ret, retry_end_l); + pm_compile_retry_end_label(iseq, ret, retry_end_l); PUSH_CATCH_ENTRY(CATCH_TYPE_BREAK, retry_label, retry_end_l, block, retry_end_l); ISEQ_COMPILE_DATA(iseq)->current_block = previous_block; } @@ -5952,7 +6837,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN1(ret, location, getglobal, name); PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); int flags = VM_CALL_ARGS_SIMPLE; PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(flags)); @@ -6065,7 +6950,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // foo ? bar : baz // ^^^^^^^^^^^^^^^ const pm_if_node_t *cast = (const pm_if_node_t *) node; - pm_compile_conditional(iseq, &location, cast->statements, cast->consequent, cast->predicate, ret, popped, scope_node); + pm_compile_conditional(iseq, &location, PM_IF_NODE, (const pm_node_t *) cast, cast->statements, cast->consequent, cast->predicate, ret, popped, scope_node); return; } case PM_IMAGINARY_NODE: { @@ -6150,7 +7035,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN2(ret, location, getinstancevariable, name, get_ivar_ic_value(iseq, name_id)); PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); int flags = VM_CALL_ARGS_SIMPLE; PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(flags)); @@ -6286,7 +7171,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } else { const pm_interpolated_string_node_t *cast = (const pm_interpolated_string_node_t *) node; - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL); if (length > 1) PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); if (popped) PUSH_INSN(ret, location, pop); } @@ -6305,7 +7190,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } } else { - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL); if (length > 1) { PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); } @@ -6327,7 +7212,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN(ret, location, putself); - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, false, scope_node); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, false, scope_node, NULL, NULL); if (length > 1) PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); PUSH_SEND_WITH_FLAG(ret, location, idBackquote, INT2NUM(1), INT2FIX(VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE)); @@ -6335,6 +7220,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } + case PM_IT_LOCAL_VARIABLE_READ_NODE: { + // -> { it } + // ^^ + if (!popped) { + PUSH_GETLOCAL(ret, location, scope_node->local_table_for_iseq_size, 0); + } + + return; + } case PM_KEYWORD_HASH_NODE: { // foo(bar: baz) // ^^^^^^^^ @@ -6400,7 +7294,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PM_COMPILE_NOT_POPPED(cast->value); - ID method_id = pm_constant_id_lookup(scope_node, cast->operator); + ID method_id = pm_constant_id_lookup(scope_node, cast->binary_operator); PUSH_SEND_WITH_FLAG(ret, location, method_id, INT2NUM(1), INT2FIX(VM_CALL_ARGS_SIMPLE)); if (!popped) PUSH_INSN(ret, location, dup); @@ -6438,9 +7332,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case PM_LOCAL_VARIABLE_READ_NODE: { // foo // ^^^ - const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) node; - if (!popped) { + const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) node; pm_local_index_t index = pm_lookup_local_index(iseq, scope_node, cast->name, cast->depth); PUSH_GETLOCAL(ret, location, index.index, index.level); } @@ -6701,18 +7594,22 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_multi_target_state_t state = { 0 }; state.position = popped ? 0 : 1; - size_t stack_size = pm_compile_multi_target_node(iseq, node, ret, writes, cleanup, scope_node, &state); + pm_compile_multi_target_node(iseq, node, ret, writes, cleanup, scope_node, &state); PM_COMPILE_NOT_POPPED(cast->value); if (!popped) PUSH_INSN(ret, location, dup); PUSH_SEQ(ret, writes); - if (!popped && stack_size >= 1) { + if (!popped && state.stack_size >= 1) { // Make sure the value on the right-hand side of the = operator is // being returned before we pop the parent expressions. - PUSH_INSN1(ret, location, setn, INT2FIX(stack_size)); + PUSH_INSN1(ret, location, setn, INT2FIX(state.stack_size)); } + // Now, we need to go back and modify the topn instructions in order to + // ensure they can correctly retrieve the parent expressions. + pm_multi_target_state_update(&state); + PUSH_SEQ(ret, cleanup); return; } @@ -6781,7 +7678,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, break; } else if (ISEQ_BODY(ip)->type == ISEQ_TYPE_EVAL) { - COMPILE_ERROR(ERROR_ARGS "Can't escape from eval with next"); + COMPILE_ERROR(iseq, location.line, "Can't escape from eval with next"); return; } @@ -6799,7 +7696,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (popped) PUSH_INSN(ret, location, pop); } else { - COMPILE_ERROR(ERROR_ARGS "Invalid next"); + COMPILE_ERROR(iseq, location.line, "Invalid next"); return; } } @@ -7034,7 +7931,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, break; } else if (ISEQ_BODY(ip)->type == ISEQ_TYPE_EVAL) { - COMPILE_ERROR(ERROR_ARGS "Can't escape from eval with redo"); + COMPILE_ERROR(iseq, location.line, "Can't escape from eval with redo"); return; } @@ -7047,7 +7944,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (popped) PUSH_INSN(ret, location, pop); } else { - COMPILE_ERROR(ERROR_ARGS "Invalid redo"); + COMPILE_ERROR(iseq, location.line, "Invalid redo"); return; } } @@ -7205,53 +8102,63 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, const pm_return_node_t *cast = (const pm_return_node_t *) node; const pm_arguments_node_t *arguments = cast->arguments; - enum rb_iseq_type type = ISEQ_BODY(iseq)->type; - LABEL *splabel = 0; - - const rb_iseq_t *parent_iseq = iseq; - enum rb_iseq_type parent_type = ISEQ_BODY(parent_iseq)->type; - while (parent_type == ISEQ_TYPE_RESCUE || parent_type == ISEQ_TYPE_ENSURE) { - if (!(parent_iseq = ISEQ_BODY(parent_iseq)->parent_iseq)) break; - parent_type = ISEQ_BODY(parent_iseq)->type; - } - - switch (parent_type) { - case ISEQ_TYPE_TOP: - case ISEQ_TYPE_MAIN: + if (PM_NODE_FLAG_P(cast, PM_RETURN_NODE_FLAGS_REDUNDANT)) { if (arguments) { - rb_warn("argument of top-level return is ignored"); + PM_COMPILE_NOT_POPPED((const pm_node_t *) arguments); } - if (parent_iseq == iseq) { - type = ISEQ_TYPE_METHOD; + else { + PUSH_INSN(ret, location, putnil); } - break; - default: - break; } + else { + enum rb_iseq_type type = ISEQ_BODY(iseq)->type; + LABEL *splabel = 0; - if (type == ISEQ_TYPE_METHOD) { - splabel = NEW_LABEL(0); - PUSH_LABEL(ret, splabel); - PUSH_ADJUST(ret, location, 0); - } + const rb_iseq_t *parent_iseq = iseq; + enum rb_iseq_type parent_type = ISEQ_BODY(parent_iseq)->type; + while (parent_type == ISEQ_TYPE_RESCUE || parent_type == ISEQ_TYPE_ENSURE) { + if (!(parent_iseq = ISEQ_BODY(parent_iseq)->parent_iseq)) break; + parent_type = ISEQ_BODY(parent_iseq)->type; + } - if (arguments) { - PM_COMPILE_NOT_POPPED((const pm_node_t *) arguments); - } - else { - PUSH_INSN(ret, location, putnil); - } + switch (parent_type) { + case ISEQ_TYPE_TOP: + case ISEQ_TYPE_MAIN: + if (arguments) { + rb_warn("argument of top-level return is ignored"); + } + if (parent_iseq == iseq) { + type = ISEQ_TYPE_METHOD; + } + break; + default: + break; + } - if (type == ISEQ_TYPE_METHOD && can_add_ensure_iseq(iseq)) { - pm_add_ensure_iseq(ret, iseq, 1, scope_node); - PUSH_TRACE(ret, RUBY_EVENT_RETURN); - PUSH_INSN(ret, location, leave); - PUSH_ADJUST_RESTORE(ret, splabel); - if (!popped) PUSH_INSN(ret, location, putnil); - } - else { - PUSH_INSN1(ret, location, throw, INT2FIX(TAG_RETURN)); - if (popped) PUSH_INSN(ret, location, pop); + if (type == ISEQ_TYPE_METHOD) { + splabel = NEW_LABEL(0); + PUSH_LABEL(ret, splabel); + PUSH_ADJUST(ret, location, 0); + } + + if (arguments) { + PM_COMPILE_NOT_POPPED((const pm_node_t *) arguments); + } + else { + PUSH_INSN(ret, location, putnil); + } + + if (type == ISEQ_TYPE_METHOD && can_add_ensure_iseq(iseq)) { + pm_add_ensure_iseq(ret, iseq, 1, scope_node); + PUSH_TRACE(ret, RUBY_EVENT_RETURN); + PUSH_INSN(ret, location, leave); + PUSH_ADJUST_RESTORE(ret, splabel); + if (!popped) PUSH_INSN(ret, location, putnil); + } + else { + PUSH_INSN1(ret, location, throw, INT2FIX(TAG_RETURN)); + if (popped) PUSH_INSN(ret, location, pop); + } } return; @@ -7265,7 +8172,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (popped) PUSH_INSN(ret, location, pop); } else { - COMPILE_ERROR(ERROR_ARGS "Invalid retry"); + COMPILE_ERROR(iseq, location.line, "Invalid retry"); return; } return; @@ -7372,6 +8279,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } } + // If we have the `it` implicit local variable, we need to account for + // it in the local table size. + if (scope_node->parameters != NULL && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { + table_size++; + } + // Ensure there is enough room in the local table for any // parameters that have been repeated // ex: def underscore_parameters(_, _ = 1, _ = 2); _; end @@ -7406,7 +8319,14 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // When we have a `...` as the keyword_rest, it's a forwarding_parameter_node and // we need to leave space for 4 locals: *, **, &, ... if (PM_NODE_TYPE_P(parameters_node->keyword_rest, PM_FORWARDING_PARAMETER_NODE)) { - table_size += 4; + // Only optimize specifically methods like this: `foo(...)` + if (requireds_list->size == 0 && optionals_list->size == 0 && keywords_list->size == 0) { + ISEQ_BODY(iseq)->param.flags.forwardable = TRUE; + table_size += 1; + } + else { + table_size += 4; + } } else { const pm_keyword_rest_parameter_node_t *kw_rest = (const pm_keyword_rest_parameter_node_t *) parameters_node->keyword_rest; @@ -7520,6 +8440,11 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, body->param.flags.has_lead = true; } + if (scope_node->parameters != NULL && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { + ID local = rb_make_temporary_id(local_index); + local_table_for_iseq->ids[local_index++] = local; + } + // def foo(a, (b, *c, d), e = 1, *f, g, (h, *i, j), k:, l: 1, **m, &n) // ^^^^^ if (optionals_list && optionals_list->size) { @@ -7755,29 +8680,31 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // def foo(...) // ^^^ case PM_FORWARDING_PARAMETER_NODE: { - body->param.rest_start = local_index; - body->param.flags.has_rest = true; + if (!ISEQ_BODY(iseq)->param.flags.forwardable) { + body->param.rest_start = local_index; + body->param.flags.has_rest = true; - // Add the leading * - pm_insert_local_special(idMULT, local_index++, index_lookup_table, local_table_for_iseq); + // Add the leading * + pm_insert_local_special(idMULT, local_index++, index_lookup_table, local_table_for_iseq); - // Add the kwrest ** - RUBY_ASSERT(!body->param.flags.has_kw); + // Add the kwrest ** + RUBY_ASSERT(!body->param.flags.has_kw); - // There are no keywords declared (in the text of the program) - // but the forwarding node implies we support kwrest (**) - body->param.flags.has_kw = false; - body->param.flags.has_kwrest = true; - body->param.keyword = keyword = ZALLOC_N(struct rb_iseq_param_keyword, 1); + // There are no keywords declared (in the text of the program) + // but the forwarding node implies we support kwrest (**) + body->param.flags.has_kw = false; + body->param.flags.has_kwrest = true; + body->param.keyword = keyword = ZALLOC_N(struct rb_iseq_param_keyword, 1); - keyword->rest_start = local_index; + keyword->rest_start = local_index; - pm_insert_local_special(idPow, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(idPow, local_index++, index_lookup_table, local_table_for_iseq); - body->param.block_start = local_index; - body->param.flags.has_block = true; + body->param.block_start = local_index; + body->param.flags.has_block = true; - pm_insert_local_special(idAnd, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(idAnd, local_index++, index_lookup_table, local_table_for_iseq); + } pm_insert_local_special(idDot3, local_index++, index_lookup_table, local_table_for_iseq); break; } @@ -7880,18 +8807,6 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, body->param.flags.has_lead = true; } - // Fill in the it variable, if it exists - if (scope_node->parameters && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { - const uint8_t param_name[] = { '0', 'i', 't' }; - pm_constant_id_t constant_id = pm_constant_pool_find(&parser->constant_pool, param_name, 3); - RUBY_ASSERT(constant_id && "parser should have inserted 0it for 'it' local"); - - ID local = rb_make_temporary_id(local_index); - local_table_for_iseq->ids[local_index] = local; - st_insert(index_lookup_table, (st_data_t) constant_id, (st_data_t) local_index); - local_index++; - } - //********END OF STEP 3********** //********STEP 4********** @@ -7933,7 +8848,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } scope_node->index_lookup_table = index_lookup_table; iseq_calc_param_size(iseq); - iseq_set_local_table(iseq, local_table_for_iseq); + + if (ISEQ_BODY(iseq)->param.flags.forwardable) { + // We're treating `...` as a parameter so that frame + // pushing won't clobber it. + ISEQ_BODY(iseq)->param.size += 1; + } + + // FIXME: args? + iseq_set_local_table(iseq, local_table_for_iseq, 0); scope_node->local_table_for_iseq_size = local_table_for_iseq->size; //********STEP 5************ @@ -8104,7 +9027,9 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } case ISEQ_TYPE_METHOD: { + ISEQ_COMPILE_DATA(iseq)->root_node = (const void *) scope_node->body; PUSH_TRACE(ret, RUBY_EVENT_CALL); + if (scope_node->body) { PM_COMPILE((const pm_node_t *) scope_node->body); } @@ -8112,9 +9037,10 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN(ret, location, putnil); } + ISEQ_COMPILE_DATA(iseq)->root_node = (const void *) scope_node->body; PUSH_TRACE(ret, RUBY_EVENT_RETURN); - ISEQ_COMPILE_DATA(iseq)->last_line = body->location.code_location.end_pos.lineno; + ISEQ_COMPILE_DATA(iseq)->last_line = body->location.code_location.end_pos.lineno; break; } case ISEQ_TYPE_RESCUE: { @@ -8150,7 +9076,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, break; } - if (PM_NODE_TYPE_P(scope_node->ast_node, PM_CLASS_NODE)) { + if (PM_NODE_TYPE_P(scope_node->ast_node, PM_CLASS_NODE) || PM_NODE_TYPE_P(scope_node->ast_node, PM_MODULE_NODE)) { const pm_line_column_t end_location = PM_NODE_END_LINE_COLUMN(scope_node->parser, scope_node->ast_node); ADD_TRACE(ret, RUBY_EVENT_END); ISEQ_COMPILE_DATA(iseq)->last_line = end_location.line; @@ -8174,7 +9100,38 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case PM_SHAREABLE_CONSTANT_NODE: { // A value that is being written to a constant that is being marked as // shared depending on the current lexical context. - PM_COMPILE(((const pm_shareable_constant_node_t *) node)->write); + const pm_shareable_constant_node_t *cast = (const pm_shareable_constant_node_t *) node; + + switch (PM_NODE_TYPE(cast->write)) { + case PM_CONSTANT_WRITE_NODE: + pm_compile_constant_write_node(iseq, (const pm_constant_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_AND_WRITE_NODE: + pm_compile_constant_and_write_node(iseq, (const pm_constant_and_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_OR_WRITE_NODE: + pm_compile_constant_or_write_node(iseq, (const pm_constant_or_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_OPERATOR_WRITE_NODE: + pm_compile_constant_operator_write_node(iseq, (const pm_constant_operator_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_WRITE_NODE: + pm_compile_constant_path_write_node(iseq, (const pm_constant_path_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_AND_WRITE_NODE: + pm_compile_constant_path_and_write_node(iseq, (const pm_constant_path_and_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_OR_WRITE_NODE: + pm_compile_constant_path_or_write_node(iseq, (const pm_constant_path_or_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + case PM_CONSTANT_PATH_OPERATOR_WRITE_NODE: + pm_compile_constant_path_operator_write_node(iseq, (const pm_constant_path_operator_write_node_t *) cast->write, cast->base.flags, &location, ret, popped, scope_node); + break; + default: + rb_bug("Unexpected node type for shareable constant write: %s", pm_node_type_to_str(PM_NODE_TYPE(cast->write))); + break; + } + return; } case PM_SINGLETON_CLASS_NODE: { @@ -8320,10 +9277,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, } PUSH_SEQ(ret, args); - PUSH_INSN2(ret, location, invokesuper, new_callinfo(iseq, 0, argc, flags, keywords, current_block != NULL), current_block); - PUSH_LABEL(ret, retry_end_l); - if (popped) PUSH_INSN(ret, location, pop); + if (ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq)->param.flags.forwardable) { + flags |= VM_CALL_FORWARDING; + PUSH_INSN2(ret, location, invokesuperforward, new_callinfo(iseq, 0, argc, flags, keywords, current_block != NULL), current_block); + } + else { + PUSH_INSN2(ret, location, invokesuper, new_callinfo(iseq, 0, argc, flags, keywords, current_block != NULL), current_block); + pm_compile_retry_end_label(iseq, ret, retry_end_l); + } + if (popped) PUSH_INSN(ret, location, pop); ISEQ_COMPILE_DATA(iseq)->current_block = previous_block; PUSH_CATCH_ENTRY(CATCH_TYPE_BREAK, retry_label, retry_end_l, current_block, retry_end_l); @@ -8379,7 +9342,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, consequent = ((const pm_else_node_t *) cast->consequent)->statements; } - pm_compile_conditional(iseq, &location, consequent, (const pm_node_t *) cast->statements, cast->predicate, ret, popped, scope_node); + pm_compile_conditional(iseq, &location, PM_UNLESS_NODE, (const pm_node_t *) cast, consequent, (const pm_node_t *) cast->statements, cast->predicate, ret, popped, scope_node); return; } case PM_UNTIL_NODE: { @@ -8389,7 +9352,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // bar until foo // ^^^^^^^^^^^^^ const pm_until_node_t *cast = (const pm_until_node_t *) node; - pm_compile_loop(iseq, &location, cast->base.flags, PM_UNTIL_NODE, cast->statements, cast->predicate, ret, popped, scope_node); + pm_compile_loop(iseq, &location, cast->base.flags, PM_UNTIL_NODE, (const pm_node_t *) cast, cast->statements, cast->predicate, ret, popped, scope_node); return; } case PM_WHILE_NODE: { @@ -8399,7 +9362,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // bar while foo // ^^^^^^^^^^^^^ const pm_while_node_t *cast = (const pm_while_node_t *) node; - pm_compile_loop(iseq, &location, cast->base.flags, PM_WHILE_NODE, cast->statements, cast->predicate, ret, popped, scope_node); + pm_compile_loop(iseq, &location, cast->base.flags, PM_WHILE_NODE, (const pm_node_t *) cast, cast->statements, cast->predicate, ret, popped, scope_node); return; } case PM_X_STRING_NODE: { @@ -8427,7 +9390,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, case ISEQ_TYPE_TOP: case ISEQ_TYPE_MAIN: case ISEQ_TYPE_CLASS: - COMPILE_ERROR(ERROR_ARGS "Invalid yield"); + COMPILE_ERROR(iseq, location.line, "Invalid yield"); return; default: /* valid */; } @@ -8533,6 +9496,368 @@ pm_parse_result_free(pm_parse_result_t *result) pm_options_free(&result->options); } +/** An error that is going to be formatted into the output. */ +typedef struct { + /** A pointer to the diagnostic that was generated during parsing. */ + pm_diagnostic_t *error; + + /** The start line of the diagnostic message. */ + int32_t line; + + /** The column start of the diagnostic message. */ + uint32_t column_start; + + /** The column end of the diagnostic message. */ + uint32_t column_end; +} pm_parse_error_t; + +/** The format that will be used to format the errors into the output. */ +typedef struct { + /** The prefix that will be used for line numbers. */ + const char *number_prefix; + + /** The prefix that will be used for blank lines. */ + const char *blank_prefix; + + /** The divider that will be used between sections of source code. */ + const char *divider; + + /** The length of the blank prefix. */ + size_t blank_prefix_length; + + /** The length of the divider. */ + size_t divider_length; +} pm_parse_error_format_t; + +#define PM_COLOR_GRAY "\033[38;5;102m" +#define PM_COLOR_RED "\033[1;31m" +#define PM_COLOR_RESET "\033[m" +#define PM_ERROR_TRUNCATE 30 + +static inline pm_parse_error_t * +pm_parse_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) { + pm_parse_error_t *errors = xcalloc(error_list->size, sizeof(pm_parse_error_t)); + if (errors == NULL) return NULL; + + int32_t start_line = parser->start_line; + for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { + pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line); + pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line); + + // We're going to insert this error into the array in sorted order. We + // do this by finding the first error that has a line number greater + // than the current error and then inserting the current error before + // that one. + size_t index = 0; + while ( + (index < error_list->size) && + (errors[index].error != NULL) && + ( + (errors[index].line < start.line) || + ((errors[index].line == start.line) && (errors[index].column_start < start.column)) + ) + ) index++; + + // Now we're going to shift all of the errors after this one down one + // index to make room for the new error. + if (index + 1 < error_list->size) { + memmove(&errors[index + 1], &errors[index], sizeof(pm_parse_error_t) * (error_list->size - index - 1)); + } + + // Finally, we'll insert the error into the array. + uint32_t column_end; + if (start.line == end.line) { + column_end = end.column; + } else { + column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1); + } + + // Ensure we have at least one column of error. + if (start.column == column_end) column_end++; + + errors[index] = (pm_parse_error_t) { + .error = error, + .line = start.line, + .column_start = start.column, + .column_end = column_end + }; + } + + return errors; +} + +static inline void +pm_parse_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, uint32_t column_start, uint32_t column_end, pm_buffer_t *buffer) { + int32_t line_delta = line - parser->start_line; + assert(line_delta >= 0); + + size_t index = (size_t) line_delta; + assert(index < newline_list->size); + + const uint8_t *start = &parser->start[newline_list->offsets[index]]; + const uint8_t *end; + + if (index >= newline_list->size - 1) { + end = parser->end; + } else { + end = &parser->start[newline_list->offsets[index + 1]]; + } + + pm_buffer_append_format(buffer, number_prefix, line); + + // Here we determine if we should truncate the end of the line. + bool truncate_end = false; + if ((column_end != 0) && ((end - (start + column_end)) >= PM_ERROR_TRUNCATE)) { + end = start + column_end + PM_ERROR_TRUNCATE; + truncate_end = true; + } + + // Here we determine if we should truncate the start of the line. + if (column_start >= PM_ERROR_TRUNCATE) { + pm_buffer_append_string(buffer, "... ", 4); + start += column_start; + } + + pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start)); + + if (truncate_end) { + pm_buffer_append_string(buffer, " ...\n", 5); + } else if (end == parser->end && end[-1] != '\n') { + pm_buffer_append_string(buffer, "\n", 1); + } +} + +/** + * Format the errors on the parser into the given buffer. + */ +static void +pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) { + assert(error_list->size != 0); + + // First, we're going to sort all of the errors by line number using an + // insertion sort into a newly allocated array. + const int32_t start_line = parser->start_line; + const pm_newline_list_t *newline_list = &parser->newline_list; + + pm_parse_error_t *errors = pm_parse_errors_format_sort(parser, error_list, newline_list); + if (errors == NULL) return; + + // Now we're going to determine how we're going to format line numbers and + // blank lines based on the maximum number of digits in the line numbers + // that are going to be displaid. + pm_parse_error_format_t error_format; + int32_t first_line_number = errors[0].line; + int32_t last_line_number = errors[error_list->size - 1].line; + + // If we have a maximum line number that is negative, then we're going to + // use the absolute value for comparison but multiple by 10 to additionally + // have a column for the negative sign. + if (first_line_number < 0) first_line_number = (-first_line_number) * 10; + if (last_line_number < 0) last_line_number = (-last_line_number) * 10; + int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number; + + if (max_line_number < 10) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%1" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~\n" + }; + } + } else if (max_line_number < 100) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%2" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~\n" + }; + } + } else if (max_line_number < 1000) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%3" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~~\n" + }; + } + } else if (max_line_number < 10000) { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%4" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~~~\n" + }; + } + } else { + if (colorize) { + error_format = (pm_parse_error_format_t) { + .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET, + .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET, + .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n" + }; + } else { + error_format = (pm_parse_error_format_t) { + .number_prefix = "%5" PRIi32 " | ", + .blank_prefix = " | ", + .divider = " ~~~~~~~~\n" + }; + } + } + + error_format.blank_prefix_length = strlen(error_format.blank_prefix); + error_format.divider_length = strlen(error_format.divider); + + // Now we're going to iterate through every error in our error list and + // display it. While we're iterating, we will display some padding lines of + // the source before the error to give some context. We'll be careful not to + // display the same line twice in case the errors are close enough in the + // source. + int32_t last_line = parser->start_line - 1; + uint32_t last_column_start = 0; + const pm_encoding_t *encoding = parser->encoding; + + for (size_t index = 0; index < error_list->size; index++) { + pm_parse_error_t *error = &errors[index]; + + // Here we determine how many lines of padding of the source to display, + // based on the difference from the last line that was displaid. + if (error->line - last_line > 1) { + if (error->line - last_line > 2) { + if ((index != 0) && (error->line - last_line > 3)) { + pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length); + } + + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, 0, 0, buffer); + } + + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, 0, 0, buffer); + } + + // If this is the first error or we're on a new line, then we'll display + // the line that has the error in it. + if ((index == 0) || (error->line != last_line)) { + if (colorize) { + pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12); + } else { + pm_buffer_append_string(buffer, "> ", 2); + } + + last_column_start = error->column_start; + + // Find the maximum column end of all the errors on this line. + uint32_t column_end = error->column_end; + for (size_t next_index = index + 1; next_index < error_list->size; next_index++) { + if (errors[next_index].line != error->line) break; + if (errors[next_index].column_end > column_end) column_end = errors[next_index].column_end; + } + + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, error->column_start, column_end, buffer); + } + + const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]]; + if (start == parser->end) pm_buffer_append_byte(buffer, '\n'); + + // Now we'll display the actual error message. We'll do this by first + // putting the prefix to the line, then a bunch of blank spaces + // depending on the column, then as many carets as we need to display + // the width of the error, then the error message itself. + // + // Note that this doesn't take into account the width of the actual + // character when displaid in the terminal. For some east-asian + // languages or emoji, this means it can be thrown off pretty badly. We + // will need to solve this eventually. + pm_buffer_append_string(buffer, " ", 2); + pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length); + + size_t column = 0; + if (last_column_start >= PM_ERROR_TRUNCATE) { + pm_buffer_append_string(buffer, " ", 4); + column = last_column_start; + } + + while (column < error->column_start) { + pm_buffer_append_byte(buffer, ' '); + + size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + column += (char_width == 0 ? 1 : char_width); + } + + if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7); + pm_buffer_append_byte(buffer, '^'); + + size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + column += (char_width == 0 ? 1 : char_width); + + while (column < error->column_end) { + pm_buffer_append_byte(buffer, '~'); + + size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + column += (char_width == 0 ? 1 : char_width); + } + + if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3); + + if (inline_messages) { + pm_buffer_append_byte(buffer, ' '); + assert(error->error != NULL); + + const char *message = error->error->message; + pm_buffer_append_string(buffer, message, strlen(message)); + } + + pm_buffer_append_byte(buffer, '\n'); + + // Here we determine how many lines of padding to display after the + // error, depending on where the next error is in source. + last_line = error->line; + int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line; + + if (next_line - last_line > 1) { + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer); + } + + if (next_line - last_line > 1) { + pm_buffer_append_string(buffer, " ", 2); + pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer); + } + } + + // Finally, we'll free the array of errors that we allocated. + xfree(errors); +} + +#undef PM_ERROR_TRUNCATE +#undef PM_COLOR_GRAY +#undef PM_COLOR_RED +#undef PM_COLOR_RESET + /** * Check if the given source slice is valid UTF-8. The location represents the * location of the error, but the slice of the source will include the content @@ -8604,7 +9929,7 @@ pm_parse_process_error(const pm_parse_result_t *result) pm_list_node_t *list_node = (pm_list_node_t *) error; pm_list_t error_list = { .size = 1, .head = list_node, .tail = list_node }; - pm_parser_errors_format(parser, &error_list, &buffer, rb_stderr_tty_p(), false); + pm_parse_errors_format(parser, &error_list, &buffer, rb_stderr_tty_p(), false); } VALUE value = rb_exc_new(rb_eArgError, pm_buffer_value(&buffer), pm_buffer_length(&buffer)); @@ -8634,7 +9959,7 @@ pm_parse_process_error(const pm_parse_result_t *result) ); if (valid_utf8) { - pm_parser_errors_format(parser, &parser->error_list, &buffer, rb_stderr_tty_p(), true); + pm_parse_errors_format(parser, &parser->error_list, &buffer, rb_stderr_tty_p(), true); } else { for (const pm_diagnostic_t *error = head; error != NULL; error = (const pm_diagnostic_t *) error->node.next) { @@ -8643,7 +9968,8 @@ pm_parse_process_error(const pm_parse_result_t *result) } } - VALUE error = rb_exc_new(rb_eSyntaxError, pm_buffer_value(&buffer), pm_buffer_length(&buffer)); + VALUE message = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), result->node.encoding); + VALUE error = rb_exc_new_str(rb_eSyntaxError, message); rb_encoding *filepath_encoding = result->node.filepath_encoding != NULL ? result->node.filepath_encoding : rb_utf8_encoding(); VALUE path = rb_enc_str_new((const char *) pm_string_source(filepath), pm_string_length(filepath), filepath_encoding); @@ -8668,10 +9994,16 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node) // freed regardless of whether or we return an error. pm_scope_node_t *scope_node = &result->node; rb_encoding *filepath_encoding = scope_node->filepath_encoding; + int coverage_enabled = scope_node->coverage_enabled; pm_scope_node_init(node, scope_node, NULL); scope_node->filepath_encoding = filepath_encoding; + scope_node->encoding = rb_enc_find(parser->encoding->name); + if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name); + + scope_node->coverage_enabled = coverage_enabled; + // Emit all of the various warnings from the parse. const pm_diagnostic_t *warning; const char *warning_filepath = (const char *) pm_string_source(&parser->filepath); @@ -8680,10 +10012,10 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node) int line = pm_location_line_number(parser, &warning->location); if (warning->level == PM_WARNING_LEVEL_VERBOSE) { - rb_compile_warning(warning_filepath, line, "%s", warning->message); + rb_enc_compile_warning(scope_node->encoding, warning_filepath, line, "%s", warning->message); } else { - rb_compile_warn(warning_filepath, line, "%s", warning->message); + rb_enc_compile_warn(scope_node->encoding, warning_filepath, line, "%s", warning->message); } } @@ -8698,9 +10030,6 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node) // Now set up the constant pool and intern all of the various constants into // their corresponding IDs. - scope_node->encoding = rb_enc_find(parser->encoding->name); - if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name); - scope_node->parser = parser; scope_node->constants = calloc(parser->constant_pool.size, sizeof(ID)); @@ -8785,7 +10114,7 @@ pm_parse_file_script_lines(const pm_scope_node_t *scope_node, const pm_parser_t * be read. */ VALUE -pm_load_file(pm_parse_result_t *result, VALUE filepath) +pm_load_file(pm_parse_result_t *result, VALUE filepath, bool load_error) { if (!pm_string_mapped_init(&result->input, RSTRING_PTR(filepath))) { #ifdef _WIN32 @@ -8794,9 +10123,21 @@ pm_load_file(pm_parse_result_t *result, VALUE filepath) int e = errno; #endif - VALUE err = rb_syserr_new(e, RSTRING_PTR(filepath)); - RB_GC_GUARD(filepath); - return err; + VALUE error; + + if (load_error) { + VALUE message = rb_str_buf_new_cstr(strerror(e)); + rb_str_cat2(message, " -- "); + rb_str_append(message, filepath); + + error = rb_exc_new3(rb_eLoadError, message); + rb_ivar_set(error, rb_intern_const("@path"), filepath); + } else { + error = rb_syserr_new(e, RSTRING_PTR(filepath)); + RB_GC_GUARD(filepath); + } + + return error; } pm_options_frozen_string_literal_init(&result->options); @@ -8812,6 +10153,7 @@ pm_load_file(pm_parse_result_t *result, VALUE filepath) VALUE pm_parse_file(pm_parse_result_t *result, VALUE filepath) { + result->node.filepath_encoding = rb_enc_get(filepath); pm_options_filepath_set(&result->options, RSTRING_PTR(filepath)); RB_GC_GUARD(filepath); @@ -8843,7 +10185,7 @@ pm_parse_file(pm_parse_result_t *result, VALUE filepath) VALUE pm_load_parse_file(pm_parse_result_t *result, VALUE filepath) { - VALUE error = pm_load_file(result, filepath); + VALUE error = pm_load_file(result, filepath, false); if (NIL_P(error)) { error = pm_parse_file(result, filepath); } |