diff options
Diffstat (limited to 'prism_compile.c')
| -rw-r--r-- | prism_compile.c | 1973 |
1 files changed, 1123 insertions, 850 deletions
diff --git a/prism_compile.c b/prism_compile.c index 39fa6e25fb..45e1de8a9c 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -1,4 +1,7 @@ #include "prism.h" +#include "ruby/version.h" + +#include <fcntl.h> /** * This compiler defines its own concept of the location of a node. We do this @@ -101,6 +104,7 @@ pm_iseq_add_setlocal(rb_iseq_t *iseq, LINK_ANCHOR *const seq, int line, int node else { ADD_ELEM(seq, (LINK_ELEMENT *) new_insn_body(iseq, line, node_id, BIN(setlocal), 2, INT2FIX((idx) + VM_ENV_DATA_SIZE - 1), INT2FIX(level))); } + update_lvar_state(iseq, level, idx); if (level > 0) access_outer_variables(iseq, level, iseq_lvar_id(iseq, idx, level), Qtrue); } @@ -138,33 +142,127 @@ pm_iseq_add_setlocal(rb_iseq_t *iseq, LINK_ANCHOR *const seq, int line, int node #define PM_COMPILE_NOT_POPPED(node) \ pm_compile_node(iseq, (node), ret, false, scope_node) -#define PM_NODE_START_LOCATION(parser, node) \ - ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) +// Direct-indexed lookup table. -1 means "not present". +#define PM_INDEX_LOOKUP_TABLE_INIT { .values = NULL, .capacity = 0, .owned = false } -#define PM_NODE_END_LOCATION(parser, node) \ - ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.end, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) +static inline void +pm_index_lookup_table_init(pm_index_lookup_table_t *table, int constants_size, rb_iseq_t *iseq) +{ + int capacity = constants_size + PM_INDEX_LOOKUP_SPECIALS; + table->values = compile_data_alloc2_type(iseq, int, capacity); + memset(table->values, -1, capacity * sizeof(int)); + table->capacity = capacity; + table->owned = false; +} -#define PM_LOCATION_START_LOCATION(parser, location, id) \ - ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, (location)->start, (parser)->start_line), .node_id = id }) +/** + * Cached line lookup that avoids repeated binary searches. Since the compiler + * walks the AST roughly in source order, consecutive lookups tend to be for + * nearby byte offsets. We cache the last result index in the scope node and + * try a short linear probe from there before falling back to binary search. + */ +static inline pm_line_column_t +pm_line_offset_list_line_column_cached(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line, size_t *last_line) +{ + size_t hint = *last_line; + size_t size = list->size; + const uint32_t *offsets = list->offsets; -#define PM_NODE_START_LINE_COLUMN(parser, node) \ - pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start, (parser)->start_line) + RUBY_ASSERT(hint < size); -#define PM_NODE_END_LINE_COLUMN(parser, node) \ - pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.end, (parser)->start_line) + /* Check if the cursor is on the same line as the hint. */ + if (offsets[hint] <= cursor) { + if (hint + 1 >= size || offsets[hint + 1] > cursor) { + *last_line = hint; + return ((pm_line_column_t) { + .line = ((int32_t) hint) + start_line, + .column = cursor - offsets[hint] + }); + } -#define PM_LOCATION_START_LINE_COLUMN(parser, location) \ - pm_newline_list_line_column(&(parser)->newline_list, (location)->start, (parser)->start_line) + /* Linear scan forward (up to 8 lines before giving up). */ + size_t limit = hint + 9; + if (limit > size) limit = size; + for (size_t idx = hint + 1; idx < limit; idx++) { + if (offsets[idx] > cursor) { + *last_line = idx - 1; + return ((pm_line_column_t) { + .line = ((int32_t) (idx - 1)) + start_line, + .column = cursor - offsets[idx - 1] + }); + } + if (offsets[idx] == cursor) { + *last_line = idx; + return ((pm_line_column_t) { ((int32_t) idx) + start_line, 0 }); + } + } + } + else { + /* Linear scan backward (up to 8 lines before giving up). */ + size_t limit = hint > 8 ? hint - 8 : 0; + for (size_t idx = hint; idx > limit; idx--) { + if (offsets[idx - 1] <= cursor) { + *last_line = idx - 1; + return ((pm_line_column_t) { + .line = ((int32_t) (idx - 1)) + start_line, + .column = cursor - offsets[idx - 1] + }); + } + } + } -static int -pm_node_line_number(const pm_parser_t *parser, const pm_node_t *node) + /* Fall back to binary search. */ + pm_line_column_t result = pm_line_offset_list_line_column(list, cursor, start_line); + *last_line = (size_t) (result.line - start_line); + return result; +} + +/** + * The same as pm_line_offset_list_line_column_cached, but returning only the + * line number. + */ +static inline int32_t +pm_line_offset_list_line_cached(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line, size_t *last_line) { - return (int) pm_newline_list_line(&parser->newline_list, node->location.start, parser->start_line); + return pm_line_offset_list_line_column_cached(list, cursor, start_line, last_line).line; } +#define PM_NODE_START_LOCATION(node) \ + ((pm_node_location_t) { .line = pm_line_offset_list_line_cached(scope_node->line_offsets, ((const pm_node_t *) (node))->location.start, scope_node->start_line, &scope_node->last_line), .node_id = ((const pm_node_t *) (node))->node_id }) + +#define PM_NODE_END_LOCATION(node) \ + ((pm_node_location_t) { .line = pm_line_offset_list_line_cached(scope_node->line_offsets, ((const pm_node_t *) (node))->location.start + ((const pm_node_t *) (node))->location.length, scope_node->start_line, &scope_node->last_line), .node_id = ((const pm_node_t *) (node))->node_id }) + +#define PM_LOCATION_START_LOCATION(location, id) \ + ((pm_node_location_t) { .line = pm_line_offset_list_line_cached(scope_node->line_offsets, (location)->start, scope_node->start_line, &scope_node->last_line), .node_id = id }) + +#define PM_NODE_START_LINE_COLUMN(node) \ + pm_line_offset_list_line_column_cached(scope_node->line_offsets, ((const pm_node_t *) (node))->location.start, scope_node->start_line, &scope_node->last_line) + +#define PM_NODE_END_LINE_COLUMN(node) \ + pm_line_offset_list_line_column_cached(scope_node->line_offsets, ((const pm_node_t *) (node))->location.start + ((const pm_node_t *) (node))->location.length, scope_node->start_line, &scope_node->last_line) + +#define PM_LOCATION_START_LINE_COLUMN(location) \ + pm_line_offset_list_line_column_cached(scope_node->line_offsets, (location)->start, scope_node->start_line, &scope_node->last_line) + static int pm_location_line_number(const pm_parser_t *parser, const pm_location_t *location) { - return (int) pm_newline_list_line(&parser->newline_list, location->start, parser->start_line); + return (int) pm_line_offset_list_line_column(pm_parser_line_offsets(parser), location->start, pm_parser_start_line(parser)).line; +} + +/** + * Cached variants that use the scope node's hint for fast lookups during + * compilation (where access patterns are roughly sequential). + */ +static inline int +pm_node_line_number_cached(const pm_node_t *node, pm_scope_node_t *scope_node) +{ + return (int) pm_line_offset_list_line_cached(scope_node->line_offsets, node->location.start, scope_node->start_line, &scope_node->last_line); +} + +static inline int +pm_location_line_number_cached(const pm_location_t *location, pm_scope_node_t *scope_node) { + return (int) pm_line_offset_list_line_cached(scope_node->line_offsets, location->start, scope_node->start_line, &scope_node->last_line); } /** @@ -179,24 +277,25 @@ parse_integer_value(const pm_integer_t *integer) result = UINT2NUM(integer->value); } else { - VALUE string = rb_str_new(NULL, integer->length * 8); - unsigned char *bytes = (unsigned char *) RSTRING_PTR(string); - - size_t offset = integer->length * 8; - for (size_t value_index = 0; value_index < integer->length; value_index++) { - uint32_t value = integer->values[value_index]; - - for (int index = 0; index < 8; index++) { - int byte = (value >> (4 * index)) & 0xf; - bytes[--offset] = byte < 10 ? byte + '0' : byte - 10 + 'a'; - } - } - - result = rb_funcall(string, rb_intern("to_i"), 1, UINT2NUM(16)); + // The pm_integer_t stores values as an array of uint32_t in + // least-significant-word-first order (base 2^32). We can convert + // directly to a Ruby Integer using rb_integer_unpack, avoiding the + // overhead of constructing a hex string and calling rb_funcall. + result = rb_integer_unpack( + integer->values, + integer->length, + sizeof(uint32_t), + 0, + INTEGER_PACK_LSWORD_FIRST | INTEGER_PACK_NATIVE_BYTE_ORDER + ); } if (integer->negative) { - result = rb_funcall(result, rb_intern("-@"), 0); + result = rb_int_uminus(result); + } + + if (!SPECIAL_CONST_P(result)) { + RB_OBJ_SET_SHAREABLE(result); // bignum } return result; @@ -217,7 +316,11 @@ parse_integer(const pm_integer_node_t *node) static VALUE parse_float(const pm_float_node_t *node) { - return DBL2NUM(node->value); + VALUE val = DBL2NUM(node->value); + if (!FLONUM_P(val)) { + RB_OBJ_SET_SHAREABLE(val); + } + return val; } /** @@ -231,7 +334,8 @@ parse_rational(const pm_rational_node_t *node) { VALUE numerator = parse_integer_value(&node->numerator); VALUE denominator = parse_integer_value(&node->denominator); - return rb_rational_new(numerator, denominator); + + return rb_ractor_make_shareable(rb_rational_new(numerator, denominator)); } /** @@ -258,10 +362,10 @@ parse_imaginary(const pm_imaginary_node_t *node) break; } default: - rb_bug("Unexpected numeric type on imaginary number %s\n", pm_node_type_to_str(PM_NODE_TYPE(node->numeric))); + rb_bug("Unexpected numeric type on imaginary number %s\n", pm_node_type(PM_NODE_TYPE(node->numeric))); } - return rb_complex_raw(INT2FIX(0), imaginary_part); + return RB_OBJ_SET_SHAREABLE(rb_complex_raw(INT2FIX(0), imaginary_part)); } static inline VALUE @@ -294,7 +398,7 @@ parse_string_encoded(const pm_node_t *node, const pm_string_t *string, rb_encodi } static inline VALUE -parse_static_literal_string(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *string) +parse_static_literal_string(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *string) { rb_encoding *encoding; @@ -312,8 +416,8 @@ parse_static_literal_string(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, rb_enc_str_coderange(value); if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) { - int line_number = pm_node_line_number(scope_node->parser, node); - value = rb_str_with_debug_created_info(value, rb_iseq_path(iseq), line_number); + int line_number = pm_node_line_number_cached(node, scope_node); + value = rb_ractor_make_shareable(rb_str_with_debug_created_info(value, rb_iseq_path(iseq), line_number)); } return value; @@ -357,7 +461,7 @@ parse_regexp_error(rb_iseq_t *iseq, int32_t line_number, const char *fmt, ...) } static VALUE -parse_regexp_string_part(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding) +parse_regexp_string_part(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding) { // If we were passed an explicit regexp encoding, then we need to double // check that it's okay here for this fragment of the string. @@ -379,12 +483,12 @@ parse_regexp_string_part(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, con VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), encoding); VALUE error = rb_reg_check_preprocess(string); - if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number(scope_node->parser, node), "%" PRIsVALUE, rb_obj_as_string(error)); + if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number_cached(node, scope_node), "%" PRIsVALUE, rb_obj_as_string(error)); return string; } static VALUE -pm_static_literal_concat(rb_iseq_t *iseq, const pm_node_list_t *nodes, const pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding, bool top) +pm_static_literal_concat(rb_iseq_t *iseq, const pm_node_list_t *nodes, pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding, bool top) { VALUE current = Qnil; @@ -401,7 +505,7 @@ pm_static_literal_concat(rb_iseq_t *iseq, const pm_node_list_t *nodes, const pm_ else { string = parse_string_encoded(part, &((const pm_string_node_t *) part)->unescaped, scope_node->encoding); VALUE error = rb_reg_check_preprocess(string); - if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number(scope_node->parser, part), "%" PRIsVALUE, rb_obj_as_string(error)); + if (error != Qnil) parse_regexp_error(iseq, pm_node_line_number_cached(part, scope_node), "%" PRIsVALUE, rb_obj_as_string(error)); } } else { @@ -514,12 +618,12 @@ parse_regexp_encoding(const pm_scope_node_t *scope_node, const pm_node_t *node) } static VALUE -parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, VALUE string) +parse_regexp(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, VALUE string) { VALUE errinfo = rb_errinfo(); - int32_t line_number = pm_node_line_number(scope_node->parser, node); - VALUE regexp = rb_reg_compile(string, parse_regexp_flags(node), (const char *) pm_string_source(&scope_node->parser->filepath), line_number); + int32_t line_number = pm_node_line_number_cached(node, scope_node); + VALUE regexp = rb_reg_compile(string, parse_regexp_flags(node), (const char *) pm_string_source(pm_parser_filepath(scope_node->parser)), line_number); if (NIL_P(regexp)) { VALUE message = rb_attr_get(rb_errinfo(), idMesg); @@ -529,22 +633,22 @@ parse_regexp(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t return Qnil; } - rb_obj_freeze(regexp); - return regexp; + return RB_OBJ_SET_SHAREABLE(rb_obj_freeze(regexp)); } static inline VALUE -parse_regexp_literal(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped) +parse_regexp_literal(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, const pm_string_t *unescaped) { rb_encoding *regexp_encoding = parse_regexp_encoding(scope_node, node); if (regexp_encoding == NULL) regexp_encoding = scope_node->encoding; VALUE string = rb_enc_str_new((const char *) pm_string_source(unescaped), pm_string_length(unescaped), regexp_encoding); + RB_OBJ_SET_SHAREABLE(string); return parse_regexp(iseq, scope_node, node, string); } static inline VALUE -parse_regexp_concat(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, const pm_node_list_t *parts) +parse_regexp_concat(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, const pm_node_list_t *parts) { rb_encoding *explicit_regexp_encoding = parse_regexp_encoding(scope_node, node); rb_encoding *implicit_regexp_encoding = explicit_regexp_encoding != NULL ? explicit_regexp_encoding : scope_node->encoding; @@ -556,7 +660,7 @@ parse_regexp_concat(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node); static int -pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const pm_node_location_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding) +pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const pm_node_location_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, rb_encoding *implicit_regexp_encoding, rb_encoding *explicit_regexp_encoding, bool mutable_result, bool frozen_result) { int stack_size = 0; size_t parts_size = parts->size; @@ -585,7 +689,7 @@ pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const } else { current_string = string_value; - if (index != 0) current_location = PM_NODE_END_LOCATION(scope_node->parser, part); + if (index != 0) current_location = PM_NODE_END_LOCATION(part); } } else { @@ -612,7 +716,7 @@ pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const } else { current_string = string_value; - current_location = PM_NODE_START_LOCATION(scope_node->parser, part); + current_location = PM_NODE_START_LOCATION(part); } } else { @@ -623,7 +727,7 @@ pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const if (explicit_regexp_encoding != NULL) { encoding = explicit_regexp_encoding; } - else if (scope_node->parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { + else if (pm_parser_encoding_us_ascii(scope_node->parser)) { encoding = rb_ascii8bit_encoding(); } else { @@ -647,7 +751,7 @@ pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const PM_COMPILE_NOT_POPPED(part); - const pm_node_location_t current_location = PM_NODE_START_LOCATION(scope_node->parser, part); + const pm_node_location_t current_location = PM_NODE_START_LOCATION(part); PUSH_INSN(ret, current_location, dup); { @@ -666,10 +770,15 @@ pm_interpolated_node_compile(rb_iseq_t *iseq, const pm_node_list_t *parts, const if (RTEST(current_string)) { current_string = rb_fstring(current_string); - if (stack_size == 0 && interpolated) { - PUSH_INSN1(ret, current_location, putstring, current_string); - } - else { + if (stack_size == 0) { + if (frozen_result) { + PUSH_INSN1(ret, current_location, putobject, current_string); + } else if (mutable_result || interpolated) { + PUSH_INSN1(ret, current_location, dupstring, current_string); + } else { + PUSH_INSN1(ret, current_location, dupchilledstring, current_string); + } + } else { PUSH_INSN1(ret, current_location, putobject, current_string); } @@ -690,7 +799,7 @@ pm_compile_regexp_dynamic(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_ rb_encoding *explicit_regexp_encoding = parse_regexp_encoding(scope_node, node); rb_encoding *implicit_regexp_encoding = explicit_regexp_encoding != NULL ? explicit_regexp_encoding : scope_node->encoding; - int length = pm_interpolated_node_compile(iseq, parts, node_location, ret, popped, scope_node, implicit_regexp_encoding, explicit_regexp_encoding); + int length = pm_interpolated_node_compile(iseq, parts, node_location, ret, popped, scope_node, implicit_regexp_encoding, explicit_regexp_encoding, false, false); PUSH_INSN2(ret, *node_location, toregexp, INT2FIX(parse_regexp_flags(node) & 0xFF), INT2FIX(length)); } @@ -717,7 +826,9 @@ static VALUE pm_static_literal_string(rb_iseq_t *iseq, VALUE string, int line_number) { if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) { - return rb_str_with_debug_created_info(string, rb_iseq_path(iseq), line_number); + VALUE str = rb_str_with_debug_created_info(string, rb_iseq_path(iseq), line_number); + RB_OBJ_SET_SHAREABLE(str); + return str; } else { return rb_fstring(string); @@ -730,7 +841,7 @@ pm_static_literal_string(rb_iseq_t *iseq, VALUE string, int line_number) * literal values can be compiled into a literal array. */ static VALUE -pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_node_t *scope_node) +pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, pm_scope_node_t *scope_node) { // Every node that comes into this function should already be marked as // static literal. If it's not, then we have a bug somewhere. @@ -746,7 +857,7 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n rb_ary_push(value, pm_static_literal_value(iseq, elements->nodes[index], scope_node)); } - OBJ_FREEZE(value); + RB_OBJ_SET_FROZEN_SHAREABLE(value); return value; } case PM_FALSE_NODE: @@ -765,11 +876,11 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n rb_ary_cat(array, pair, 2); } - VALUE value = rb_hash_new_with_size(elements->size); + VALUE value = rb_hash_alloc_fixed_size(Qfalse, elements->size); rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), value); + RB_GC_GUARD(array); - value = rb_obj_hide(value); - OBJ_FREEZE(value); + RB_OBJ_SET_FROZEN_SHAREABLE(value); return value; } case PM_IMAGINARY_NODE: @@ -786,7 +897,7 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n } case PM_INTERPOLATED_STRING_NODE: { VALUE string = pm_static_literal_concat(iseq, &((const pm_interpolated_string_node_t *) node)->parts, scope_node, NULL, NULL, false); - int line_number = pm_node_line_number(scope_node->parser, node); + int line_number = pm_node_line_number_cached(node, scope_node); return pm_static_literal_string(iseq, string, line_number); } case PM_INTERPOLATED_SYMBOL_NODE: { @@ -814,7 +925,7 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n return pm_source_file_value(cast, scope_node); } case PM_SOURCE_LINE_NODE: - return INT2FIX(pm_node_line_number(scope_node->parser, node)); + return INT2FIX(pm_node_line_number_cached(node, scope_node)); case PM_STRING_NODE: { const pm_string_node_t *cast = (const pm_string_node_t *) node; return parse_static_literal_string(iseq, scope_node, node, &cast->unescaped); @@ -824,7 +935,7 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n case PM_TRUE_NODE: return Qtrue; default: - rb_bug("Don't have a literal value for node type %s", pm_node_type_to_str(PM_NODE_TYPE(node))); + rb_bug("Don't have a literal value for node type %s", pm_node_type(PM_NODE_TYPE(node))); return Qfalse; } } @@ -833,10 +944,10 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_n * A helper for converting a pm_location_t into a rb_code_location_t. */ static rb_code_location_t -pm_code_location(const pm_scope_node_t *scope_node, const pm_node_t *node) +pm_code_location(pm_scope_node_t *scope_node, const pm_node_t *node) { - const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, node); - const pm_line_column_t end_location = PM_NODE_END_LINE_COLUMN(scope_node->parser, node); + const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(node); + const pm_line_column_t end_location = PM_NODE_END_LINE_COLUMN(node); return (rb_code_location_t) { .beg_pos = { .lineno = start_location.line, .column = start_location.column }, @@ -853,12 +964,12 @@ pm_code_location(const pm_scope_node_t *scope_node, const pm_node_t *node) static void pm_compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_node_t *cond, - LABEL *then_label, LABEL *else_label, bool popped, pm_scope_node_t *scope_node); + LABEL *then_label, LABEL *else_label, pm_scope_node_t *scope_node); static void -pm_compile_logical(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_node_t *cond, LABEL *then_label, LABEL *else_label, bool popped, pm_scope_node_t *scope_node) +pm_compile_logical(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_node_t *cond, LABEL *then_label, LABEL *else_label, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, cond); + const pm_node_location_t location = PM_NODE_START_LOCATION(cond); DECL_ANCHOR(seq); @@ -866,17 +977,14 @@ pm_compile_logical(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_node_t *cond, LAB if (!then_label) then_label = label; else if (!else_label) else_label = label; - pm_compile_branch_condition(iseq, seq, cond, then_label, else_label, popped, scope_node); + pm_compile_branch_condition(iseq, seq, cond, then_label, else_label, scope_node); if (LIST_INSN_SIZE_ONE(seq)) { INSN *insn = (INSN *) ELEM_FIRST_INSN(FIRST_ELEMENT(seq)); if (insn->insn_id == BIN(jump) && (LABEL *)(insn->operands[0]) == label) return; } - if (!label->refcnt) { - if (popped) PUSH_INSN(ret, location, putnil); - } - else { + if (label->refcnt) { PUSH_LABEL(seq, label); } @@ -887,7 +995,7 @@ pm_compile_logical(rb_iseq_t *iseq, LINK_ANCHOR *const ret, pm_node_t *cond, LAB static void pm_compile_flip_flop_bound(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - const pm_node_location_t location = { .line = ISEQ_BODY(iseq)->location.first_lineno, .node_id = -1 }; + const pm_node_location_t location = PM_NODE_START_LOCATION(node); if (PM_NODE_TYPE_P(node, PM_INTEGER_NODE)) { PM_COMPILE_NOT_POPPED(node); @@ -906,7 +1014,7 @@ pm_compile_flip_flop_bound(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR * static void pm_compile_flip_flop(const pm_flip_flop_node_t *flip_flop_node, LABEL *else_label, LABEL *then_label, rb_iseq_t *iseq, const int lineno, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - const pm_node_location_t location = { .line = ISEQ_BODY(iseq)->location.first_lineno, .node_id = -1 }; + const pm_node_location_t location = { .line = lineno, .node_id = -1 }; LABEL *lend = NEW_LABEL(location.line); int again = !(flip_flop_node->base.flags & PM_RANGE_FLAGS_EXCLUDE_END); @@ -948,22 +1056,22 @@ pm_compile_flip_flop(const pm_flip_flop_node_t *flip_flop_node, LABEL *else_labe static void pm_compile_defined_expr(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_location_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, bool in_condition); static void -pm_compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_node_t *cond, LABEL *then_label, LABEL *else_label, bool popped, pm_scope_node_t *scope_node) +pm_compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_node_t *cond, LABEL *then_label, LABEL *else_label, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, cond); + const pm_node_location_t location = PM_NODE_START_LOCATION(cond); again: switch (PM_NODE_TYPE(cond)) { case PM_AND_NODE: { const pm_and_node_t *cast = (const pm_and_node_t *) cond; - pm_compile_logical(iseq, ret, cast->left, NULL, else_label, popped, scope_node); + pm_compile_logical(iseq, ret, cast->left, NULL, else_label, scope_node); cond = cast->right; goto again; } case PM_OR_NODE: { const pm_or_node_t *cast = (const pm_or_node_t *) cond; - pm_compile_logical(iseq, ret, cast->left, then_label, NULL, popped, scope_node); + pm_compile_logical(iseq, ret, cast->left, then_label, NULL, scope_node); cond = cast->right; goto again; @@ -984,11 +1092,11 @@ again: PUSH_INSNL(ret, location, jump, then_label); return; case PM_FLIP_FLOP_NODE: - pm_compile_flip_flop((const pm_flip_flop_node_t *) cond, else_label, then_label, iseq, location.line, ret, popped, scope_node); + pm_compile_flip_flop((const pm_flip_flop_node_t *) cond, else_label, then_label, iseq, location.line, ret, false, scope_node); return; case PM_DEFINED_NODE: { const pm_defined_node_t *cast = (const pm_defined_node_t *) cond; - pm_compile_defined_expr(iseq, cast->value, &location, ret, popped, scope_node, true); + pm_compile_defined_expr(iseq, cast->value, &location, ret, false, scope_node, true); break; } default: { @@ -1032,7 +1140,7 @@ pm_compile_conditional(rb_iseq_t *iseq, const pm_node_location_t *node_location, LABEL *end_label = NULL; DECL_ANCHOR(cond_seq); - pm_compile_branch_condition(iseq, cond_seq, predicate, then_label, else_label, false, scope_node); + pm_compile_branch_condition(iseq, cond_seq, predicate, then_label, else_label, scope_node); PUSH_SEQ(ret, cond_seq); rb_code_location_t conditional_location = { 0 }; @@ -1063,7 +1171,7 @@ pm_compile_conditional(rb_iseq_t *iseq, const pm_node_location_t *node_location, if (statements != NULL) { branch_location = pm_code_location(scope_node, (const pm_node_t *) statements); } else if (type == PM_IF_NODE) { - pm_line_column_t predicate_end = PM_NODE_END_LINE_COLUMN(scope_node->parser, predicate); + pm_line_column_t predicate_end = PM_NODE_END_LINE_COLUMN(predicate); branch_location = (rb_code_location_t) { .beg_pos = { .lineno = predicate_end.line, .column = predicate_end.column }, .end_pos = { .lineno = predicate_end.line, .column = predicate_end.column } @@ -1180,10 +1288,10 @@ pm_compile_loop(rb_iseq_t *iseq, const pm_node_location_t *node_location, pm_nod PUSH_LABEL(ret, next_label); if (type == PM_WHILE_NODE) { - pm_compile_branch_condition(iseq, ret, predicate, redo_label, end_label, popped, scope_node); + pm_compile_branch_condition(iseq, ret, predicate, redo_label, end_label, scope_node); } else if (type == PM_UNTIL_NODE) { - pm_compile_branch_condition(iseq, ret, predicate, end_label, redo_label, popped, scope_node); + pm_compile_branch_condition(iseq, ret, predicate, end_label, redo_label, scope_node); } PUSH_LABEL(ret, end_label); @@ -1212,14 +1320,15 @@ static pm_local_index_t pm_lookup_local_index(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, pm_constant_id_t constant_id, int start_depth) { pm_local_index_t lindex = { 0 }; - st_data_t local_index; + int local_index; int level; for (level = 0; level < start_depth; level++) { scope_node = scope_node->previous; } - while (!st_lookup(scope_node->index_lookup_table, constant_id, &local_index)) { + while (!pm_index_lookup_table_lookup(&scope_node->index_lookup_table, constant_id, &local_index)) + { level++; if (scope_node->previous) { @@ -1243,12 +1352,10 @@ pm_lookup_local_index(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, pm_con // We add a constants mapping on the scope_node which is a mapping from // these constant_id indexes to the CRuby IDs that they represent. // This helper method allows easy access to those IDs -static ID +static inline ID pm_constant_id_lookup(const pm_scope_node_t *scope_node, pm_constant_id_t constant_id) { - if (constant_id < 1 || constant_id > scope_node->parser->constant_pool.size) { - rb_bug("constant_id out of range: %u", (unsigned int)constant_id); - } + RUBY_ASSERT(constant_id >= 1 && constant_id <= pm_parser_constants_size(scope_node->parser)); return scope_node->constants[constant_id - 1]; } @@ -1257,31 +1364,46 @@ pm_new_child_iseq(rb_iseq_t *iseq, pm_scope_node_t *node, VALUE name, const rb_i { debugs("[new_child_iseq]> ---------------------------------------\n"); int isolated_depth = ISEQ_COMPILE_DATA(iseq)->isolated_depth; - int error_state; - rb_iseq_t *ret_iseq = pm_iseq_new_with_opt(node, name, + rb_iseq_t *ret_iseq = pm_iseq_build(node, name, rb_iseq_path(iseq), rb_iseq_realpath(iseq), line_no, parent, isolated_depth ? isolated_depth + 1 : 0, - type, ISEQ_COMPILE_DATA(iseq)->option, &error_state); - - if (error_state) { - RUBY_ASSERT(ret_iseq == NULL); - rb_jump_tag(error_state); - } + type, ISEQ_COMPILE_DATA(iseq)->option); debugs("[new_child_iseq]< ---------------------------------------\n"); return ret_iseq; } static int +pm_cpath_const_p(const pm_node_t *node) +{ + switch (PM_NODE_TYPE(node)) { + case PM_CONSTANT_READ_NODE: + return TRUE; + case PM_CONSTANT_PATH_NODE: + { + const pm_node_t *parent = ((const pm_constant_path_node_t *) node)->parent; + if (!parent) return TRUE; /* ::Foo */ + return pm_cpath_const_p(parent); + } + default: + return FALSE; + } +} + +static int pm_compile_class_path(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_location_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { if (PM_NODE_TYPE_P(node, PM_CONSTANT_PATH_NODE)) { const pm_node_t *parent = ((const pm_constant_path_node_t *) node)->parent; if (parent) { - /* Bar::Foo */ + /* Bar::Foo or expr::Foo */ PM_COMPILE(parent); - return VM_DEFINECLASS_FLAG_SCOPED; + int flags = VM_DEFINECLASS_FLAG_SCOPED; + if (!pm_cpath_const_p(parent)) { + flags |= VM_DEFINECLASS_FLAG_DYNAMIC_CREF; + } + return flags; } else { /* toplevel class ::Foo */ @@ -1361,7 +1483,7 @@ static void pm_compile_shareable_constant_value(rb_iseq_t *iseq, const pm_node_t static void pm_compile_hash_elements(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_list_t *elements, const pm_node_flags_t shareability, VALUE path, bool argument, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); // If this element is not popped, then we need to create the hash on the // stack. Neighboring plain assoc nodes should be grouped together (either @@ -1434,10 +1556,10 @@ pm_compile_hash_elements(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l } index --; - VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary) / 2); + VALUE hash = rb_hash_alloc_fixed_size(Qfalse, RARRAY_LEN(ary) / 2); rb_hash_bulk_insert(RARRAY_LEN(ary), RARRAY_CONST_PTR(ary), hash); - hash = rb_obj_hide(hash); - OBJ_FREEZE(hash); + RB_GC_GUARD(ary); + RB_OBJ_SET_FROZEN_SHAREABLE(hash); // Emit optimized code. FLUSH_CHUNK; @@ -1761,9 +1883,14 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b break; } - orig_argc += 2; + if (has_splat) { + // If we already have a splat, we're concatenating to existing array + orig_argc += 1; + } else { + orig_argc += 2; + } - *flags |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_SPLAT_MUT | VM_CALL_ARGS_BLOCKARG | VM_CALL_KW_SPLAT; + *flags |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG | VM_CALL_KW_SPLAT; // Forwarding arguments nodes are treated as foo(*, **, &) // So foo(...) equals foo(*, **, &) and as such the local @@ -1772,7 +1899,13 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b // Push the * pm_local_index_t mult_local = pm_lookup_local_index(iseq, scope_node, PM_CONSTANT_MULT, 0); PUSH_GETLOCAL(ret, location, mult_local.index, mult_local.level); - PUSH_INSN1(ret, location, splatarray, Qtrue); + + if (has_splat) { + // If we already have a splat, we need to concatenate arrays + PUSH_INSN(ret, location, concattoarray); + } else { + PUSH_INSN1(ret, location, splatarray, Qfalse); + } // Push the ** pm_local_index_t pow_local = pm_lookup_local_index(iseq, scope_node, PM_CONSTANT_POW, 0); @@ -1781,7 +1914,6 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b // Push the & pm_local_index_t and_local = pm_lookup_local_index(iseq, scope_node, PM_CONSTANT_AND, 0); PUSH_INSN2(ret, location, getblockparamproxy, INT2FIX(and_local.index + VM_ENV_DATA_SIZE - 1), INT2FIX(and_local.level)); - PUSH_INSN(ret, location, splatkw); break; } @@ -1804,6 +1936,10 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b // foo(*a, b, c: :d) // foo(*a, b, **c) // + // If the next node is a forwarding argument: + // + // foo(*a, b, ...) + // // If the next node is NULL (we have hit the end): // // foo(*a, b) @@ -1826,6 +1962,10 @@ pm_setup_args_core(const pm_arguments_node_t *arguments_node, const pm_node_t *b PUSH_INSN(ret, location, concatarray); break; } + case PM_FORWARDING_ARGUMENTS_NODE: { + PUSH_INSN1(ret, location, pushtoarray, INT2FIX(post_splat_counter)); + break; + } default: break; } @@ -1854,7 +1994,6 @@ pm_setup_args_dup_rest_p(const pm_node_t *node) switch (PM_NODE_TYPE(node)) { case PM_BACK_REFERENCE_READ_NODE: case PM_CLASS_VARIABLE_READ_NODE: - case PM_CONSTANT_PATH_NODE: case PM_CONSTANT_READ_NODE: case PM_FALSE_NODE: case PM_FLOAT_NODE: @@ -1873,8 +2012,24 @@ pm_setup_args_dup_rest_p(const pm_node_t *node) case PM_SYMBOL_NODE: case PM_TRUE_NODE: return false; + case PM_CONSTANT_PATH_NODE: { + const pm_constant_path_node_t *cast = (const pm_constant_path_node_t *) node; + if (cast->parent != NULL) { + return pm_setup_args_dup_rest_p(cast->parent); + } + return false; + } case PM_IMPLICIT_NODE: return pm_setup_args_dup_rest_p(((const pm_implicit_node_t *) node)->value); + case PM_ARRAY_NODE: { + const pm_array_node_t *cast = (const pm_array_node_t *) node; + for (size_t index = 0; index < cast->elements.size; index++) { + if (pm_setup_args_dup_rest_p(cast->elements.nodes[index])) { + return true; + } + } + return false; + } default: return true; } @@ -2244,7 +2399,7 @@ pm_compile_index_control_flow_write_node(rb_iseq_t *iseq, const pm_node_t *node, // A forward declaration because this is the recursive function that handles // compiling a pattern. It can be reentered by nesting patterns, as in the case // of arrays or hashes. -static int pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *matched_label, LABEL *unmatched_label, bool in_single_pattern, bool in_alternation_pattern, bool use_deconstructed_cache, unsigned int base_index); +static int pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *matched_label, LABEL *unmatched_label, bool in_single_pattern, bool use_deconstructed_cache, unsigned int base_index); /** * This function generates the code to set up the error string and error_p @@ -2253,7 +2408,7 @@ static int pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, cons static int pm_compile_pattern_generic_error(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, VALUE message, unsigned int base_index) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); LABEL *match_succeeded_label = NEW_LABEL(location.line); PUSH_INSN(ret, location, dup); @@ -2283,7 +2438,7 @@ pm_compile_pattern_generic_error(rb_iseq_t *iseq, pm_scope_node_t *scope_node, c static int pm_compile_pattern_length_error(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, VALUE message, VALUE length, unsigned int base_index) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); LABEL *match_succeeded_label = NEW_LABEL(location.line); PUSH_INSN(ret, location, dup); @@ -2316,7 +2471,7 @@ pm_compile_pattern_length_error(rb_iseq_t *iseq, pm_scope_node_t *scope_node, co static int pm_compile_pattern_eqq_error(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, unsigned int base_index) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); LABEL *match_succeeded_label = NEW_LABEL(location.line); PUSH_INSN(ret, location, dup); @@ -2350,10 +2505,10 @@ pm_compile_pattern_eqq_error(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const * label. */ static int -pm_compile_pattern_match(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *unmatched_label, bool in_single_pattern, bool in_alternation_pattern, bool use_deconstructed_cache, unsigned int base_index) +pm_compile_pattern_match(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *unmatched_label, bool in_single_pattern, bool use_deconstructed_cache, unsigned int base_index) { - LABEL *matched_label = NEW_LABEL(pm_node_line_number(scope_node->parser, node)); - CHECK(pm_compile_pattern(iseq, scope_node, node, ret, matched_label, unmatched_label, in_single_pattern, in_alternation_pattern, use_deconstructed_cache, base_index)); + LABEL *matched_label = NEW_LABEL(pm_node_line_number_cached(node, scope_node)); + CHECK(pm_compile_pattern(iseq, scope_node, node, ret, matched_label, unmatched_label, in_single_pattern, use_deconstructed_cache, base_index)); PUSH_LABEL(ret, matched_label); return COMPILE_OK; } @@ -2366,7 +2521,7 @@ pm_compile_pattern_match(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_ static int pm_compile_pattern_deconstruct(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *deconstruct_label, LABEL *match_failed_label, LABEL *deconstructed_label, LABEL *type_error_label, bool in_single_pattern, bool use_deconstructed_cache, unsigned int base_index) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); if (use_deconstructed_cache) { PUSH_INSN1(ret, location, topn, INT2FIX(base_index + PM_PATTERN_BASE_INDEX_OFFSET_DECONSTRUCTED_CACHE)); @@ -2420,7 +2575,7 @@ pm_compile_pattern_deconstruct(rb_iseq_t *iseq, pm_scope_node_t *scope_node, con static int pm_compile_pattern_constant(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *match_failed_label, bool in_single_pattern, unsigned int base_index) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); PUSH_INSN(ret, location, dup); PM_COMPILE_NOT_POPPED(node); @@ -2441,9 +2596,9 @@ pm_compile_pattern_constant(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const * responsible for compiling in those error raising instructions. */ static void -pm_compile_pattern_error_handler(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *done_label, bool popped) +pm_compile_pattern_error_handler(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *done_label, bool popped) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); LABEL *key_error_label = NEW_LABEL(location.line); LABEL *cleanup_label = NEW_LABEL(location.line); @@ -2500,9 +2655,9 @@ pm_compile_pattern_error_handler(rb_iseq_t *iseq, const pm_scope_node_t *scope_n * Compile a pattern matching expression. */ static int -pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *matched_label, LABEL *unmatched_label, bool in_single_pattern, bool in_alternation_pattern, bool use_deconstructed_cache, unsigned int base_index) +pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *matched_label, LABEL *unmatched_label, bool in_single_pattern, bool use_deconstructed_cache, unsigned int base_index) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); switch (PM_NODE_TYPE(node)) { case PM_ARRAY_PATTERN_NODE: { @@ -2560,7 +2715,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t PUSH_INSN(ret, location, dup); PUSH_INSN1(ret, location, putobject, INT2FIX(index)); PUSH_SEND(ret, location, idAREF, INT2FIX(1)); - CHECK(pm_compile_pattern_match(iseq, scope_node, required, ret, match_failed_label, in_single_pattern, in_alternation_pattern, false, base_index + 1)); + CHECK(pm_compile_pattern_match(iseq, scope_node, required, ret, match_failed_label, in_single_pattern, false, base_index + 1)); } if (cast->rest != NULL) { @@ -2573,7 +2728,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t PUSH_SEND(ret, location, idMINUS, INT2FIX(1)); PUSH_INSN1(ret, location, setn, INT2FIX(4)); PUSH_SEND(ret, location, idAREF, INT2FIX(2)); - CHECK(pm_compile_pattern_match(iseq, scope_node, ((const pm_splat_node_t *) cast->rest)->expression, ret, match_failed_label, in_single_pattern, in_alternation_pattern, false, base_index + 1)); + CHECK(pm_compile_pattern_match(iseq, scope_node, ((const pm_splat_node_t *) cast->rest)->expression, ret, match_failed_label, in_single_pattern, false, base_index + 1)); } else if (posts_size > 0) { PUSH_INSN(ret, location, dup); @@ -2593,7 +2748,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t PUSH_INSN1(ret, location, topn, INT2FIX(3)); PUSH_SEND(ret, location, idPLUS, INT2FIX(1)); PUSH_SEND(ret, location, idAREF, INT2FIX(1)); - CHECK(pm_compile_pattern_match(iseq, scope_node, post, ret, match_failed_label, in_single_pattern, in_alternation_pattern, false, base_index + 1)); + CHECK(pm_compile_pattern_match(iseq, scope_node, post, ret, match_failed_label, in_single_pattern, false, base_index + 1)); } PUSH_INSN(ret, location, pop); @@ -2690,7 +2845,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t } PUSH_SEND(ret, location, idAREF, INT2FIX(1)); - CHECK(pm_compile_pattern_match(iseq, scope_node, cast->requireds.nodes[index], ret, next_loop_label, in_single_pattern, in_alternation_pattern, false, base_index + 4)); + CHECK(pm_compile_pattern_match(iseq, scope_node, cast->requireds.nodes[index], ret, next_loop_label, in_single_pattern, false, base_index + 4)); } const pm_splat_node_t *left = cast->left; @@ -2700,11 +2855,10 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t PUSH_INSN1(ret, location, putobject, INT2FIX(0)); PUSH_INSN1(ret, location, topn, INT2FIX(2)); PUSH_SEND(ret, location, idAREF, INT2FIX(2)); - CHECK(pm_compile_pattern_match(iseq, scope_node, left->expression, ret, find_failed_label, in_single_pattern, in_alternation_pattern, false, base_index + 4)); + CHECK(pm_compile_pattern_match(iseq, scope_node, left->expression, ret, find_failed_label, in_single_pattern, false, base_index + 4)); } - RUBY_ASSERT(PM_NODE_TYPE_P(cast->right, PM_SPLAT_NODE)); - const pm_splat_node_t *right = (const pm_splat_node_t *) cast->right; + const pm_splat_node_t *right = cast->right; if (right->expression != NULL) { PUSH_INSN1(ret, location, topn, INT2FIX(3)); @@ -2713,7 +2867,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t PUSH_SEND(ret, location, idPLUS, INT2FIX(1)); PUSH_INSN1(ret, location, topn, INT2FIX(3)); PUSH_SEND(ret, location, idAREF, INT2FIX(2)); - pm_compile_pattern_match(iseq, scope_node, right->expression, ret, find_failed_label, in_single_pattern, in_alternation_pattern, false, base_index + 4); + pm_compile_pattern_match(iseq, scope_node, right->expression, ret, find_failed_label, in_single_pattern, false, base_index + 4); } PUSH_INSNL(ret, location, jump, find_succeeded_label); @@ -2827,8 +2981,10 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t PUSH_INSN(ret, location, putnil); } else { + rb_obj_hide(keys); + RB_OBJ_SET_FROZEN_SHAREABLE(keys); PUSH_INSN1(ret, location, duparray, keys); - RB_OBJ_WRITTEN(iseq, Qundef, rb_obj_hide(keys)); + RB_OBJ_WRITTEN(iseq, Qundef, keys); } PUSH_SEND(ret, location, rb_intern("deconstruct_keys"), INT2FIX(1)); @@ -2864,6 +3020,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t { VALUE operand = rb_str_freeze(rb_sprintf("key not found: %+"PRIsVALUE, symbol)); + RB_OBJ_SET_SHAREABLE(operand); PUSH_INSN1(ret, location, putobject, operand); } @@ -2889,7 +3046,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t value = ((const pm_implicit_node_t *) value)->value; } - CHECK(pm_compile_pattern_match(iseq, scope_node, value, match_values, match_failed_label, in_single_pattern, in_alternation_pattern, false, base_index + 1)); + CHECK(pm_compile_pattern_match(iseq, scope_node, value, match_values, match_failed_label, in_single_pattern, false, base_index + 1)); } PUSH_SEQ(ret, match_values); @@ -2917,7 +3074,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t case PM_ASSOC_SPLAT_NODE: { const pm_assoc_splat_node_t *splat = (const pm_assoc_splat_node_t *) cast->rest; PUSH_INSN(ret, location, dup); - pm_compile_pattern_match(iseq, scope_node, splat->value, ret, match_failed_label, in_single_pattern, in_alternation_pattern, false, base_index + 1); + pm_compile_pattern_match(iseq, scope_node, splat->value, ret, match_failed_label, in_single_pattern, false, base_index + 1); break; } default: @@ -2962,8 +3119,8 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t LABEL *match_failed_label = NEW_LABEL(location.line); PUSH_INSN(ret, location, dup); - CHECK(pm_compile_pattern_match(iseq, scope_node, cast->value, ret, match_failed_label, in_single_pattern, in_alternation_pattern, use_deconstructed_cache, base_index + 1)); - CHECK(pm_compile_pattern(iseq, scope_node, (const pm_node_t *) cast->target, ret, matched_label, match_failed_label, in_single_pattern, in_alternation_pattern, false, base_index)); + CHECK(pm_compile_pattern_match(iseq, scope_node, cast->value, ret, match_failed_label, in_single_pattern, use_deconstructed_cache, base_index + 1)); + CHECK(pm_compile_pattern(iseq, scope_node, (const pm_node_t *) cast->target, ret, matched_label, match_failed_label, in_single_pattern, false, base_index)); PUSH_INSN(ret, location, putnil); PUSH_LABEL(ret, match_failed_label); @@ -2979,20 +3136,6 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t const pm_local_variable_target_node_t *cast = (const pm_local_variable_target_node_t *) node; pm_local_index_t index = pm_lookup_local_index(iseq, scope_node, cast->name, cast->depth); - // If this local variable is being written from within an alternation - // pattern, then it cannot actually be added to the local table since - // it's ambiguous which value should be used. So instead we indicate - // this with a compile error. - if (in_alternation_pattern) { - ID id = pm_constant_id_lookup(scope_node, cast->name); - const char *name = rb_id2name(id); - - if (name && strlen(name) > 0 && name[0] != '_') { - COMPILE_ERROR(iseq, location.line, "illegal variable in alternative pattern (%"PRIsVALUE")", rb_id2str(id)); - return COMPILE_NG; - } - } - PUSH_SETLOCAL(ret, location, index.index, index.level); PUSH_INSNL(ret, location, jump, matched_label); break; @@ -3008,7 +3151,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t // First, we're going to attempt to match against the left pattern. If // that pattern matches, then we'll skip matching the right pattern. PUSH_INSN(ret, location, dup); - CHECK(pm_compile_pattern(iseq, scope_node, cast->left, ret, matched_left_label, unmatched_left_label, in_single_pattern, true, use_deconstructed_cache, base_index + 1)); + CHECK(pm_compile_pattern(iseq, scope_node, cast->left, ret, matched_left_label, unmatched_left_label, in_single_pattern, use_deconstructed_cache, base_index + 1)); // If we get here, then we matched on the left pattern. In this case we // should pop out the duplicate value that we preemptively added to @@ -3021,7 +3164,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t // If we get here, then we didn't match on the left pattern. In this // case we attempt to match against the right pattern. PUSH_LABEL(ret, unmatched_left_label); - CHECK(pm_compile_pattern(iseq, scope_node, cast->right, ret, matched_label, unmatched_label, in_single_pattern, true, use_deconstructed_cache, base_index)); + CHECK(pm_compile_pattern(iseq, scope_node, cast->right, ret, matched_label, unmatched_label, in_single_pattern, use_deconstructed_cache, base_index)); break; } case PM_PARENTHESES_NODE: @@ -3029,7 +3172,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t // they do nothing since they can only wrap individual expressions and // not groups. In this case we'll recurse back into this same function // with the body of the parentheses. - return pm_compile_pattern(iseq, scope_node, ((const pm_parentheses_node_t *) node)->body, ret, matched_label, unmatched_label, in_single_pattern, in_alternation_pattern, use_deconstructed_cache, base_index); + return pm_compile_pattern(iseq, scope_node, ((const pm_parentheses_node_t *) node)->body, ret, matched_label, unmatched_label, in_single_pattern, use_deconstructed_cache, base_index); case PM_PINNED_EXPRESSION_NODE: // Pinned expressions are a way to match against the value of an // expression that should be evaluated at runtime. This looks like: @@ -3090,7 +3233,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t // looks like: foo in ^@bar. To compile these, we compile the variable // that they hold. const pm_pinned_variable_node_t *cast = (const pm_pinned_variable_node_t *) node; - CHECK(pm_compile_pattern(iseq, scope_node, cast->variable, ret, matched_label, unmatched_label, in_single_pattern, in_alternation_pattern, true, base_index)); + CHECK(pm_compile_pattern(iseq, scope_node, cast->variable, ret, matched_label, unmatched_label, in_single_pattern, true, base_index)); break; } case PM_IF_NODE: @@ -3124,7 +3267,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t statement = cast->statements->body.nodes[0]; } - CHECK(pm_compile_pattern_match(iseq, scope_node, statement, ret, unmatched_label, in_single_pattern, in_alternation_pattern, use_deconstructed_cache, base_index)); + CHECK(pm_compile_pattern_match(iseq, scope_node, statement, ret, unmatched_label, in_single_pattern, use_deconstructed_cache, base_index)); PM_COMPILE_NOT_POPPED(predicate); if (in_single_pattern) { @@ -3167,7 +3310,7 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t // If we get here, then we have a node type that should not be in this // position. This would be a bug in the parser, because a different node // type should never have been created in this position in the tree. - rb_bug("Unexpected node type in pattern matching expression: %s", pm_node_type_to_str(PM_NODE_TYPE(node))); + rb_bug("Unexpected node type in pattern matching expression: %s", pm_node_type(PM_NODE_TYPE(node))); break; } @@ -3184,26 +3327,27 @@ pm_compile_pattern(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_node_t void pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_t *previous) { - // This is very important, otherwise the scope node could be seen as having - // certain flags set that _should not_ be set. - memset(scope, 0, sizeof(pm_scope_node_t)); + if (previous) { + // Copy inherited fields from the parent scope in one shot, then + // zero out the fields that are scope-specific. + *scope = *previous; + scope->locals = (pm_constant_id_list_t) { 0 }; + scope->parameters = NULL; + scope->body = NULL; + scope->local_table_for_iseq_size = 0; + scope->index_lookup_table = (pm_index_lookup_table_t) PM_INDEX_LOOKUP_TABLE_INIT; + scope->pre_execution_anchor = NULL; + } + else { + memset(scope, 0, sizeof(pm_scope_node_t)); + } scope->base.type = PM_SCOPE_NODE; scope->base.location.start = node->location.start; - scope->base.location.end = node->location.end; - + scope->base.location.length = node->location.length; scope->previous = previous; scope->ast_node = (pm_node_t *) node; - if (previous) { - scope->parser = previous->parser; - scope->encoding = previous->encoding; - scope->filepath_encoding = previous->filepath_encoding; - scope->constants = previous->constants; - scope->coverage_enabled = previous->coverage_enabled; - scope->script_lines = previous->script_lines; - } - switch (PM_NODE_TYPE(node)) { case PM_BLOCK_NODE: { const pm_block_node_t *cast = (const pm_block_node_t *) node; @@ -3231,7 +3375,7 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ if (cast->statements != NULL) { scope->base.location.start = cast->statements->base.location.start; - scope->base.location.end = cast->statements->base.location.end; + scope->base.location.length = cast->statements->base.location.length; } break; @@ -3251,13 +3395,6 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ scope->parameters = cast->parameters; scope->body = cast->body; scope->locals = cast->locals; - - if (cast->parameters != NULL) { - scope->base.location.start = cast->parameters->location.start; - } - else { - scope->base.location.start = cast->operator_loc.end; - } break; } case PM_MODULE_NODE: { @@ -3307,8 +3444,8 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ void pm_scope_node_destroy(pm_scope_node_t *scope_node) { - if (scope_node->index_lookup_table) { - st_free_table(scope_node->index_lookup_table); + if (scope_node->index_lookup_table.owned) { + xfree(scope_node->index_lookup_table.values); } } @@ -3377,7 +3514,7 @@ pm_iseq_builtin_function_name(const pm_scope_node_t *scope_node, const pm_node_t // Compile Primitive.attr! :leaf, ... static int -pm_compile_builtin_attr(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, const pm_arguments_node_t *arguments, const pm_node_location_t *node_location) +pm_compile_builtin_attr(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_arguments_node_t *arguments, const pm_node_location_t *node_location) { if (arguments == NULL) { COMPILE_ERROR(iseq, node_location->line, "attr!: no argument"); @@ -3387,7 +3524,7 @@ pm_compile_builtin_attr(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, cons const pm_node_t *argument; PM_NODE_LIST_FOREACH(&arguments->arguments, index, argument) { if (!PM_NODE_TYPE_P(argument, PM_SYMBOL_NODE)) { - COMPILE_ERROR(iseq, node_location->line, "non symbol argument to attr!: %s", pm_node_type_to_str(PM_NODE_TYPE(argument))); + COMPILE_ERROR(iseq, node_location->line, "non symbol argument to attr!: %s", pm_node_type(PM_NODE_TYPE(argument))); return COMPILE_NG; } @@ -3407,6 +3544,9 @@ pm_compile_builtin_attr(rb_iseq_t *iseq, const pm_scope_node_t *scope_node, cons // Let the iseq act like a C method in backtraces ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_C_TRACE; } + else if (strcmp(RSTRING_PTR(string), "without_interrupts") == 0) { + ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_WITHOUT_INTERRUPTS; + } else { COMPILE_ERROR(iseq, node_location->line, "unknown argument to attr!: %s", RSTRING_PTR(string)); return COMPILE_NG; @@ -3431,7 +3571,7 @@ pm_compile_builtin_arg(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const pm_scope_n const pm_node_t *argument = arguments->arguments.nodes[0]; if (!PM_NODE_TYPE_P(argument, PM_SYMBOL_NODE)) { - COMPILE_ERROR(iseq, node_location->line, "non symbol argument to arg!: %s", pm_node_type_to_str(PM_NODE_TYPE(argument))); + COMPILE_ERROR(iseq, node_location->line, "non symbol argument to arg!: %s", pm_node_type(PM_NODE_TYPE(argument))); return COMPILE_NG; } @@ -3495,8 +3635,7 @@ pm_compile_builtin_mandatory_only_method(rb_iseq_t *iseq, pm_scope_node_t *scope pm_scope_node_t next_scope_node; pm_scope_node_init(&def.base, &next_scope_node, scope_node); - int error_state; - ISEQ_BODY(iseq)->mandatory_only_iseq = pm_iseq_new_with_opt( + const rb_iseq_t *mandatory_only_iseq = pm_iseq_build( &next_scope_node, rb_iseq_base_label(iseq), rb_iseq_path(iseq), @@ -3505,14 +3644,9 @@ pm_compile_builtin_mandatory_only_method(rb_iseq_t *iseq, pm_scope_node_t *scope NULL, 0, ISEQ_TYPE_METHOD, - ISEQ_COMPILE_DATA(iseq)->option, - &error_state + ISEQ_COMPILE_DATA(iseq)->option ); - - if (error_state) { - RUBY_ASSERT(ISEQ_BODY(iseq)->mandatory_only_iseq == NULL); - rb_jump_tag(error_state); - } + RB_OBJ_WRITE(iseq, &ISEQ_BODY(iseq)->mandatory_only_iseq, (VALUE)mandatory_only_iseq); pm_scope_node_destroy(&next_scope_node); return COMPILE_OK; @@ -3617,9 +3751,10 @@ static void pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, ID method_id, LABEL *start) { const pm_location_t *message_loc = &call_node->message_loc; - if (message_loc->start == NULL) message_loc = &call_node->base.location; + if (message_loc->length == 0) message_loc = &call_node->base.location; + + const pm_node_location_t location = PM_LOCATION_START_LOCATION(message_loc, call_node->base.node_id); - const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, message_loc, call_node->base.node_id); LABEL *else_label = NEW_LABEL(location.line); LABEL *end_label = NEW_LABEL(location.line); LABEL *retry_end_l = NEW_LABEL(location.line); @@ -3630,19 +3765,37 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { if (PM_BRANCH_COVERAGE_P(iseq)) { - const uint8_t *cursors[3] = { - call_node->closing_loc.end, - call_node->arguments == NULL ? NULL : call_node->arguments->base.location.end, - call_node->message_loc.end - }; + uint32_t end_cursor = 0; + bool end_found = false; - const uint8_t *end_cursor = cursors[0]; - end_cursor = (end_cursor == NULL || cursors[1] == NULL) ? cursors[1] : (end_cursor > cursors[1] ? end_cursor : cursors[1]); - end_cursor = (end_cursor == NULL || cursors[2] == NULL) ? cursors[2] : (end_cursor > cursors[2] ? end_cursor : cursors[2]); - if (!end_cursor) end_cursor = call_node->closing_loc.end; + if (call_node->closing_loc.length > 0) { + uint32_t cursor = call_node->closing_loc.start + call_node->closing_loc.length; + end_cursor = cursor; + end_found = true; + } - const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, call_node); - const pm_line_column_t end_location = pm_newline_list_line_column(&scope_node->parser->newline_list, end_cursor, scope_node->parser->start_line); + if (call_node->arguments != NULL) { + uint32_t cursor = call_node->arguments->base.location.start + call_node->arguments->base.location.length; + if (!end_found || cursor > end_cursor) { + end_cursor = cursor; + end_found = true; + } + } + + if (call_node->message_loc.length > 0) { + uint32_t cursor = call_node->message_loc.start + call_node->message_loc.length; + if (!end_found || cursor > end_cursor) { + end_cursor = cursor; + end_found = true; + } + } + + if (!end_found) { + end_cursor = call_node->closing_loc.start + call_node->closing_loc.length; + } + + const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(call_node); + const pm_line_column_t end_location = pm_line_offset_list_line_column_cached(scope_node->line_offsets, end_cursor, scope_node->start_line, &scope_node->last_line); code_location = (rb_code_location_t) { .beg_pos = { .lineno = start_location.line, .column = start_location.column }, @@ -3658,6 +3811,8 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c add_trace_branch_coverage(iseq, ret, &code_location, node_id, 0, "then", branches); } + LINK_ELEMENT *opt_new_prelude = LAST_ELEMENT(ret); + int flags = 0; struct rb_callinfo_kwarg *kw_arg = NULL; @@ -3670,7 +3825,7 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c pm_scope_node_t next_scope_node; pm_scope_node_init(call_node->block, &next_scope_node, scope_node); - block_iseq = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, pm_node_line_number(scope_node->parser, call_node->block)); + block_iseq = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, pm_node_line_number_cached(call_node->block, scope_node)); pm_scope_node_destroy(&next_scope_node); ISEQ_COMPILE_DATA(iseq)->current_block = block_iseq; } @@ -3714,7 +3869,50 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c PUSH_INSN(ret, location, splatkw); } - PUSH_SEND_R(ret, location, method_id, INT2FIX(orig_argc), block_iseq, INT2FIX(flags), kw_arg); + LABEL *not_basic_new = NEW_LABEL(location.line); + LABEL *not_basic_new_finish = NEW_LABEL(location.line); + + bool inline_new = ISEQ_COMPILE_DATA(iseq)->option->specialized_instruction && + method_id == rb_intern("new") && + call_node->block == NULL && + (flags & VM_CALL_ARGS_BLOCKARG) == 0; + + if (inline_new) { + if (LAST_ELEMENT(ret) == opt_new_prelude) { + PUSH_INSN(ret, location, putnil); + PUSH_INSN(ret, location, swap); + } + else { + ELEM_INSERT_NEXT(opt_new_prelude, &new_insn_body(iseq, location.line, location.node_id, BIN(swap), 0)->link); + ELEM_INSERT_NEXT(opt_new_prelude, &new_insn_body(iseq, location.line, location.node_id, BIN(putnil), 0)->link); + } + + // Jump unless the receiver uses the "basic" implementation of "new" + VALUE ci; + if (flags & VM_CALL_FORWARDING) { + ci = (VALUE)new_callinfo(iseq, method_id, orig_argc + 1, flags, kw_arg, 0); + } + else { + ci = (VALUE)new_callinfo(iseq, method_id, orig_argc, flags, kw_arg, 0); + } + + PUSH_INSN2(ret, location, opt_new, ci, not_basic_new); + LABEL_REF(not_basic_new); + // optimized path + PUSH_SEND_R(ret, location, rb_intern("initialize"), INT2FIX(orig_argc), block_iseq, INT2FIX(flags | VM_CALL_FCALL), kw_arg); + PUSH_INSNL(ret, location, jump, not_basic_new_finish); + + PUSH_LABEL(ret, not_basic_new); + // Fall back to normal send + PUSH_SEND_R(ret, location, method_id, INT2FIX(orig_argc), block_iseq, INT2FIX(flags), kw_arg); + PUSH_INSN(ret, location, swap); + + PUSH_LABEL(ret, not_basic_new_finish); + PUSH_INSN(ret, location, pop); + } + else { + PUSH_SEND_R(ret, location, method_id, INT2FIX(orig_argc), block_iseq, INT2FIX(flags), kw_arg); + } if (block_iseq && ISEQ_BODY(block_iseq)->catch_table) { pm_compile_retry_end_label(iseq, ret, retry_end_l); @@ -3741,9 +3939,9 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c * node. */ static inline VALUE -pm_compile_back_reference_ref(const pm_back_reference_read_node_t *node) +pm_compile_back_reference_ref(const pm_scope_node_t *scope_node, const pm_back_reference_read_node_t *node) { - const char *type = (const char *) (node->base.location.start + 1); + const char *type = (const char *) (pm_parser_start(scope_node->parser) + node->base.location.start + 1); // Since a back reference is `$<char>`, Ruby represents the ID as an // rb_intern on the value after the `$`. @@ -4134,7 +4332,7 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l // defined?($+) // ^^ const pm_back_reference_read_node_t *cast = (const pm_back_reference_read_node_t *) node; - VALUE ref = pm_compile_back_reference_ref(cast); + VALUE ref = pm_compile_back_reference_ref(scope_node, cast); PUSH_INSN(ret, location, putnil); PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_REF), ref, PUSH_VAL(DEFINED_GVAR)); @@ -4213,7 +4411,7 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l // If we have empty parentheses, then we want to return "nil". dtype = DEFINED_NIL; } - else if (PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE) && ((const pm_statements_node_t *) cast->body)->body.size == 1) { + else if (PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE) && !PM_NODE_FLAG_P(cast, PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS)) { // If we have a parentheses node that is wrapping a single statement // then we want to recurse down to that statement and compile it. pm_compile_defined_expr0(iseq, ((const pm_statements_node_t *) cast->body)->body.nodes[0], node_location, ret, popped, scope_node, in_condition, lfinish, false); @@ -4422,6 +4620,7 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l case PM_PARAMETERS_NODE: case PM_KEYWORD_REST_PARAMETER_NODE: case PM_NO_KEYWORDS_PARAMETER_NODE: + case PM_NO_BLOCK_PARAMETER_NODE: case PM_NUMBERED_PARAMETERS_NODE: case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: case PM_OPTIONAL_PARAMETER_NODE: @@ -4456,12 +4655,12 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l case PM_BLOCK_NODE: case PM_EMBEDDED_STATEMENTS_NODE: case PM_EMBEDDED_VARIABLE_NODE: - case PM_MISSING_NODE: + case PM_ERROR_RECOVERY_NODE: case PM_PRE_EXECUTION_NODE: case PM_PROGRAM_NODE: case PM_SCOPE_NODE: case PM_STATEMENTS_NODE: - rb_bug("Unreachable node in defined?: %s", pm_node_type_to_str(PM_NODE_TYPE(node))); + rb_bug("Unreachable node in defined?: %s", pm_node_type(PM_NODE_TYPE(node))); } RUBY_ASSERT(dtype != DEFINED_NOT_DEFINED); @@ -4566,33 +4765,7 @@ pm_add_ensure_iseq(LINK_ANCHOR *const ret, rb_iseq_t *iseq, int is_return, pm_sc PUSH_SEQ(ret, ensure); } -struct pm_local_table_insert_ctx { - pm_scope_node_t *scope_node; - rb_ast_id_table_t *local_table_for_iseq; - int local_index; -}; - -static int -pm_local_table_insert_func(st_data_t *key, st_data_t *value, st_data_t arg, int existing) -{ - if (!existing) { - pm_constant_id_t constant_id = (pm_constant_id_t) *key; - struct pm_local_table_insert_ctx * ctx = (struct pm_local_table_insert_ctx *) arg; - pm_scope_node_t *scope_node = ctx->scope_node; - rb_ast_id_table_t *local_table_for_iseq = ctx->local_table_for_iseq; - int local_index = ctx->local_index; - - ID local = pm_constant_id_lookup(scope_node, constant_id); - local_table_for_iseq->ids[local_index] = local; - - *value = (st_data_t)local_index; - - ctx->local_index++; - } - - return ST_CONTINUE; -} /** * Insert a local into the local table for the iseq. This is used to create the @@ -4600,24 +4773,23 @@ pm_local_table_insert_func(st_data_t *key, st_data_t *value, st_data_t arg, int * inserted are regular named locals, as opposed to special forwarding locals. */ static void -pm_insert_local_index(pm_constant_id_t constant_id, int local_index, st_table *index_lookup_table, rb_ast_id_table_t *local_table_for_iseq, pm_scope_node_t *scope_node) +pm_insert_local_index(pm_constant_id_t constant_id, int local_index, pm_index_lookup_table_t *index_lookup_table, rb_ast_id_table_t *local_table_for_iseq, pm_scope_node_t *scope_node) { RUBY_ASSERT((constant_id & PM_SPECIAL_CONSTANT_FLAG) == 0); ID local = pm_constant_id_lookup(scope_node, constant_id); local_table_for_iseq->ids[local_index] = local; - st_insert(index_lookup_table, (st_data_t) constant_id, (st_data_t) local_index); + pm_index_lookup_table_insert(index_lookup_table, constant_id, local_index); } /** - * Insert a local into the local table for the iseq that is a special forwarding - * local variable. + * Insert a special forwarding local (*, **, &, ...) into the local table. */ static void -pm_insert_local_special(ID local_name, int local_index, st_table *index_lookup_table, rb_ast_id_table_t *local_table_for_iseq) +pm_insert_local_special(pm_constant_id_t special_id, ID local_name, int local_index, pm_index_lookup_table_t *index_lookup_table, rb_ast_id_table_t *local_table_for_iseq) { local_table_for_iseq->ids[local_index] = local_name; - st_insert(index_lookup_table, (st_data_t) (local_name | PM_SPECIAL_CONSTANT_FLAG), (st_data_t) local_index); + pm_index_lookup_table_insert(index_lookup_table, special_id, local_index); } /** @@ -4627,7 +4799,7 @@ pm_insert_local_special(ID local_name, int local_index, st_table *index_lookup_t * local and index lookup tables and increments the local index as necessary. */ static int -pm_compile_destructured_param_locals(const pm_multi_target_node_t *node, st_table *index_lookup_table, rb_ast_id_table_t *local_table_for_iseq, pm_scope_node_t *scope_node, int local_index) +pm_compile_destructured_param_locals(const pm_multi_target_node_t *node, pm_index_lookup_table_t *index_lookup_table, rb_ast_id_table_t *local_table_for_iseq, pm_scope_node_t *scope_node, int local_index) { for (size_t index = 0; index < node->lefts.size; index++) { const pm_node_t *left = node->lefts.nodes[index]; @@ -4680,9 +4852,9 @@ pm_compile_destructured_param_locals(const pm_multi_target_node_t *node, st_tabl * as a positional parameter in a method, block, or lambda definition. */ static inline void -pm_compile_destructured_param_write(rb_iseq_t *iseq, const pm_required_parameter_node_t *node, LINK_ANCHOR *const ret, const pm_scope_node_t *scope_node) +pm_compile_destructured_param_write(rb_iseq_t *iseq, const pm_required_parameter_node_t *node, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); pm_local_index_t index = pm_lookup_local_index(iseq, scope_node, node->name, 0); PUSH_SETLOCAL(ret, location, index.index, index.level); } @@ -4696,9 +4868,9 @@ pm_compile_destructured_param_write(rb_iseq_t *iseq, const pm_required_parameter * for this simplified case. */ static void -pm_compile_destructured_param_writes(rb_iseq_t *iseq, const pm_multi_target_node_t *node, LINK_ANCHOR *const ret, const pm_scope_node_t *scope_node) +pm_compile_destructured_param_writes(rb_iseq_t *iseq, const pm_multi_target_node_t *node, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); bool has_rest = (node->rest && PM_NODE_TYPE_P(node->rest, PM_SPLAT_NODE) && (((const pm_splat_node_t *) node->rest)->expression) != NULL); bool has_rights = node->rights.size > 0; @@ -4857,7 +5029,7 @@ pm_multi_target_state_update(pm_multi_target_state_t *state) previous = current; current = current->next; - xfree(previous); + SIZED_FREE(previous); } } @@ -4895,7 +5067,7 @@ pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR static void pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const parents, LINK_ANCHOR *const writes, LINK_ANCHOR *const cleanup, pm_scope_node_t *scope_node, pm_multi_target_state_t *state) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); switch (PM_NODE_TYPE(node)) { case PM_LOCAL_VARIABLE_TARGET_NODE: { @@ -5117,8 +5289,22 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons break; } + case PM_SPLAT_NODE: { + // Splat nodes capture all values into an array. They can be used + // as targets in assignments or for loops. + // + // for *x in []; end + // + const pm_splat_node_t *cast = (const pm_splat_node_t *) node; + + if (cast->expression != NULL) { + pm_compile_target_node(iseq, cast->expression, parents, writes, cleanup, scope_node, state); + } + + break; + } default: - rb_bug("Unexpected node type: %s", pm_node_type_to_str(PM_NODE_TYPE(node))); + rb_bug("Unexpected node type: %s", pm_node_type(PM_NODE_TYPE(node))); break; } } @@ -5131,7 +5317,7 @@ pm_compile_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *cons static void pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const parents, LINK_ANCHOR *const writes, LINK_ANCHOR *const cleanup, pm_scope_node_t *scope_node, pm_multi_target_state_t *state) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); const pm_node_list_t *lefts; const pm_node_t *rest; const pm_node_list_t *rights; @@ -5152,7 +5338,7 @@ pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR break; } default: - rb_bug("Unsupported node %s", pm_node_type_to_str(PM_NODE_TYPE(node))); + rb_bug("Unsupported node %s", pm_node_type(PM_NODE_TYPE(node))); break; } @@ -5214,7 +5400,7 @@ pm_compile_multi_target_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR static void pm_compile_for_node_index(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); switch (PM_NODE_TYPE(node)) { case PM_LOCAL_VARIABLE_TARGET_NODE: { @@ -5250,6 +5436,7 @@ pm_compile_for_node_index(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *c pm_multi_target_state_update(&state); break; } + case PM_SPLAT_NODE: case PM_MULTI_TARGET_NODE: { DECL_ANCHOR(writes); DECL_ANCHOR(cleanup); @@ -5285,12 +5472,18 @@ pm_compile_for_node_index(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *c PUSH_INSN(ret, location, pop); PUSH_LABEL(ret, not_single); + + if (PM_NODE_TYPE_P(node, PM_SPLAT_NODE)) { + const pm_splat_node_t *cast = (const pm_splat_node_t *) node; + PUSH_INSN2(ret, location, expandarray, INT2FIX(0), INT2FIX(cast->expression == NULL ? 0 : 1)); + } + PUSH_SEQ(ret, writes); PUSH_SEQ(ret, cleanup); break; } default: - rb_bug("Unexpected node type for index in for node: %s", pm_node_type_to_str(PM_NODE_TYPE(node))); + rb_bug("Unexpected node type for index in for node: %s", pm_node_type(PM_NODE_TYPE(node))); break; } } @@ -5298,8 +5491,6 @@ pm_compile_for_node_index(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *c static void pm_compile_rescue(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_node_location_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - const pm_parser_t *parser = scope_node->parser; - LABEL *lstart = NEW_LABEL(node_location->line); LABEL *lend = NEW_LABEL(node_location->line); LABEL *lcont = NEW_LABEL(node_location->line); @@ -5311,7 +5502,7 @@ pm_compile_rescue(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_node_lo &rescue_scope_node, rb_str_concat(rb_str_new2("rescue in "), ISEQ_BODY(iseq)->location.label), ISEQ_TYPE_RESCUE, - pm_node_line_number(parser, (const pm_node_t *) cast->rescue_clause) + pm_node_line_number_cached((const pm_node_t *) cast->rescue_clause, scope_node) ); pm_scope_node_destroy(&rescue_scope_node); @@ -5327,7 +5518,7 @@ pm_compile_rescue(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_node_lo PM_COMPILE_NOT_POPPED((const pm_node_t *) cast->statements); } else { - const pm_node_location_t location = PM_NODE_START_LOCATION(parser, cast->rescue_clause); + const pm_node_location_t location = PM_NODE_START_LOCATION(cast->rescue_clause); PUSH_INSN(ret, location, putnil); } @@ -5350,12 +5541,11 @@ pm_compile_rescue(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_node_lo static void pm_compile_ensure(rb_iseq_t *iseq, const pm_begin_node_t *cast, const pm_node_location_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - const pm_parser_t *parser = scope_node->parser; const pm_statements_node_t *statements = cast->ensure_clause->statements; pm_node_location_t location; if (statements != NULL) { - location = PM_NODE_START_LOCATION(parser, statements); + location = PM_NODE_START_LOCATION(statements); } else { location = *node_location; @@ -5439,55 +5629,18 @@ pm_opt_str_freeze_p(const rb_iseq_t *iseq, const pm_call_node_t *node) } /** - * Returns true if the given call node can use the opt_aref_with optimization - * with the current iseq options. - */ -static inline bool -pm_opt_aref_with_p(const rb_iseq_t *iseq, const pm_call_node_t *node) -{ - return ( - !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION) && - node->arguments != NULL && - PM_NODE_TYPE_P((const pm_node_t *) node->arguments, PM_ARGUMENTS_NODE) && - ((const pm_arguments_node_t *) node->arguments)->arguments.size == 1 && - PM_NODE_TYPE_P(((const pm_arguments_node_t *) node->arguments)->arguments.nodes[0], PM_STRING_NODE) && - node->block == NULL && - !PM_NODE_FLAG_P(((const pm_arguments_node_t *) node->arguments)->arguments.nodes[0], PM_STRING_FLAGS_FROZEN) && - ISEQ_COMPILE_DATA(iseq)->option->specialized_instruction - ); -} - -/** - * Returns true if the given call node can use the opt_aset_with optimization - * with the current iseq options. - */ -static inline bool -pm_opt_aset_with_p(const rb_iseq_t *iseq, const pm_call_node_t *node) -{ - return ( - !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION) && - node->arguments != NULL && - PM_NODE_TYPE_P((const pm_node_t *) node->arguments, PM_ARGUMENTS_NODE) && - ((const pm_arguments_node_t *) node->arguments)->arguments.size == 2 && - PM_NODE_TYPE_P(((const pm_arguments_node_t *) node->arguments)->arguments.nodes[0], PM_STRING_NODE) && - node->block == NULL && - !PM_NODE_FLAG_P(((const pm_arguments_node_t *) node->arguments)->arguments.nodes[0], PM_STRING_FLAGS_FROZEN) && - ISEQ_COMPILE_DATA(iseq)->option->specialized_instruction - ); -} - -/** * Compile the instructions necessary to read a constant, based on the options * of the current iseq. */ static void -pm_compile_constant_read(rb_iseq_t *iseq, VALUE name, const pm_location_t *name_loc, uint32_t node_id, LINK_ANCHOR *const ret, const pm_scope_node_t *scope_node) +pm_compile_constant_read(rb_iseq_t *iseq, VALUE name, const pm_location_t *name_loc, uint32_t node_id, LINK_ANCHOR *const ret, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, name_loc, node_id); + const pm_node_location_t location = PM_LOCATION_START_LOCATION(name_loc, node_id); if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { ISEQ_BODY(iseq)->ic_size++; VALUE segments = rb_ary_new_from_args(1, name); + RB_OBJ_SET_SHAREABLE(segments); PUSH_INSN1(ret, location, opt_getconstant_path, segments); } else { @@ -5542,7 +5695,7 @@ pm_constant_path_parts(const pm_node_t *node, const pm_scope_node_t *scope_node) static void pm_compile_constant_path(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const prefix, LINK_ANCHOR *const body, bool popped, pm_scope_node_t *scope_node) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); switch (PM_NODE_TYPE(node)) { case PM_CONSTANT_READ_NODE: { @@ -5580,7 +5733,7 @@ pm_compile_constant_path(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *co * Return the object that will be pushed onto the stack for the given node. */ static VALUE -pm_compile_shareable_constant_literal(rb_iseq_t *iseq, const pm_node_t *node, const pm_scope_node_t *scope_node) +pm_compile_shareable_constant_literal(rb_iseq_t *iseq, const pm_node_t *node, pm_scope_node_t *scope_node) { switch (PM_NODE_TYPE(node)) { case PM_TRUE_NODE: @@ -5614,7 +5767,7 @@ pm_compile_shareable_constant_literal(rb_iseq_t *iseq, const pm_node_t *node, co } case PM_HASH_NODE: { const pm_hash_node_t *cast = (const pm_hash_node_t *) node; - VALUE result = rb_hash_new_capa(cast->elements.size); + VALUE result = rb_hash_alloc_fixed_size(rb_cHash, cast->elements.size); for (size_t index = 0; index < cast->elements.size; index++) { const pm_node_t *element = cast->elements.nodes[index]; @@ -5647,12 +5800,12 @@ pm_compile_shareable_constant_value(rb_iseq_t *iseq, const pm_node_t *node, cons { VALUE literal = pm_compile_shareable_constant_literal(iseq, node, scope_node); if (literal != Qundef) { - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); PUSH_INSN1(ret, location, putobject, literal); return; } - const pm_node_location_t location = PM_NODE_START_LOCATION(scope_node->parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); switch (PM_NODE_TYPE(node)) { case PM_ARRAY_NODE: { const pm_array_node_t *cast = (const pm_array_node_t *) node; @@ -5701,6 +5854,9 @@ pm_compile_shareable_constant_value(rb_iseq_t *iseq, const pm_node_t *node, cons if (shareability & PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL) { PUSH_INSN1(ret, location, putspecialobject, INT2FIX(VM_SPECIAL_OBJECT_VMCORE)); PUSH_SEQ(ret, value_seq); + if (!RB_OBJ_SHAREABLE_P(path)) { + RB_OBJ_SET_SHAREABLE(path); + } PUSH_INSN1(ret, location, putobject, path); PUSH_SEND_WITH_FLAG(ret, location, rb_intern("ensure_shareable"), INT2FIX(2), INT2FIX(VM_CALL_ARGS_SIMPLE)); } @@ -6114,7 +6270,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod body->param.flags.ambiguous_param0 = true; break; default: - rb_bug("Unexpected node type for parameters: %s", pm_node_type_to_str(PM_NODE_TYPE(scope_node->parameters))); + rb_bug("Unexpected node type for parameters: %s", pm_node_type(PM_NODE_TYPE(scope_node->parameters))); } } @@ -6139,8 +6295,9 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod // hidden variables and multi target nodes size_t locals_size = locals->size; - // Index lookup table buffer size is only the number of the locals - st_table *index_lookup_table = st_init_numtable(); + // Index lookup table buffer size is only the number of the locals. + // We'll initialize it after computing table_size below. + pm_index_lookup_table_t index_lookup_table = PM_INDEX_LOOKUP_TABLE_INIT; int table_size = (int) locals_size; @@ -6251,7 +6408,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod } } - if (parameters_node && parameters_node->block) { + if (parameters_node && parameters_node->block && PM_NODE_TYPE_P(parameters_node->block, PM_BLOCK_PARAMETER_NODE)) { const pm_block_parameter_node_t *block_node = (const pm_block_parameter_node_t *) parameters_node->block; if (PM_NODE_FLAG_P(block_node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER) || !block_node->name) { @@ -6264,6 +6421,10 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod rb_ast_id_table_t *local_table_for_iseq = ALLOCV(idtmp, sizeof(rb_ast_id_table_t) + table_size * sizeof(ID)); local_table_for_iseq->size = table_size; + // Init the direct-indexed lookup table. The capacity is based on the + // parser's constant pool size (for regular locals) plus special slots. + pm_index_lookup_table_init(&index_lookup_table, (int) pm_parser_constants_size(scope_node->parser), iseq); + //********END OF STEP 1********** //********STEP 2********** @@ -6316,13 +6477,13 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod local_table_for_iseq->ids[local_index] = local; } else { - pm_insert_local_index(param->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(param->name, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } break; } default: - rb_bug("Unsupported node in requireds in parameters %s", pm_node_type_to_str(PM_NODE_TYPE(required))); + rb_bug("Unsupported node in requireds in parameters %s", pm_node_type(PM_NODE_TYPE(required))); } } @@ -6331,8 +6492,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod } if (scope_node->parameters != NULL && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { - ID local = rb_make_temporary_id(local_index); - local_table_for_iseq->ids[local_index++] = local; + local_table_for_iseq->ids[local_index++] = idItImplicit; } // def foo(a, (b, *c, d), e = 1, *f, g, (h, *i, j), k:, l: 1, **m, &n) @@ -6350,7 +6510,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod local_table_for_iseq->ids[local_index] = local; } else { - pm_insert_local_index(name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(name, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } } } @@ -6376,14 +6536,14 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod local_table_for_iseq->ids[local_index] = local; } else { - pm_insert_local_index(name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(name, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } } else { // def foo(a, (b, *c, d), e = 1, *, g, (h, *i, j), k:, l: 1, **m, &n) // ^ body->param.flags.anon_rest = true; - pm_insert_local_special(idMULT, local_index, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(PM_CONSTANT_MULT, idMULT, local_index, &index_lookup_table, local_table_for_iseq); } local_index++; @@ -6423,12 +6583,12 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod local_table_for_iseq->ids[local_index] = local; } else { - pm_insert_local_index(param->name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(param->name, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } break; } default: - rb_bug("Unsupported node in posts in parameters %s", pm_node_type_to_str(PM_NODE_TYPE(post_node))); + rb_bug("Unsupported node in posts in parameters %s", pm_node_type(PM_NODE_TYPE(post_node))); } } } @@ -6458,7 +6618,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod local_table_for_iseq->ids[local_index] = local; } else { - pm_insert_local_index(name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(name, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } local_index++; } @@ -6488,7 +6648,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod local_table_for_iseq->ids[local_index] = local; } else { - pm_insert_local_index(name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(name, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } local_index++; } @@ -6550,12 +6710,12 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod local_table_for_iseq->ids[local_index] = local; } else { - pm_insert_local_index(constant_id, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(constant_id, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } } else { body->param.flags.anon_kwrest = true; - pm_insert_local_special(idPow, local_index, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(PM_CONSTANT_POW, idPow, local_index, &index_lookup_table, local_table_for_iseq); } local_index++; @@ -6569,7 +6729,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod body->param.rest_start = local_index; body->param.flags.has_rest = true; body->param.flags.anon_rest = true; - pm_insert_local_special(idMULT, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(PM_CONSTANT_MULT, idMULT, local_index++, &index_lookup_table, local_table_for_iseq); // Add the anonymous ** RUBY_ASSERT(!body->param.flags.has_kw); @@ -6578,46 +6738,58 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod body->param.flags.anon_kwrest = true; body->param.keyword = keyword = ZALLOC_N(struct rb_iseq_param_keyword, 1); keyword->rest_start = local_index; - pm_insert_local_special(idPow, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(PM_CONSTANT_POW, idPow, local_index++, &index_lookup_table, local_table_for_iseq); // Add the anonymous & body->param.block_start = local_index; body->param.flags.has_block = true; - pm_insert_local_special(idAnd, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(PM_CONSTANT_AND, idAnd, local_index++, &index_lookup_table, local_table_for_iseq); } // Add the ... - pm_insert_local_special(idDot3, local_index++, index_lookup_table, local_table_for_iseq); + pm_insert_local_special(PM_CONSTANT_DOT3, idDot3, local_index++, &index_lookup_table, local_table_for_iseq); break; } default: - rb_bug("node type %s not expected as keyword_rest", pm_node_type_to_str(PM_NODE_TYPE(parameters_node->keyword_rest))); + rb_bug("node type %s not expected as keyword_rest", pm_node_type(PM_NODE_TYPE(parameters_node->keyword_rest))); } } // def foo(a, (b, *c, d), e = 1, *f, g, (h, *i, j), k:, l: 1, **m, &n) // ^^ if (parameters_node->block) { - body->param.block_start = local_index; - body->param.flags.has_block = true; - iseq_set_use_block(iseq); + switch (PM_NODE_TYPE(parameters_node->block)) { + case PM_BLOCK_PARAMETER_NODE: { + body->param.block_start = local_index; + body->param.flags.has_block = true; - pm_constant_id_t name = ((const pm_block_parameter_node_t *) parameters_node->block)->name; + iseq_set_use_block(iseq); - if (name) { - if (PM_NODE_FLAG_P(parameters_node->block, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { - ID local = pm_constant_id_lookup(scope_node, name); - local_table_for_iseq->ids[local_index] = local; + pm_constant_id_t name = ((const pm_block_parameter_node_t *) parameters_node->block)->name; + + if (name) { + if (PM_NODE_FLAG_P(parameters_node->block, PM_PARAMETER_FLAGS_REPEATED_PARAMETER)) { + ID local = pm_constant_id_lookup(scope_node, name); + local_table_for_iseq->ids[local_index] = local; + } + else { + pm_insert_local_index(name, local_index, &index_lookup_table, local_table_for_iseq, scope_node); + } } else { - pm_insert_local_index(name, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_special(PM_CONSTANT_AND, idAnd, local_index, &index_lookup_table, local_table_for_iseq); } - } - else { - pm_insert_local_special(idAnd, local_index, index_lookup_table, local_table_for_iseq); - } - local_index++; + local_index++; + break; + } + case PM_NO_BLOCK_PARAMETER_NODE: { + body->param.flags.accepts_no_block = true; + break; + } + default: + rb_bug("node type %s not expected as block parameter", pm_node_type(PM_NODE_TYPE(parameters_node->block))); + } } } @@ -6641,7 +6813,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod const pm_node_t *required = requireds_list->nodes[i]; if (PM_NODE_TYPE_P(required, PM_MULTI_TARGET_NODE)) { - local_index = pm_compile_destructured_param_locals((const pm_multi_target_node_t *) required, index_lookup_table, local_table_for_iseq, scope_node, local_index); + local_index = pm_compile_destructured_param_locals((const pm_multi_target_node_t *) required, &index_lookup_table, local_table_for_iseq, scope_node, local_index); } } } @@ -6655,7 +6827,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod const pm_node_t *post = posts_list->nodes[i]; if (PM_NODE_TYPE_P(post, PM_MULTI_TARGET_NODE)) { - local_index = pm_compile_destructured_param_locals((const pm_multi_target_node_t *) post, index_lookup_table, local_table_for_iseq, scope_node, local_index); + local_index = pm_compile_destructured_param_locals((const pm_multi_target_node_t *) post, &index_lookup_table, local_table_for_iseq, scope_node, local_index); } } } @@ -6681,14 +6853,20 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod RUBY_ASSERT(0 < maximum && maximum <= 9); for (int i = 0; i < maximum; i++, local_index++) { const uint8_t param_name[] = { '_', '1' + i }; - pm_constant_id_t constant_id = pm_constant_pool_find(&scope_node->parser->constant_pool, param_name, 2); + pm_constant_id_t constant_id = pm_parser_constant_find(scope_node->parser, param_name, 2); RUBY_ASSERT(constant_id && "parser should fill in any gaps in numbered parameters"); - pm_insert_local_index(constant_id, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(constant_id, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } body->param.lead_num = maximum; body->param.flags.has_lead = true; } + // Fill in the anonymous `it` parameter, if it exists + if (scope_node->parameters && PM_NODE_TYPE_P(scope_node->parameters, PM_IT_PARAMETERS_NODE)) { + body->param.lead_num = 1; + body->param.flags.has_lead = true; + } + //********END OF STEP 3********** //********STEP 4********** @@ -6700,7 +6878,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod if (block_locals && block_locals->size) { for (size_t i = 0; i < block_locals->size; i++, local_index++) { pm_constant_id_t constant_id = ((const pm_block_local_variable_node_t *) block_locals->nodes[i])->name; - pm_insert_local_index(constant_id, local_index, index_lookup_table, local_table_for_iseq, scope_node); + pm_insert_local_index(constant_id, local_index, &index_lookup_table, local_table_for_iseq, scope_node); } } @@ -6709,14 +6887,13 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod for (size_t i = 0; i < scope_node->locals.size; i++) { pm_constant_id_t constant_id = locals->ids[i]; if (constant_id) { - struct pm_local_table_insert_ctx ctx; - ctx.scope_node = scope_node; - ctx.local_table_for_iseq = local_table_for_iseq; - ctx.local_index = local_index; - - st_update(index_lookup_table, (st_data_t)constant_id, pm_local_table_insert_func, (st_data_t)&ctx); - - local_index = ctx.local_index; + int existing; + if (!pm_index_lookup_table_lookup(&index_lookup_table, constant_id, &existing)) { + ID local = pm_constant_id_lookup(scope_node, constant_id); + local_table_for_iseq->ids[local_index] = local; + pm_index_lookup_table_insert(&index_lookup_table, constant_id, local_index); + local_index++; + } } } } @@ -6724,10 +6901,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod //********END OF STEP 4********** // We set the index_lookup_table on the scope node so we can - // refer to the parameters correctly - if (scope_node->index_lookup_table) { - st_free_table(scope_node->index_lookup_table); - } + // refer to the parameters correctly. scope_node->index_lookup_table = index_lookup_table; iseq_calc_param_size(iseq); @@ -6739,6 +6913,8 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod // FIXME: args? iseq_set_local_table(iseq, local_table_for_iseq, 0); + iseq_set_parameters_lvar_state(iseq); + scope_node->local_table_for_iseq_size = local_table_for_iseq->size; if (keyword != NULL) { @@ -6806,7 +6982,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod // ^^ break; default: - rb_bug("Unexpected keyword parameter node type %s", pm_node_type_to_str(PM_NODE_TYPE(keyword_parameter_node))); + rb_bug("Unexpected keyword parameter node type %s", pm_node_type(PM_NODE_TYPE(keyword_parameter_node))); } } } @@ -6908,7 +7084,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod break; } case ISEQ_TYPE_ENSURE: { - const pm_node_location_t statements_location = (scope_node->body != NULL ? PM_NODE_START_LOCATION(scope_node->parser, scope_node->body) : location); + const pm_node_location_t statements_location = (scope_node->body != NULL ? PM_NODE_START_LOCATION(scope_node->body) : location); iseq_set_exception_local_table(iseq); if (scope_node->body != NULL) { @@ -6971,7 +7147,7 @@ pm_compile_scope_node(rb_iseq_t *iseq, pm_scope_node_t *scope_node, const pm_nod } if (PM_NODE_TYPE_P(scope_node->ast_node, PM_CLASS_NODE) || PM_NODE_TYPE_P(scope_node->ast_node, PM_MODULE_NODE)) { - const pm_node_location_t end_location = PM_NODE_END_LOCATION(scope_node->parser, scope_node->ast_node); + const pm_node_location_t end_location = PM_NODE_END_LOCATION(scope_node->ast_node); PUSH_TRACE(ret, RUBY_EVENT_END); ISEQ_COMPILE_DATA(iseq)->last_line = end_location.line; } @@ -6991,13 +7167,13 @@ pm_compile_alias_global_variable_node(rb_iseq_t *iseq, const pm_alias_global_var { const pm_location_t *name_loc = &node->new_name->location; - VALUE operand = ID2SYM(rb_intern3((const char *) name_loc->start, name_loc->end - name_loc->start, scope_node->encoding)); + VALUE operand = ID2SYM(rb_intern3((const char *) (pm_parser_start(scope_node->parser) + name_loc->start), name_loc->length, scope_node->encoding)); PUSH_INSN1(ret, *location, putobject, operand); } { const pm_location_t *name_loc = &node->old_name->location; - VALUE operand = ID2SYM(rb_intern3((const char *) name_loc->start, name_loc->end - name_loc->start, scope_node->encoding)); + VALUE operand = ID2SYM(rb_intern3((const char *) (pm_parser_start(scope_node->parser) + name_loc->start), name_loc->length, scope_node->encoding)); PUSH_INSN1(ret, *location, putobject, operand); } @@ -7043,6 +7219,7 @@ pm_compile_array_node(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_list if (!popped) { if (elements->size) { VALUE value = pm_static_literal_value(iseq, node, scope_node); + RB_OBJ_SET_FROZEN_SHAREABLE(value); PUSH_INSN1(ret, *location, duparray, value); } else { @@ -7173,7 +7350,7 @@ pm_compile_array_node(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_list rb_ary_push(tmp_array, pm_static_literal_value(iseq, elements->nodes[index++], scope_node)); index--; // about to be incremented by for loop - OBJ_FREEZE(tmp_array); + RB_OBJ_SET_FROZEN_SHAREABLE(tmp_array); // Emit the optimized code. FLUSH_CHUNK; @@ -7275,9 +7452,9 @@ pm_compile_call_node(rb_iseq_t *iseq, const pm_call_node_t *node, LINK_ANCHOR *c ID method_id = pm_constant_id_lookup(scope_node, node->name); const pm_location_t *message_loc = &node->message_loc; - if (message_loc->start == NULL) message_loc = &node->base.location; + if (message_loc->length == 0) message_loc = &node->base.location; - const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, message_loc, node->base.node_id); + const pm_node_location_t location = PM_LOCATION_START_LOCATION(message_loc, node->base.node_id); const char *builtin_func; if (UNLIKELY(iseq_has_builtin_function_table(iseq)) && (builtin_func = pm_iseq_builtin_function_name(scope_node, node->receiver, method_id)) != NULL) { @@ -7309,44 +7486,6 @@ pm_compile_call_node(rb_iseq_t *iseq, const pm_call_node_t *node, LINK_ANCHOR *c } break; } - case idAREF: { - if (pm_opt_aref_with_p(iseq, node)) { - const pm_string_node_t *string = (const pm_string_node_t *) ((const pm_arguments_node_t *) node->arguments)->arguments.nodes[0]; - VALUE value = parse_static_literal_string(iseq, scope_node, (const pm_node_t *) string, &string->unescaped); - - PM_COMPILE_NOT_POPPED(node->receiver); - - const struct rb_callinfo *callinfo = new_callinfo(iseq, idAREF, 1, 0, NULL, FALSE); - PUSH_INSN2(ret, location, opt_aref_with, value, callinfo); - - if (popped) { - PUSH_INSN(ret, location, pop); - } - - return; - } - break; - } - case idASET: { - if (pm_opt_aset_with_p(iseq, node)) { - const pm_string_node_t *string = (const pm_string_node_t *) ((const pm_arguments_node_t *) node->arguments)->arguments.nodes[0]; - VALUE value = parse_static_literal_string(iseq, scope_node, (const pm_node_t *) string, &string->unescaped); - - PM_COMPILE_NOT_POPPED(node->receiver); - PM_COMPILE_NOT_POPPED(((const pm_arguments_node_t *) node->arguments)->arguments.nodes[1]); - - if (!popped) { - PUSH_INSN(ret, location, swap); - PUSH_INSN1(ret, location, topn, INT2FIX(1)); - } - - const struct rb_callinfo *callinfo = new_callinfo(iseq, idASET, 2, 0, NULL, FALSE); - PUSH_INSN2(ret, location, opt_aset_with, value, callinfo); - PUSH_INSN(ret, location, pop); - return; - } - break; - } } if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && !popped) { @@ -7435,10 +7574,9 @@ pm_compile_call_operator_write_node(rb_iseq_t *iseq, const pm_call_operator_writ * optimization entirely. */ static VALUE -pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t *node, LABEL *label, const pm_scope_node_t *scope_node) +pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t *node, LABEL *label, pm_scope_node_t *scope_node) { VALUE key = Qundef; - switch (PM_NODE_TYPE(node)) { case PM_FLOAT_NODE: { key = pm_static_literal_value(iseq, node, scope_node); @@ -7468,10 +7606,7 @@ pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t * return Qundef; } - if (NIL_P(rb_hash_lookup(dispatch, key))) { - rb_hash_aset(dispatch, key, ((VALUE) label) | 1); - } - + cdhash_aset_if_missing(dispatch, key, (VALUE)label); return dispatch; } @@ -7481,7 +7616,6 @@ pm_compile_case_node_dispatch(rb_iseq_t *iseq, VALUE dispatch, const pm_node_t * static inline void pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_location_t *node_location, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - const pm_parser_t *parser = scope_node->parser; const pm_node_location_t location = *node_location; const pm_node_list_t *conditions = &cast->conditions; @@ -7520,7 +7654,7 @@ pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_ const pm_when_node_t *clause = (const pm_when_node_t *) conditions->nodes[clause_index]; const pm_node_list_t *conditions = &clause->conditions; - int clause_lineno = pm_node_line_number(parser, (const pm_node_t *) clause); + int clause_lineno = pm_node_line_number_cached((const pm_node_t *) clause, scope_node); LABEL *label = NEW_LABEL(clause_lineno); PUSH_LABEL(body_seq, label); @@ -7546,15 +7680,15 @@ pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_ const pm_node_t *condition = conditions->nodes[condition_index]; if (PM_NODE_TYPE_P(condition, PM_SPLAT_NODE)) { - pm_node_location_t cond_location = PM_NODE_START_LOCATION(parser, condition); + pm_node_location_t cond_location = PM_NODE_START_LOCATION(condition); PUSH_INSN(cond_seq, cond_location, putnil); pm_compile_node(iseq, condition, cond_seq, false, scope_node); PUSH_INSN1(cond_seq, cond_location, checkmatch, INT2FIX(VM_CHECKMATCH_TYPE_WHEN | VM_CHECKMATCH_ARRAY)); PUSH_INSNL(cond_seq, cond_location, branchif, label); } else { - LABEL *next_label = NEW_LABEL(pm_node_line_number(parser, condition)); - pm_compile_branch_condition(iseq, cond_seq, condition, label, next_label, false, scope_node); + LABEL *next_label = NEW_LABEL(pm_node_line_number_cached(condition, scope_node)); + pm_compile_branch_condition(iseq, cond_seq, condition, label, next_label, scope_node); PUSH_LABEL(cond_seq, next_label); } } @@ -7611,8 +7745,7 @@ pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_ // lookup to jump directly to the correct when clause body. VALUE dispatch = Qundef; if (ISEQ_COMPILE_DATA(iseq)->option->specialized_instruction) { - dispatch = rb_hash_new(); - RHASH_TBL_RAW(dispatch)->type = &cdhash_type; + dispatch = cdhash_new(0); } // We're going to loop through each of the conditions in the case @@ -7625,7 +7758,7 @@ pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_ // node instructions later. for (size_t clause_index = 0; clause_index < conditions->size; clause_index++) { const pm_when_node_t *clause = (const pm_when_node_t *) conditions->nodes[clause_index]; - pm_node_location_t clause_location = PM_NODE_START_LOCATION(parser, (const pm_node_t *) clause); + pm_node_location_t clause_location = PM_NODE_START_LOCATION((const pm_node_t *) clause); const pm_node_list_t *conditions = &clause->conditions; LABEL *label = NEW_LABEL(clause_location.line); @@ -7635,7 +7768,7 @@ pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_ // jumps into the body if it matches. for (size_t condition_index = 0; condition_index < conditions->size; condition_index++) { const pm_node_t *condition = conditions->nodes[condition_index]; - const pm_node_location_t condition_location = PM_NODE_START_LOCATION(parser, condition); + const pm_node_location_t condition_location = PM_NODE_START_LOCATION(condition); // If we haven't already abandoned the optimization, then // we're going to try to compile the condition into the @@ -7699,6 +7832,7 @@ pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_ // optimization. if (dispatch != Qundef) { PUSH_INSN(ret, location, dup); + RB_OBJ_SET_SHAREABLE(dispatch); // it is special that the hash is shareable but not frozen, because compile.c modify them. This Hahs instance is not accessible so it is safe to leave it. PUSH_INSN2(ret, location, opt_case_dispatch, dispatch, else_label); LABEL_REF(else_label); } @@ -7710,7 +7844,7 @@ pm_compile_case_node(rb_iseq_t *iseq, const pm_case_node_t *cast, const pm_node_ PUSH_LABEL(ret, else_label); if (cast->else_clause != NULL) { - pm_node_location_t else_location = PM_NODE_START_LOCATION(parser, cast->else_clause->statements != NULL ? ((const pm_node_t *) cast->else_clause->statements) : ((const pm_node_t *) cast->else_clause)); + pm_node_location_t else_location = PM_NODE_START_LOCATION(cast->else_clause->statements != NULL ? ((const pm_node_t *) cast->else_clause->statements) : ((const pm_node_t *) cast->else_clause)); PUSH_INSN(ret, else_location, pop); // Establish branch coverage for the else clause. @@ -7799,8 +7933,8 @@ pm_compile_case_match_node(rb_iseq_t *iseq, const pm_case_match_node_t *node, co RUBY_ASSERT(PM_NODE_TYPE_P(condition, PM_IN_NODE)); const pm_in_node_t *in_node = (const pm_in_node_t *) condition; - const pm_node_location_t in_location = PM_NODE_START_LOCATION(scope_node->parser, in_node); - const pm_node_location_t pattern_location = PM_NODE_START_LOCATION(scope_node->parser, in_node->pattern); + const pm_node_location_t in_location = PM_NODE_START_LOCATION(in_node); + const pm_node_location_t pattern_location = PM_NODE_START_LOCATION(in_node->pattern); if (branch_id) { PUSH_INSN(body_seq, in_location, putnil); @@ -7827,7 +7961,7 @@ pm_compile_case_match_node(rb_iseq_t *iseq, const pm_case_match_node_t *node, co LABEL *next_pattern_label = NEW_LABEL(pattern_location.line); PUSH_INSN(cond_seq, pattern_location, dup); - pm_compile_pattern(iseq, scope_node, in_node->pattern, cond_seq, body_label, next_pattern_label, in_single_pattern, false, true, 2); + pm_compile_pattern(iseq, scope_node, in_node->pattern, cond_seq, body_label, next_pattern_label, in_single_pattern, true, 2); PUSH_LABEL(cond_seq, next_pattern_label); LABEL_UNREMOVABLE(next_pattern_label); } @@ -8075,7 +8209,7 @@ pm_compile_match_required_node(rb_iseq_t *iseq, const pm_match_required_node_t * // through the in_single_pattern parameter. We also indicate that the // value to compare against is 2 slots from the top of the stack (the // base_index parameter). - pm_compile_pattern(iseq, scope_node, node->pattern, ret, matched_label, unmatched_label, true, false, true, 2); + pm_compile_pattern(iseq, scope_node, node->pattern, ret, matched_label, unmatched_label, true, true, 2); // If the pattern did not match the value, then we're going to compile // in our error handler code. This will determine which error to raise @@ -8578,8 +8712,7 @@ pm_compile_yield_node(rb_iseq_t *iseq, const pm_yield_node_t *node, const pm_nod static void pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node) { - const pm_parser_t *parser = scope_node->parser; - const pm_node_location_t location = PM_NODE_START_LOCATION(parser, node); + const pm_node_location_t location = PM_NODE_START_LOCATION(node); int lineno = (int) location.line; if (PM_NODE_TYPE_P(node, PM_BEGIN_NODE) && (((const pm_begin_node_t *) node)->statements == NULL) && (((const pm_begin_node_t *) node)->rescue_clause != NULL)) { @@ -8587,7 +8720,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // has a rescue clause, then the other parser considers it as // starting on the same line as the rescue, as opposed to the // location of the begin keyword. We replicate that behavior here. - lineno = (int) PM_NODE_START_LINE_COLUMN(parser, ((const pm_begin_node_t *) node)->rescue_clause).line; + lineno = (int) PM_NODE_START_LINE_COLUMN(((const pm_begin_node_t *) node)->rescue_clause).line; } if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_NEWLINE) && ISEQ_COMPILE_DATA(iseq)->last_line != lineno) { @@ -8597,7 +8730,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, int event = RUBY_EVENT_LINE; ISEQ_COMPILE_DATA(iseq)->last_line = lineno; - if (ISEQ_COVERAGE(iseq) && ISEQ_LINE_COVERAGE(iseq)) { + if (lineno > 0 && ISEQ_COVERAGE(iseq) && ISEQ_LINE_COVERAGE(iseq)) { event |= RUBY_EVENT_COVERAGE_LINE; } PUSH_TRACE(ret, event); @@ -8682,7 +8815,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // ^^ if (!popped) { const pm_back_reference_read_node_t *cast = (const pm_back_reference_read_node_t *) node; - VALUE backref = pm_compile_back_reference_ref(cast); + VALUE backref = pm_compile_back_reference_ref(scope_node, cast); PUSH_INSN2(ret, location, getspecial, INT2FIX(1), backref); } @@ -8926,6 +9059,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache && ((parts = pm_constant_path_parts(node, scope_node)) != Qnil)) { ISEQ_BODY(iseq)->ic_size++; + RB_OBJ_SET_SHAREABLE(parts); PUSH_INSN1(ret, location, opt_getconstant_path, parts); } else { @@ -9540,16 +9674,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN1(ret, location, putobject, string); } else if (PM_NODE_FLAG_P(node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE)) { - PUSH_INSN1(ret, location, putstring, string); + PUSH_INSN1(ret, location, dupstring, string); } else { - PUSH_INSN1(ret, location, putchilledstring, string); + PUSH_INSN1(ret, location, dupchilledstring, string); } } } else { const pm_interpolated_string_node_t *cast = (const pm_interpolated_string_node_t *) node; - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL, PM_NODE_FLAG_P(cast, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE), PM_NODE_FLAG_P(cast, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN)); if (length > 1) PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); if (popped) PUSH_INSN(ret, location, pop); } @@ -9560,7 +9694,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // :"foo #{bar}" // ^^^^^^^^^^^^^ const pm_interpolated_symbol_node_t *cast = (const pm_interpolated_symbol_node_t *) node; - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, popped, scope_node, NULL, NULL, false, false); if (length > 1) { PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); @@ -9582,7 +9716,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN(ret, location, putself); - int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, false, scope_node, NULL, NULL); + int length = pm_interpolated_node_compile(iseq, &cast->parts, &location, ret, false, scope_node, NULL, NULL, false, false); if (length > 1) PUSH_INSN1(ret, location, concatstrings, INT2FIX(length)); PUSH_SEND_WITH_FLAG(ret, location, idBackquote, INT2NUM(1), INT2FIX(VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE)); @@ -9594,7 +9728,19 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // -> { it } // ^^ if (!popped) { - PUSH_GETLOCAL(ret, location, scope_node->local_table_for_iseq_size, 0); + pm_scope_node_t *current_scope_node = scope_node; + int level = 0; + + while (current_scope_node) { + if (current_scope_node->parameters && PM_NODE_TYPE_P(current_scope_node->parameters, PM_IT_PARAMETERS_NODE)) { + PUSH_GETLOCAL(ret, location, current_scope_node->local_table_for_iseq_size, level); + return; + } + + current_scope_node = current_scope_node->previous; + level++; + } + rb_bug("Local `it` does not exist"); } return; @@ -9621,7 +9767,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_scope_node_t next_scope_node; pm_scope_node_init(node, &next_scope_node, scope_node); - int opening_lineno = pm_location_line_number(parser, &cast->opening_loc); + int opening_lineno = pm_location_line_number_cached(&cast->opening_loc, scope_node); const rb_iseq_t *block = NEW_CHILD_ISEQ(&next_scope_node, make_name_for_block(iseq), ISEQ_TYPE_BLOCK, opening_lineno); pm_scope_node_destroy(&next_scope_node); @@ -9751,7 +9897,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, LABEL *matched_label = NEW_LABEL(location.line); LABEL *unmatched_label = NEW_LABEL(location.line); LABEL *done_label = NEW_LABEL(location.line); - pm_compile_pattern(iseq, scope_node, cast->pattern, ret, matched_label, unmatched_label, false, false, true, 2); + pm_compile_pattern(iseq, scope_node, cast->pattern, ret, matched_label, unmatched_label, false, true, 2); // If the pattern did not match, then compile the necessary instructions // to handle pushing false onto the stack, then jump to the end. @@ -9798,8 +9944,8 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // when the call is executed. pm_compile_match_write_node(iseq, (const pm_match_write_node_t *) node, &location, ret, popped, scope_node); return; - case PM_MISSING_NODE: - rb_bug("A pm_missing_node_t should not exist in prism's AST."); + case PM_ERROR_RECOVERY_NODE: + rb_bug("A pm_error_recovery_node_t should not exist in prism's AST."); return; case PM_MODULE_NODE: { // module Foo; end @@ -9884,6 +10030,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } + case PM_NO_BLOCK_PARAMETER_NODE: { + // def foo(&nil); end + // ^^^^ + ISEQ_BODY(iseq)->param.flags.accepts_no_block = TRUE; + return; + } case PM_NO_KEYWORDS_PARAMETER_NODE: { // def foo(**nil); end // ^^^^^ @@ -10029,6 +10181,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, exclude_end ); + RB_OBJ_SET_SHAREABLE(val); PUSH_INSN1(ret, location, putobject, val); } } @@ -10092,7 +10245,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, &rescue_scope_node, rb_str_concat(rb_str_new2("rescue in "), ISEQ_BODY(iseq)->location.label), ISEQ_TYPE_RESCUE, - pm_node_line_number(parser, cast->rescue_expression) + pm_node_line_number_cached(cast->rescue_expression, scope_node) ); pm_scope_node_destroy(&rescue_scope_node); @@ -10181,7 +10334,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_compile_constant_path_operator_write_node(iseq, (const pm_constant_path_operator_write_node_t *) cast->write, shareability, &location, ret, popped, scope_node); break; default: - rb_bug("Unexpected node type for shareable constant write: %s", pm_node_type_to_str(PM_NODE_TYPE(cast->write))); + rb_bug("Unexpected node type for shareable constant write: %s", pm_node_type(PM_NODE_TYPE(cast->write))); break; } @@ -10202,7 +10355,17 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, ID singletonclass; CONST_ID(singletonclass, "singletonclass"); - PUSH_INSN3(ret, location, defineclass, ID2SYM(singletonclass), child_iseq, INT2FIX(VM_DEFINECLASS_TYPE_SINGLETON_CLASS)); + + /* `class << self` in a class body and `class << Foo` (constant + receiver) are stable. All other forms are potentially dynamic. */ + int sclass_flags = VM_DEFINECLASS_TYPE_SINGLETON_CLASS; + if (!(PM_NODE_TYPE_P(cast->expression, PM_SELF_NODE) && + ISEQ_BODY(iseq)->type == ISEQ_TYPE_CLASS) && + !pm_cpath_const_p(cast->expression)) { + sclass_flags |= VM_DEFINECLASS_FLAG_DYNAMIC_CREF; + } + + PUSH_INSN3(ret, location, defineclass, ID2SYM(singletonclass), child_iseq, INT2FIX(sclass_flags)); if (popped) PUSH_INSN(ret, location, pop); RB_OBJ_WRITTEN(iseq, Qundef, (VALUE) child_iseq); @@ -10229,10 +10392,10 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN1(ret, location, putobject, string); } else if (PM_NODE_FLAG_P(cast, PM_STRING_FLAGS_MUTABLE)) { - PUSH_INSN1(ret, location, putstring, string); + PUSH_INSN1(ret, location, dupstring, string); } else { - PUSH_INSN1(ret, location, putchilledstring, string); + PUSH_INSN1(ret, location, dupchilledstring, string); } } return; @@ -10286,10 +10449,10 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, PUSH_INSN1(ret, location, putobject, value); } else if (PM_NODE_FLAG_P(node, PM_STRING_FLAGS_MUTABLE)) { - PUSH_INSN1(ret, location, putstring, value); + PUSH_INSN1(ret, location, dupstring, value); } else { - PUSH_INSN1(ret, location, putchilledstring, value); + PUSH_INSN1(ret, location, dupchilledstring, value); } } return; @@ -10395,7 +10558,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, pm_compile_yield_node(iseq, (const pm_yield_node_t *) node, &location, ret, popped, scope_node); return; default: - rb_raise(rb_eNotImpError, "node type %s not implemented", pm_node_type_to_str(PM_NODE_TYPE(node))); + rb_raise(rb_eNotImpError, "node type %s not implemented", pm_node_type(PM_NODE_TYPE(node))); return; } } @@ -10457,6 +10620,15 @@ pm_iseq_compile_node(rb_iseq_t *iseq, pm_scope_node_t *node) return iseq_setup(iseq, ret); } +void +pm_parse_result_init(pm_parse_result_t *result) +{ + memset(result, 0, sizeof(pm_parse_result_t)); + result->arena = pm_arena_new(); + result->options = pm_options_new(); + pm_options_line_set(result->options, 1); +} + /** * Free the internal memory associated with a pm_parse_result_t struct. * Importantly this does not free the struct itself. @@ -10464,24 +10636,21 @@ pm_iseq_compile_node(rb_iseq_t *iseq, pm_scope_node_t *node) void pm_parse_result_free(pm_parse_result_t *result) { - if (result->node.ast_node != NULL) { - pm_node_destroy(&result->parser, result->node.ast_node); - } - if (result->parsed) { - xfree(result->node.constants); + SIZED_FREE_N(result->node.constants, pm_parser_constants_size(result->node.parser)); pm_scope_node_destroy(&result->node); } - pm_parser_free(&result->parser); - pm_string_free(&result->input); - pm_options_free(&result->options); + if (result->parser) pm_parser_free(result->parser); + pm_arena_free(result->arena); + if (result->source) pm_source_free(result->source); + pm_options_free(result->options); } /** An error that is going to be formatted into the output. */ typedef struct { /** A pointer to the diagnostic that was generated during parsing. */ - pm_diagnostic_t *error; + const pm_diagnostic_t *error; /** The start line of the diagnostic message. */ int32_t line; @@ -10517,123 +10686,151 @@ typedef struct { #define PM_COLOR_RESET "\033[m" #define PM_ERROR_TRUNCATE 30 -static inline pm_parse_error_t * -pm_parse_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) { - pm_parse_error_t *errors = xcalloc(error_list->size, sizeof(pm_parse_error_t)); - if (errors == NULL) return NULL; +/** Context struct for collecting errors via callback. */ +typedef struct { + pm_parse_error_t *errors; + size_t count; + size_t capacity; + const pm_line_offset_list_t *line_offsets; + int32_t start_line; +} pm_error_collect_t; - int32_t start_line = parser->start_line; - for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { - pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line); - pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line); - - // We're going to insert this error into the array in sorted order. We - // do this by finding the first error that has a line number greater - // than the current error and then inserting the current error before - // that one. - size_t index = 0; - while ( - (index < error_list->size) && - (errors[index].error != NULL) && - ( - (errors[index].line < start.line) || - ((errors[index].line == start.line) && (errors[index].column_start < start.column)) - ) - ) index++; - - // Now we're going to shift all of the errors after this one down one - // index to make room for the new error. - if (index + 1 < error_list->size) { - memmove(&errors[index + 1], &errors[index], sizeof(pm_parse_error_t) * (error_list->size - index - 1)); - } - - // Finally, we'll insert the error into the array. - uint32_t column_end; - if (start.line == end.line) { - column_end = end.column; - } else { - column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1); - } +static void +pm_error_collect_callback(const pm_diagnostic_t *diagnostic, void *data) +{ + pm_error_collect_t *ctx = (pm_error_collect_t *) data; + pm_location_t loc = pm_diagnostic_location(diagnostic); - // Ensure we have at least one column of error. - if (start.column == column_end) column_end++; + pm_line_column_t start = pm_line_offset_list_line_column(ctx->line_offsets, loc.start, ctx->start_line); + pm_line_column_t end = pm_line_offset_list_line_column(ctx->line_offsets, loc.start + loc.length, ctx->start_line); - errors[index] = (pm_parse_error_t) { - .error = error, - .line = start.line, - .column_start = start.column, - .column_end = column_end - }; + uint32_t column_end; + if (start.line == end.line) { + column_end = end.column; + } else { + column_end = (uint32_t) (ctx->line_offsets->offsets[start.line - ctx->start_line + 1] - ctx->line_offsets->offsets[start.line - ctx->start_line] - 1); } - return errors; + // Ensure we have at least one column of error. + if (start.column == column_end) column_end++; + + // Insert into sorted position (insertion sort). + size_t index = 0; + while ( + (index < ctx->count) && + ( + (ctx->errors[index].line < start.line) || + ((ctx->errors[index].line == start.line) && (ctx->errors[index].column_start < start.column)) + ) + ) index++; + + if (index < ctx->count) { + memmove(&ctx->errors[index + 1], &ctx->errors[index], sizeof(pm_parse_error_t) * (ctx->count - index)); + } + + ctx->errors[index] = (pm_parse_error_t) { + .error = diagnostic, + .line = start.line, + .column_start = start.column, + .column_end = column_end + }; + ctx->count++; } -/* Append a literal string to the buffer. */ -#define pm_buffer_append_literal(buffer, str) pm_buffer_append_string(buffer, str, rb_strlen_lit(str)) +static inline pm_parse_error_t * +pm_parse_errors_format_sort(const pm_parser_t *parser, size_t error_count, const pm_line_offset_list_t *line_offsets) { + pm_parse_error_t *errors = xcalloc(error_count, sizeof(pm_parse_error_t)); + if (errors == NULL) return NULL; + + pm_error_collect_t ctx = { + .errors = errors, + .count = 0, + .capacity = error_count, + .line_offsets = line_offsets, + .start_line = pm_parser_start_line(parser) + }; + + pm_parser_errors_each(parser, pm_error_collect_callback, &ctx); + + return errors; +} static inline void -pm_parse_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, uint32_t column_start, uint32_t column_end, pm_buffer_t *buffer) { - int32_t line_delta = line - parser->start_line; +pm_parse_errors_format_line(const pm_parser_t *parser, const pm_line_offset_list_t *line_offsets, const char *number_prefix, int32_t line, uint32_t column_start, uint32_t column_end, VALUE buffer) { + int32_t line_delta = line - pm_parser_start_line(parser); assert(line_delta >= 0); size_t index = (size_t) line_delta; - assert(index < newline_list->size); + assert(index < line_offsets->size); - const uint8_t *start = &parser->start[newline_list->offsets[index]]; + const uint8_t *start = &pm_parser_start(parser)[line_offsets->offsets[index]]; const uint8_t *end; - if (index >= newline_list->size - 1) { - end = parser->end; + if (index >= line_offsets->size - 1) { + end = pm_parser_end(parser); } else { - end = &parser->start[newline_list->offsets[index + 1]]; + end = &pm_parser_start(parser)[line_offsets->offsets[index + 1]]; } - pm_buffer_append_format(buffer, number_prefix, line); + rb_str_catf(buffer, number_prefix, line); // Here we determine if we should truncate the end of the line. bool truncate_end = false; if ((column_end != 0) && ((end - (start + column_end)) >= PM_ERROR_TRUNCATE)) { - end = start + column_end + PM_ERROR_TRUNCATE; + const uint8_t *end_candidate = start + column_end + PM_ERROR_TRUNCATE; + + for (const uint8_t *ptr = start; ptr < end_candidate;) { + size_t char_width = pm_parser_encoding_char_width(parser, ptr, pm_parser_end(parser) - ptr); + + // If we failed to decode a character, then just bail out and + // truncate at the fixed width. + if (char_width == 0) break; + + // If this next character would go past the end candidate, + // then we need to truncate before it. + if (ptr + char_width > end_candidate) { + end_candidate = ptr; + break; + } + + ptr += char_width; + } + + end = end_candidate; truncate_end = true; } // Here we determine if we should truncate the start of the line. if (column_start >= PM_ERROR_TRUNCATE) { - pm_buffer_append_string(buffer, "... ", 4); + rb_str_cat(buffer, "... ", 4); start += column_start; } - pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start)); + rb_str_cat(buffer, (const char *) start, (size_t) (end - start)); if (truncate_end) { - pm_buffer_append_string(buffer, " ...\n", 5); - } else if (end == parser->end && end[-1] != '\n') { - pm_buffer_append_string(buffer, "\n", 1); + rb_str_cat(buffer, " ...\n", 5); + } else if (end == pm_parser_end(parser) && end[-1] != '\n') { + rb_str_cat(buffer, "\n", 1); } } /** - * Format the errors on the parser into the given buffer. + * Format a pre-sorted array of errors into the given buffer. */ static void -pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, int highlight, bool inline_messages) { - assert(error_list->size != 0); - - // First, we're going to sort all of the errors by line number using an - // insertion sort into a newly allocated array. - const int32_t start_line = parser->start_line; - const pm_newline_list_t *newline_list = &parser->newline_list; +pm_parse_errors_format_with(const pm_parser_t *parser, pm_parse_error_t *errors, size_t error_count, VALUE buffer, int highlight, bool inline_messages) { + assert(error_count != 0); - pm_parse_error_t *errors = pm_parse_errors_format_sort(parser, error_list, newline_list); - if (errors == NULL) return; + const int32_t start_line = pm_parser_start_line(parser); + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser); // Now we're going to determine how we're going to format line numbers and // blank lines based on the maximum number of digits in the line numbers // that are going to be displaid. pm_parse_error_format_t error_format; int32_t first_line_number = errors[0].line; - int32_t last_line_number = errors[error_list->size - 1].line; + int32_t last_line_number = errors[error_count - 1].line; // If we have a maximum line number that is negative, then we're going to // use the absolute value for comparison but multiple by 10 to additionally @@ -10722,11 +10919,10 @@ pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, p // the source before the error to give some context. We'll be careful not to // display the same line twice in case the errors are close enough in the // source. - int32_t last_line = parser->start_line - 1; + int32_t last_line = pm_parser_start_line(parser) - 1; uint32_t last_column_start = 0; - const pm_encoding_t *encoding = parser->encoding; - for (size_t index = 0; index < error_list->size; index++) { + for (size_t index = 0; index < error_count; index++) { pm_parse_error_t *error = &errors[index]; // Here we determine how many lines of padding of the source to display, @@ -10734,42 +10930,42 @@ pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, p if (error->line - last_line > 1) { if (error->line - last_line > 2) { if ((index != 0) && (error->line - last_line > 3)) { - pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length); + rb_str_cat(buffer, error_format.divider, error_format.divider_length); } - pm_buffer_append_string(buffer, " ", 2); - pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, 0, 0, buffer); + rb_str_cat(buffer, " ", 2); + pm_parse_errors_format_line(parser, line_offsets, error_format.number_prefix, error->line - 2, 0, 0, buffer); } - pm_buffer_append_string(buffer, " ", 2); - pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, 0, 0, buffer); + rb_str_cat(buffer, " ", 2); + pm_parse_errors_format_line(parser, line_offsets, error_format.number_prefix, error->line - 1, 0, 0, buffer); } // If this is the first error or we're on a new line, then we'll display // the line that has the error in it. if ((index == 0) || (error->line != last_line)) { if (highlight > 1) { - pm_buffer_append_literal(buffer, PM_COLOR_RED "> " PM_COLOR_RESET); + rb_str_cat_cstr(buffer, PM_COLOR_RED "> " PM_COLOR_RESET); } else if (highlight > 0) { - pm_buffer_append_literal(buffer, PM_COLOR_BOLD "> " PM_COLOR_RESET); + rb_str_cat_cstr(buffer, PM_COLOR_BOLD "> " PM_COLOR_RESET); } else { - pm_buffer_append_literal(buffer, "> "); + rb_str_cat_cstr(buffer, "> "); } last_column_start = error->column_start; // Find the maximum column end of all the errors on this line. uint32_t column_end = error->column_end; - for (size_t next_index = index + 1; next_index < error_list->size; next_index++) { + for (size_t next_index = index + 1; next_index < error_count; next_index++) { if (errors[next_index].line != error->line) break; if (errors[next_index].column_end > column_end) column_end = errors[next_index].column_end; } - pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, error->column_start, column_end, buffer); + pm_parse_errors_format_line(parser, line_offsets, error_format.number_prefix, error->line, error->column_start, column_end, buffer); } - const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]]; - if (start == parser->end) pm_buffer_append_byte(buffer, '\n'); + const uint8_t *start = &pm_parser_start(parser)[line_offsets->offsets[error->line - start_line]]; + if (start == pm_parser_end(parser)) rb_str_cat(buffer, "\n", 1); // Now we'll display the actual error message. We'll do this by first // putting the prefix to the line, then a bunch of blank spaces @@ -10780,59 +10976,59 @@ pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, p // character when displaid in the terminal. For some east-asian // languages or emoji, this means it can be thrown off pretty badly. We // will need to solve this eventually. - pm_buffer_append_string(buffer, " ", 2); - pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length); + rb_str_cat(buffer, " ", 2); + rb_str_cat(buffer, error_format.blank_prefix, error_format.blank_prefix_length); size_t column = 0; if (last_column_start >= PM_ERROR_TRUNCATE) { - pm_buffer_append_string(buffer, " ", 4); + rb_str_cat(buffer, " ", 4); column = last_column_start; } while (column < error->column_start) { - pm_buffer_append_byte(buffer, ' '); + rb_str_cat(buffer, " ", 1); - size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + size_t char_width = pm_parser_encoding_char_width(parser, start + column, pm_parser_end(parser) - (start + column)); column += (char_width == 0 ? 1 : char_width); } - if (highlight > 1) pm_buffer_append_literal(buffer, PM_COLOR_RED); - else if (highlight > 0) pm_buffer_append_literal(buffer, PM_COLOR_BOLD); - pm_buffer_append_byte(buffer, '^'); + if (highlight > 1) rb_str_cat_cstr(buffer, PM_COLOR_RED); + else if (highlight > 0) rb_str_cat_cstr(buffer, PM_COLOR_BOLD); + rb_str_cat(buffer, "^", 1); - size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + size_t char_width = pm_parser_encoding_char_width(parser, start + column, pm_parser_end(parser) - (start + column)); column += (char_width == 0 ? 1 : char_width); while (column < error->column_end) { - pm_buffer_append_byte(buffer, '~'); + rb_str_cat(buffer, "~", 1); - size_t char_width = encoding->char_width(start + column, parser->end - (start + column)); + size_t char_width = pm_parser_encoding_char_width(parser, start + column, pm_parser_end(parser) - (start + column)); column += (char_width == 0 ? 1 : char_width); } - if (highlight > 0) pm_buffer_append_literal(buffer, PM_COLOR_RESET); + if (highlight > 0) rb_str_cat_cstr(buffer, PM_COLOR_RESET); if (inline_messages) { - pm_buffer_append_byte(buffer, ' '); + rb_str_cat(buffer, " ", 1); assert(error->error != NULL); - const char *message = error->error->message; - pm_buffer_append_string(buffer, message, strlen(message)); + const char *message = pm_diagnostic_message(error->error); + rb_str_cat(buffer, message, strlen(message)); } - pm_buffer_append_byte(buffer, '\n'); + rb_str_cat(buffer, "\n", 1); // Here we determine how many lines of padding to display after the // error, depending on where the next error is in source. last_line = error->line; int32_t next_line; - if (index == error_list->size - 1) { - next_line = (((int32_t) newline_list->size) + parser->start_line); + if (index == error_count - 1) { + next_line = (((int32_t) line_offsets->size) + pm_parser_start_line(parser)); // If the file ends with a newline, subtract one from our "next_line" // so that we don't output an extra line at the end of the file - if ((parser->start + newline_list->offsets[newline_list->size - 1]) == parser->end) { + if ((pm_parser_start(parser) + line_offsets->offsets[line_offsets->size - 1]) == pm_parser_end(parser)) { next_line--; } } @@ -10841,18 +11037,30 @@ pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, p } if (next_line - last_line > 1) { - pm_buffer_append_string(buffer, " ", 2); - pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer); + rb_str_cat(buffer, " ", 2); + pm_parse_errors_format_line(parser, line_offsets, error_format.number_prefix, ++last_line, 0, 0, buffer); } if (next_line - last_line > 1) { - pm_buffer_append_string(buffer, " ", 2); - pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer); + rb_str_cat(buffer, " ", 2); + pm_parse_errors_format_line(parser, line_offsets, error_format.number_prefix, ++last_line, 0, 0, buffer); } } - // Finally, we'll free the array of errors that we allocated. - xfree(errors); +} + +/** + * Format the errors on the parser into the given buffer. + */ +static void +pm_parse_errors_format(const pm_parser_t *parser, size_t error_count, VALUE buffer, int highlight, bool inline_messages) { + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser); + + pm_parse_error_t *errors = pm_parse_errors_format_sort(parser, error_count, line_offsets); + if (errors == NULL) return; + + pm_parse_errors_format_with(parser, errors, error_count, buffer, highlight, inline_messages); + SIZED_FREE_N(errors, error_count); } #undef PM_ERROR_TRUNCATE @@ -10867,23 +11075,136 @@ pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, p * as well. */ static bool -pm_parse_process_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location) +pm_parse_process_error_utf8_p(const pm_parser_t *parser, pm_location_t location) { - const size_t start_line = pm_newline_list_line_column(&parser->newline_list, location->start, 1).line; - const size_t end_line = pm_newline_list_line_column(&parser->newline_list, location->end, 1).line; + const size_t start_line = pm_line_offset_list_line_column(pm_parser_line_offsets(parser), location.start, 1).line; + const size_t end_line = pm_line_offset_list_line_column(pm_parser_line_offsets(parser), location.start + location.length, 1).line; - const uint8_t *start = parser->start + parser->newline_list.offsets[start_line - 1]; - const uint8_t *end = ((end_line == parser->newline_list.size) ? parser->end : (parser->start + parser->newline_list.offsets[end_line])); - size_t width; + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser); + const uint8_t *start = pm_parser_start(parser) + line_offsets->offsets[start_line - 1]; + const uint8_t *end = ((end_line == line_offsets->size) ? pm_parser_end(parser) : (pm_parser_start(parser) + line_offsets->offsets[end_line])); + rb_encoding *utf8 = rb_utf8_encoding(); while (start < end) { - if ((width = pm_encoding_utf_8_char_width(start, end - start)) == 0) return false; - start += width; + int width = rb_enc_precise_mbclen((const char *) start, (const char *) end, utf8); + if (!MBCLEN_CHARFOUND_P(width)) return false; + start += MBCLEN_CHARFOUND_LEN(width); } return true; } +/** Context for the error processing callback used in pm_parse_process_error. */ +typedef struct { + const pm_parse_result_t *result; + const pm_parser_t *parser; + const pm_string_t *filepath; + VALUE buffer; + int highlight; + bool valid_utf8; + bool found_argument_error; + bool found_load_error; + VALUE early_return; + const pm_diagnostic_t *first_error; + size_t error_count; +} pm_process_error_ctx_t; + +static void +pm_process_error_check_callback(const pm_diagnostic_t *diagnostic, void *data) +{ + pm_process_error_ctx_t *ctx = (pm_process_error_ctx_t *) data; + pm_location_t loc = pm_diagnostic_location(diagnostic); + + if (ctx->first_error == NULL) ctx->first_error = diagnostic; + ctx->error_count++; + + switch (pm_diagnostic_error_level(diagnostic)) { + case PM_ERROR_LEVEL_SYNTAX: + if (ctx->valid_utf8 && !pm_parse_process_error_utf8_p(ctx->parser, loc)) { + ctx->valid_utf8 = false; + } + break; + case PM_ERROR_LEVEL_ARGUMENT: { + if (ctx->found_argument_error || ctx->found_load_error) break; + ctx->found_argument_error = true; + + int32_t line_number = (int32_t) pm_location_line_number(ctx->parser, &loc); + + rb_str_catf( + ctx->buffer, + "%.*s:%" PRIi32 ": %s", + (int) pm_string_length(ctx->filepath), + pm_string_source(ctx->filepath), + line_number, + pm_diagnostic_message(diagnostic) + ); + + if (pm_parse_process_error_utf8_p(ctx->parser, loc)) { + rb_str_cat(ctx->buffer, "\n", 1); + // Format just this one error. We construct a single-element sorted + // array manually and call the format function with count=1. + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(ctx->parser); + int32_t start_line = pm_parser_start_line(ctx->parser); + pm_line_column_t start_lc = pm_line_offset_list_line_column(line_offsets, loc.start, start_line); + pm_line_column_t end_lc = pm_line_offset_list_line_column(line_offsets, loc.start + loc.length, start_line); + + uint32_t col_end; + if (start_lc.line == end_lc.line) { + col_end = end_lc.column; + } else { + col_end = (uint32_t) (line_offsets->offsets[start_lc.line - start_line + 1] - line_offsets->offsets[start_lc.line - start_line] - 1); + } + if (start_lc.column == col_end) col_end++; + + pm_parse_error_t single_error = { + .error = diagnostic, + .line = start_lc.line, + .column_start = start_lc.column, + .column_end = col_end + }; + pm_parse_errors_format_with(ctx->parser, &single_error, 1, ctx->buffer, ctx->highlight, false); + } + + ctx->early_return = rb_exc_new_str(rb_eArgError, ctx->buffer); + break; + } + case PM_ERROR_LEVEL_LOAD: { + if (ctx->found_argument_error || ctx->found_load_error) break; + ctx->found_load_error = true; + + VALUE message = rb_enc_str_new_cstr(pm_diagnostic_message(diagnostic), rb_locale_encoding()); + VALUE value = rb_exc_new3(rb_eLoadError, message); + rb_ivar_set(value, rb_intern_const("@path"), Qnil); + ctx->early_return = value; + break; + } + } +} + +/** Callback for formatting non-UTF8 errors. */ +typedef struct { + const pm_parser_t *parser; + const pm_string_t *filepath; + VALUE buffer; + bool first; +} pm_error_simple_format_ctx_t; + +static void +pm_error_simple_format_callback(const pm_diagnostic_t *diagnostic, void *data) +{ + pm_error_simple_format_ctx_t *ctx = (pm_error_simple_format_ctx_t *) data; + pm_location_t loc = pm_diagnostic_location(diagnostic); + + if (!ctx->first) rb_str_cat(ctx->buffer, "\n", 1); + ctx->first = false; + + rb_str_catf(ctx->buffer, "%.*s:%" PRIi32 ": %s", + (int) pm_string_length(ctx->filepath), + pm_string_source(ctx->filepath), + (int32_t) pm_location_line_number(ctx->parser, &loc), + pm_diagnostic_message(diagnostic)); +} + /** * Generate an error object from the given parser that contains as much * information as possible about the errors that were encountered. @@ -10891,12 +11212,11 @@ pm_parse_process_error_utf8_p(const pm_parser_t *parser, const pm_location_t *lo static VALUE pm_parse_process_error(const pm_parse_result_t *result) { - const pm_parser_t *parser = &result->parser; - const pm_diagnostic_t *head = (const pm_diagnostic_t *) parser->error_list.head; - bool valid_utf8 = true; + const pm_parser_t *parser = result->parser; + size_t error_count = pm_parser_errors_size(parser); - pm_buffer_t buffer = { 0 }; - const pm_string_t *filepath = &parser->filepath; + VALUE buffer = rb_str_buf_new(0); + const pm_string_t *filepath = pm_parser_filepath(parser); int highlight = rb_stderr_tty_p(); if (highlight) { @@ -10904,90 +11224,98 @@ pm_parse_process_error(const pm_parse_result_t *result) highlight = (no_color == NULL || no_color[0] == '\0') ? 2 : 1; } - for (const pm_diagnostic_t *error = head; error != NULL; error = (const pm_diagnostic_t *) error->node.next) { - switch (error->level) { - case PM_ERROR_LEVEL_SYNTAX: - // It is implicitly assumed that the error messages will be - // encodeable as UTF-8. Because of this, we can't include source - // examples that contain invalid byte sequences. So if any source - // examples include invalid UTF-8 byte sequences, we will skip - // showing source examples entirely. - if (valid_utf8 && !pm_parse_process_error_utf8_p(parser, &error->location)) { - valid_utf8 = false; - } - break; - case PM_ERROR_LEVEL_ARGUMENT: { - // Any errors with the level PM_ERROR_LEVEL_ARGUMENT take over as - // the only argument that gets raised. This is to allow priority - // messages that should be handled before anything else. - int32_t line_number = (int32_t) pm_location_line_number(parser, &error->location); - - pm_buffer_append_format( - &buffer, - "%.*s:%" PRIi32 ": %s", - (int) pm_string_length(filepath), - pm_string_source(filepath), - line_number, - error->message - ); - - if (pm_parse_process_error_utf8_p(parser, &error->location)) { - pm_buffer_append_byte(&buffer, '\n'); - - pm_list_node_t *list_node = (pm_list_node_t *) error; - pm_list_t error_list = { .size = 1, .head = list_node, .tail = list_node }; - - pm_parse_errors_format(parser, &error_list, &buffer, highlight, false); - } + // First pass: check for argument/load errors and UTF-8 validity. + pm_process_error_ctx_t ctx = { + .result = result, + .parser = parser, + .filepath = filepath, + .buffer = buffer, + .highlight = highlight, + .valid_utf8 = true, + .found_argument_error = false, + .found_load_error = false, + .early_return = Qundef, + .first_error = NULL, + .error_count = 0 + }; - VALUE value = rb_exc_new(rb_eArgError, pm_buffer_value(&buffer), pm_buffer_length(&buffer)); - pm_buffer_free(&buffer); + pm_parser_errors_each(parser, pm_process_error_check_callback, &ctx); - return value; - } - case PM_ERROR_LEVEL_LOAD: { - // Load errors are much simpler, because they don't include any of - // the source in them. We create the error directly from the - // message. - VALUE message = rb_enc_str_new_cstr(error->message, rb_locale_encoding()); - VALUE value = rb_exc_new3(rb_eLoadError, message); - rb_ivar_set(value, rb_intern_const("@path"), Qnil); - return value; - } - } + // If we found an argument or load error, return it immediately. + if (ctx.early_return != Qundef) { + return ctx.early_return; } - pm_buffer_append_format( - &buffer, + // Format the header line. + pm_location_t first_loc = pm_diagnostic_location(ctx.first_error); + rb_str_catf( + buffer, "%.*s:%" PRIi32 ": syntax error%s found\n", (int) pm_string_length(filepath), pm_string_source(filepath), - (int32_t) pm_location_line_number(parser, &head->location), - (parser->error_list.size > 1) ? "s" : "" + (int32_t) pm_location_line_number(parser, &first_loc), + (error_count > 1) ? "s" : "" ); - if (valid_utf8) { - pm_parse_errors_format(parser, &parser->error_list, &buffer, highlight, true); + if (ctx.valid_utf8) { + pm_parse_errors_format(parser, error_count, buffer, highlight, true); } else { - for (const pm_diagnostic_t *error = head; error != NULL; error = (const pm_diagnostic_t *) error->node.next) { - if (error != head) pm_buffer_append_byte(&buffer, '\n'); - pm_buffer_append_format(&buffer, "%.*s:%" PRIi32 ": %s", (int) pm_string_length(filepath), pm_string_source(filepath), (int32_t) pm_location_line_number(parser, &error->location), error->message); - } + pm_error_simple_format_ctx_t simple_ctx = { + .parser = parser, + .filepath = filepath, + .buffer = buffer, + .first = true + }; + pm_parser_errors_each(parser, pm_error_simple_format_callback, &simple_ctx); } - VALUE message = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), result->node.encoding); - VALUE error = rb_exc_new_str(rb_eSyntaxError, message); + rb_enc_associate(buffer, result->node.encoding); + VALUE error = rb_exc_new_str(rb_eSyntaxError, buffer); rb_encoding *filepath_encoding = result->node.filepath_encoding != NULL ? result->node.filepath_encoding : rb_utf8_encoding(); VALUE path = rb_enc_str_new((const char *) pm_string_source(filepath), pm_string_length(filepath), filepath_encoding); rb_ivar_set(error, rb_intern_const("@path"), path); - pm_buffer_free(&buffer); return error; } +/** Context for interning constants via callback. */ +typedef struct { + ID *constants; + rb_encoding *encoding; + size_t index; +} pm_intern_constants_ctx_t; + +static void +pm_intern_constants_callback(const pm_constant_t *constant, void *data) +{ + pm_intern_constants_ctx_t *ctx = (pm_intern_constants_ctx_t *) data; + ctx->constants[ctx->index++] = rb_intern3((const char *) pm_constant_start(constant), pm_constant_length(constant), ctx->encoding); +} + +/** Context for emitting warnings via callback. */ +typedef struct { + const pm_parser_t *parser; + rb_encoding *encoding; + const char *filepath; +} pm_warning_emit_ctx_t; + +static void +pm_warning_emit_callback(const pm_diagnostic_t *diagnostic, void *data) { + pm_warning_emit_ctx_t *ctx = (pm_warning_emit_ctx_t *) data; + pm_location_t loc = pm_diagnostic_location(diagnostic); + int line = pm_location_line_number(ctx->parser, &loc); + + if (pm_diagnostic_warning_level(diagnostic) == PM_WARNING_LEVEL_VERBOSE) { + rb_enc_compile_warning(ctx->encoding, ctx->filepath, line, "%s", pm_diagnostic_message(diagnostic)); + } + else { + rb_enc_compile_warn(ctx->encoding, ctx->filepath, line, "%s", pm_diagnostic_message(diagnostic)); + } +} + /** * Parse the parse result and raise a Ruby error if there are any syntax errors. * It returns an error if one should be raised. It is assumed that the parse @@ -10996,7 +11324,7 @@ pm_parse_process_error(const pm_parse_result_t *result) static VALUE pm_parse_process(pm_parse_result_t *result, pm_node_t *node, VALUE *script_lines) { - pm_parser_t *parser = &result->parser; + pm_parser_t *parser = result->parser; // First, set up the scope node so that the AST node is attached and can be // freed regardless of whether or we return an error. @@ -11007,42 +11335,37 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node, VALUE *script_lines pm_scope_node_init(node, scope_node, NULL); scope_node->filepath_encoding = filepath_encoding; - scope_node->encoding = rb_enc_find(parser->encoding->name); - if (!scope_node->encoding) rb_bug("Encoding not found %s!", parser->encoding->name); + const char *encoding_name = pm_parser_encoding_name(parser); + scope_node->encoding = rb_enc_find(encoding_name); + if (!scope_node->encoding) rb_bug("Encoding not found %s!", encoding_name); scope_node->coverage_enabled = coverage_enabled; // If RubyVM.keep_script_lines is set to true, then we need to create that // array of script lines here. if (script_lines != NULL) { - *script_lines = rb_ary_new_capa(parser->newline_list.size); + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser); + *script_lines = rb_ary_new_capa(line_offsets->size); - for (size_t index = 0; index < parser->newline_list.size; index++) { - size_t offset = parser->newline_list.offsets[index]; - size_t length = index == parser->newline_list.size - 1 ? ((size_t) (parser->end - (parser->start + offset))) : (parser->newline_list.offsets[index + 1] - offset); - rb_ary_push(*script_lines, rb_enc_str_new((const char *) parser->start + offset, length, scope_node->encoding)); + for (size_t index = 0; index < line_offsets->size; index++) { + size_t offset = line_offsets->offsets[index]; + size_t length = index == line_offsets->size - 1 ? ((size_t) (pm_parser_end(parser) - (pm_parser_start(parser) + offset))) : (line_offsets->offsets[index + 1] - offset); + rb_ary_push(*script_lines, rb_enc_str_new((const char *) pm_parser_start(parser) + offset, length, scope_node->encoding)); } scope_node->script_lines = script_lines; } // Emit all of the various warnings from the parse. - const pm_diagnostic_t *warning; - const char *warning_filepath = (const char *) pm_string_source(&parser->filepath); - - for (warning = (const pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (const pm_diagnostic_t *) warning->node.next) { - int line = pm_location_line_number(parser, &warning->location); - - if (warning->level == PM_WARNING_LEVEL_VERBOSE) { - rb_enc_compile_warning(scope_node->encoding, warning_filepath, line, "%s", warning->message); - } - else { - rb_enc_compile_warn(scope_node->encoding, warning_filepath, line, "%s", warning->message); - } - } + pm_warning_emit_ctx_t warning_ctx = { + .parser = parser, + .encoding = scope_node->encoding, + .filepath = (const char *) pm_string_source(pm_parser_filepath(parser)) + }; + pm_parser_warnings_each(parser, pm_warning_emit_callback, &warning_ctx); // If there are errors, raise an appropriate error and free the result. - if (parser->error_list.size > 0) { + if (pm_parser_errors_size(parser) > 0) { VALUE error = pm_parse_process_error(result); // TODO: We need to set the backtrace. @@ -11053,18 +11376,14 @@ pm_parse_process(pm_parse_result_t *result, pm_node_t *node, VALUE *script_lines // Now set up the constant pool and intern all of the various constants into // their corresponding IDs. scope_node->parser = parser; - scope_node->constants = xcalloc(parser->constant_pool.size, sizeof(ID)); + scope_node->options = result->options; + scope_node->line_offsets = pm_parser_line_offsets(parser); + scope_node->start_line = pm_parser_start_line(parser); + size_t constants_size = pm_parser_constants_size(parser); + scope_node->constants = constants_size ? xmalloc(constants_size * sizeof(ID)) : NULL; - for (uint32_t index = 0; index < parser->constant_pool.size; index++) { - pm_constant_t *constant = &parser->constant_pool.constants[index]; - scope_node->constants[index] = rb_intern3((const char *) constant->start, constant->length, scope_node->encoding); - } - - scope_node->index_lookup_table = st_init_numtable(); - pm_constant_id_list_t *locals = &scope_node->locals; - for (size_t index = 0; index < locals->size; index++) { - st_insert(scope_node->index_lookup_table, locals->ids[index], index); - } + pm_intern_constants_ctx_t intern_ctx = { .constants = scope_node->constants, .encoding = scope_node->encoding, .index = 0 }; + pm_parser_constants_each(parser, pm_intern_constants_callback, &intern_ctx); // If we got here, this is a success and we can return Qnil to indicate that // no error should be raised. @@ -11103,22 +11422,22 @@ pm_options_frozen_string_literal_init(pm_options_t *options) static inline VALUE pm_parse_file_script_lines(const pm_scope_node_t *scope_node, const pm_parser_t *parser) { - const pm_newline_list_t *newline_list = &parser->newline_list; - const char *start = (const char *) parser->start; - const char *end = (const char *) parser->end; + const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser); + const char *start = (const char *) pm_parser_start(parser); + const char *end = (const char *) pm_parser_end(parser); // If we end exactly on a newline, then there's no need to push on a final // segment. If we don't, then we need to push on the last offset up to the // end of the string. - size_t last_offset = newline_list->offsets[newline_list->size - 1]; + size_t last_offset = line_offsets->offsets[line_offsets->size - 1]; bool last_push = start + last_offset != end; // Create the ruby strings that represent the lines of the source. - VALUE lines = rb_ary_new_capa(newline_list->size - (last_push ? 0 : 1)); + VALUE lines = rb_ary_new_capa(line_offsets->size - (last_push ? 0 : 1)); - for (size_t index = 0; index < newline_list->size - 1; index++) { - size_t offset = newline_list->offsets[index]; - size_t length = newline_list->offsets[index + 1] - offset; + for (size_t index = 0; index < line_offsets->size - 1; index++) { + size_t offset = line_offsets->offsets[index]; + size_t length = line_offsets->offsets[index + 1] - offset; rb_ary_push(lines, rb_enc_str_new(start + offset, length, scope_node->encoding)); } @@ -11131,145 +11450,35 @@ pm_parse_file_script_lines(const pm_scope_node_t *scope_node, const pm_parser_t return lines; } -// This is essentially pm_string_mapped_init(), preferring to memory map the -// file, with additional handling for files that require blocking to properly -// read (e.g. pipes). -static pm_string_init_result_t -pm_read_file(pm_string_t *string, const char *filepath) -{ -#ifdef _WIN32 - // Open the file for reading. - int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0); - if (length == 0) return PM_STRING_INIT_ERROR_GENERIC; - - WCHAR *wfilepath = xmalloc(sizeof(WCHAR) * ((size_t) length)); - if ((wfilepath == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, wfilepath, length) == 0)) { - xfree(wfilepath); - return PM_STRING_INIT_ERROR_GENERIC; - } - - HANDLE file = CreateFileW(wfilepath, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); - if (file == INVALID_HANDLE_VALUE) { - pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC; - - if (GetLastError() == ERROR_ACCESS_DENIED) { - DWORD attributes = GetFileAttributesW(wfilepath); - if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { - result = PM_STRING_INIT_ERROR_DIRECTORY; - } - } - - xfree(wfilepath); - return result; - } - - // Get the file size. - DWORD file_size = GetFileSize(file, NULL); - if (file_size == INVALID_FILE_SIZE) { - CloseHandle(file); - xfree(wfilepath); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // If the file is empty, then we don't need to do anything else, we'll set - // the source to a constant empty string and return. - if (file_size == 0) { - CloseHandle(file); - xfree(wfilepath); - const uint8_t source[] = ""; - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - // Create a mapping of the file. - HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); - if (mapping == NULL) { - CloseHandle(file); - xfree(wfilepath); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Map the file into memory. - uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); - CloseHandle(mapping); - CloseHandle(file); - xfree(wfilepath); - - if (source == NULL) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size }; - return PM_STRING_INIT_SUCCESS; -#elif defined(_POSIX_MAPPED_FILES) - // Open the file for reading - const int open_mode = O_RDONLY | O_NONBLOCK; - int fd = open(filepath, open_mode); - if (fd == -1) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Stat the file to get the file size - struct stat sb; - if (fstat(fd, &sb) == -1) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Ensure it is a file and not a directory - if (S_ISDIR(sb.st_mode)) { - close(fd); - return PM_STRING_INIT_ERROR_DIRECTORY; - } - - // We need to wait for data first before reading from pipes and character - // devices. To not block the entire VM, we need to release the GVL while - // reading. Use IO#read to do this and let the GC handle closing the FD. - if (S_ISFIFO(sb.st_mode) || S_ISCHR(sb.st_mode)) { - VALUE io = rb_io_fdopen((int) fd, open_mode, filepath); - rb_io_wait(io, RB_INT2NUM(RUBY_IO_READABLE), Qnil); - VALUE contents = rb_funcall(io, rb_intern("read"), 0); - - if (!RB_TYPE_P(contents, T_STRING)) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - long len = RSTRING_LEN(contents); - if (len < 0) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - size_t length = (size_t) len; - uint8_t *source = malloc(length); - memcpy(source, RSTRING_PTR(contents), length); - *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length }; - - return PM_STRING_INIT_SUCCESS; - } - - // mmap the file descriptor to virtually get the contents - size_t size = (size_t) sb.st_size; - uint8_t *source = NULL; +struct load_from_fd_args { + VALUE path; + VALUE io; + int open_mode; + int fd; +}; - if (size == 0) { - close(fd); - const uint8_t source[] = ""; - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; +static VALUE +close_file(VALUE args) +{ + struct load_from_fd_args *arg = (void *)args; + if (arg->fd != -1) { + close(arg->fd); } - - source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - if (source == MAP_FAILED) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; + else if (!NIL_P(arg->io)) { + rb_io_close(arg->io); } + return Qnil; +} - close(fd); - *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size }; - return PM_STRING_INIT_SUCCESS; -#else - return pm_string_file_init(string, filepath); -#endif +static VALUE +load_content(VALUE args) +{ + struct load_from_fd_args *arg = (void *)args; + VALUE io = rb_io_fdopen(arg->fd, arg->open_mode, RSTRING_PTR(arg->path)); + arg->io = io; + arg->fd = -1; + rb_io_wait(io, RB_INT2NUM(RUBY_IO_READABLE), Qnil); + return rb_funcall(io, rb_intern("read"), 0); } /** @@ -11279,17 +11488,46 @@ pm_read_file(pm_string_t *string, const char *filepath) VALUE pm_load_file(pm_parse_result_t *result, VALUE filepath, bool load_error) { - pm_string_init_result_t init_result = pm_read_file(&result->input, RSTRING_PTR(filepath)); + pm_source_init_result_t init_result; + result->source = pm_source_mapped_new(RSTRING_PTR(filepath), O_RDONLY | O_NONBLOCK, &init_result); - if (init_result == PM_STRING_INIT_SUCCESS) { - pm_options_frozen_string_literal_init(&result->options); + if (init_result == PM_SOURCE_INIT_SUCCESS) { + pm_options_frozen_string_literal_init(result->options); return Qnil; } int err; - if (init_result == PM_STRING_INIT_ERROR_DIRECTORY) { + + // For non-regular files (pipes, character devices), we need to read + // through Ruby IO to properly release the GVL while waiting for data. + if (init_result == PM_SOURCE_INIT_ERROR_NON_REGULAR) { + struct load_from_fd_args args = { + .path = filepath, + .open_mode = O_RDONLY | O_NONBLOCK, + .fd = rb_cloexec_open(RSTRING_PTR(filepath), args.open_mode, 0), + .io = Qnil, + }; + if (args.fd == -1) goto error_generic; + VALUE contents = rb_ensure(load_content, (VALUE)&args, close_file, (VALUE)&args); + + if (!RB_TYPE_P(contents, T_STRING)) goto error_generic; + + long len = RSTRING_LEN(contents); + if (len < 0) goto error_generic; + + size_t length = (size_t) len; + uint8_t *source_data = xmalloc(length); + memcpy(source_data, RSTRING_PTR(contents), length); + result->source = pm_source_owned_new(source_data, length); + + pm_options_frozen_string_literal_init(result->options); + return Qnil; + } + + if (init_result == PM_SOURCE_INIT_ERROR_DIRECTORY) { err = EISDIR; } else { +error_generic: #ifdef _WIN32 err = rb_w32_map_errno(GetLastError()); #else @@ -11323,11 +11561,13 @@ VALUE pm_parse_file(pm_parse_result_t *result, VALUE filepath, VALUE *script_lines) { result->node.filepath_encoding = rb_enc_get(filepath); - pm_options_filepath_set(&result->options, RSTRING_PTR(filepath)); + pm_options_filepath_set(result->options, RSTRING_PTR(filepath)); RB_GC_GUARD(filepath); - pm_parser_init(&result->parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options); - pm_node_t *node = pm_parse(&result->parser); + pm_options_version_for_current_ruby_set(result->options); + + result->parser = pm_parser_new(result->arena, pm_source_source(result->source), pm_source_length(result->source), result->options); + pm_node_t *node = pm_parse(result->parser); VALUE error = pm_parse_process(result, node, script_lines); @@ -11340,7 +11580,7 @@ pm_parse_file(pm_parse_result_t *result, VALUE filepath, VALUE *script_lines) VALUE constant_script_lines = rb_const_get_at(rb_cObject, id_script_lines); if (RB_TYPE_P(constant_script_lines, T_HASH)) { - rb_hash_aset(constant_script_lines, filepath, pm_parse_file_script_lines(&result->node, &result->parser)); + rb_hash_aset(constant_script_lines, filepath, pm_parse_file_script_lines(&result->node, result->parser)); } } @@ -11376,20 +11616,36 @@ pm_parse_string(pm_parse_result_t *result, VALUE source, VALUE filepath, VALUE * return rb_exc_new_cstr(rb_eArgError, "invalid source encoding"); } - pm_options_frozen_string_literal_init(&result->options); - pm_string_constant_init(&result->input, RSTRING_PTR(source), RSTRING_LEN(source)); - pm_options_encoding_set(&result->options, rb_enc_name(encoding)); + pm_options_frozen_string_literal_init(result->options); + result->source = pm_source_constant_new((const uint8_t *) RSTRING_PTR(source), (size_t) RSTRING_LEN(source)); + pm_options_encoding_set(result->options, rb_enc_name(encoding)); result->node.filepath_encoding = rb_enc_get(filepath); - pm_options_filepath_set(&result->options, RSTRING_PTR(filepath)); + pm_options_filepath_set(result->options, RSTRING_PTR(filepath)); RB_GC_GUARD(filepath); - pm_parser_init(&result->parser, pm_string_source(&result->input), pm_string_length(&result->input), &result->options); - pm_node_t *node = pm_parse(&result->parser); + pm_options_version_for_current_ruby_set(result->options); + + result->parser = pm_parser_new(result->arena, pm_source_source(result->source), pm_source_length(result->source), result->options); + pm_node_t *node = pm_parse(result->parser); return pm_parse_process(result, node, script_lines); } +struct rb_stdin_wrapper { + VALUE rb_stdin; + int eof_seen; +}; + +static int +pm_parse_stdin_eof(void *stream) +{ + struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream; + return wrapped_stdin->eof_seen; +} + +VALUE rb_io_gets_limit_internal(VALUE io, long limit); + /** * An implementation of fgets that is suitable for use with Ruby IO objects. */ @@ -11398,7 +11654,9 @@ pm_parse_stdin_fgets(char *string, int size, void *stream) { RUBY_ASSERT(size > 0); - VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1)); + struct rb_stdin_wrapper * wrapped_stdin = (struct rb_stdin_wrapper *)stream; + + VALUE line = rb_io_gets_limit_internal(wrapped_stdin->rb_stdin, size - 1); if (NIL_P(line)) { return NULL; } @@ -11409,6 +11667,13 @@ pm_parse_stdin_fgets(char *string, int size, void *stream) memcpy(string, cstr, length); string[length] = '\0'; + // We're reading strings from stdin via gets. We'll assume that if the + // string is smaller than the requested length, and doesn't end with a + // newline, that we hit EOF. + if (length < (size - 1) && string[length - 1] != '\n') { + wrapped_stdin->eof_seen = 1; + } + return string; } @@ -11423,15 +11688,15 @@ void rb_reset_argf_lineno(long n); VALUE pm_parse_stdin(pm_parse_result_t *result) { - pm_options_frozen_string_literal_init(&result->options); + pm_options_frozen_string_literal_init(result->options); - pm_buffer_t buffer; - pm_node_t *node = pm_parse_stream(&result->parser, &buffer, (void *) rb_stdin, pm_parse_stdin_fgets, &result->options); + struct rb_stdin_wrapper wrapped_stdin = { + rb_stdin, + 0 + }; - // Copy the allocated buffer contents into the input string so that it gets - // freed. At this point we've handed over ownership, so we don't need to - // free the buffer itself. - pm_string_owned_init(&result->input, (uint8_t *) pm_buffer_value(&buffer), pm_buffer_length(&buffer)); + result->source = pm_source_stream_new((void *) &wrapped_stdin, pm_parse_stdin_fgets, pm_parse_stdin_eof); + pm_node_t *node = pm_parse_stream(&result->parser, result->arena, result->source, result->options); // When we're done parsing, we reset $. because we don't want the fact that // we went through an IO object to be visible to the user. @@ -11440,6 +11705,14 @@ pm_parse_stdin(pm_parse_result_t *result) return pm_parse_process(result, node, NULL); } +#define PM_VERSION_FOR_RELEASE(major, minor) PM_VERSION_FOR_RELEASE_IMPL(major, minor) +#define PM_VERSION_FOR_RELEASE_IMPL(major, minor) #major "." #minor + +void pm_options_version_for_current_ruby_set(pm_options_t *options) { + const char *version = PM_VERSION_FOR_RELEASE(RUBY_API_VERSION_MAJOR, RUBY_API_VERSION_MINOR); + pm_options_version_set(options, version, strlen(version)); +} + #undef NEW_ISEQ #define NEW_ISEQ OLD_ISEQ |
