diff options
Diffstat (limited to 'ruby_parser.c')
| -rw-r--r-- | ruby_parser.c | 275 |
1 files changed, 153 insertions, 122 deletions
diff --git a/ruby_parser.c b/ruby_parser.c index decc05f619..a96fc4974b 100644 --- a/ruby_parser.c +++ b/ruby_parser.c @@ -32,18 +32,14 @@ #include "vm_core.h" #include "symbol.h" -static int -is_ascii_string2(VALUE str) -{ - return is_ascii_string(str); -} +#define parser_encoding const void RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0) static VALUE syntax_error_append(VALUE exc, VALUE file, int line, int column, - void *enc, const char *fmt, va_list args) + parser_encoding *enc, const char *fmt, va_list args) { - return rb_syntax_error_append(exc, file, line, column, (rb_encoding *)enc, fmt, args); + return rb_syntax_error_append(exc, file, line, column, enc, fmt, args); } static int @@ -59,9 +55,9 @@ dvar_defined(ID id, const void *p) } static int -is_usascii_enc(void *enc) +is_usascii_enc(parser_encoding *enc) { - return rb_is_usascii_enc((rb_encoding *)enc); + return rb_is_usascii_enc(enc); } static int @@ -83,21 +79,21 @@ is_notop_id2(ID id) } static VALUE -enc_str_new(const char *ptr, long len, void *enc) +enc_str_new(const char *ptr, long len, parser_encoding *enc) { - return rb_enc_str_new(ptr, len, (rb_encoding *)enc); + return rb_enc_str_new(ptr, len, enc); } static int -enc_isalnum(OnigCodePoint c, void *enc) +enc_isalnum(OnigCodePoint c, parser_encoding *enc) { - return rb_enc_isalnum(c, (rb_encoding *)enc); + return rb_enc_isalnum(c, enc); } static int -enc_precise_mbclen(const char *p, const char *e, void *enc) +enc_precise_mbclen(const char *p, const char *e, parser_encoding *enc) { - return rb_enc_precise_mbclen(p, e, (rb_encoding *)enc); + return rb_enc_precise_mbclen(p, e, enc); } static int @@ -113,87 +109,75 @@ mbclen_charfound_len(int len) } static const char * -enc_name(void *enc) +enc_name(parser_encoding *enc) { - return rb_enc_name((rb_encoding *)enc); + return rb_enc_name(enc); } static char * -enc_prev_char(const char *s, const char *p, const char *e, void *enc) +enc_prev_char(const char *s, const char *p, const char *e, parser_encoding *enc) { - return rb_enc_prev_char(s, p, e, (rb_encoding *)enc); + return rb_enc_prev_char(s, p, e, enc); } -static void * +static parser_encoding * enc_get(VALUE obj) { - return (void *)rb_enc_get(obj); + return rb_enc_get(obj); } static int -enc_asciicompat(void *enc) +enc_asciicompat(parser_encoding *enc) { - return rb_enc_asciicompat((rb_encoding *)enc); + return rb_enc_asciicompat(enc); } -static void * +static parser_encoding * utf8_encoding(void) { - return (void *)rb_utf8_encoding(); + return rb_utf8_encoding(); } -static VALUE -enc_associate(VALUE obj, void *enc) -{ - return rb_enc_associate(obj, (rb_encoding *)enc); -} - -static void * +static parser_encoding * ascii8bit_encoding(void) { - return (void *)rb_ascii8bit_encoding(); + return rb_ascii8bit_encoding(); } static int -enc_codelen(int c, void *enc) +enc_codelen(int c, parser_encoding *enc) { - return rb_enc_codelen(c, (rb_encoding *)enc); + return rb_enc_codelen(c, enc); } static int -enc_mbcput(unsigned int c, void *buf, void *enc) +enc_mbcput(unsigned int c, void *buf, parser_encoding *enc) { - return rb_enc_mbcput(c, buf, (rb_encoding *)enc); + return rb_enc_mbcput(c, buf, enc); } -static void * +static parser_encoding * enc_from_index(int idx) { - return (void *)rb_enc_from_index(idx); + return rb_enc_from_index(idx); } static int -enc_isspace(OnigCodePoint c, void *enc) +enc_isspace(OnigCodePoint c, parser_encoding *enc) { - return rb_enc_isspace(c, (rb_encoding *)enc); + return rb_enc_isspace(c, enc); } static ID -intern3(const char *name, long len, void *enc) -{ - return rb_intern3(name, len, (rb_encoding *)enc); -} - -static void * -usascii_encoding(void) +intern3(const char *name, long len, parser_encoding *enc) { - return (void *)rb_usascii_encoding(); + return rb_intern3(name, len, enc); } static int -enc_symname_type(const char *name, long len, void *enc, unsigned int allowed_attrset) +enc_symname_type(const char *name, long len, parser_encoding *enc, unsigned int allowed_attrset) { - return rb_enc_symname_type(name, len, (rb_encoding *)enc, allowed_attrset); + return rb_enc_symname_type(name, len, enc, allowed_attrset); } typedef struct { @@ -201,6 +185,7 @@ typedef struct { rb_encoding *enc; NODE *succ_block; const rb_code_location_t *loc; + rb_parser_assignable_func assignable; } reg_named_capture_assign_t; static int @@ -214,11 +199,12 @@ reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end, long len = name_end - name; const char *s = (const char *)name; - return rb_reg_named_capture_assign_iter_impl(p, s, len, (void *)enc, &arg->succ_block, loc); + return rb_reg_named_capture_assign_iter_impl(p, s, len, enc, &arg->succ_block, loc, arg->assignable); } static NODE * -reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc) +reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc, + rb_parser_assignable_func assignable) { reg_named_capture_assign_t arg; @@ -226,6 +212,7 @@ reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_lo arg.enc = rb_enc_get(regexp); arg.succ_block = 0; arg.loc = loc; + arg.assignable = assignable; onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg); if (!arg.succ_block) return 0; @@ -292,12 +279,6 @@ arg_error(void) return rb_eArgError; } -static rb_ast_t * -ast_new(VALUE nb) -{ - return IMEMO_NEW(rb_ast_t, imemo_ast, nb); -} - static VALUE static_id2sym(ID id) { @@ -305,25 +286,25 @@ static_id2sym(ID id) } static long -str_coderange_scan_restartable(const char *s, const char *e, void *enc, int *cr) +str_coderange_scan_restartable(const char *s, const char *e, parser_encoding *enc, int *cr) { - return rb_str_coderange_scan_restartable(s, e, (rb_encoding *)enc, cr); + return rb_str_coderange_scan_restartable(s, e, enc, cr); } static int -enc_mbminlen(void *enc) +enc_mbminlen(parser_encoding *enc) { - return rb_enc_mbminlen((rb_encoding *)enc); + return rb_enc_mbminlen(enc); } static bool -enc_isascii(OnigCodePoint c, void *enc) +enc_isascii(OnigCodePoint c, parser_encoding *enc) { - return rb_enc_isascii(c, (rb_encoding *)enc); + return rb_enc_isascii(c, enc); } static OnigCodePoint -enc_mbc_to_codepoint(const char *p, const char *e, void *enc) +enc_mbc_to_codepoint(const char *p, const char *e, parser_encoding *enc) { const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p); const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e); @@ -346,15 +327,11 @@ static const rb_parser_config_t rb_global_parser_config = { .nonempty_memcpy = nonempty_memcpy, .xmalloc_mul_add = rb_xmalloc_mul_add, - .ast_new = ast_new, - .compile_callback = rb_suppress_tracing, .reg_named_capture_assign = reg_named_capture_assign, .attr_get = rb_attr_get, - .ary_new = rb_ary_new, - .ary_push = rb_ary_push, .ary_new_from_args = rb_ary_new_from_args, .ary_unshift = rb_ary_unshift, @@ -373,26 +350,18 @@ static const rb_parser_config_t rb_global_parser_config = { .id2name = rb_id2name, .id2str = rb_id2str, .id2sym = rb_id2sym, - .sym2id = rb_sym2id, .str_catf = rb_str_catf, .str_cat_cstr = rb_str_cat_cstr, - .str_modify = rb_str_modify, - .str_set_len = rb_str_set_len, - .str_cat = rb_str_cat, .str_resize = rb_str_resize, .str_new = rb_str_new, .str_new_cstr = rb_str_new_cstr, .str_to_interned_str = rb_str_to_interned_str, - .is_ascii_string = is_ascii_string2, .enc_str_new = enc_str_new, .str_vcatf = rb_str_vcatf, - .string_value_cstr = rb_string_value_cstr, .rb_sprintf = rb_sprintf, .rstring_ptr = RSTRING_PTR, - .rstring_end = RSTRING_END, .rstring_len = RSTRING_LEN, - .obj_as_string = rb_obj_as_string, .int2num = rb_int2num_inline, @@ -415,7 +384,6 @@ static const rb_parser_config_t rb_global_parser_config = { .enc_get = enc_get, .enc_asciicompat = enc_asciicompat, .utf8_encoding = utf8_encoding, - .enc_associate = enc_associate, .ascii8bit_encoding = ascii8bit_encoding, .enc_codelen = enc_codelen, .enc_mbcput = enc_mbcput, @@ -424,8 +392,6 @@ static const rb_parser_config_t rb_global_parser_config = { .enc_isspace = enc_isspace, .enc_coderange_7bit = ENC_CODERANGE_7BIT, .enc_coderange_unknown = ENC_CODERANGE_UNKNOWN, - .usascii_encoding = usascii_encoding, - .enc_coderange_broken = ENC_CODERANGE_BROKEN, .enc_mbminlen = enc_mbminlen, .enc_isascii = enc_isascii, .enc_mbc_to_codepoint = enc_mbc_to_codepoint, @@ -439,10 +405,9 @@ static const rb_parser_config_t rb_global_parser_config = { .errinfo = rb_errinfo, .set_errinfo = rb_set_errinfo, - .exc_raise = rb_exc_raise, .make_exception = rb_make_exception, - .sized_xfree = ruby_sized_xfree, + .sized_xfree = ruby_xfree_sized, .sized_realloc_n = ruby_sized_realloc_n, .gc_guard = gc_guard, .gc_mark = rb_gc_mark, @@ -526,6 +491,7 @@ parser_free(void *ptr) { struct ruby_parser *parser = (struct ruby_parser*)ptr; rb_ruby_parser_free(parser->parser_params); + xfree(parser); } static size_t @@ -630,8 +596,8 @@ rb_parser_keep_tokens(VALUE vparser) rb_ruby_parser_keep_tokens(parser->parser_params); } -VALUE -rb_parser_lex_get_str(struct lex_pointer_string *ptr_str) +rb_parser_string_t * +rb_parser_lex_get_str(struct parser_params *p, struct lex_pointer_string *ptr_str) { char *beg, *end, *start; long len; @@ -641,20 +607,30 @@ rb_parser_lex_get_str(struct lex_pointer_string *ptr_str) len = RSTRING_LEN(s); start = beg; if (ptr_str->ptr) { - if (len == ptr_str->ptr) return Qnil; + if (len == ptr_str->ptr) return 0; beg += ptr_str->ptr; len -= ptr_str->ptr; } end = memchr(beg, '\n', len); if (end) len = ++end - beg; ptr_str->ptr += len; - return rb_str_subseq(s, beg - start, len); + return rb_str_to_parser_string(p, rb_str_subseq(s, beg - start, len)); } -static VALUE +static rb_parser_string_t * lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count) { - return rb_parser_lex_get_str((struct lex_pointer_string *)input); + return rb_parser_lex_get_str(p, (struct lex_pointer_string *)input); +} + +static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines); + +static rb_ast_t* +parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line) +{ + rb_ast_t *ast = rb_parser_compile(p, gets, fname, input, line); + parser_aset_script_lines_for(fname, ast->body.script_lines); + return ast; } static rb_ast_t* @@ -666,7 +642,7 @@ parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int lin parser->data.lex_str.str = str; parser->data.lex_str.ptr = 0; - return rb_parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line); + return parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line); } static rb_encoding * @@ -694,15 +670,16 @@ parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int li VALUE rb_io_gets_internal(VALUE io); -static VALUE +static rb_parser_string_t * lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count) { VALUE io = (VALUE)input; - - return rb_io_gets_internal(io); + VALUE line = rb_io_gets_internal(io); + if (NIL_P(line)) return 0; + return rb_str_to_parser_string(p, line); } -static VALUE +static rb_parser_string_t * lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index) { VALUE array = (VALUE)data; @@ -712,8 +689,11 @@ lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index) if (!rb_enc_asciicompat(rb_enc_get(str))) { rb_raise(rb_eArgError, "invalid source encoding"); } + return rb_str_to_parser_string(p, str); + } + else { + return 0; } - return str; } static rb_ast_t* @@ -722,7 +702,7 @@ parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, in parser->type = lex_type_io; parser->data.lex_io.file = file; - return rb_parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start); + return parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start); } static rb_ast_t* @@ -731,7 +711,7 @@ parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int s parser->type = lex_type_array; parser->data.lex_array.ary = array; - return rb_parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start); + return parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start); } static rb_ast_t* @@ -739,72 +719,95 @@ parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_ { parser->type = lex_type_generic; - return rb_parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start); + return parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start); +} + +static void +ast_free(void *ptr) +{ + rb_ast_t *ast = (rb_ast_t *)ptr; + rb_ast_free(ast); +} + +static const rb_data_type_t ast_data_type = { + "AST", + { + NULL, + ast_free, + NULL, // No dsize() because this object does not appear in ObjectSpace. + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY +}; + +static VALUE +ast_alloc(void) +{ + return TypedData_Wrap_Struct(0, &ast_data_type, NULL); } -rb_ast_t* +VALUE rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start) { struct ruby_parser *parser; - rb_ast_t *ast; + VALUE ast_value = ast_alloc(); TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser); - ast = parser_compile_file_path(parser, fname, file, start); + DATA_PTR(ast_value) = parser_compile_file_path(parser, fname, file, start); RB_GC_GUARD(vparser); - return ast; + return ast_value; } -rb_ast_t* +VALUE rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start) { struct ruby_parser *parser; - rb_ast_t *ast; + VALUE ast_value = ast_alloc(); TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser); - ast = parser_compile_array(parser, fname, array, start); + DATA_PTR(ast_value) = parser_compile_array(parser, fname, array, start); RB_GC_GUARD(vparser); - return ast; + return ast_value; } -rb_ast_t* +VALUE rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start) { struct ruby_parser *parser; - rb_ast_t *ast; + VALUE ast_value = ast_alloc(); TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser); - ast = parser_compile_generic(parser, lex_gets, fname, input, start); + DATA_PTR(ast_value) = parser_compile_generic(parser, lex_gets, fname, input, start); RB_GC_GUARD(vparser); - return ast; + return ast_value; } -rb_ast_t* +VALUE rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line) { struct ruby_parser *parser; - rb_ast_t *ast; + VALUE ast_value = ast_alloc(); TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser); - ast = parser_compile_string(parser, f, s, line); + DATA_PTR(ast_value) = parser_compile_string(parser, f, s, line); RB_GC_GUARD(vparser); - return ast; + return ast_value; } -rb_ast_t* +VALUE rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line) { struct ruby_parser *parser; - rb_ast_t *ast; + VALUE ast_value = ast_alloc(); TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser); - ast = parser_compile_string_path(parser, f, s, line); + DATA_PTR(ast_value) = parser_compile_string_path(parser, f, s, line); RB_GC_GUARD(vparser); - return ast; + return ast_value; } VALUE @@ -855,6 +858,7 @@ VALUE rb_parser_build_script_lines_from(rb_parser_ary_t *lines) { int i; + if (!lines) return Qnil; if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) { rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type); } @@ -869,7 +873,7 @@ rb_parser_build_script_lines_from(rb_parser_ary_t *lines) VALUE rb_str_new_parser_string(rb_parser_string_t *str) { - VALUE string = rb_enc_interned_str(str->ptr, str->len, str->enc); + VALUE string = rb_enc_literal_str(str->ptr, str->len, str->enc); rb_enc_str_coderange(string); return string; } @@ -1072,12 +1076,12 @@ rb_node_encoding_val(const NODE *node) return rb_enc_from_encoding(RNODE_ENCODING(node)->enc); } -void -rb_parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines) +static void +parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines) { VALUE hash, script_lines; ID script_lines_id; - if (NIL_P(path) || !lines || FIXNUM_P((VALUE)lines)) return; + if (NIL_P(path) || !lines) return; CONST_ID(script_lines_id, "SCRIPT_LINES__"); if (!rb_const_defined_at(rb_cObject, script_lines_id)) return; hash = rb_const_get_at(rb_cObject, script_lines_id); @@ -1086,3 +1090,30 @@ rb_parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines) script_lines = rb_parser_build_script_lines_from(lines); rb_hash_aset(hash, path, script_lines); } + +VALUE +rb_ruby_ast_new(const NODE *const root) +{ + rb_ast_t *ast; + VALUE ast_value = TypedData_Make_Struct(0, rb_ast_t, &ast_data_type, ast); +#ifdef UNIVERSAL_PARSER + ast->config = &rb_global_parser_config; +#endif + ast->body = (rb_ast_body_t){ + .root = root, + .frozen_string_literal = -1, + .coverage_enabled = -1, + .script_lines = NULL, + .line_count = 0, + }; + return ast_value; +} + +rb_ast_t * +rb_ruby_ast_data_get(VALUE ast_value) +{ + rb_ast_t *ast; + if (NIL_P(ast_value)) return NULL; + TypedData_Get_Struct(ast_value, rb_ast_t, &ast_data_type, ast); + return ast; +} |
