diff options
Diffstat (limited to 'ast.c')
| -rw-r--r-- | ast.c | 404 |
1 files changed, 383 insertions, 21 deletions
@@ -14,6 +14,7 @@ static VALUE rb_mAST; static VALUE rb_cNode; +static VALUE rb_cLocation; struct ASTNodeData { VALUE ast_value; @@ -31,9 +32,13 @@ static size_t node_memsize(const void *ptr) { struct ASTNodeData *data = (struct ASTNodeData *)ptr; - rb_ast_t *ast = rb_ruby_ast_data_get(data->ast_value); + size_t size = sizeof(struct ASTNodeData); + if (data->ast_value) { + rb_ast_t *ast = rb_ruby_ast_data_get(data->ast_value); + size += rb_ast_memsize(ast); + } - return sizeof(struct ASTNodeData) + rb_ast_memsize(ast); + return size; } static const rb_data_type_t rb_node_type = { @@ -43,6 +48,32 @@ static const rb_data_type_t rb_node_type = { RUBY_TYPED_FREE_IMMEDIATELY, }; +struct ASTLocationData { + int first_lineno; + int first_column; + int last_lineno; + int last_column; +}; + +static void +location_gc_mark(void *ptr) +{ +} + +static size_t +location_memsize(const void *ptr) +{ + return sizeof(struct ASTLocationData); +} + +static const rb_data_type_t rb_location_type = { + "AST/location", + {location_gc_mark, RUBY_TYPED_DEFAULT_FREE, location_memsize,}, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + + static VALUE rb_ast_node_alloc(VALUE klass); static void @@ -89,6 +120,16 @@ ast_parse_done(VALUE ast_value) } static VALUE +setup_vparser(VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) +{ + VALUE vparser = ast_parse_new(); + if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser); + if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser); + if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser); + return vparser; +} + +static VALUE ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { return rb_ast_parse_str(str, keep_script_lines, error_tolerant, keep_tokens); @@ -97,13 +138,9 @@ ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE keep_scri static VALUE rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens) { - VALUE ast_value; - + VALUE ast_value = Qnil; StringValue(str); - VALUE vparser = ast_parse_new(); - if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser); - if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser); - if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser); + VALUE vparser = setup_vparser(keep_script_lines, error_tolerant, keep_tokens); ast_value = rb_parser_compile_string_path(vparser, Qnil, str, 1); return ast_parse_done(ast_value); } @@ -123,10 +160,7 @@ rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VAL f = rb_file_open_str(path, "r"); rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-")); - VALUE vparser = ast_parse_new(); - if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser); - if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser); - if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser); + VALUE vparser = setup_vparser(keep_script_lines, error_tolerant, keep_tokens); ast_value = rb_parser_compile_file_path(vparser, Qnil, f, 1); rb_io_close(f); return ast_parse_done(ast_value); @@ -138,10 +172,7 @@ rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, V VALUE ast_value = Qnil; array = rb_check_array_type(array); - VALUE vparser = ast_parse_new(); - if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser); - if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser); - if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser); + VALUE vparser = setup_vparser(keep_script_lines, error_tolerant, keep_tokens); ast_value = rb_parser_compile_array(vparser, Qnil, array, 1); return ast_parse_done(ast_value); } @@ -334,6 +365,24 @@ dump_array(VALUE ast_value, const struct RNode_LIST *node) } static VALUE +dump_parser_array(VALUE ast_value, rb_parser_ary_t *p_ary) +{ + VALUE ary; + + if (p_ary->data_type != PARSER_ARY_DATA_NODE) { + rb_bug("unexpected rb_parser_ary_data_type: %d", p_ary->data_type); + } + + ary = rb_ary_new(); + + for (long i = 0; i < p_ary->len; i++) { + rb_ary_push(ary, NEW_CHILD(ast_value, p_ary->data[i])); + } + + return ary; +} + +static VALUE var_name(ID id) { if (!id) return Qnil; @@ -355,6 +404,19 @@ rest_arg(VALUE ast_value, const NODE *rest_arg) return NODE_NAMED_REST_P(rest_arg) ? NEW_CHILD(ast_value, rest_arg) : no_name_rest(); } +static ID +node_colon_name(const NODE *node) +{ + switch (nd_type(node)) { + case NODE_COLON2: + return RNODE_COLON2(node)->nd_mid; + case NODE_COLON3: + return RNODE_COLON3(node)->nd_mid; + default: + rb_bug("unexpected node: %s", ruby_node_name(nd_type(node))); + } +} + static VALUE node_children(VALUE ast_value, const NODE *node) { @@ -402,7 +464,7 @@ node_children(VALUE ast_value, const NODE *node) case NODE_RESCUE: return rb_ary_new_from_node_args(ast_value, 3, RNODE_RESCUE(node)->nd_head, RNODE_RESCUE(node)->nd_resq, RNODE_RESCUE(node)->nd_else); case NODE_RESBODY: - return rb_ary_new_from_node_args(ast_value, 3, RNODE_RESBODY(node)->nd_args, RNODE_RESBODY(node)->nd_body, RNODE_RESBODY(node)->nd_next); + return rb_ary_new_from_node_args(ast_value, 4, RNODE_RESBODY(node)->nd_args, RNODE_RESBODY(node)->nd_exc_var, RNODE_RESBODY(node)->nd_body, RNODE_RESBODY(node)->nd_next); case NODE_ENSURE: return rb_ary_new_from_node_args(ast_value, 2, RNODE_ENSURE(node)->nd_head, RNODE_ENSURE(node)->nd_ensr); case NODE_AND: @@ -448,7 +510,7 @@ node_children(VALUE ast_value, const NODE *node) if (RNODE_CDECL(node)->nd_vid) { return rb_ary_new_from_args(2, ID2SYM(RNODE_CDECL(node)->nd_vid), NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_value)); } - return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_else), ID2SYM(RNODE_COLON2(RNODE_CDECL(node)->nd_else)->nd_mid), NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_value)); + return rb_ary_new_from_args(3, NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_else), ID2SYM(node_colon_name(RNODE_CDECL(node)->nd_else)), NEW_CHILD(ast_value, RNODE_CDECL(node)->nd_value)); case NODE_OP_ASGN1: return rb_ary_new_from_args(4, NEW_CHILD(ast_value, RNODE_OP_ASGN1(node)->nd_recv), ID2SYM(RNODE_OP_ASGN1(node)->nd_mid), @@ -577,7 +639,7 @@ node_children(VALUE ast_value, const NODE *node) case NODE_VALIAS: return rb_ary_new_from_args(2, ID2SYM(RNODE_VALIAS(node)->nd_alias), ID2SYM(RNODE_VALIAS(node)->nd_orig)); case NODE_UNDEF: - return rb_ary_new_from_node_args(ast_value, 1, RNODE_UNDEF(node)->nd_undef); + return rb_ary_new_from_args(1, dump_parser_array(ast_value, RNODE_UNDEF(node)->nd_undefs)); case NODE_CLASS: return rb_ary_new_from_node_args(ast_value, 3, RNODE_CLASS(node)->nd_cpath, RNODE_CLASS(node)->nd_super, RNODE_CLASS(node)->nd_body); case NODE_MODULE: @@ -636,7 +698,7 @@ node_children(VALUE ast_value, const NODE *node) : var_name(ainfo->rest_arg)), (ainfo->no_kwarg ? Qfalse : NEW_CHILD(ast_value, (NODE *)ainfo->kw_args)), (ainfo->no_kwarg ? Qfalse : NEW_CHILD(ast_value, ainfo->kw_rest_arg)), - var_name(ainfo->block_arg)); + (ainfo->no_blockarg ? Qfalse : var_name(ainfo->block_arg))); } case NODE_SCOPE: { @@ -702,6 +764,249 @@ ast_node_children(rb_execution_context_t *ec, VALUE self) return node_children(data->ast_value, data->node); } +static int +null_loc_p(rb_code_location_t *loc) +{ + return (loc->beg_pos.lineno == 0 && loc->beg_pos.column == -1 && loc->end_pos.lineno == 0 && loc->end_pos.column == -1); +} + +static VALUE +location_new(rb_code_location_t *loc) +{ + VALUE obj; + struct ASTLocationData *data; + + if (null_loc_p(loc)) return Qnil; + + obj = TypedData_Make_Struct(rb_cLocation, struct ASTLocationData, &rb_location_type, data); + data->first_lineno = loc->beg_pos.lineno; + data->first_column = loc->beg_pos.column; + data->last_lineno = loc->end_pos.lineno; + data->last_column = loc->end_pos.column; + + return obj; +} + +static VALUE +node_locations(VALUE ast_value, const NODE *node) +{ + enum node_type type = nd_type(node); + switch (type) { + case NODE_ALIAS: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_ALIAS(node)->keyword_loc)); + case NODE_AND: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_AND(node)->operator_loc)); + case NODE_BLOCK_PASS: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_BLOCK_PASS(node)->operator_loc)); + case NODE_BREAK: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_BREAK(node)->keyword_loc)); + case NODE_CASE: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_CASE(node)->case_keyword_loc), + location_new(&RNODE_CASE(node)->end_keyword_loc)); + case NODE_CASE2: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_CASE2(node)->case_keyword_loc), + location_new(&RNODE_CASE2(node)->end_keyword_loc)); + case NODE_CASE3: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_CASE3(node)->case_keyword_loc), + location_new(&RNODE_CASE3(node)->end_keyword_loc)); + case NODE_CLASS: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_CLASS(node)->class_keyword_loc), + location_new(&RNODE_CLASS(node)->inheritance_operator_loc), + location_new(&RNODE_CLASS(node)->end_keyword_loc)); + case NODE_COLON2: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_COLON2(node)->delimiter_loc), + location_new(&RNODE_COLON2(node)->name_loc)); + case NODE_COLON3: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_COLON3(node)->delimiter_loc), + location_new(&RNODE_COLON3(node)->name_loc)); + case NODE_DEFINED: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_DEFINED(node)->keyword_loc)); + case NODE_DOT2: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_DOT2(node)->operator_loc)); + case NODE_DOT3: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_DOT3(node)->operator_loc)); + case NODE_EVSTR: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_EVSTR(node)->opening_loc), + location_new(&RNODE_EVSTR(node)->closing_loc)); + case NODE_FLIP2: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_FLIP2(node)->operator_loc)); + case NODE_FLIP3: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_FLIP3(node)->operator_loc)); + case NODE_FOR: + return rb_ary_new_from_args(5, + location_new(nd_code_loc(node)), + location_new(&RNODE_FOR(node)->for_keyword_loc), + location_new(&RNODE_FOR(node)->in_keyword_loc), + location_new(&RNODE_FOR(node)->do_keyword_loc), + location_new(&RNODE_FOR(node)->end_keyword_loc)); + case NODE_LAMBDA: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_LAMBDA(node)->operator_loc), + location_new(&RNODE_LAMBDA(node)->opening_loc), + location_new(&RNODE_LAMBDA(node)->closing_loc)); + case NODE_IF: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_IF(node)->if_keyword_loc), + location_new(&RNODE_IF(node)->then_keyword_loc), + location_new(&RNODE_IF(node)->end_keyword_loc)); + case NODE_IN: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_IN(node)->in_keyword_loc), + location_new(&RNODE_IN(node)->then_keyword_loc), + location_new(&RNODE_IN(node)->operator_loc)); + case NODE_MODULE: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_MODULE(node)->module_keyword_loc), + location_new(&RNODE_MODULE(node)->end_keyword_loc)); + case NODE_NEXT: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_NEXT(node)->keyword_loc)); + case NODE_OR: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_OR(node)->operator_loc)); + case NODE_OP_ASGN1: + return rb_ary_new_from_args(5, + location_new(nd_code_loc(node)), + location_new(&RNODE_OP_ASGN1(node)->call_operator_loc), + location_new(&RNODE_OP_ASGN1(node)->opening_loc), + location_new(&RNODE_OP_ASGN1(node)->closing_loc), + location_new(&RNODE_OP_ASGN1(node)->binary_operator_loc)); + case NODE_OP_ASGN2: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_OP_ASGN2(node)->call_operator_loc), + location_new(&RNODE_OP_ASGN2(node)->message_loc), + location_new(&RNODE_OP_ASGN2(node)->binary_operator_loc)); + case NODE_POSTEXE: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_POSTEXE(node)->keyword_loc), + location_new(&RNODE_POSTEXE(node)->opening_loc), + location_new(&RNODE_POSTEXE(node)->closing_loc)); + case NODE_REDO: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_REDO(node)->keyword_loc)); + case NODE_REGX: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_REGX(node)->opening_loc), + location_new(&RNODE_REGX(node)->content_loc), + location_new(&RNODE_REGX(node)->closing_loc)); + case NODE_RETURN: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_RETURN(node)->keyword_loc)); + + case NODE_SCLASS: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_SCLASS(node)->class_keyword_loc), + location_new(&RNODE_SCLASS(node)->operator_loc), + location_new(&RNODE_SCLASS(node)->end_keyword_loc)); + + case NODE_SPLAT: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_SPLAT(node)->operator_loc)); + case NODE_SUPER: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_SUPER(node)->keyword_loc), + location_new(&RNODE_SUPER(node)->lparen_loc), + location_new(&RNODE_SUPER(node)->rparen_loc)); + case NODE_UNDEF: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_UNDEF(node)->keyword_loc)); + case NODE_UNLESS: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_UNLESS(node)->keyword_loc), + location_new(&RNODE_UNLESS(node)->then_keyword_loc), + location_new(&RNODE_UNLESS(node)->end_keyword_loc)); + case NODE_VALIAS: + return rb_ary_new_from_args(2, + location_new(nd_code_loc(node)), + location_new(&RNODE_VALIAS(node)->keyword_loc)); + case NODE_WHEN: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_WHEN(node)->keyword_loc), + location_new(&RNODE_WHEN(node)->then_keyword_loc)); + case NODE_WHILE: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_WHILE(node)->keyword_loc), + location_new(&RNODE_WHILE(node)->closing_loc)); + case NODE_UNTIL: + return rb_ary_new_from_args(3, + location_new(nd_code_loc(node)), + location_new(&RNODE_UNTIL(node)->keyword_loc), + location_new(&RNODE_UNTIL(node)->closing_loc)); + case NODE_YIELD: + return rb_ary_new_from_args(4, + location_new(nd_code_loc(node)), + location_new(&RNODE_YIELD(node)->keyword_loc), + location_new(&RNODE_YIELD(node)->lparen_loc), + location_new(&RNODE_YIELD(node)->rparen_loc)); + case NODE_ARGS_AUX: + case NODE_LAST: + break; + default: + return rb_ary_new_from_args(1, location_new(nd_code_loc(node))); + } + + rb_bug("node_locations: unknown node: %s", ruby_node_name(type)); +} + +static VALUE +ast_node_locations(rb_execution_context_t *ec, VALUE self) +{ + struct ASTNodeData *data; + TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data); + + return node_locations(data->ast_value, data->node); +} + static VALUE ast_node_first_lineno(rb_execution_context_t *ec, VALUE self) { @@ -769,7 +1074,7 @@ ast_node_all_tokens(rb_execution_context_t *ec, VALUE self) token = rb_ary_new_from_args(4, INT2FIX(parser_token->id), ID2SYM(rb_intern(parser_token->type_name)), str, loc); rb_ary_push(all_tokens, token); } - rb_obj_freeze(all_tokens); + rb_ary_freeze(all_tokens); return all_tokens; } @@ -805,6 +1110,61 @@ ast_node_script_lines(rb_execution_context_t *ec, VALUE self) return rb_parser_build_script_lines_from(ret); } +static VALUE +ast_location_first_lineno(rb_execution_context_t *ec, VALUE self) +{ + struct ASTLocationData *data; + TypedData_Get_Struct(self, struct ASTLocationData, &rb_location_type, data); + + return INT2NUM(data->first_lineno); +} + +static VALUE +ast_location_first_column(rb_execution_context_t *ec, VALUE self) +{ + struct ASTLocationData *data; + TypedData_Get_Struct(self, struct ASTLocationData, &rb_location_type, data); + + return INT2NUM(data->first_column); +} + +static VALUE +ast_location_last_lineno(rb_execution_context_t *ec, VALUE self) +{ + struct ASTLocationData *data; + TypedData_Get_Struct(self, struct ASTLocationData, &rb_location_type, data); + + return INT2NUM(data->last_lineno); +} + +static VALUE +ast_location_last_column(rb_execution_context_t *ec, VALUE self) +{ + struct ASTLocationData *data; + TypedData_Get_Struct(self, struct ASTLocationData, &rb_location_type, data); + + return INT2NUM(data->last_column); +} + +static VALUE +ast_location_inspect(rb_execution_context_t *ec, VALUE self) +{ + VALUE str; + VALUE cname; + struct ASTLocationData *data; + TypedData_Get_Struct(self, struct ASTLocationData, &rb_location_type, data); + + cname = rb_class_path(rb_obj_class(self)); + str = rb_str_new2("#<"); + + rb_str_append(str, cname); + rb_str_catf(str, ":@%d:%d-%d:%d>", + data->first_lineno, data->first_column, + data->last_lineno, data->last_column); + + return str; +} + #include "ast.rbinc" void @@ -812,5 +1172,7 @@ Init_ast(void) { rb_mAST = rb_define_module_under(rb_cRubyVM, "AbstractSyntaxTree"); rb_cNode = rb_define_class_under(rb_mAST, "Node", rb_cObject); + rb_cLocation = rb_define_class_under(rb_mAST, "Location", rb_cObject); rb_undef_alloc_func(rb_cNode); + rb_undef_alloc_func(rb_cLocation); } |
