diff options
Diffstat (limited to 'parse.y')
| -rw-r--r-- | parse.y | 8550 |
1 files changed, 6430 insertions, 2120 deletions
@@ -3,10 +3,9 @@ parse.y - $Author$ - $Date$ created at: Fri May 28 18:02:42 JST 1993 - Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 1993-2007 Yukihiro Matsumoto **********************************************************************/ @@ -14,49 +13,35 @@ #define YYDEBUG 1 #define YYERROR_VERBOSE 1 -#ifndef YYSTACK_USE_ALLOCA #define YYSTACK_USE_ALLOCA 0 -#endif -#include "ruby.h" -#include "env.h" -#include "intern.h" +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/encoding.h" #include "node.h" -#include "st.h" +#include "parse.h" +#include "id.h" +#include "regenc.h" #include <stdio.h> #include <errno.h> #include <ctype.h> -#define YYMALLOC rb_parser_malloc -#define YYREALLOC rb_parser_realloc -#define YYCALLOC rb_parser_calloc -#define YYFREE rb_parser_free +#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) + +#define YYMALLOC(size) rb_parser_malloc(parser, size) +#define YYREALLOC(ptr, size) rb_parser_realloc(parser, ptr, size) +#define YYCALLOC(nelem, size) rb_parser_calloc(parser, nelem, size) +#define YYFREE(ptr) rb_parser_free(parser, ptr) #define malloc YYMALLOC #define realloc YYREALLOC #define calloc YYCALLOC #define free YYFREE -static void *rb_parser_malloc _((size_t)); -static void *rb_parser_realloc _((void *, size_t)); -static void *rb_parser_calloc _((size_t, size_t)); -static void rb_parser_free _((void *)); -#define yyparse ruby_yyparse -#define yylex ruby_yylex -#define yyerror ruby_yyerror -#define yylval ruby_yylval -#define yychar ruby_yychar -#define yydebug ruby_yydebug - -#define ID_SCOPE_SHIFT 3 -#define ID_SCOPE_MASK 0x07 -#define ID_LOCAL 0x01 -#define ID_INSTANCE 0x02 -#define ID_GLOBAL 0x03 -#define ID_ATTRSET 0x04 -#define ID_CONST 0x05 -#define ID_CLASS 0x06 -#define ID_JUNK 0x07 -#define ID_INTERNAL ID_JUNK +#ifndef RIPPER +static ID register_symid(ID, const char *, long, rb_encoding *); +#define REGISTER_SYMID(id, name) register_symid(id, name, strlen(name), enc) +#include "id.c" +#endif #define is_notop_id(id) ((id)>tLAST_TOKEN) #define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL) @@ -72,116 +57,427 @@ static void rb_parser_free _((void *)); ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \ ((id)&ID_SCOPE_MASK) == ID_CLASS)) -NODE *ruby_eval_tree_begin = 0; -NODE *ruby_eval_tree = 0; - -char *ruby_sourcefile; /* current source file */ -int ruby_sourceline; /* current line no. */ - -static int yylex(); -static int yyerror(); - -static enum lex_state { +enum lex_state_e { EXPR_BEG, /* ignore newline, +/- is a sign. */ EXPR_END, /* newline significant, +/- is an operator. */ + EXPR_ENDARG, /* ditto, and unbound braces. */ + EXPR_ENDFN, /* ditto, and unbound braces. */ EXPR_ARG, /* newline significant, +/- is an operator. */ EXPR_CMDARG, /* newline significant, +/- is an operator. */ - EXPR_ENDARG, /* newline significant, +/- is an operator. */ EXPR_MID, /* newline significant, +/- is an operator. */ EXPR_FNAME, /* ignore newline, no reserved words. */ EXPR_DOT, /* right after `.' or `::', no reserved words. */ EXPR_CLASS, /* immediate after `class', no here document. */ -} lex_state; -static NODE *lex_strterm; + EXPR_VALUE, /* alike EXPR_BEG but label is disallowed. */ + EXPR_MAX_STATE +}; -#ifdef HAVE_LONG_LONG -typedef unsigned LONG_LONG stack_type; -#else -typedef unsigned long stack_type; -#endif +typedef VALUE stack_type; -#define BITSTACK_PUSH(stack, n) (stack = (stack<<1)|((n)&1)) -#define BITSTACK_POP(stack) (stack >>= 1) -#define BITSTACK_LEXPOP(stack) (stack = (stack >> 1) | (stack & 1)) -#define BITSTACK_SET_P(stack) (stack&1) +# define BITSTACK_PUSH(stack, n) (stack = (stack<<1)|((n)&1)) +# define BITSTACK_POP(stack) (stack = stack >> 1) +# define BITSTACK_LEXPOP(stack) (stack = (stack >> 1) | (stack & 1)) +# define BITSTACK_SET_P(stack) (stack&1) -static stack_type cond_stack = 0; #define COND_PUSH(n) BITSTACK_PUSH(cond_stack, n) #define COND_POP() BITSTACK_POP(cond_stack) #define COND_LEXPOP() BITSTACK_LEXPOP(cond_stack) #define COND_P() BITSTACK_SET_P(cond_stack) -static stack_type cmdarg_stack = 0; #define CMDARG_PUSH(n) BITSTACK_PUSH(cmdarg_stack, n) #define CMDARG_POP() BITSTACK_POP(cmdarg_stack) #define CMDARG_LEXPOP() BITSTACK_LEXPOP(cmdarg_stack) #define CMDARG_P() BITSTACK_SET_P(cmdarg_stack) -static int class_nest = 0; -static int in_single = 0; -static int in_def = 0; -static int compile_for_eval = 0; -static ID cur_mid = 0; -static int command_start = Qtrue; - -static NODE *cond(); -static NODE *logop(); -static int cond_negative(); - -static NODE *newline_node(); -static void fixpos(); - -static int value_expr0(); -static void void_expr0(); -static void void_stmts(); -static NODE *remove_begin(); -#define value_expr(node) value_expr0((node) = remove_begin(node)) -#define void_expr(node) void_expr0((node) = remove_begin(node)) +struct vtable { + ID *tbl; + int pos; + int capa; + struct vtable *prev; +}; + +struct local_vars { + struct vtable *args; + struct vtable *vars; + struct local_vars *prev; +}; + +#define DVARS_INHERIT ((void*)1) +#define DVARS_TOPSCOPE NULL +#define DVARS_SPECIAL_P(tbl) (!POINTER_P(tbl)) +#define POINTER_P(val) ((VALUE)(val) & ~(VALUE)3) + +static int +vtable_size(const struct vtable *tbl) +{ + if (POINTER_P(tbl)) { + return tbl->pos; + } + else { + return 0; + } +} + +#define VTBL_DEBUG 0 + +static struct vtable * +vtable_alloc(struct vtable *prev) +{ + struct vtable *tbl = ALLOC(struct vtable); + tbl->pos = 0; + tbl->capa = 8; + tbl->tbl = ALLOC_N(ID, tbl->capa); + tbl->prev = prev; + if (VTBL_DEBUG) printf("vtable_alloc: %p\n", (void *)tbl); + return tbl; +} + +static void +vtable_free(struct vtable *tbl) +{ + if (VTBL_DEBUG)printf("vtable_free: %p\n", (void *)tbl); + if (POINTER_P(tbl)) { + if (tbl->tbl) { + xfree(tbl->tbl); + } + xfree(tbl); + } +} + +static void +vtable_add(struct vtable *tbl, ID id) +{ + if (!POINTER_P(tbl)) { + rb_bug("vtable_add: vtable is not allocated (%p)", (void *)tbl); + } + if (VTBL_DEBUG) printf("vtable_add: %p, %s\n", (void *)tbl, rb_id2name(id)); + + if (tbl->pos == tbl->capa) { + tbl->capa = tbl->capa * 2; + REALLOC_N(tbl->tbl, ID, tbl->capa); + } + tbl->tbl[tbl->pos++] = id; +} + +static int +vtable_included(const struct vtable * tbl, ID id) +{ + int i; + + if (POINTER_P(tbl)) { + for (i = 0; i < tbl->pos; i++) { + if (tbl->tbl[i] == id) { + return 1; + } + } + } + return 0; +} + + +#ifndef RIPPER +typedef struct token_info { + const char *token; + int linenum; + int column; + int nonspc; + struct token_info *next; +} token_info; +#endif + +/* + Structure of Lexer Buffer: + + lex_pbeg tokp lex_p lex_pend + | | | | + |-----------+--------------+------------| + |<------------>| + token +*/ +struct parser_params { + int is_ripper; + NODE *heap; + + YYSTYPE *parser_yylval; + VALUE eofp; + + NODE *parser_lex_strterm; + enum lex_state_e parser_lex_state; + stack_type parser_cond_stack; + stack_type parser_cmdarg_stack; + int parser_class_nest; + int parser_paren_nest; + int parser_lpar_beg; + int parser_in_single; + int parser_in_def; + int parser_compile_for_eval; + VALUE parser_cur_mid; + int parser_in_defined; + char *parser_tokenbuf; + int parser_tokidx; + int parser_toksiz; + VALUE parser_lex_input; + VALUE parser_lex_lastline; + VALUE parser_lex_nextline; + const char *parser_lex_pbeg; + const char *parser_lex_p; + const char *parser_lex_pend; + int parser_heredoc_end; + int parser_command_start; + NODE *parser_deferred_nodes; + long parser_lex_gets_ptr; + VALUE (*parser_lex_gets)(struct parser_params*,VALUE); + struct local_vars *parser_lvtbl; + int parser_ruby__end__seen; + int line_count; + int has_shebang; + char *parser_ruby_sourcefile; /* current source file */ + int parser_ruby_sourceline; /* current line no. */ + rb_encoding *enc; + rb_encoding *utf8; + + int parser_yydebug; + +#ifndef RIPPER + /* Ruby core only */ + NODE *parser_eval_tree_begin; + NODE *parser_eval_tree; + VALUE debug_lines; + VALUE coverage; + int nerr; + + token_info *parser_token_info; +#else + /* Ripper only */ + VALUE parser_ruby_sourcefile_string; + const char *tokp; + VALUE delayed; + int delayed_line; + int delayed_col; + + VALUE value; + VALUE result; + VALUE parsing_thread; + int toplevel_p; +#endif +}; + +#define UTF8_ENC() (parser->utf8 ? parser->utf8 : \ + (parser->utf8 = rb_utf8_encoding())) +#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc) +#define STR_NEW0() rb_enc_str_new(0,0,parser->enc) +#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc) +#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc) +#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT) +#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), parser->enc) + +#ifdef YYMALLOC +void *rb_parser_malloc(struct parser_params *, size_t); +void *rb_parser_realloc(struct parser_params *, void *, size_t); +void *rb_parser_calloc(struct parser_params *, size_t, size_t); +void rb_parser_free(struct parser_params *, void *); +#endif + +static int parser_yyerror(struct parser_params*, const char*); +#define yyerror(msg) parser_yyerror(parser, msg) + +#define YYLEX_PARAM parser + +#define lex_strterm (parser->parser_lex_strterm) +#define lex_state (parser->parser_lex_state) +#define cond_stack (parser->parser_cond_stack) +#define cmdarg_stack (parser->parser_cmdarg_stack) +#define class_nest (parser->parser_class_nest) +#define paren_nest (parser->parser_paren_nest) +#define lpar_beg (parser->parser_lpar_beg) +#define in_single (parser->parser_in_single) +#define in_def (parser->parser_in_def) +#define compile_for_eval (parser->parser_compile_for_eval) +#define cur_mid (parser->parser_cur_mid) +#define in_defined (parser->parser_in_defined) +#define tokenbuf (parser->parser_tokenbuf) +#define tokidx (parser->parser_tokidx) +#define toksiz (parser->parser_toksiz) +#define lex_input (parser->parser_lex_input) +#define lex_lastline (parser->parser_lex_lastline) +#define lex_nextline (parser->parser_lex_nextline) +#define lex_pbeg (parser->parser_lex_pbeg) +#define lex_p (parser->parser_lex_p) +#define lex_pend (parser->parser_lex_pend) +#define heredoc_end (parser->parser_heredoc_end) +#define command_start (parser->parser_command_start) +#define deferred_nodes (parser->parser_deferred_nodes) +#define lex_gets_ptr (parser->parser_lex_gets_ptr) +#define lex_gets (parser->parser_lex_gets) +#define lvtbl (parser->parser_lvtbl) +#define ruby__end__seen (parser->parser_ruby__end__seen) +#define ruby_sourceline (parser->parser_ruby_sourceline) +#define ruby_sourcefile (parser->parser_ruby_sourcefile) +#define yydebug (parser->parser_yydebug) +#ifdef RIPPER +#else +#define ruby_eval_tree (parser->parser_eval_tree) +#define ruby_eval_tree_begin (parser->parser_eval_tree_begin) +#define ruby_debug_lines (parser->debug_lines) +#define ruby_coverage (parser->coverage) +#endif + +static int yylex(void*, void*); + +#ifndef RIPPER +#define yyparse ruby_yyparse -static NODE *block_append(); -static NODE *list_append(); -static NODE *list_concat(); -static NODE *arg_concat(); -static NODE *arg_prepend(); -static NODE *literal_concat(); -static NODE *new_evstr(); -static NODE *evstr2dstr(); -static NODE *call_op(); -static int in_defined = 0; - -static NODE *negate_lit(); -static NODE *ret_args(); -static NODE *arg_blk_pass(); -static NODE *new_call(); -static NODE *new_fcall(); -static NODE *new_super(); -static NODE *new_yield(); - -static NODE *gettable(); -static NODE *assignable(); -static NODE *aryset(); -static NODE *attrset(); -static void rb_backref_error(); -static NODE *node_assign(); - -static NODE *match_gen(); -static void local_push(); -static void local_pop(); -static int local_append(); -static int local_cnt(); -static int local_id(); -static ID *local_tbl(); -static ID internal_id(); - -static struct RVarmap *dyna_push(); -static void dyna_pop(); -static int dyna_in_block(); -static NODE *dyna_init(); - -static void top_local_init(); -static void top_local_setup(); - -#define RE_OPTION_ONCE 0x80 +static NODE* node_newnode(struct parser_params *, enum node_type, VALUE, VALUE, VALUE); +#define rb_node_newnode(type, a1, a2, a3) node_newnode(parser, type, a1, a2, a3) + +static NODE *cond_gen(struct parser_params*,NODE*); +#define cond(node) cond_gen(parser, node) +static NODE *logop_gen(struct parser_params*,enum node_type,NODE*,NODE*); +#define logop(type,node1,node2) logop_gen(parser, type, node1, node2) + +static NODE *newline_node(NODE*); +static void fixpos(NODE*,NODE*); + +static int value_expr_gen(struct parser_params*,NODE*); +static void void_expr_gen(struct parser_params*,NODE*); +static NODE *remove_begin(NODE*); +#define value_expr(node) value_expr_gen(parser, (node) = remove_begin(node)) +#define void_expr0(node) void_expr_gen(parser, (node)) +#define void_expr(node) void_expr0((node) = remove_begin(node)) +static void void_stmts_gen(struct parser_params*,NODE*); +#define void_stmts(node) void_stmts_gen(parser, node) +static void reduce_nodes_gen(struct parser_params*,NODE**); +#define reduce_nodes(n) reduce_nodes_gen(parser,n) +static void block_dup_check_gen(struct parser_params*,NODE*,NODE*); +#define block_dup_check(n1,n2) block_dup_check_gen(parser,n1,n2) + +static NODE *block_append_gen(struct parser_params*,NODE*,NODE*); +#define block_append(h,t) block_append_gen(parser,h,t) +static NODE *list_append_gen(struct parser_params*,NODE*,NODE*); +#define list_append(l,i) list_append_gen(parser,l,i) +static NODE *list_concat_gen(struct parser_params*,NODE*,NODE*); +#define list_concat(h,t) list_concat_gen(parser,h,t) +static NODE *arg_append_gen(struct parser_params*,NODE*,NODE*); +#define arg_append(h,t) arg_append_gen(parser,h,t) +static NODE *arg_concat_gen(struct parser_params*,NODE*,NODE*); +#define arg_concat(h,t) arg_concat_gen(parser,h,t) +static NODE *literal_concat_gen(struct parser_params*,NODE*,NODE*); +#define literal_concat(h,t) literal_concat_gen(parser,h,t) +static int literal_concat0(struct parser_params *, VALUE, VALUE); +static NODE *new_evstr_gen(struct parser_params*,NODE*); +#define new_evstr(n) new_evstr_gen(parser,n) +static NODE *evstr2dstr_gen(struct parser_params*,NODE*); +#define evstr2dstr(n) evstr2dstr_gen(parser,n) +static NODE *splat_array(NODE*); + +static NODE *call_bin_op_gen(struct parser_params*,NODE*,ID,NODE*); +#define call_bin_op(recv,id,arg1) call_bin_op_gen(parser, recv,id,arg1) +static NODE *call_uni_op_gen(struct parser_params*,NODE*,ID); +#define call_uni_op(recv,id) call_uni_op_gen(parser, recv,id) + +static NODE *new_args_gen(struct parser_params*,NODE*,NODE*,ID,NODE*,ID); +#define new_args(f,o,r,p,b) new_args_gen(parser, f,o,r,p,b) + +static NODE *negate_lit(NODE*); +static NODE *ret_args_gen(struct parser_params*,NODE*); +#define ret_args(node) ret_args_gen(parser, node) +static NODE *arg_blk_pass(NODE*,NODE*); +static NODE *new_yield_gen(struct parser_params*,NODE*); +#define new_yield(node) new_yield_gen(parser, node) + +static NODE *gettable_gen(struct parser_params*,ID); +#define gettable(id) gettable_gen(parser,id) +static NODE *assignable_gen(struct parser_params*,ID,NODE*); +#define assignable(id,node) assignable_gen(parser, id, node) + +static NODE *aryset_gen(struct parser_params*,NODE*,NODE*); +#define aryset(node1,node2) aryset_gen(parser, node1, node2) +static NODE *attrset_gen(struct parser_params*,NODE*,ID); +#define attrset(node,id) attrset_gen(parser, node, id) + +static void rb_backref_error_gen(struct parser_params*,NODE*); +#define rb_backref_error(n) rb_backref_error_gen(parser,n) +static NODE *node_assign_gen(struct parser_params*,NODE*,NODE*); +#define node_assign(node1, node2) node_assign_gen(parser, node1, node2) + +static NODE *match_op_gen(struct parser_params*,NODE*,NODE*); +#define match_op(node1,node2) match_op_gen(parser, node1, node2) + +static ID *local_tbl_gen(struct parser_params*); +#define local_tbl() local_tbl_gen(parser) + +static void fixup_nodes(NODE **); + +extern int rb_dvar_defined(ID); +extern int rb_local_defined(ID); +extern int rb_parse_in_eval(void); +extern int rb_parse_in_main(void); + +static VALUE reg_compile_gen(struct parser_params*, VALUE, int); +#define reg_compile(str,options) reg_compile_gen(parser, str, options) +static void reg_fragment_setenc_gen(struct parser_params*, VALUE, int); +#define reg_fragment_setenc(str,options) reg_fragment_setenc_gen(parser, str, options) +static int reg_fragment_check_gen(struct parser_params*, VALUE, int); +#define reg_fragment_check(str,options) reg_fragment_check_gen(parser, str, options) +static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); +#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,regexp,match) + +#define get_id(id) (id) +#define get_value(val) (val) +#else +#define remove_begin(node) (node) +#define rb_dvar_defined(id) 0 +#define rb_local_defined(id) 0 +static ID ripper_get_id(VALUE); +#define get_id(id) ripper_get_id(id) +static VALUE ripper_get_value(VALUE); +#define get_value(val) ripper_get_value(val) +static VALUE assignable_gen(struct parser_params*,VALUE); +#define assignable(lhs,node) assignable_gen(parser, lhs) +#endif /* !RIPPER */ + +static ID formal_argument_gen(struct parser_params*, ID); +#define formal_argument(id) formal_argument_gen(parser, id) +static ID shadowing_lvar_gen(struct parser_params*,ID); +#define shadowing_lvar(name) shadowing_lvar_gen(parser, name) +static void new_bv_gen(struct parser_params*,ID); +#define new_bv(id) new_bv_gen(parser, id) + +static void local_push_gen(struct parser_params*,int); +#define local_push(top) local_push_gen(parser,top) +static void local_pop_gen(struct parser_params*); +#define local_pop() local_pop_gen(parser) +static int local_var_gen(struct parser_params*, ID); +#define local_var(id) local_var_gen(parser, id); +static int arg_var_gen(struct parser_params*, ID); +#define arg_var(id) arg_var_gen(parser, id) +static int local_id_gen(struct parser_params*, ID); +#define local_id(id) local_id_gen(parser, id) +static ID internal_id_gen(struct parser_params*); +#define internal_id() internal_id_gen(parser) + +static const struct vtable *dyna_push_gen(struct parser_params *); +#define dyna_push() dyna_push_gen(parser) +static void dyna_pop_gen(struct parser_params*, const struct vtable *); +#define dyna_pop(node) dyna_pop_gen(parser, node) +static int dyna_in_block_gen(struct parser_params*); +#define dyna_in_block() dyna_in_block_gen(parser) +#define dyna_var(id) local_var(id) +static int dvar_defined_gen(struct parser_params*,ID); +#define dvar_defined(id) dvar_defined_gen(parser, id) +static int dvar_curr_gen(struct parser_params*,ID); +#define dvar_curr(id) dvar_curr_gen(parser, id) + +static int lvar_defined_gen(struct parser_params*, ID); +#define lvar_defined(id) lvar_defined_gen(parser, id) + +#define RE_OPTION_ONCE (1<<16) +#define RE_OPTION_ENCODING_SHIFT 8 +#define RE_OPTION_ENCODING(e) (((e)&0xff)<<RE_OPTION_ENCODING_SHIFT) +#define RE_OPTION_ENCODING_IDX(o) (((o)>>RE_OPTION_ENCODING_SHIFT)&0xff) +#define RE_OPTION_ENCODING_NONE(o) ((o)&RE_OPTION_ARG_ENCODING_NONE) +#define RE_OPTION_MASK 0xff +#define RE_OPTION_ARG_ENCODING_NONE 32 #define NODE_STRTERM NODE_ZARRAY /* nothing to gc */ #define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */ @@ -193,7 +489,110 @@ static void top_local_setup(); #define nd_term(node) SIGN_EXTEND((node)->u2.id, CHAR_BIT*2) #endif #define nd_paren(node) (char)((node)->u2.id >> CHAR_BIT*2) -#define nd_nest u3.id +#define nd_nest u3.cnt + +/****** Ripper *******/ + +#ifdef RIPPER +#define RIPPER_VERSION "0.1.0" + +#include "eventids1.c" +#include "eventids2.c" +static ID ripper_id_gets; + +static VALUE ripper_dispatch0(struct parser_params*,ID); +static VALUE ripper_dispatch1(struct parser_params*,ID,VALUE); +static VALUE ripper_dispatch2(struct parser_params*,ID,VALUE,VALUE); +static VALUE ripper_dispatch3(struct parser_params*,ID,VALUE,VALUE,VALUE); +static VALUE ripper_dispatch4(struct parser_params*,ID,VALUE,VALUE,VALUE,VALUE); +static VALUE ripper_dispatch5(struct parser_params*,ID,VALUE,VALUE,VALUE,VALUE,VALUE); + +#define dispatch0(n) ripper_dispatch0(parser, TOKEN_PASTE(ripper_id_, n)) +#define dispatch1(n,a) ripper_dispatch1(parser, TOKEN_PASTE(ripper_id_, n), a) +#define dispatch2(n,a,b) ripper_dispatch2(parser, TOKEN_PASTE(ripper_id_, n), a, b) +#define dispatch3(n,a,b,c) ripper_dispatch3(parser, TOKEN_PASTE(ripper_id_, n), a, b, c) +#define dispatch4(n,a,b,c,d) ripper_dispatch4(parser, TOKEN_PASTE(ripper_id_, n), a, b, c, d) +#define dispatch5(n,a,b,c,d,e) ripper_dispatch5(parser, TOKEN_PASTE(ripper_id_, n), a, b, c, d, e) + +#define yyparse ripper_yyparse + +#define ripper_intern(s) ID2SYM(rb_intern(s)) +static VALUE ripper_id2sym(ID); +#ifdef __GNUC__ +#define ripper_id2sym(id) ((id) < 256 && rb_ispunct(id) ? \ + ID2SYM(id) : ripper_id2sym(id)) +#endif + +#define arg_new() dispatch0(args_new) +#define arg_add(l,a) dispatch2(args_add, l, a) +#define arg_add_star(l,a) dispatch2(args_add_star, l, a) +#define arg_add_block(l,b) dispatch2(args_add_block, l, b) +#define arg_add_optblock(l,b) ((b)==Qundef? l : dispatch2(args_add_block, l, b)) +#define bare_assoc(v) dispatch1(bare_assoc_hash, v) +#define arg_add_assocs(l,b) arg_add(l, bare_assoc(b)) + +#define args2mrhs(a) dispatch1(mrhs_new_from_args, a) +#define mrhs_new() dispatch0(mrhs_new) +#define mrhs_add(l,a) dispatch2(mrhs_add, l, a) +#define mrhs_add_star(l,a) dispatch2(mrhs_add_star, l, a) + +#define mlhs_new() dispatch0(mlhs_new) +#define mlhs_add(l,a) dispatch2(mlhs_add, l, a) +#define mlhs_add_star(l,a) dispatch2(mlhs_add_star, l, a) + +#define params_new(pars, opts, rest, pars2, blk) \ + dispatch5(params, pars, opts, rest, pars2, blk) + +#define blockvar_new(p,v) dispatch2(block_var, p, v) +#define blockvar_add_star(l,a) dispatch2(block_var_add_star, l, a) +#define blockvar_add_block(l,a) dispatch2(block_var_add_block, l, a) + +#define method_optarg(m,a) ((a)==Qundef ? m : dispatch2(method_add_arg,m,a)) +#define method_arg(m,a) dispatch2(method_add_arg,m,a) +#define method_add_block(m,b) dispatch2(method_add_block, m, b) + +#define escape_Qundef(x) ((x)==Qundef ? Qnil : (x)) + +#define FIXME 0 + +#endif /* RIPPER */ + +#ifndef RIPPER +# define ifndef_ripper(x) x +#else +# define ifndef_ripper(x) +#endif + +#ifndef RIPPER +# define rb_warn0(fmt) rb_compile_warn(ruby_sourcefile, ruby_sourceline, fmt) +# define rb_warnI(fmt,a) rb_compile_warn(ruby_sourcefile, ruby_sourceline, fmt, a) +# define rb_warnS(fmt,a) rb_compile_warn(ruby_sourcefile, ruby_sourceline, fmt, a) +# define rb_warning0(fmt) rb_compile_warning(ruby_sourcefile, ruby_sourceline, fmt) +# define rb_warningS(fmt,a) rb_compile_warning(ruby_sourcefile, ruby_sourceline, fmt, a) +#else +# define rb_warn0(fmt) ripper_warn0(parser, fmt) +# define rb_warnI(fmt,a) ripper_warnI(parser, fmt, a) +# define rb_warnS(fmt,a) ripper_warnS(parser, fmt, a) +# define rb_warning0(fmt) ripper_warning0(parser, fmt) +# define rb_warningS(fmt,a) ripper_warningS(parser, fmt, a) +static void ripper_warn0(struct parser_params*, const char*); +static void ripper_warnI(struct parser_params*, const char*, int); +#if 0 +static void ripper_warnS(struct parser_params*, const char*, const char*); +#endif +static void ripper_warning0(struct parser_params*, const char*); +static void ripper_warningS(struct parser_params*, const char*, const char*); +#endif + +#ifdef RIPPER +static void ripper_compile_error(struct parser_params*, const char *fmt, ...); +# define rb_compile_error ripper_compile_error +# define compile_error ripper_compile_error +# define PARSER_ARG parser, +#else +# define compile_error parser->nerr++,rb_compile_error +# define PARSER_ARG ruby_sourcefile, ruby_sourceline, +#endif /* Older versions of Yacc set YYMAXDEPTH to a very low value by default (150, for instance). This is too low for Ruby to parse some files, such as @@ -204,94 +603,123 @@ static void top_local_setup(); #endif #endif +#ifndef RIPPER +static void token_info_push(struct parser_params*, const char *token); +static void token_info_pop(struct parser_params*, const char *token); +#define token_info_push(token) (RTEST(ruby_verbose) ? token_info_push(parser, token) : (void)0) +#define token_info_pop(token) (RTEST(ruby_verbose) ? token_info_pop(parser, token) : (void)0) +#else +#define token_info_push(token) /* nothing */ +#define token_info_pop(token) /* nothing */ +#endif %} +%pure_parser +%parse-param {struct parser_params *parser} + %union { + VALUE val; NODE *node; ID id; int num; - struct RVarmap *vars; -} - -%token kCLASS - kMODULE - kDEF - kUNDEF - kBEGIN - kRESCUE - kENSURE - kEND - kIF - kUNLESS - kTHEN - kELSIF - kELSE - kCASE - kWHEN - kWHILE - kUNTIL - kFOR - kBREAK - kNEXT - kREDO - kRETRY - kIN - kDO - kDO_COND - kDO_BLOCK - kRETURN - kYIELD - kSUPER - kSELF - kNIL - kTRUE - kFALSE - kAND - kOR - kNOT - kIF_MOD - kUNLESS_MOD - kWHILE_MOD - kUNTIL_MOD - kRESCUE_MOD - kALIAS - kDEFINED - klBEGIN - klEND - k__LINE__ - k__FILE__ - -%token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR -%token <node> tINTEGER tFLOAT tSTRING_CONTENT + const struct vtable *vars; +} + +/*%%%*/ +%token +/*% +%token <val> +%*/ + keyword_class + keyword_module + keyword_def + keyword_undef + keyword_begin + keyword_rescue + keyword_ensure + keyword_end + keyword_if + keyword_unless + keyword_then + keyword_elsif + keyword_else + keyword_case + keyword_when + keyword_while + keyword_until + keyword_for + keyword_break + keyword_next + keyword_redo + keyword_retry + keyword_in + keyword_do + keyword_do_cond + keyword_do_block + keyword_do_LAMBDA + keyword_return + keyword_yield + keyword_super + keyword_self + keyword_nil + keyword_true + keyword_false + keyword_and + keyword_or + keyword_not + modifier_if + modifier_unless + modifier_while + modifier_until + modifier_rescue + keyword_alias + keyword_defined + keyword_BEGIN + keyword_END + keyword__LINE__ + keyword__FILE__ + keyword__ENCODING__ + +%token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL +%token <node> tINTEGER tFLOAT tSTRING_CONTENT tCHAR %token <node> tNTH_REF tBACK_REF %token <num> tREGEXP_END %type <node> singleton strings string string1 xstring regexp -%type <node> string_contents xstring_contents string_content +%type <node> string_contents xstring_contents regexp_contents string_content %type <node> words qwords word_list qword_list word %type <node> literal numeric dsym cpath +%type <node> top_compstmt top_stmts top_stmt %type <node> bodystmt compstmt stmts stmt expr arg primary command command_call method_call %type <node> expr_value arg_value primary_value %type <node> if_tail opt_else case_body cases opt_rescue exc_list exc_var opt_ensure -%type <node> args when_args call_args call_args2 open_args paren_args opt_paren_args +%type <node> args call_args opt_call_args +%type <node> paren_args opt_paren_args %type <node> command_args aref_args opt_block_arg block_arg var_ref var_lhs %type <node> mrhs superclass block_call block_command -%type <node> f_arglist f_args f_optarg f_opt f_rest_arg f_block_arg opt_f_block_arg -%type <node> assoc_list assocs assoc undef_list backref string_dvar -%type <node> block_var opt_block_var brace_block cmd_brace_block do_block lhs none fitem -%type <node> mlhs mlhs_head mlhs_basic mlhs_entry mlhs_item mlhs_node +%type <node> f_block_optarg f_block_opt +%type <node> f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list f_margs +%type <node> assoc_list assocs assoc undef_list backref string_dvar for_var +%type <node> block_param opt_block_param block_param_def f_opt +%type <node> bv_decls opt_bv_decl bvar +%type <node> lambda f_larglist lambda_body +%type <node> brace_block cmd_brace_block do_block lhs none fitem +%type <node> mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner %type <id> fsym variable sym symbol operation operation2 operation3 -%type <id> cname fname op -%type <num> f_norm_arg f_arg -%token tUPLUS /* unary+ */ -%token tUMINUS /* unary- */ +%type <id> cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg +/*%%%*/ +/*% +%type <val> program reswords then do dot_or_colon +%*/ +%token tUPLUS /* unary+ */ +%token tUMINUS /* unary- */ %token tPOW /* ** */ -%token tCMP /* <=> */ -%token tEQ /* == */ -%token tEQQ /* === */ -%token tNEQ /* != */ -%token tGEQ /* >= */ -%token tLEQ /* <= */ +%token tCMP /* <=> */ +%token tEQ /* == */ +%token tEQQ /* === */ +%token tNEQ /* != */ +%token tGEQ /* >= */ +%token tLEQ /* <= */ %token tANDOP tOROP /* && and || */ %token tMATCH tNMATCH /* =~ and !~ */ %token tDOT2 tDOT3 /* .. and ... */ @@ -309,8 +737,9 @@ static void top_local_setup(); %token tLBRACE_ARG /* { */ %token tSTAR /* * */ %token tAMPER /* & */ +%token tLAMBDA /* -> */ %token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG -%token tSTRING_DBEG tSTRING_DVAR tSTRING_END +%token tSTRING_DBEG tSTRING_DVAR tSTRING_END tLAMBEG /* * precedence table @@ -319,12 +748,12 @@ static void top_local_setup(); %nonassoc tLOWEST %nonassoc tLBRACE_ARG -%nonassoc kIF_MOD kUNLESS_MOD kWHILE_MOD kUNTIL_MOD -%left kOR kAND -%right kNOT -%nonassoc kDEFINED +%nonassoc modifier_if modifier_unless modifier_while modifier_until +%left keyword_or keyword_and +%right keyword_not +%nonassoc keyword_defined %right '=' tOP_ASGN -%left kRESCUE_MOD +%left modifier_rescue %right '?' ':' %nonassoc tDOT2 tDOT3 %left tOROP @@ -340,19 +769,33 @@ static void top_local_setup(); %right tPOW %right '!' '~' tUPLUS +%nonassoc idNULL +%nonassoc idRespond_to +%nonassoc idIFUNC +%nonassoc idCFUNC +%nonassoc id_core_set_method_alias +%nonassoc id_core_set_variable_alias +%nonassoc id_core_undef_method +%nonassoc id_core_define_method +%nonassoc id_core_define_singleton_method +%nonassoc id_core_set_postexe + %token tLAST_TOKEN %% program : { lex_state = EXPR_BEG; - top_local_init(); - if (ruby_class == rb_cObject) class_nest = 0; - else class_nest = 1; + /*%%%*/ + local_push(compile_for_eval || rb_parse_in_main()); + /*% + local_push(0); + %*/ } - compstmt + top_compstmt { + /*%%%*/ if ($2 && !compile_for_eval) { - /* last expression should not be void */ + /* last expression should not be void */ if (nd_type($2) != NODE_BLOCK) void_expr($2); else { NODE *node = $2; @@ -362,9 +805,79 @@ program : { void_expr(node->nd_head); } } - ruby_eval_tree = block_append(ruby_eval_tree, $2); - top_local_setup(); - class_nest = 0; + ruby_eval_tree = NEW_SCOPE(0, block_append(ruby_eval_tree, $2)); + /*% + $$ = $2; + parser->result = dispatch1(program, $$); + %*/ + local_pop(); + } + ; + +top_compstmt : top_stmts opt_terms + { + /*%%%*/ + void_stmts($1); + fixup_nodes(&deferred_nodes); + /*% + %*/ + $$ = $1; + } + ; + +top_stmts : none + { + /*%%%*/ + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), + dispatch0(void_stmt)); + %*/ + } + | top_stmt + { + /*%%%*/ + $$ = newline_node($1); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), $1); + %*/ + } + | top_stmts terms top_stmt + { + /*%%%*/ + $$ = block_append($1, newline_node($3)); + /*% + $$ = dispatch2(stmts_add, $1, $3); + %*/ + } + | error top_stmt + { + $$ = remove_begin($2); + } + ; + +top_stmt : stmt + | keyword_BEGIN + { + if (in_def || in_single) { + yyerror("BEGIN in method"); + } + /*%%%*/ + /* local_push(0); */ + /*% + %*/ + } + '{' top_compstmt '}' + { + /*%%%*/ + ruby_eval_tree_begin = block_append(ruby_eval_tree_begin, + $4); + /* NEW_PREEXE($4)); */ + /* local_pop(); */ + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(BEGIN, $4); + %*/ } ; @@ -373,36 +886,69 @@ bodystmt : compstmt opt_else opt_ensure { - $$ = $1; + /*%%%*/ + $$ = $1; if ($2) { $$ = NEW_RESCUE($1, $2, $3); } else if ($3) { - rb_warn("else without rescue is useless"); + rb_warn0("else without rescue is useless"); $$ = block_append($$, $3); } if ($4) { - $$ = NEW_ENSURE($$, $4); + if ($$) { + $$ = NEW_ENSURE($$, $4); + } + else { + $$ = block_append($4, NEW_NIL()); + } } fixpos($$, $1); + /*% + $$ = dispatch4(bodystmt, + escape_Qundef($1), + escape_Qundef($2), + escape_Qundef($3), + escape_Qundef($4)); + %*/ } ; compstmt : stmts opt_terms { + /*%%%*/ void_stmts($1); - $$ = $1; + fixup_nodes(&deferred_nodes); + /*% + %*/ + $$ = $1; } ; stmts : none + { + /*%%%*/ + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), + dispatch0(void_stmt)); + %*/ + } | stmt { + /*%%%*/ $$ = newline_node($1); + /*% + $$ = dispatch2(stmts_add, dispatch0(stmts_new), $1); + %*/ } | stmts terms stmt { + /*%%%*/ $$ = block_append($1, newline_node($3)); + /*% + $$ = dispatch2(stmts_add, $1, $3); + %*/ } | error stmt { @@ -410,111 +956,138 @@ stmts : none } ; -stmt : kALIAS fitem {lex_state = EXPR_FNAME;} fitem +stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem { - $$ = NEW_ALIAS($2, $4); + /*%%%*/ + $$ = NEW_ALIAS($2, $4); + /*% + $$ = dispatch2(alias, $2, $4); + %*/ } - | kALIAS tGVAR tGVAR + | keyword_alias tGVAR tGVAR { - $$ = NEW_VALIAS($2, $3); + /*%%%*/ + $$ = NEW_VALIAS($2, $3); + /*% + $$ = dispatch2(var_alias, $2, $3); + %*/ } - | kALIAS tGVAR tBACK_REF + | keyword_alias tGVAR tBACK_REF { - char buf[3]; - - sprintf(buf, "$%c", (char)$3->nd_nth); - $$ = NEW_VALIAS($2, rb_intern(buf)); + /*%%%*/ + char buf[2]; + buf[0] = '$'; + buf[1] = (char)$3->nd_nth; + $$ = NEW_VALIAS($2, rb_intern2(buf, 2)); + /*% + $$ = dispatch2(var_alias, $2, $3); + %*/ } - | kALIAS tGVAR tNTH_REF + | keyword_alias tGVAR tNTH_REF { - yyerror("can't make alias for the number variables"); - $$ = 0; + /*%%%*/ + yyerror("can't make alias for the number variables"); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(var_alias, $2, $3); + $$ = dispatch1(alias_error, $$); + %*/ } - | kUNDEF undef_list + | keyword_undef undef_list { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(undef, $2); + %*/ } - | stmt kIF_MOD expr_value + | stmt modifier_if expr_value { + /*%%%*/ $$ = NEW_IF(cond($3), remove_begin($1), 0); - fixpos($$, $3); - if (cond_negative(&$$->nd_cond)) { - $$->nd_else = $$->nd_body; - $$->nd_body = 0; - } + fixpos($$, $3); + /*% + $$ = dispatch2(if_mod, $3, $1); + %*/ } - | stmt kUNLESS_MOD expr_value + | stmt modifier_unless expr_value { + /*%%%*/ $$ = NEW_UNLESS(cond($3), remove_begin($1), 0); - fixpos($$, $3); - if (cond_negative(&$$->nd_cond)) { - $$->nd_body = $$->nd_else; - $$->nd_else = 0; - } + fixpos($$, $3); + /*% + $$ = dispatch2(unless_mod, $3, $1); + %*/ } - | stmt kWHILE_MOD expr_value + | stmt modifier_while expr_value { + /*%%%*/ if ($1 && nd_type($1) == NODE_BEGIN) { $$ = NEW_WHILE(cond($3), $1->nd_body, 0); } else { $$ = NEW_WHILE(cond($3), $1, 1); } - if (cond_negative(&$$->nd_cond)) { - nd_set_type($$, NODE_UNTIL); - } + /*% + $$ = dispatch2(while_mod, $3, $1); + %*/ } - | stmt kUNTIL_MOD expr_value + | stmt modifier_until expr_value { + /*%%%*/ if ($1 && nd_type($1) == NODE_BEGIN) { $$ = NEW_UNTIL(cond($3), $1->nd_body, 0); } else { $$ = NEW_UNTIL(cond($3), $1, 1); } - if (cond_negative(&$$->nd_cond)) { - nd_set_type($$, NODE_WHILE); - } + /*% + $$ = dispatch2(until_mod, $3, $1); + %*/ } - | stmt kRESCUE_MOD stmt + | stmt modifier_rescue stmt { + /*%%%*/ NODE *resq = NEW_RESBODY(0, remove_begin($3), 0); $$ = NEW_RESCUE(remove_begin($1), resq, 0); + /*% + $$ = dispatch2(rescue_mod, $3, $1); + %*/ } - | klBEGIN - { - if (in_def || in_single) { - yyerror("BEGIN in method"); - } - local_push(0); - } - '{' compstmt '}' - { - ruby_eval_tree_begin = block_append(ruby_eval_tree_begin, - NEW_PREEXE($4)); - local_pop(); - $$ = 0; - } - | klEND '{' compstmt '}' + | keyword_END '{' compstmt '}' { if (in_def || in_single) { - rb_warn("END in method; use at_exit"); + rb_warn0("END in method; use at_exit"); } - - $$ = NEW_ITER(0, NEW_POSTEXE(), $3); + /*%%%*/ + $$ = NEW_POSTEXE(NEW_NODE( + NODE_SCOPE, 0 /* tbl */, $3 /* body */, 0 /* args */)); + /*% + $$ = dispatch1(END, $3); + %*/ } | lhs '=' command_call { + /*%%%*/ + value_expr($3); $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, $3); + %*/ } | mlhs '=' command_call { + /*%%%*/ value_expr($3); - $1->nd_value = ($1->nd_head) ? NEW_TO_ARY($3) : NEW_ARRAY($3); + $1->nd_value = $3; $$ = $1; + /*% + $$ = dispatch2(massign, $1, $3); + %*/ } | var_lhs tOP_ASGN command_call { + /*%%%*/ value_expr($3); if ($1) { ID vid = $1->nd_vid; @@ -531,20 +1104,24 @@ stmt : kALIAS fitem {lex_state = EXPR_FNAME;} fitem } else { $$ = $1; - $$->nd_value = call_op(gettable(vid),$2,1,$3); + $$->nd_value = NEW_CALL(gettable(vid), $2, NEW_LIST($3)); } } else { - $$ = 0; + $$ = NEW_BEGIN(0); } + /*% + $$ = dispatch3(opassign, $1, $2, $3); + %*/ } - | primary_value '[' aref_args ']' tOP_ASGN command_call + | primary_value '[' opt_call_args rbracket tOP_ASGN command_call { - NODE *args; + /*%%%*/ + NODE *args; value_expr($6); if (!$3) $3 = NEW_ZARRAY(); - args = arg_concat($6, $3); + args = arg_concat($3, $6); if ($5 == tOROP) { $5 = 0; } @@ -552,10 +1129,15 @@ stmt : kALIAS fitem {lex_state = EXPR_FNAME;} fitem $5 = 1; } $$ = NEW_OP_ASGN1($1, $5, args); - fixpos($$, $1); + fixpos($$, $1); + /*% + $$ = dispatch2(aref_field, $1, escape_Qundef($3)); + $$ = dispatch3(opassign, $$, $5, $6); + %*/ } | primary_value '.' tIDENTIFIER tOP_ASGN command_call { + /*%%%*/ value_expr($5); if ($4 == tOROP) { $4 = 0; @@ -564,10 +1146,15 @@ stmt : kALIAS fitem {lex_state = EXPR_FNAME;} fitem $4 = 1; } $$ = NEW_OP_ASGN2($1, $3, $4, $5); - fixpos($$, $1); + fixpos($$, $1); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $$, $4, $5); + %*/ } | primary_value '.' tCONSTANT tOP_ASGN command_call { + /*%%%*/ value_expr($5); if ($4 == tOROP) { $4 = 0; @@ -576,10 +1163,26 @@ stmt : kALIAS fitem {lex_state = EXPR_FNAME;} fitem $4 = 1; } $$ = NEW_OP_ASGN2($1, $3, $4, $5); - fixpos($$, $1); + fixpos($$, $1); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $$, $4, $5); + %*/ + } + | primary_value tCOLON2 tCONSTANT tOP_ASGN command_call + { + /*%%%*/ + yyerror("constant re-assignment"); + $$ = 0; + /*% + $$ = dispatch2(const_path_field, $1, $3); + $$ = dispatch3(opassign, $$, $4, $5); + $$ = dispatch1(assign_error, $$); + %*/ } | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call { + /*%%%*/ value_expr($5); if ($4 == tOROP) { $4 = 0; @@ -588,216 +1191,406 @@ stmt : kALIAS fitem {lex_state = EXPR_FNAME;} fitem $4 = 1; } $$ = NEW_OP_ASGN2($1, $3, $4, $5); - fixpos($$, $1); + fixpos($$, $1); + /*% + $$ = dispatch3(field, $1, ripper_intern("::"), $3); + $$ = dispatch3(opassign, $$, $4, $5); + %*/ } | backref tOP_ASGN command_call { - rb_backref_error($1); - $$ = 0; + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(assign, dispatch1(var_field, $1), $3); + $$ = dispatch1(assign_error, $$); + %*/ } | lhs '=' mrhs { - $$ = node_assign($1, NEW_SVALUE($3)); + /*%%%*/ + value_expr($3); + $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, $3); + %*/ } | mlhs '=' arg_value { - $1->nd_value = ($1->nd_head) ? NEW_TO_ARY($3) : NEW_ARRAY($3); + /*%%%*/ + $1->nd_value = $3; $$ = $1; + /*% + $$ = dispatch2(massign, $1, $3); + %*/ } | mlhs '=' mrhs { + /*%%%*/ $1->nd_value = $3; $$ = $1; + /*% + $$ = dispatch2(massign, $1, $3); + %*/ } | expr ; expr : command_call - | expr kAND expr + | expr keyword_and expr { + /*%%%*/ $$ = logop(NODE_AND, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("and"), $3); + %*/ } - | expr kOR expr + | expr keyword_or expr { + /*%%%*/ $$ = logop(NODE_OR, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("or"), $3); + %*/ } - | kNOT expr + | keyword_not opt_nl expr { - $$ = NEW_NOT(cond($2)); + /*%%%*/ + $$ = call_uni_op(cond($3), '!'); + /*% + $$ = dispatch2(unary, ripper_intern("not"), $3); + %*/ } | '!' command_call { - $$ = NEW_NOT(cond($2)); + /*%%%*/ + $$ = call_uni_op(cond($2), '!'); + /*% + $$ = dispatch2(unary, ripper_id2sym('!'), $2); + %*/ } | arg ; expr_value : expr { - value_expr($$); + /*%%%*/ + value_expr($1); $$ = $1; + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ } ; command_call : command | block_command - | kRETURN call_args - { - $$ = NEW_RETURN(ret_args($2)); - } - | kBREAK call_args - { - $$ = NEW_BREAK(ret_args($2)); - } - | kNEXT call_args - { - $$ = NEW_NEXT(ret_args($2)); - } ; block_command : block_call | block_call '.' operation2 command_args { - $$ = new_call($1, $3, $4); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_arg($$, $4); + %*/ } | block_call tCOLON2 operation2 command_args { - $$ = new_call($1, $3, $4); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), $3); + $$ = method_arg($$, $4); + %*/ } ; cmd_brace_block : tLBRACE_ARG { - $<vars>$ = dyna_push(); - $<num>1 = ruby_sourceline; + $<vars>1 = dyna_push(); + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% + %*/ } - opt_block_var {$<vars>$ = ruby_dyna_vars;} + opt_block_param compstmt '}' { - $$ = NEW_ITER($3, 0, dyna_init($5, $<vars>4)); - nd_set_line($$, $<num>1); - dyna_pop($<vars>2); + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $<num>2); + /*% + $$ = dispatch2(brace_block, escape_Qundef($3), $4); + %*/ + dyna_pop($<vars>1); } ; command : operation command_args %prec tLOWEST { - $$ = new_fcall($1, $2); - fixpos($$, $2); - } + /*%%%*/ + $$ = NEW_FCALL($1, $2); + fixpos($$, $2); + /*% + $$ = dispatch2(command, $1, $2); + %*/ + } | operation command_args cmd_brace_block { - $$ = new_fcall($1, $2); - if ($3) { - if (nd_type($$) == NODE_BLOCK_PASS) { - rb_compile_error("both block arg and actual block given"); - } - $3->nd_iter = $$; - $$ = $3; - } - fixpos($$, $2); - } + /*%%%*/ + block_dup_check($2,$3); + $3->nd_iter = NEW_FCALL($1, $2); + $$ = $3; + fixpos($$, $2); + /*% + $$ = dispatch2(command, $1, $2); + $$ = method_add_block($$, $3); + %*/ + } | primary_value '.' operation2 command_args %prec tLOWEST { - $$ = new_call($1, $3, $4); - fixpos($$, $1); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_id2sym('.'), $3, $4); + %*/ } | primary_value '.' operation2 command_args cmd_brace_block { - $$ = new_call($1, $3, $4); - if ($5) { - if (nd_type($$) == NODE_BLOCK_PASS) { - rb_compile_error("both block arg and actual block given"); - } - $5->nd_iter = $$; - $$ = $5; - } - fixpos($$, $1); + /*%%%*/ + block_dup_check($4,$5); + $5->nd_iter = NEW_CALL($1, $3, $4); + $$ = $5; + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_id2sym('.'), $3, $4); + $$ = method_add_block($$, $5); + %*/ } | primary_value tCOLON2 operation2 command_args %prec tLOWEST { - $$ = new_call($1, $3, $4); - fixpos($$, $1); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_intern("::"), $3, $4); + %*/ } | primary_value tCOLON2 operation2 command_args cmd_brace_block { - $$ = new_call($1, $3, $4); - if ($5) { - if (nd_type($$) == NODE_BLOCK_PASS) { - rb_compile_error("both block arg and actual block given"); - } - $5->nd_iter = $$; - $$ = $5; - } - fixpos($$, $1); + /*%%%*/ + block_dup_check($4,$5); + $5->nd_iter = NEW_CALL($1, $3, $4); + $$ = $5; + fixpos($$, $1); + /*% + $$ = dispatch4(command_call, $1, ripper_intern("::"), $3, $4); + $$ = method_add_block($$, $5); + %*/ } - | kSUPER command_args + | keyword_super command_args { - $$ = new_super($2); - fixpos($$, $2); + /*%%%*/ + $$ = NEW_SUPER($2); + fixpos($$, $2); + /*% + $$ = dispatch1(super, $2); + %*/ } - | kYIELD command_args + | keyword_yield command_args { + /*%%%*/ $$ = new_yield($2); - fixpos($$, $2); + fixpos($$, $2); + /*% + $$ = dispatch1(yield, $2); + %*/ + } + | keyword_return call_args + { + /*%%%*/ + $$ = NEW_RETURN(ret_args($2)); + /*% + $$ = dispatch1(return, $2); + %*/ + } + | keyword_break call_args + { + /*%%%*/ + $$ = NEW_BREAK(ret_args($2)); + /*% + $$ = dispatch1(break, $2); + %*/ + } + | keyword_next call_args + { + /*%%%*/ + $$ = NEW_NEXT(ret_args($2)); + /*% + $$ = dispatch1(next, $2); + %*/ } ; mlhs : mlhs_basic - | tLPAREN mlhs_entry ')' + | tLPAREN mlhs_inner rparen { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ } ; -mlhs_entry : mlhs_basic - | tLPAREN mlhs_entry ')' +mlhs_inner : mlhs_basic + | tLPAREN mlhs_inner rparen { + /*%%%*/ $$ = NEW_MASGN(NEW_LIST($2), 0); + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ } ; mlhs_basic : mlhs_head { + /*%%%*/ $$ = NEW_MASGN($1, 0); + /*% + $$ = $1; + %*/ } | mlhs_head mlhs_item { + /*%%%*/ $$ = NEW_MASGN(list_append($1,$2), 0); + /*% + $$ = mlhs_add($1, $2); + %*/ } | mlhs_head tSTAR mlhs_node { + /*%%%*/ $$ = NEW_MASGN($1, $3); + /*% + $$ = mlhs_add_star($1, $3); + %*/ + } + | mlhs_head tSTAR mlhs_node ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG($3,$5)); + /*% + $1 = mlhs_add_star($1, $3); + $$ = mlhs_add($1, $5); + %*/ } | mlhs_head tSTAR { + /*%%%*/ $$ = NEW_MASGN($1, -1); + /*% + $$ = mlhs_add_star($1, Qnil); + %*/ + } + | mlhs_head tSTAR ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG(-1, $4)); + /*% + $1 = mlhs_add_star($1, Qnil); + $$ = mlhs_add($1, $4); + %*/ } | tSTAR mlhs_node { + /*%%%*/ $$ = NEW_MASGN(0, $2); + /*% + $$ = mlhs_add_star(mlhs_new(), $2); + %*/ + } + | tSTAR mlhs_node ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG($2,$4)); + /*% + $2 = mlhs_add_star(mlhs_new(), $2); + $$ = mlhs_add($2, $4); + %*/ } | tSTAR { + /*%%%*/ $$ = NEW_MASGN(0, -1); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + %*/ + } + | tSTAR ',' mlhs_post + { + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG(-1, $3)); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + $$ = mlhs_add($$, $3); + %*/ } ; mlhs_item : mlhs_node - | tLPAREN mlhs_entry ')' + | tLPAREN mlhs_inner rparen { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ } ; mlhs_head : mlhs_item ',' { + /*%%%*/ $$ = NEW_LIST($1); + /*% + $$ = mlhs_add(mlhs_new(), $1); + %*/ } | mlhs_head mlhs_item ',' { + /*%%%*/ $$ = list_append($1, $2); + /*% + $$ = mlhs_add($1, $2); + %*/ + } + ; + +mlhs_post : mlhs_item + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = mlhs_add(mlhs_new(), $1); + %*/ + } + | mlhs_post ',' mlhs_item + { + /*%%%*/ + $$ = list_append($1, $3); + /*% + $$ = mlhs_add($1, $3); + %*/ } ; @@ -805,98 +1598,184 @@ mlhs_node : variable { $$ = assignable($1, 0); } - | primary_value '[' aref_args ']' + | primary_value '[' opt_call_args rbracket { + /*%%%*/ $$ = aryset($1, $3); + /*% + $$ = dispatch2(aref_field, $1, escape_Qundef($3)); + %*/ } | primary_value '.' tIDENTIFIER { + /*%%%*/ $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ } | primary_value tCOLON2 tIDENTIFIER { + /*%%%*/ $$ = attrset($1, $3); + /*% + $$ = dispatch2(const_path_field, $1, $3); + %*/ } | primary_value '.' tCONSTANT { + /*%%%*/ $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ } | primary_value tCOLON2 tCONSTANT { + /*%%%*/ if (in_def || in_single) yyerror("dynamic constant assignment"); $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3)); + /*% + if (in_def || in_single) + yyerror("dynamic constant assignment"); + $$ = dispatch2(const_path_field, $1, $3); + %*/ } | tCOLON3 tCONSTANT { + /*%%%*/ if (in_def || in_single) yyerror("dynamic constant assignment"); $$ = NEW_CDECL(0, 0, NEW_COLON3($2)); + /*% + $$ = dispatch1(top_const_field, $2); + %*/ } | backref { - rb_backref_error($1); - $$ = 0; + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_field, $1); + $$ = dispatch1(assign_error, $$); + %*/ } ; lhs : variable { $$ = assignable($1, 0); + /*%%%*/ + if (!$$) $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_field, $$); + %*/ } - | primary_value '[' aref_args ']' + | primary_value '[' opt_call_args rbracket { + /*%%%*/ $$ = aryset($1, $3); + /*% + $$ = dispatch2(aref_field, $1, escape_Qundef($3)); + %*/ } | primary_value '.' tIDENTIFIER { + /*%%%*/ $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ } | primary_value tCOLON2 tIDENTIFIER { + /*%%%*/ $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_intern("::"), $3); + %*/ } | primary_value '.' tCONSTANT { + /*%%%*/ $$ = attrset($1, $3); + /*% + $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); + %*/ } | primary_value tCOLON2 tCONSTANT { + /*%%%*/ if (in_def || in_single) yyerror("dynamic constant assignment"); $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3)); + /*% + $$ = dispatch2(const_path_field, $1, $3); + if (in_def || in_single) { + $$ = dispatch1(assign_error, $$); + } + %*/ } | tCOLON3 tCONSTANT { + /*%%%*/ if (in_def || in_single) yyerror("dynamic constant assignment"); $$ = NEW_CDECL(0, 0, NEW_COLON3($2)); + /*% + $$ = dispatch1(top_const_field, $2); + if (in_def || in_single) { + $$ = dispatch1(assign_error, $$); + } + %*/ } | backref { - rb_backref_error($1); - $$ = 0; + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(assign_error, $1); + %*/ } ; cname : tIDENTIFIER { + /*%%%*/ yyerror("class/module name must be CONSTANT"); + /*% + $$ = dispatch1(class_name_error, $1); + %*/ } | tCONSTANT ; cpath : tCOLON3 cname { + /*%%%*/ $$ = NEW_COLON3($2); + /*% + $$ = dispatch1(top_const_ref, $2); + %*/ } | cname { + /*%%%*/ $$ = NEW_COLON2(0, $$); + /*% + $$ = dispatch1(const_ref, $1); + %*/ } | primary_value tCOLON2 cname { + /*%%%*/ $$ = NEW_COLON2($1, $3); + /*% + $$ = dispatch2(const_path_ref, $1, $3); + %*/ } ; @@ -905,13 +1784,17 @@ fname : tIDENTIFIER | tFID | op { - lex_state = EXPR_END; + lex_state = EXPR_ENDFN; $$ = $1; } | reswords { - lex_state = EXPR_END; + lex_state = EXPR_ENDFN; + /*%%%*/ $$ = $<id>1; + /*% + $$ = $1; + %*/ } ; @@ -921,68 +1804,100 @@ fsym : fname fitem : fsym { + /*%%%*/ $$ = NEW_LIT(ID2SYM($1)); + /*% + $$ = dispatch1(symbol_literal, $1); + %*/ } | dsym ; undef_list : fitem { + /*%%%*/ $$ = NEW_UNDEF($1); + /*% + $$ = rb_ary_new3(1, $1); + %*/ } | undef_list ',' {lex_state = EXPR_FNAME;} fitem { + /*%%%*/ $$ = block_append($1, NEW_UNDEF($4)); + /*% + rb_ary_push($1, $4); + %*/ } ; -op : '|' { $$ = '|'; } - | '^' { $$ = '^'; } - | '&' { $$ = '&'; } - | tCMP { $$ = tCMP; } - | tEQ { $$ = tEQ; } - | tEQQ { $$ = tEQQ; } - | tMATCH { $$ = tMATCH; } - | '>' { $$ = '>'; } - | tGEQ { $$ = tGEQ; } - | '<' { $$ = '<'; } - | tLEQ { $$ = tLEQ; } - | tLSHFT { $$ = tLSHFT; } - | tRSHFT { $$ = tRSHFT; } - | '+' { $$ = '+'; } - | '-' { $$ = '-'; } - | '*' { $$ = '*'; } - | tSTAR { $$ = '*'; } - | '/' { $$ = '/'; } - | '%' { $$ = '%'; } - | tPOW { $$ = tPOW; } - | '~' { $$ = '~'; } - | tUPLUS { $$ = tUPLUS; } - | tUMINUS { $$ = tUMINUS; } - | tAREF { $$ = tAREF; } - | tASET { $$ = tASET; } - | '`' { $$ = '`'; } - ; - -reswords : k__LINE__ | k__FILE__ | klBEGIN | klEND - | kALIAS | kAND | kBEGIN | kBREAK | kCASE | kCLASS | kDEF - | kDEFINED | kDO | kELSE | kELSIF | kEND | kENSURE | kFALSE - | kFOR | kIN | kMODULE | kNEXT | kNIL | kNOT - | kOR | kREDO | kRESCUE | kRETRY | kRETURN | kSELF | kSUPER - | kTHEN | kTRUE | kUNDEF | kWHEN | kYIELD - | kIF | kUNLESS | kWHILE | kUNTIL +op : '|' { ifndef_ripper($$ = '|'); } + | '^' { ifndef_ripper($$ = '^'); } + | '&' { ifndef_ripper($$ = '&'); } + | tCMP { ifndef_ripper($$ = tCMP); } + | tEQ { ifndef_ripper($$ = tEQ); } + | tEQQ { ifndef_ripper($$ = tEQQ); } + | tMATCH { ifndef_ripper($$ = tMATCH); } + | tNMATCH { ifndef_ripper($$ = tNMATCH); } + | '>' { ifndef_ripper($$ = '>'); } + | tGEQ { ifndef_ripper($$ = tGEQ); } + | '<' { ifndef_ripper($$ = '<'); } + | tLEQ { ifndef_ripper($$ = tLEQ); } + | tNEQ { ifndef_ripper($$ = tNEQ); } + | tLSHFT { ifndef_ripper($$ = tLSHFT); } + | tRSHFT { ifndef_ripper($$ = tRSHFT); } + | '+' { ifndef_ripper($$ = '+'); } + | '-' { ifndef_ripper($$ = '-'); } + | '*' { ifndef_ripper($$ = '*'); } + | tSTAR { ifndef_ripper($$ = '*'); } + | '/' { ifndef_ripper($$ = '/'); } + | '%' { ifndef_ripper($$ = '%'); } + | tPOW { ifndef_ripper($$ = tPOW); } + | '!' { ifndef_ripper($$ = '!'); } + | '~' { ifndef_ripper($$ = '~'); } + | tUPLUS { ifndef_ripper($$ = tUPLUS); } + | tUMINUS { ifndef_ripper($$ = tUMINUS); } + | tAREF { ifndef_ripper($$ = tAREF); } + | tASET { ifndef_ripper($$ = tASET); } + | '`' { ifndef_ripper($$ = '`'); } + ; + +reswords : keyword__LINE__ | keyword__FILE__ | keyword__ENCODING__ + | keyword_BEGIN | keyword_END + | keyword_alias | keyword_and | keyword_begin + | keyword_break | keyword_case | keyword_class | keyword_def + | keyword_defined | keyword_do | keyword_else | keyword_elsif + | keyword_end | keyword_ensure | keyword_false + | keyword_for | keyword_in | keyword_module | keyword_next + | keyword_nil | keyword_not | keyword_or | keyword_redo + | keyword_rescue | keyword_retry | keyword_return | keyword_self + | keyword_super | keyword_then | keyword_true | keyword_undef + | keyword_when | keyword_yield | keyword_if | keyword_unless + | keyword_while | keyword_until ; arg : lhs '=' arg { + /*%%%*/ + value_expr($3); $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, $3); + %*/ } - | lhs '=' arg kRESCUE_MOD arg + | lhs '=' arg modifier_rescue arg { - $$ = node_assign($1, NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0)); + /*%%%*/ + value_expr($3); + $3 = NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0); + $$ = node_assign($1, $3); + /*% + $$ = dispatch2(assign, $1, dispatch2(rescue_mod, $3, $5)); + %*/ } | var_lhs tOP_ASGN arg { + /*%%%*/ value_expr($3); if ($1) { ID vid = $1->nd_vid; @@ -999,20 +1914,60 @@ arg : lhs '=' arg } else { $$ = $1; - $$->nd_value = call_op(gettable(vid),$2,1,$3); + $$->nd_value = NEW_CALL(gettable(vid), $2, NEW_LIST($3)); } } else { - $$ = 0; + $$ = NEW_BEGIN(0); } + /*% + $$ = dispatch3(opassign, $1, $2, $3); + %*/ } - | primary_value '[' aref_args ']' tOP_ASGN arg + | var_lhs tOP_ASGN arg modifier_rescue arg { - NODE *args; + /*%%%*/ + value_expr($3); + $3 = NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0); + if ($1) { + ID vid = $1->nd_vid; + if ($2 == tOROP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_OR(gettable(vid), $1); + if (is_asgn_or_id(vid)) { + $$->nd_aid = vid; + } + } + else if ($2 == tANDOP) { + $1->nd_value = $3; + $$ = NEW_OP_ASGN_AND(gettable(vid), $1); + } + else { + $$ = $1; + $$->nd_value = NEW_CALL(gettable(vid), $2, NEW_LIST($3)); + } + } + else { + $$ = NEW_BEGIN(0); + } + /*% + $3 = dispatch2(rescue_mod, $3, $5); + $$ = dispatch3(opassign, $1, $2, $3); + %*/ + } + | primary_value '[' opt_call_args rbracket tOP_ASGN arg + { + /*%%%*/ + NODE *args; value_expr($6); if (!$3) $3 = NEW_ZARRAY(); - args = arg_concat($6, $3); + if (nd_type($3) == NODE_BLOCK_PASS) { + args = NEW_ARGSCAT($3, $6); + } + else { + args = arg_concat($3, $6); + } if ($5 == tOROP) { $5 = 0; } @@ -1020,10 +1975,15 @@ arg : lhs '=' arg $5 = 1; } $$ = NEW_OP_ASGN1($1, $5, args); - fixpos($$, $1); + fixpos($$, $1); + /*% + $1 = dispatch2(aref_field, $1, escape_Qundef($3)); + $$ = dispatch3(opassign, $1, $5, $6); + %*/ } | primary_value '.' tIDENTIFIER tOP_ASGN arg { + /*%%%*/ value_expr($5); if ($4 == tOROP) { $4 = 0; @@ -1032,10 +1992,15 @@ arg : lhs '=' arg $4 = 1; } $$ = NEW_OP_ASGN2($1, $3, $4, $5); - fixpos($$, $1); + fixpos($$, $1); + /*% + $1 = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $1, $4, $5); + %*/ } | primary_value '.' tCONSTANT tOP_ASGN arg { + /*%%%*/ value_expr($5); if ($4 == tOROP) { $4 = 0; @@ -1044,10 +2009,15 @@ arg : lhs '=' arg $4 = 1; } $$ = NEW_OP_ASGN2($1, $3, $4, $5); - fixpos($$, $1); + fixpos($$, $1); + /*% + $1 = dispatch3(field, $1, ripper_id2sym('.'), $3); + $$ = dispatch3(opassign, $1, $4, $5); + %*/ } | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg { + /*%%%*/ value_expr($5); if ($4 == tOROP) { $4 = 0; @@ -1056,179 +2026,329 @@ arg : lhs '=' arg $4 = 1; } $$ = NEW_OP_ASGN2($1, $3, $4, $5); - fixpos($$, $1); + fixpos($$, $1); + /*% + $1 = dispatch3(field, $1, ripper_intern("::"), $3); + $$ = dispatch3(opassign, $1, $4, $5); + %*/ } | primary_value tCOLON2 tCONSTANT tOP_ASGN arg { + /*%%%*/ yyerror("constant re-assignment"); - $$ = 0; + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch2(const_path_field, $1, $3); + $$ = dispatch3(opassign, $$, $4, $5); + $$ = dispatch1(assign_error, $$); + %*/ } | tCOLON3 tCONSTANT tOP_ASGN arg { + /*%%%*/ yyerror("constant re-assignment"); - $$ = 0; + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(top_const_field, $2); + $$ = dispatch3(opassign, $$, $3, $4); + $$ = dispatch1(assign_error, $$); + %*/ } | backref tOP_ASGN arg { - rb_backref_error($1); - $$ = 0; + /*%%%*/ + rb_backref_error($1); + $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_field, $1); + $$ = dispatch3(opassign, $$, $2, $3); + $$ = dispatch1(assign_error, $$); + %*/ } | arg tDOT2 arg { + /*%%%*/ value_expr($1); value_expr($3); + $$ = NEW_DOT2($1, $3); if (nd_type($1) == NODE_LIT && FIXNUM_P($1->nd_lit) && nd_type($3) == NODE_LIT && FIXNUM_P($3->nd_lit)) { - $1->nd_lit = rb_range_new($1->nd_lit, $3->nd_lit, Qfalse); - $$ = $1; - } - else { - $$ = NEW_DOT2($1, $3); + deferred_nodes = list_append(deferred_nodes, $$); } + /*% + $$ = dispatch2(dot2, $1, $3); + %*/ } | arg tDOT3 arg { + /*%%%*/ value_expr($1); value_expr($3); + $$ = NEW_DOT3($1, $3); if (nd_type($1) == NODE_LIT && FIXNUM_P($1->nd_lit) && nd_type($3) == NODE_LIT && FIXNUM_P($3->nd_lit)) { - $1->nd_lit = rb_range_new($1->nd_lit, $3->nd_lit, Qtrue); - $$ = $1; - } - else { - $$ = NEW_DOT3($1, $3); + deferred_nodes = list_append(deferred_nodes, $$); } + /*% + $$ = dispatch2(dot3, $1, $3); + %*/ } | arg '+' arg { - $$ = call_op($1, '+', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '+', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('+'), $3); + %*/ } | arg '-' arg { - $$ = call_op($1, '-', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '-', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('-'), $3); + %*/ } | arg '*' arg { - $$ = call_op($1, '*', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '*', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('*'), $3); + %*/ } | arg '/' arg { - $$ = call_op($1, '/', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '/', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('/'), $3); + %*/ } | arg '%' arg { - $$ = call_op($1, '%', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '%', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('%'), $3); + %*/ } | arg tPOW arg { - $$ = call_op($1, tPOW, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tPOW, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("**"), $3); + %*/ } | tUMINUS_NUM tINTEGER tPOW arg { - $$ = call_op(call_op($2, tPOW, 1, $4), tUMINUS, 0, 0); + /*%%%*/ + $$ = NEW_CALL(call_bin_op($2, tPOW, $4), tUMINUS, 0); + /*% + $$ = dispatch3(binary, $2, ripper_intern("**"), $4); + $$ = dispatch2(unary, ripper_intern("-@"), $$); + %*/ } | tUMINUS_NUM tFLOAT tPOW arg { - $$ = call_op(call_op($2, tPOW, 1, $4), tUMINUS, 0, 0); + /*%%%*/ + $$ = NEW_CALL(call_bin_op($2, tPOW, $4), tUMINUS, 0); + /*% + $$ = dispatch3(binary, $2, ripper_intern("**"), $4); + $$ = dispatch2(unary, ripper_intern("-@"), $$); + %*/ } | tUPLUS arg { - if ($2 && nd_type($2) == NODE_LIT) { - $$ = $2; - } - else { - $$ = call_op($2, tUPLUS, 0, 0); - } + /*%%%*/ + $$ = call_uni_op($2, tUPLUS); + /*% + $$ = dispatch2(unary, ripper_intern("+@"), $2); + %*/ } | tUMINUS arg { - $$ = call_op($2, tUMINUS, 0, 0); + /*%%%*/ + $$ = call_uni_op($2, tUMINUS); + /*% + $$ = dispatch2(unary, ripper_intern("-@"), $2); + %*/ } | arg '|' arg { - $$ = call_op($1, '|', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '|', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('|'), $3); + %*/ } | arg '^' arg { - $$ = call_op($1, '^', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '^', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('^'), $3); + %*/ } | arg '&' arg { - $$ = call_op($1, '&', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '&', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('&'), $3); + %*/ } | arg tCMP arg { - $$ = call_op($1, tCMP, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tCMP, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("<=>"), $3); + %*/ } | arg '>' arg { - $$ = call_op($1, '>', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '>', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('>'), $3); + %*/ } | arg tGEQ arg { - $$ = call_op($1, tGEQ, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tGEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern(">="), $3); + %*/ } | arg '<' arg { - $$ = call_op($1, '<', 1, $3); + /*%%%*/ + $$ = call_bin_op($1, '<', $3); + /*% + $$ = dispatch3(binary, $1, ID2SYM('<'), $3); + %*/ } | arg tLEQ arg { - $$ = call_op($1, tLEQ, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tLEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("<="), $3); + %*/ } | arg tEQ arg { - $$ = call_op($1, tEQ, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("=="), $3); + %*/ } | arg tEQQ arg { - $$ = call_op($1, tEQQ, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tEQQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("==="), $3); + %*/ } | arg tNEQ arg { - $$ = NEW_NOT(call_op($1, tEQ, 1, $3)); + /*%%%*/ + $$ = call_bin_op($1, tNEQ, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("!="), $3); + %*/ } | arg tMATCH arg { - $$ = match_gen($1, $3); + /*%%%*/ + $$ = match_op($1, $3); + if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) { + $$ = reg_named_capture_assign($1->nd_lit, $$); + } + /*% + $$ = dispatch3(binary, $1, ripper_intern("=~"), $3); + %*/ } | arg tNMATCH arg { - $$ = NEW_NOT(match_gen($1, $3)); + /*%%%*/ + $$ = call_bin_op($1, tNMATCH, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("!~"), $3); + %*/ } | '!' arg { - $$ = NEW_NOT(cond($2)); + /*%%%*/ + $$ = call_uni_op(cond($2), '!'); + /*% + $$ = dispatch2(unary, ID2SYM('!'), $2); + %*/ } | '~' arg { - $$ = call_op($2, '~', 0, 0); + /*%%%*/ + $$ = call_uni_op($2, '~'); + /*% + $$ = dispatch2(unary, ID2SYM('~'), $2); + %*/ } | arg tLSHFT arg { - $$ = call_op($1, tLSHFT, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tLSHFT, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("<<"), $3); + %*/ } | arg tRSHFT arg { - $$ = call_op($1, tRSHFT, 1, $3); + /*%%%*/ + $$ = call_bin_op($1, tRSHFT, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern(">>"), $3); + %*/ } | arg tANDOP arg { + /*%%%*/ $$ = logop(NODE_AND, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("&&"), $3); + %*/ } | arg tOROP arg { + /*%%%*/ $$ = logop(NODE_OR, $1, $3); + /*% + $$ = dispatch3(binary, $1, ripper_intern("||"), $3); + %*/ } - | kDEFINED opt_nl {in_defined = 1;} arg + | keyword_defined opt_nl {in_defined = 1;} arg { - in_defined = 0; + /*%%%*/ + in_defined = 0; $$ = NEW_DEFINED($4); + /*% + in_defined = 0; + $$ = dispatch1(defined, $4); + %*/ } - | arg '?' arg ':' arg + | arg '?' arg opt_nl ':' arg { - $$ = NEW_IF(cond($1), $3, $5); - fixpos($$, $1); + /*%%%*/ + value_expr($1); + $$ = NEW_IF(cond($1), $3, $6); + fixpos($$, $1); + /*% + $$ = dispatch3(ifop, $1, $3, $6); + %*/ } | primary { @@ -1238,54 +2358,46 @@ arg : lhs '=' arg arg_value : arg { + /*%%%*/ value_expr($1); $$ = $1; + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ } ; aref_args : none - | command opt_nl - { - rb_warn("parenthesize argument(s) for future version"); - $$ = NEW_LIST($1); - } | args trailer { $$ = $1; } - | args ',' tSTAR arg opt_nl + | args ',' assocs trailer { - value_expr($4); - $$ = arg_concat($1, $4); + /*%%%*/ + $$ = arg_append($1, NEW_HASH($3)); + /*% + $$ = arg_add_assocs($1, $3); + %*/ } | assocs trailer { + /*%%%*/ $$ = NEW_LIST(NEW_HASH($1)); - } - | tSTAR arg opt_nl - { - value_expr($2); - $$ = NEW_NEWLINE(NEW_SPLAT($2)); + /*% + $$ = arg_add_assocs(arg_new(), $1); + %*/ } ; -paren_args : '(' none ')' - { - $$ = $2; - } - | '(' call_args opt_nl ')' +paren_args : '(' opt_call_args rparen { + /*%%%*/ $$ = $2; - } - | '(' block_call opt_nl ')' - { - rb_warn("parenthesize argument for future version"); - $$ = NEW_LIST($2); - } - | '(' args ',' block_call opt_nl ')' - { - rb_warn("parenthesize argument for future version"); - $$ = list_append($2, $4); + /*% + $$ = dispatch1(arg_paren, escape_Qundef($2)); + %*/ } ; @@ -1293,131 +2405,74 @@ opt_paren_args : none | paren_args ; +opt_call_args : none + | call_args + ; + call_args : command { - rb_warn("parenthesize argument(s) for future version"); + /*%%%*/ + value_expr($1); $$ = NEW_LIST($1); + /*% + $$ = arg_add(arg_new(), $1); + %*/ } | args opt_block_arg { + /*%%%*/ $$ = arg_blk_pass($1, $2); - } - | args ',' tSTAR arg_value opt_block_arg - { - $$ = arg_concat($1, $4); - $$ = arg_blk_pass($$, $5); + /*% + $$ = arg_add_optblock($1, $2); + %*/ } | assocs opt_block_arg { + /*%%%*/ $$ = NEW_LIST(NEW_HASH($1)); $$ = arg_blk_pass($$, $2); - } - | assocs ',' tSTAR arg_value opt_block_arg - { - $$ = arg_concat(NEW_LIST(NEW_HASH($1)), $4); - $$ = arg_blk_pass($$, $5); + /*% + $$ = arg_add_assocs(arg_new(), $1); + $$ = arg_add_optblock($$, $2); + %*/ } | args ',' assocs opt_block_arg { - $$ = list_append($1, NEW_HASH($3)); + /*%%%*/ + $$ = arg_append($1, NEW_HASH($3)); $$ = arg_blk_pass($$, $4); - } - | args ',' assocs ',' tSTAR arg opt_block_arg - { - value_expr($6); - $$ = arg_concat(list_append($1, NEW_HASH($3)), $6); - $$ = arg_blk_pass($$, $7); - } - | tSTAR arg_value opt_block_arg - { - $$ = arg_blk_pass(NEW_SPLAT($2), $3); + /*% + $$ = arg_add_optblock(arg_add_assocs($1, $3), $4); + %*/ } | block_arg - ; - -call_args2 : arg_value ',' args opt_block_arg - { - $$ = arg_blk_pass(list_concat(NEW_LIST($1),$3), $4); - } - | arg_value ',' block_arg - { - $$ = arg_blk_pass($1, $3); - } - | arg_value ',' tSTAR arg_value opt_block_arg - { - $$ = arg_concat(NEW_LIST($1), $4); - $$ = arg_blk_pass($$, $5); - } - | arg_value ',' args ',' tSTAR arg_value opt_block_arg + /*%c%*/ + /*%c { - $$ = arg_concat(list_concat(NEW_LIST($1),$3), $6); - $$ = arg_blk_pass($$, $7); + $$ = arg_add_block(arg_new(), $1); } - | assocs opt_block_arg - { - $$ = NEW_LIST(NEW_HASH($1)); - $$ = arg_blk_pass($$, $2); - } - | assocs ',' tSTAR arg_value opt_block_arg - { - $$ = arg_concat(NEW_LIST(NEW_HASH($1)), $4); - $$ = arg_blk_pass($$, $5); - } - | arg_value ',' assocs opt_block_arg - { - $$ = list_append(NEW_LIST($1), NEW_HASH($3)); - $$ = arg_blk_pass($$, $4); - } - | arg_value ',' args ',' assocs opt_block_arg - { - $$ = list_append(list_concat(NEW_LIST($1),$3), NEW_HASH($5)); - $$ = arg_blk_pass($$, $6); - } - | arg_value ',' assocs ',' tSTAR arg_value opt_block_arg - { - $$ = arg_concat(list_append(NEW_LIST($1), NEW_HASH($3)), $6); - $$ = arg_blk_pass($$, $7); - } - | arg_value ',' args ',' assocs ',' tSTAR arg_value opt_block_arg - { - $$ = arg_concat(list_append(list_concat(NEW_LIST($1), $3), NEW_HASH($5)), $8); - $$ = arg_blk_pass($$, $9); - } - | tSTAR arg_value opt_block_arg - { - $$ = arg_blk_pass(NEW_SPLAT($2), $3); - } - | block_arg + %*/ ; command_args : { - $<num>$ = cmdarg_stack; + $<val>$ = cmdarg_stack; CMDARG_PUSH(1); } - open_args + call_args { /* CMDARG_POP() */ - cmdarg_stack = $<num>1; - $$ = $2; - } - ; - -open_args : call_args - | tLPAREN_ARG {lex_state = EXPR_ENDARG;} ')' - { - rb_warn("don't put space before argument parentheses"); - $$ = 0; - } - | tLPAREN_ARG call_args2 {lex_state = EXPR_ENDARG;} ')' - { - rb_warn("don't put space before argument parentheses"); + cmdarg_stack = $<val>1; $$ = $2; } ; block_arg : tAMPER arg_value { + /*%%%*/ $$ = NEW_BLOCK_PASS($2); + /*% + $$ = $2; + %*/ } ; @@ -1425,30 +2480,98 @@ opt_block_arg : ',' block_arg { $$ = $2; } + | ',' + { + $$ = 0; + } | none + { + $$ = 0; + } ; -args : arg_value +args : arg_value { + /*%%%*/ $$ = NEW_LIST($1); + /*% + $$ = arg_add(arg_new(), $1); + %*/ + } + | tSTAR arg_value + { + /*%%%*/ + $$ = NEW_SPLAT($2); + /*% + $$ = arg_add_star(arg_new(), $2); + %*/ } | args ',' arg_value { - $$ = list_append($1, $3); + /*%%%*/ + NODE *n1; + if ((n1 = splat_array($1)) != 0) { + $$ = list_append(n1, $3); + } + else { + $$ = arg_append($1, $3); + } + /*% + $$ = arg_add($1, $3); + %*/ + } + | args ',' tSTAR arg_value + { + /*%%%*/ + NODE *n1; + if ((nd_type($4) == NODE_ARRAY) && (n1 = splat_array($1)) != 0) { + $$ = list_concat(n1, $4); + } + else { + $$ = arg_concat($1, $4); + } + /*% + $$ = arg_add_star($1, $4); + %*/ } ; mrhs : args ',' arg_value { - $$ = list_append($1, $3); + /*%%%*/ + NODE *n1; + if ((n1 = splat_array($1)) != 0) { + $$ = list_append(n1, $3); + } + else { + $$ = arg_append($1, $3); + } + /*% + $$ = mrhs_add(args2mrhs($1), $3); + %*/ } | args ',' tSTAR arg_value { - $$ = arg_concat($1, $4); + /*%%%*/ + NODE *n1; + if (nd_type($4) == NODE_ARRAY && + (n1 = splat_array($1)) != 0) { + $$ = list_concat(n1, $4); + } + else { + $$ = arg_concat($1, $4); + } + /*% + $$ = mrhs_add_star(args2mrhs($1), $4); + %*/ } | tSTAR arg_value { + /*%%%*/ $$ = NEW_SPLAT($2); + /*% + $$ = mrhs_add_star(mrhs_new(), $2); + %*/ } ; @@ -1462,220 +2585,374 @@ primary : literal | backref | tFID { + /*%%%*/ $$ = NEW_FCALL($1, 0); + /*% + $$ = method_arg(dispatch1(fcall, $1), arg_new()); + %*/ } - | kBEGIN + | k_begin { - $<num>1 = ruby_sourceline; + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% + %*/ } bodystmt - kEND + k_end { - if ($3 == NULL) + /*%%%*/ + if ($3 == NULL) { $$ = NEW_NIL(); - else + } + else { + if (nd_type($3) == NODE_RESCUE || + nd_type($3) == NODE_ENSURE) + nd_set_line($3, $<num>2); $$ = NEW_BEGIN($3); - nd_set_line($$, $<num>1); + } + nd_set_line($$, $<num>2); + /*% + $$ = dispatch1(begin, $3); + %*/ } - | tLPAREN_ARG expr {lex_state = EXPR_ENDARG;} opt_nl ')' + | tLPAREN_ARG expr {lex_state = EXPR_ENDARG;} rparen { - rb_warning("(...) interpreted as grouped expression"); + rb_warning0("(...) interpreted as grouped expression"); + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(paren, $2); + %*/ } | tLPAREN compstmt ')' { - if (!$2) $$ = NEW_NIL(); - else $$ = $2; + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(paren, $2); + %*/ } | primary_value tCOLON2 tCONSTANT { + /*%%%*/ $$ = NEW_COLON2($1, $3); + /*% + $$ = dispatch2(const_path_ref, $1, $3); + %*/ } | tCOLON3 tCONSTANT { + /*%%%*/ $$ = NEW_COLON3($2); - } - | primary_value '[' aref_args ']' - { - if ($1 && nd_type($1) == NODE_SELF) - $$ = NEW_FCALL(tAREF, $3); - else - $$ = NEW_CALL($1, tAREF, $3); - fixpos($$, $1); + /*% + $$ = dispatch1(top_const_ref, $2); + %*/ } | tLBRACK aref_args ']' { - if ($2 == 0) { + /*%%%*/ + if ($2 == 0) { $$ = NEW_ZARRAY(); /* zero length array*/ } else { $$ = $2; } + /*% + $$ = dispatch1(array, escape_Qundef($2)); + %*/ } | tLBRACE assoc_list '}' { + /*%%%*/ $$ = NEW_HASH($2); + /*% + $$ = dispatch1(hash, escape_Qundef($2)); + %*/ } - | kRETURN + | keyword_return { + /*%%%*/ $$ = NEW_RETURN(0); + /*% + $$ = dispatch0(return0); + %*/ } - | kYIELD '(' call_args ')' + | keyword_yield '(' call_args rparen { + /*%%%*/ $$ = new_yield($3); + /*% + $$ = dispatch1(yield, dispatch1(paren, $3)); + %*/ } - | kYIELD '(' ')' + | keyword_yield '(' rparen { + /*%%%*/ $$ = NEW_YIELD(0, Qfalse); + /*% + $$ = dispatch1(yield, dispatch1(paren, arg_new())); + %*/ } - | kYIELD + | keyword_yield { + /*%%%*/ $$ = NEW_YIELD(0, Qfalse); + /*% + $$ = dispatch0(yield0); + %*/ } - | kDEFINED opt_nl '(' {in_defined = 1;} expr ')' + | keyword_defined opt_nl '(' {in_defined = 1;} expr rparen { - in_defined = 0; + /*%%%*/ + in_defined = 0; $$ = NEW_DEFINED($5); + /*% + in_defined = 0; + $$ = dispatch1(defined, $5); + %*/ + } + | keyword_not '(' expr rparen + { + /*%%%*/ + $$ = call_uni_op(cond($3), '!'); + /*% + $$ = dispatch2(unary, ripper_intern("not"), $3); + %*/ + } + | keyword_not '(' rparen + { + /*%%%*/ + $$ = call_uni_op(cond(NEW_NIL()), '!'); + /*% + $$ = dispatch2(unary, ripper_intern("not"), Qnil); + %*/ } | operation brace_block { + /*%%%*/ $2->nd_iter = NEW_FCALL($1, 0); $$ = $2; fixpos($2->nd_iter, $2); + /*% + $$ = method_arg(dispatch1(fcall, $1), arg_new()); + $$ = method_add_block($$, $2); + %*/ } | method_call | method_call brace_block { - if ($1 && nd_type($1) == NODE_BLOCK_PASS) { - rb_compile_error("both block arg and actual block given"); - } + /*%%%*/ + block_dup_check($1->nd_args, $2); $2->nd_iter = $1; $$ = $2; - fixpos($$, $1); + fixpos($$, $1); + /*% + $$ = method_add_block($1, $2); + %*/ + } + | tLAMBDA lambda + { + $$ = $2; } - | kIF expr_value then + | k_if expr_value then compstmt if_tail - kEND + k_end { + /*%%%*/ $$ = NEW_IF(cond($2), $4, $5); - fixpos($$, $2); - if (cond_negative(&$$->nd_cond)) { - NODE *tmp = $$->nd_body; - $$->nd_body = $$->nd_else; - $$->nd_else = tmp; - } + fixpos($$, $2); + /*% + $$ = dispatch3(if, $2, $4, escape_Qundef($5)); + %*/ } - | kUNLESS expr_value then + | k_unless expr_value then compstmt opt_else - kEND + k_end { + /*%%%*/ $$ = NEW_UNLESS(cond($2), $4, $5); - fixpos($$, $2); - if (cond_negative(&$$->nd_cond)) { - NODE *tmp = $$->nd_body; - $$->nd_body = $$->nd_else; - $$->nd_else = tmp; - } + fixpos($$, $2); + /*% + $$ = dispatch3(unless, $2, $4, escape_Qundef($5)); + %*/ } - | kWHILE {COND_PUSH(1);} expr_value do {COND_POP();} + | k_while {COND_PUSH(1);} expr_value do {COND_POP();} compstmt - kEND + k_end { + /*%%%*/ $$ = NEW_WHILE(cond($3), $6, 1); - fixpos($$, $3); - if (cond_negative(&$$->nd_cond)) { - nd_set_type($$, NODE_UNTIL); - } + fixpos($$, $3); + /*% + $$ = dispatch2(while, $3, $6); + %*/ } - | kUNTIL {COND_PUSH(1);} expr_value do {COND_POP();} + | k_until {COND_PUSH(1);} expr_value do {COND_POP();} compstmt - kEND + k_end { + /*%%%*/ $$ = NEW_UNTIL(cond($3), $6, 1); - fixpos($$, $3); - if (cond_negative(&$$->nd_cond)) { - nd_set_type($$, NODE_WHILE); - } + fixpos($$, $3); + /*% + $$ = dispatch2(until, $3, $6); + %*/ } - | kCASE expr_value opt_terms + | k_case expr_value opt_terms case_body - kEND + k_end { + /*%%%*/ $$ = NEW_CASE($2, $4); - fixpos($$, $2); - } - | kCASE opt_terms case_body kEND - { - $$ = $3; - } - | kCASE opt_terms kELSE compstmt kEND - { - $$ = $4; - } - | kFOR block_var kIN {COND_PUSH(1);} expr_value do {COND_POP();} + fixpos($$, $2); + /*% + $$ = dispatch2(case, $2, $4); + %*/ + } + | k_case opt_terms case_body k_end + { + /*%%%*/ + $$ = NEW_CASE(0, $3); + /*% + $$ = dispatch2(case, Qnil, $3); + %*/ + } + | k_for for_var keyword_in + {COND_PUSH(1);} + expr_value do + {COND_POP();} compstmt - kEND - { - $$ = NEW_FOR($2, $5, $8); - fixpos($$, $2); + k_end + { + /*%%%*/ + /* + * for a, b, c in e + * #=> + * e.each{|*x| a, b, c = x + * + * for a in e + * #=> + * e.each{|x| a, = x} + */ + ID id = internal_id(); + ID *tbl = ALLOC_N(ID, 2); + NODE *m = NEW_ARGS_AUX(0, 0); + NODE *args, *scope; + + if (nd_type($2) == NODE_MASGN) { + /* if args.length == 1 && args[0].kind_of?(Array) + * args = args[0] + * end + */ + NODE *one = NEW_LIST(NEW_LIT(INT2FIX(1))); + NODE *zero = NEW_LIST(NEW_LIT(INT2FIX(0))); + m->nd_next = block_append( + NEW_IF( + NEW_NODE(NODE_AND, + NEW_CALL(NEW_CALL(NEW_DVAR(id), rb_intern("length"), 0), + rb_intern("=="), one), + NEW_CALL(NEW_CALL(NEW_DVAR(id), rb_intern("[]"), zero), + rb_intern("kind_of?"), NEW_LIST(NEW_LIT(rb_cArray))), + 0), + NEW_DASGN_CURR(id, + NEW_CALL(NEW_DVAR(id), rb_intern("[]"), zero)), + 0), + node_assign($2, NEW_DVAR(id))); + + args = new_args(m, 0, id, 0, 0); + } + else { + if (nd_type($2) == NODE_LASGN || + nd_type($2) == NODE_DASGN || + nd_type($2) == NODE_DASGN_CURR) { + $2->nd_value = NEW_DVAR(id); + m->nd_plen = 1; + m->nd_next = $2; + args = new_args(m, 0, 0, 0, 0); + } + else { + m->nd_next = node_assign(NEW_MASGN(NEW_LIST($2), 0), NEW_DVAR(id)); + args = new_args(m, 0, id, 0, 0); + } + } + scope = NEW_NODE(NODE_SCOPE, tbl, $8, args); + tbl[0] = 1; tbl[1] = id; + $$ = NEW_FOR(0, $5, scope); + fixpos($$, $2); + /*% + $$ = dispatch3(for, $2, $5, $8); + %*/ } - | kCLASS cpath superclass + | k_class cpath superclass { if (in_def || in_single) yyerror("class definition in method body"); - class_nest++; local_push(0); - $<num>$ = ruby_sourceline; + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% + %*/ } bodystmt - kEND + k_end { - $$ = NEW_CLASS($2, $5, $3); - nd_set_line($$, $<num>4); - local_pop(); - class_nest--; + /*%%%*/ + $$ = NEW_CLASS($2, $5, $3); + nd_set_line($$, $<num>4); + /*% + $$ = dispatch3(class, $2, $3, $5); + %*/ + local_pop(); } - | kCLASS tLSHFT expr + | k_class tLSHFT expr { $<num>$ = in_def; - in_def = 0; + in_def = 0; } term { - $<num>$ = in_single; - in_single = 0; - class_nest++; + $<num>$ = in_single; + in_single = 0; local_push(0); } bodystmt - kEND + k_end { - $$ = NEW_SCLASS($3, $7); - fixpos($$, $3); - local_pop(); - class_nest--; - in_def = $<num>4; - in_single = $<num>6; + /*%%%*/ + $$ = NEW_SCLASS($3, $7); + fixpos($$, $3); + /*% + $$ = dispatch2(sclass, $3, $7); + %*/ + local_pop(); + in_def = $<num>4; + in_single = $<num>6; } - | kMODULE cpath + | k_module cpath { if (in_def || in_single) yyerror("module definition in method body"); - class_nest++; local_push(0); - $<num>$ = ruby_sourceline; + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% + %*/ } bodystmt - kEND + k_end { - $$ = NEW_MODULE($2, $4); - nd_set_line($$, $<num>3); - local_pop(); - class_nest--; + /*%%%*/ + $$ = NEW_MODULE($2, $4); + nd_set_line($$, $<num>3); + /*% + $$ = dispatch2(module, $2, $4); + %*/ + local_pop(); } - | kDEF fname + | k_def fname { $<id>$ = cur_mid; cur_mid = $2; @@ -1684,206 +2961,768 @@ primary : literal } f_arglist bodystmt - kEND + k_end { - if (!$5) $5 = NEW_NIL(); - $$ = NEW_DEFN($2, $4, $5, NOEX_PRIVATE); - fixpos($$, $4); - local_pop(); + /*%%%*/ + NODE *body = remove_begin($5); + reduce_nodes(&body); + $$ = NEW_DEFN($2, $4, body, NOEX_PRIVATE); + nd_set_line($$, $<num>1); + /*% + $$ = dispatch3(def, $2, $4, $5); + %*/ + local_pop(); in_def--; cur_mid = $<id>3; } - | kDEF singleton dot_or_colon {lex_state = EXPR_FNAME;} fname + | k_def singleton dot_or_colon {lex_state = EXPR_FNAME;} fname { in_single++; + lex_state = EXPR_ENDFN; /* force for args */ local_push(0); - lex_state = EXPR_END; /* force for args */ } f_arglist bodystmt - kEND + k_end { - $$ = NEW_DEFS($2, $5, $7, $8); - fixpos($$, $2); - local_pop(); + /*%%%*/ + NODE *body = remove_begin($8); + reduce_nodes(&body); + $$ = NEW_DEFS($2, $5, $7, body); + nd_set_line($$, $<num>1); + /*% + $$ = dispatch5(defs, $2, $3, $5, $7, $8); + %*/ + local_pop(); in_single--; } - | kBREAK + | keyword_break { + /*%%%*/ $$ = NEW_BREAK(0); + /*% + $$ = dispatch1(break, arg_new()); + %*/ } - | kNEXT + | keyword_next { + /*%%%*/ $$ = NEW_NEXT(0); + /*% + $$ = dispatch1(next, arg_new()); + %*/ } - | kREDO + | keyword_redo { + /*%%%*/ $$ = NEW_REDO(); + /*% + $$ = dispatch0(redo); + %*/ } - | kRETRY + | keyword_retry { + /*%%%*/ $$ = NEW_RETRY(); + /*% + $$ = dispatch0(retry); + %*/ } ; -primary_value : primary +primary_value : primary { + /*%%%*/ value_expr($1); $$ = $1; + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ + } + ; + +k_begin : keyword_begin + { + token_info_push("begin"); + } + ; + +k_if : keyword_if + { + token_info_push("if"); + } + ; + +k_unless : keyword_unless + { + token_info_push("unless"); + } + ; + +k_while : keyword_while + { + token_info_push("while"); + } + ; + +k_until : keyword_until + { + token_info_push("until"); + } + ; + +k_case : keyword_case + { + token_info_push("case"); + } + ; + +k_for : keyword_for + { + token_info_push("for"); + } + ; + +k_class : keyword_class + { + token_info_push("class"); + } + ; + +k_module : keyword_module + { + token_info_push("module"); + } + ; + +k_def : keyword_def + { + token_info_push("def"); + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% + %*/ + } + ; + +k_end : keyword_end + { + token_info_pop("end"); } ; then : term - | ':' - | kTHEN - | term kTHEN + /*%c%*/ + /*%c + { $$ = Qnil; } + %*/ + | keyword_then + | term keyword_then + /*%c%*/ + /*%c + { $$ = $2; } + %*/ ; do : term - | ':' - | kDO_COND + /*%c%*/ + /*%c + { $$ = Qnil; } + %*/ + | keyword_do_cond ; if_tail : opt_else - | kELSIF expr_value then + | keyword_elsif expr_value then compstmt if_tail { + /*%%%*/ $$ = NEW_IF(cond($2), $4, $5); - fixpos($$, $2); + fixpos($$, $2); + /*% + $$ = dispatch3(elsif, $2, $4, escape_Qundef($5)); + %*/ } ; opt_else : none - | kELSE compstmt + | keyword_else compstmt { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(else, $2); + %*/ } ; -block_var : lhs +for_var : lhs | mlhs ; -opt_block_var : none - | '|' /* none */ '|' +f_marg : f_norm_arg + { + $$ = assignable($1, 0); + /*%%%*/ + /*% + $$ = dispatch1(mlhs_paren, $$); + %*/ + } + | tLPAREN f_margs rparen + { + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ + } + ; + +f_marg_list : f_marg + { + /*%%%*/ + $$ = NEW_LIST($1); + /*% + $$ = mlhs_add(mlhs_new(), $1); + %*/ + } + | f_marg_list ',' f_marg + { + /*%%%*/ + $$ = list_append($1, $3); + /*% + $$ = mlhs_add($1, $3); + %*/ + } + ; + +f_margs : f_marg_list + { + /*%%%*/ + $$ = NEW_MASGN($1, 0); + /*% + $$ = $1; + %*/ + } + | f_marg_list ',' tSTAR f_norm_arg + { + $$ = assignable($4, 0); + /*%%%*/ + $$ = NEW_MASGN($1, $$); + /*% + $$ = mlhs_add_star($1, $$); + %*/ + } + | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list + { + $$ = assignable($4, 0); + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG($$, $6)); + /*% + $$ = mlhs_add_star($1, $$); + %*/ + } + | f_marg_list ',' tSTAR + { + /*%%%*/ + $$ = NEW_MASGN($1, -1); + /*% + $$ = mlhs_add_star($1, Qnil); + %*/ + } + | f_marg_list ',' tSTAR ',' f_marg_list + { + /*%%%*/ + $$ = NEW_MASGN($1, NEW_POSTARG(-1, $5)); + /*% + $$ = mlhs_add_star($1, $5); + %*/ + } + | tSTAR f_norm_arg + { + $$ = assignable($2, 0); + /*%%%*/ + $$ = NEW_MASGN(0, $$); + /*% + $$ = mlhs_add_star(mlhs_new(), $$); + %*/ + } + | tSTAR f_norm_arg ',' f_marg_list + { + $$ = assignable($2, 0); + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG($$, $4)); + /*% + #if 0 + TODO: Check me + #endif + $$ = mlhs_add_star($$, $4); + %*/ + } + | tSTAR + { + /*%%%*/ + $$ = NEW_MASGN(0, -1); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + %*/ + } + | tSTAR ',' f_marg_list + { + /*%%%*/ + $$ = NEW_MASGN(0, NEW_POSTARG(-1, $3)); + /*% + $$ = mlhs_add_star(mlhs_new(), Qnil); + %*/ + } + ; + +block_param : f_arg ',' f_block_optarg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, $5, 0, $6); + /*% + $$ = params_new($1, $3, $5, Qnil, escape_Qundef($6)); + %*/ + } + | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, $5, $7, $8); + /*% + $$ = params_new($1, $3, $5, $7, escape_Qundef($8)); + %*/ + } + | f_arg ',' f_block_optarg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, 0, 0, $4); + /*% + $$ = params_new($1, $3, Qnil, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' f_block_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, 0, $5, $6); + /*% + $$ = params_new($1, $3, Qnil, $5, escape_Qundef($6)); + %*/ + } + | f_arg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, $3, 0, $4); + /*% + $$ = params_new($1, Qnil, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' + { + /*%%%*/ + $$ = new_args($1, 0, 1, 0, 0); + /*% + $$ = params_new($1, Qnil, Qnil, Qnil, Qnil); + dispatch1(excessed_comma, $$); + %*/ + } + | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, $3, $5, $6); + /*% + $$ = params_new($1, Qnil, $3, $5, escape_Qundef($6)); + %*/ + } + | f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, 0, 0, $2); + /*% + $$ = params_new($1, Qnil,Qnil, Qnil, escape_Qundef($2)); + %*/ + } + | f_block_optarg ',' f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, $3, 0, $4); + /*% + $$ = params_new(Qnil, $1, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, $3, $5, $6); + /*% + $$ = params_new(Qnil, $1, $3, $5, escape_Qundef($6)); + %*/ + } + | f_block_optarg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, 0, 0, $2); + /*% + $$ = params_new(Qnil, $1, Qnil, Qnil,escape_Qundef($2)); + %*/ + } + | f_block_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, 0, $3, $4); + /*% + $$ = params_new(Qnil, $1, Qnil, $3, escape_Qundef($4)); + %*/ + } + | f_rest_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, $1, 0, $2); + /*% + $$ = params_new(Qnil, Qnil, $1, Qnil, escape_Qundef($2)); + %*/ + } + | f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, $1, $3, $4); + /*% + $$ = params_new(Qnil, Qnil, $1, $3, escape_Qundef($4)); + %*/ + } + | f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, 0, 0, $1); + /*% + $$ = params_new(Qnil, Qnil, Qnil, Qnil, $1); + %*/ + } + ; + +opt_block_param : none + | block_param_def + { + command_start = TRUE; + } + ; + +block_param_def : '|' opt_bv_decl '|' { - $$ = (NODE*)1; + /*%%%*/ + $$ = 0; + /*% + $$ = blockvar_new(params_new(Qnil,Qnil,Qnil,Qnil,Qnil), + escape_Qundef($2)); + %*/ } | tOROP { - $$ = (NODE*)1; + /*%%%*/ + $$ = 0; + /*% + $$ = blockvar_new(params_new(Qnil,Qnil,Qnil,Qnil,Qnil), + Qnil); + %*/ + } + | '|' block_param opt_bv_decl '|' + { + /*%%%*/ + $$ = $2; + /*% + $$ = blockvar_new(escape_Qundef($2), escape_Qundef($3)); + %*/ + } + ; + + +opt_bv_decl : none + | ';' bv_decls + { + /*%%%*/ + $$ = 0; + /*% + $$ = $2; + %*/ + } + ; + +bv_decls : bvar + /*%c%*/ + /*%c + { + $$ = rb_ary_new3(1, $1); + } + %*/ + | bv_decls ',' bvar + /*%c%*/ + /*%c + { + rb_ary_push($$, $3); + } + %*/ + ; + +bvar : tIDENTIFIER + { + new_bv(get_id($1)); + /*%%%*/ + /*% + $$ = get_value($1); + %*/ + } + | f_bad_arg + { + $$ = 0; + } + ; + +lambda : { + $<vars>$ = dyna_push(); + } + { + $<num>$ = lpar_beg; + lpar_beg = ++paren_nest; + } + f_larglist + lambda_body + { + lpar_beg = $<num>2; + /*%%%*/ + $$ = $3; + $$->nd_body = NEW_SCOPE($3->nd_head, $4); + /*% + $$ = dispatch2(lambda, $3, $4); + %*/ + dyna_pop($<vars>1); + } + ; + +f_larglist : '(' f_args opt_bv_decl rparen + { + /*%%%*/ + $$ = NEW_LAMBDA($2); + /*% + $$ = dispatch1(paren, $2); + %*/ } - | '|' block_var '|' + | f_args + { + /*%%%*/ + $$ = NEW_LAMBDA($1); + /*% + $$ = $1; + %*/ + } + ; + +lambda_body : tLAMBEG compstmt '}' + { + $$ = $2; + } + | keyword_do_LAMBDA compstmt keyword_end { $$ = $2; } ; -do_block : kDO_BLOCK +do_block : keyword_do_block { - $<vars>$ = dyna_push(); - $<num>1 = ruby_sourceline; + $<vars>1 = dyna_push(); + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% %*/ } - opt_block_var {$<vars>$ = ruby_dyna_vars;} + opt_block_param compstmt - kEND + keyword_end { - $$ = NEW_ITER($3, 0, dyna_init($5, $<vars>4)); - nd_set_line($$, $<num>1); - dyna_pop($<vars>2); + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $<num>2); + /*% + $$ = dispatch2(do_block, escape_Qundef($3), $4); + %*/ + dyna_pop($<vars>1); } ; block_call : command do_block { - if ($1 && nd_type($1) == NODE_BLOCK_PASS) { - rb_compile_error("both block arg and actual block given"); + /*%%%*/ + if (nd_type($1) == NODE_YIELD) { + compile_error(PARSER_ARG "block given to yield"); + } + else { + block_dup_check($1->nd_args, $2); } $2->nd_iter = $1; $$ = $2; - fixpos($$, $1); + fixpos($$, $1); + /*% + $$ = method_add_block($1, $2); + %*/ } | block_call '.' operation2 opt_paren_args { - $$ = new_call($1, $3, $4); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_optarg($$, $4); + %*/ } | block_call tCOLON2 operation2 opt_paren_args { - $$ = new_call($1, $3, $4); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), $3); + $$ = method_optarg($$, $4); + %*/ } ; method_call : operation paren_args { - $$ = new_fcall($1, $2); - fixpos($$, $2); + /*%%%*/ + $$ = NEW_FCALL($1, $2); + fixpos($$, $2); + /*% + $$ = method_arg(dispatch1(fcall, $1), $2); + %*/ } | primary_value '.' operation2 opt_paren_args { - $$ = new_call($1, $3, $4); - fixpos($$, $1); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_optarg($$, $4); + %*/ } | primary_value tCOLON2 operation2 paren_args { - $$ = new_call($1, $3, $4); - fixpos($$, $1); + /*%%%*/ + $$ = NEW_CALL($1, $3, $4); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); + $$ = method_optarg($$, $4); + %*/ } | primary_value tCOLON2 operation3 { - $$ = new_call($1, $3, 0); + /*%%%*/ + $$ = NEW_CALL($1, $3, 0); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), $3); + %*/ + } + | primary_value '.' paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, rb_intern("call"), $3); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_id2sym('.'), + ripper_intern("call")); + $$ = method_optarg($$, $3); + %*/ + } + | primary_value tCOLON2 paren_args + { + /*%%%*/ + $$ = NEW_CALL($1, rb_intern("call"), $3); + fixpos($$, $1); + /*% + $$ = dispatch3(call, $1, ripper_intern("::"), + ripper_intern("call")); + $$ = method_optarg($$, $3); + %*/ } - | kSUPER paren_args + | keyword_super paren_args { - $$ = new_super($2); + /*%%%*/ + $$ = NEW_SUPER($2); + /*% + $$ = dispatch1(super, $2); + %*/ } - | kSUPER + | keyword_super { + /*%%%*/ $$ = NEW_ZSUPER(); + /*% + $$ = dispatch0(zsuper); + %*/ + } + | primary_value '[' opt_call_args rbracket + { + /*%%%*/ + if ($1 && nd_type($1) == NODE_SELF) + $$ = NEW_FCALL(tAREF, $3); + else + $$ = NEW_CALL($1, tAREF, $3); + fixpos($$, $1); + /*% + $$ = dispatch2(aref, $1, escape_Qundef($3)); + %*/ } ; brace_block : '{' { - $<vars>$ = dyna_push(); - $<num>1 = ruby_sourceline; + $<vars>1 = dyna_push(); + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% + %*/ } - opt_block_var {$<vars>$ = ruby_dyna_vars;} + opt_block_param compstmt '}' { - $$ = NEW_ITER($3, 0, dyna_init($5, $<vars>4)); - nd_set_line($$, $<num>1); - dyna_pop($<vars>2); - } - | kDO - { - $<vars>$ = dyna_push(); - $<num>1 = ruby_sourceline; - } - opt_block_var {$<vars>$ = ruby_dyna_vars;} - compstmt kEND - { - $$ = NEW_ITER($3, 0, dyna_init($5, $<vars>4)); - nd_set_line($$, $<num>1); - dyna_pop($<vars>2); + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $<num>2); + /*% + $$ = dispatch2(brace_block, escape_Qundef($3), $4); + %*/ + dyna_pop($<vars>1); + } + | keyword_do + { + $<vars>1 = dyna_push(); + /*%%%*/ + $<num>$ = ruby_sourceline; + /*% + %*/ + } + opt_block_param + compstmt keyword_end + { + /*%%%*/ + $$ = NEW_ITER($3,$4); + nd_set_line($$, $<num>2); + /*% + $$ = dispatch2(do_block, escape_Qundef($3), $4); + %*/ + dyna_pop($<vars>1); } ; -case_body : kWHEN when_args then +case_body : keyword_when args then compstmt cases { + /*%%%*/ $$ = NEW_WHEN($2, $4, $5); - } - ; -when_args : args - | args ',' tSTAR arg_value - { - $$ = list_append($1, NEW_WHEN($4, 0, 0)); - } - | tSTAR arg_value - { - $$ = NEW_LIST(NEW_WHEN($2, 0, 0)); + /*% + $$ = dispatch3(when, $2, $4, escape_Qundef($5)); + %*/ } ; @@ -1891,25 +3730,44 @@ cases : opt_else | case_body ; -opt_rescue : kRESCUE exc_list exc_var then +opt_rescue : keyword_rescue exc_list exc_var then compstmt opt_rescue { - if ($3) { - $3 = node_assign($3, NEW_GVAR(rb_intern("$!"))); + /*%%%*/ + if ($3) { + $3 = node_assign($3, NEW_ERRINFO()); $5 = block_append($3, $5); } $$ = NEW_RESBODY($2, $5, $6); - fixpos($$, $2?$2:$5); + fixpos($$, $2?$2:$5); + /*% + $$ = dispatch4(rescue, + escape_Qundef($2), + escape_Qundef($3), + escape_Qundef($5), + escape_Qundef($6)); + %*/ } | none ; exc_list : arg_value { + /*%%%*/ $$ = NEW_LIST($1); + /*% + $$ = rb_ary_new3(1, $1); + %*/ } | mrhs + { + /*%%%*/ + if (!($$ = splat_array($1))) $$ = $1; + /*% + $$ = $1; + %*/ + } | none ; @@ -1920,13 +3778,13 @@ exc_var : tASSOC lhs | none ; -opt_ensure : kENSURE compstmt +opt_ensure : keyword_ensure compstmt { - if ($2) - $$ = $2; - else - /* place holder */ - $$ = NEW_NIL(); + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(ensure, $2); + %*/ } | none ; @@ -1934,42 +3792,60 @@ opt_ensure : kENSURE compstmt literal : numeric | symbol { + /*%%%*/ $$ = NEW_LIT(ID2SYM($1)); + /*% + $$ = dispatch1(symbol_literal, $1); + %*/ } | dsym ; strings : string { + /*%%%*/ NODE *node = $1; if (!node) { - node = NEW_STR(rb_str_new(0, 0)); + node = NEW_STR(STR_NEW0()); } else { node = evstr2dstr(node); } $$ = node; + /*% + $$ = $1; + %*/ } ; -string : string1 +string : tCHAR + | string1 | string string1 { + /*%%%*/ $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(string_concat, $1, $2); + %*/ } ; string1 : tSTRING_BEG string_contents tSTRING_END { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(string_literal, $2); + %*/ } ; xstring : tXSTRING_BEG xstring_contents tSTRING_END { + /*%%%*/ NODE *node = $2; if (!node) { - node = NEW_XSTR(rb_str_new(0, 0)); + node = NEW_XSTR(STR_NEW0()); } else { switch (nd_type(node)) { @@ -1980,33 +3856,36 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END nd_set_type(node, NODE_DXSTR); break; default: - node = NEW_NODE(NODE_DXSTR, rb_str_new(0, 0), 1, NEW_LIST(node)); + node = NEW_NODE(NODE_DXSTR, Qnil, 1, NEW_LIST(node)); break; } } $$ = node; + /*% + $$ = dispatch1(xstring_literal, $2); + %*/ } ; -regexp : tREGEXP_BEG xstring_contents tREGEXP_END +regexp : tREGEXP_BEG regexp_contents tREGEXP_END { + /*%%%*/ int options = $3; NODE *node = $2; + NODE *list, *prev; if (!node) { - node = NEW_LIT(rb_reg_new("", 0, options & ~RE_OPTION_ONCE)); + node = NEW_LIT(reg_compile(STR_NEW0(), options)); } else switch (nd_type(node)) { case NODE_STR: { VALUE src = node->nd_lit; nd_set_type(node, NODE_LIT); - node->nd_lit = rb_reg_new(RSTRING(src)->ptr, - RSTRING(src)->len, - options & ~RE_OPTION_ONCE); + node->nd_lit = reg_compile(src, options); } break; default: - node = NEW_NODE(NODE_DSTR, rb_str_new(0, 0), 1, NEW_LIST(node)); + node = NEW_NODE(NODE_DSTR, STR_NEW0(), 1, NEW_LIST(node)); case NODE_DSTR: if (options & RE_OPTION_ONCE) { nd_set_type(node, NODE_DREGX_ONCE); @@ -2014,77 +3893,207 @@ regexp : tREGEXP_BEG xstring_contents tREGEXP_END else { nd_set_type(node, NODE_DREGX); } - node->nd_cflag = options & ~RE_OPTION_ONCE; + node->nd_cflag = options & RE_OPTION_MASK; + if (!NIL_P(node->nd_lit)) reg_fragment_check(node->nd_lit, options); + for (list = (prev = node)->nd_next; list; list = list->nd_next) { + if (nd_type(list->nd_head) == NODE_STR) { + VALUE tail = list->nd_head->nd_lit; + if (reg_fragment_check(tail, options) && prev && !NIL_P(prev->nd_lit)) { + VALUE lit = prev == node ? prev->nd_lit : prev->nd_head->nd_lit; + if (!literal_concat0(parser, lit, tail)) { + node = 0; + break; + } + rb_str_resize(tail, 0); + prev->nd_next = list->nd_next; + rb_gc_force_recycle((VALUE)list->nd_head); + rb_gc_force_recycle((VALUE)list); + list = prev; + } + else { + prev = list; + } + } + else { + prev = 0; + } + } + if (!node->nd_next) { + VALUE src = node->nd_lit; + nd_set_type(node, NODE_LIT); + node->nd_lit = reg_compile(src, options); + } break; } $$ = node; + /*% + $$ = dispatch2(regexp_literal, $2, $3); + %*/ } ; words : tWORDS_BEG ' ' tSTRING_END { + /*%%%*/ $$ = NEW_ZARRAY(); + /*% + $$ = dispatch0(words_new); + $$ = dispatch1(array, $$); + %*/ } | tWORDS_BEG word_list tSTRING_END { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(array, $2); + %*/ } ; word_list : /* none */ { + /*%%%*/ $$ = 0; + /*% + $$ = dispatch0(words_new); + %*/ } | word_list word ' ' { + /*%%%*/ $$ = list_append($1, evstr2dstr($2)); + /*% + $$ = dispatch2(words_add, $1, $2); + %*/ } ; word : string_content + /*%c%*/ + /*%c + { + $$ = dispatch0(word_new); + $$ = dispatch2(word_add, $$, $1); + } + %*/ | word string_content { + /*%%%*/ $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(word_add, $1, $2); + %*/ } ; qwords : tQWORDS_BEG ' ' tSTRING_END { + /*%%%*/ $$ = NEW_ZARRAY(); + /*% + $$ = dispatch0(qwords_new); + $$ = dispatch1(array, $$); + %*/ } | tQWORDS_BEG qword_list tSTRING_END { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(array, $2); + %*/ } ; qword_list : /* none */ { + /*%%%*/ $$ = 0; + /*% + $$ = dispatch0(qwords_new); + %*/ } | qword_list tSTRING_CONTENT ' ' { + /*%%%*/ $$ = list_append($1, $2); + /*% + $$ = dispatch2(qwords_add, $1, $2); + %*/ } ; string_contents : /* none */ { + /*%%%*/ $$ = 0; + /*% + $$ = dispatch0(string_content); + %*/ } | string_contents string_content { + /*%%%*/ $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(string_add, $1, $2); + %*/ } ; xstring_contents: /* none */ { + /*%%%*/ $$ = 0; + /*% + $$ = dispatch0(xstring_new); + %*/ } | xstring_contents string_content { + /*%%%*/ $$ = literal_concat($1, $2); + /*% + $$ = dispatch2(xstring_add, $1, $2); + %*/ + } + ; + +regexp_contents: /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = dispatch0(regexp_new); + %*/ + } + | regexp_contents string_content + { + /*%%%*/ + NODE *head = $1, *tail = $2; + if (!head) { + $$ = tail; + } + else if (!tail) { + $$ = head; + } + else { + switch (nd_type(head)) { + case NODE_STR: + nd_set_type(head, NODE_DSTR); + break; + case NODE_DSTR: + break; + default: + head = list_append(NEW_DSTR(Qnil), head); + break; + } + $$ = list_append(head, tail); + } + /*% + $$ = dispatch2(regexp_add, $1, $2); + %*/ } ; @@ -2097,40 +4106,75 @@ string_content : tSTRING_CONTENT } string_dvar { + /*%%%*/ + lex_strterm = $<node>2; + $$ = NEW_EVSTR($3); + /*% lex_strterm = $<node>2; - $$ = NEW_EVSTR($3); + $$ = dispatch1(string_dvar, $3); + %*/ } | tSTRING_DBEG { + $<val>1 = cond_stack; + $<val>$ = cmdarg_stack; + cond_stack = 0; + cmdarg_stack = 0; + } + { $<node>$ = lex_strterm; lex_strterm = 0; lex_state = EXPR_BEG; - COND_PUSH(0); - CMDARG_PUSH(0); } compstmt '}' { - lex_strterm = $<node>2; - COND_LEXPOP(); - CMDARG_LEXPOP(); - if (($$ = $3) && nd_type($$) == NODE_NEWLINE) { - $$ = $$->nd_next; - rb_gc_force_recycle((VALUE)$3); - } - $$ = new_evstr($$); + cond_stack = $<val>1; + cmdarg_stack = $<val>2; + lex_strterm = $<node>3; + /*%%%*/ + if ($4) $4->flags &= ~NODE_FL_NEWLINE; + $$ = new_evstr($4); + /*% + $$ = dispatch1(string_embexpr, $4); + %*/ } ; -string_dvar : tGVAR {$$ = NEW_GVAR($1);} - | tIVAR {$$ = NEW_IVAR($1);} - | tCVAR {$$ = NEW_CVAR($1);} +string_dvar : tGVAR + { + /*%%%*/ + $$ = NEW_GVAR($1); + /*% + $$ = dispatch1(var_ref, $1); + %*/ + } + | tIVAR + { + /*%%%*/ + $$ = NEW_IVAR($1); + /*% + $$ = dispatch1(var_ref, $1); + %*/ + } + | tCVAR + { + /*%%%*/ + $$ = NEW_CVAR($1); + /*% + $$ = dispatch1(var_ref, $1); + %*/ + } | backref ; symbol : tSYMBEG sym { - lex_state = EXPR_END; + lex_state = EXPR_END; + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(symbol, $2); + %*/ } ; @@ -2142,10 +4186,10 @@ sym : fname dsym : tSYMBEG xstring_contents tSTRING_END { - lex_state = EXPR_END; + lex_state = EXPR_END; + /*%%%*/ if (!($$ = $2)) { - $$ = NEW_NIL(); - yyerror("empty symbol literal"); + $$ = NEW_LIT(ID2SYM(rb_intern(""))); } else { VALUE lit; @@ -2156,33 +4200,37 @@ dsym : tSYMBEG xstring_contents tSTRING_END break; case NODE_STR: lit = $$->nd_lit; - if (RSTRING(lit)->len == 0) { - yyerror("empty symbol literal"); - break; - } - if (strlen(RSTRING(lit)->ptr) == RSTRING(lit)->len) { - $$->nd_lit = ID2SYM(rb_intern(RSTRING($$->nd_lit)->ptr)); - nd_set_type($$, NODE_LIT); - break; - } - /* fall through */ + $$->nd_lit = ID2SYM(rb_intern_str(lit)); + nd_set_type($$, NODE_LIT); + break; default: - $$ = NEW_NODE(NODE_DSYM, rb_str_new(0, 0), 1, NEW_LIST($$)); + $$ = NEW_NODE(NODE_DSYM, Qnil, 1, NEW_LIST($$)); break; } } + /*% + $$ = dispatch1(dyna_symbol, $2); + %*/ } ; -numeric : tINTEGER +numeric : tINTEGER | tFLOAT | tUMINUS_NUM tINTEGER %prec tLOWEST { + /*%%%*/ $$ = negate_lit($2); + /*% + $$ = dispatch2(unary, ripper_intern("-@"), $2); + %*/ } | tUMINUS_NUM tFLOAT %prec tLOWEST { + /*%%%*/ $$ = negate_lit($2); + /*% + $$ = dispatch2(unary, ripper_intern("-@"), $2); + %*/ } ; @@ -2191,23 +4239,32 @@ variable : tIDENTIFIER | tGVAR | tCONSTANT | tCVAR - | kNIL {$$ = kNIL;} - | kSELF {$$ = kSELF;} - | kTRUE {$$ = kTRUE;} - | kFALSE {$$ = kFALSE;} - | k__FILE__ {$$ = k__FILE__;} - | k__LINE__ {$$ = k__LINE__;} + | keyword_nil {ifndef_ripper($$ = keyword_nil);} + | keyword_self {ifndef_ripper($$ = keyword_self);} + | keyword_true {ifndef_ripper($$ = keyword_true);} + | keyword_false {ifndef_ripper($$ = keyword_false);} + | keyword__FILE__ {ifndef_ripper($$ = keyword__FILE__);} + | keyword__LINE__ {ifndef_ripper($$ = keyword__LINE__);} + | keyword__ENCODING__ {ifndef_ripper($$ = keyword__ENCODING__);} ; var_ref : variable { - $$ = gettable($1); + /*%%%*/ + if (!($$ = gettable($1))) $$ = NEW_BEGIN(0); + /*% + $$ = dispatch1(var_ref, $1); + %*/ } ; var_lhs : variable { $$ = assignable($1, 0); + /*%%%*/ + /*% + $$ = dispatch1(var_field, $$); + %*/ } ; @@ -2217,7 +4274,11 @@ backref : tNTH_REF superclass : term { + /*%%%*/ $$ = 0; + /*% + $$ = Qnil; + %*/ } | '<' { @@ -2227,14 +4288,27 @@ superclass : term { $$ = $3; } - | error term {yyerrok; $$ = 0;} + | error term + { + /*%%%*/ + yyerrok; + $$ = 0; + /*% + yyerrok; + $$ = Qnil; + %*/ + } ; -f_arglist : '(' f_args opt_nl ')' +f_arglist : '(' f_args rparen { + /*%%%*/ $$ = $2; + /*% + $$ = dispatch1(paren, $2); + %*/ lex_state = EXPR_BEG; - command_start = Qtrue; + command_start = TRUE; } | f_args term { @@ -2244,94 +4318,289 @@ f_arglist : '(' f_args opt_nl ')' f_args : f_arg ',' f_optarg ',' f_rest_arg opt_f_block_arg { - $$ = block_append(NEW_ARGS($1, $3, $5), $6); + /*%%%*/ + $$ = new_args($1, $3, $5, 0, $6); + /*% + $$ = params_new($1, $3, $5, Qnil, escape_Qundef($6)); + %*/ + } + | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, $5, $7, $8); + /*% + $$ = params_new($1, $3, $5, $7, escape_Qundef($8)); + %*/ } | f_arg ',' f_optarg opt_f_block_arg { - $$ = block_append(NEW_ARGS($1, $3, 0), $4); + /*%%%*/ + $$ = new_args($1, $3, 0, 0, $4); + /*% + $$ = params_new($1, $3, Qnil, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' f_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, $3, 0, $5, $6); + /*% + $$ = params_new($1, $3, Qnil, $5, escape_Qundef($6)); + %*/ } | f_arg ',' f_rest_arg opt_f_block_arg { - $$ = block_append(NEW_ARGS($1, 0, $3), $4); + /*%%%*/ + $$ = new_args($1, 0, $3, 0, $4); + /*% + $$ = params_new($1, Qnil, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args($1, 0, $3, $5, $6); + /*% + $$ = params_new($1, Qnil, $3, $5, escape_Qundef($6)); + %*/ } | f_arg opt_f_block_arg { - $$ = block_append(NEW_ARGS($1, 0, 0), $2); + /*%%%*/ + $$ = new_args($1, 0, 0, 0, $2); + /*% + $$ = params_new($1, Qnil, Qnil, Qnil,escape_Qundef($2)); + %*/ } | f_optarg ',' f_rest_arg opt_f_block_arg { - $$ = block_append(NEW_ARGS(0, $1, $3), $4); + /*%%%*/ + $$ = new_args(0, $1, $3, 0, $4); + /*% + $$ = params_new(Qnil, $1, $3, Qnil, escape_Qundef($4)); + %*/ + } + | f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, $3, $5, $6); + /*% + $$ = params_new(Qnil, $1, $3, $5, escape_Qundef($6)); + %*/ } | f_optarg opt_f_block_arg { - $$ = block_append(NEW_ARGS(0, $1, 0), $2); + /*%%%*/ + $$ = new_args(0, $1, 0, 0, $2); + /*% + $$ = params_new(Qnil, $1, Qnil, Qnil,escape_Qundef($2)); + %*/ + } + | f_optarg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, $1, 0, $3, $4); + /*% + $$ = params_new(Qnil, $1, Qnil, $3, escape_Qundef($4)); + %*/ } | f_rest_arg opt_f_block_arg { - $$ = block_append(NEW_ARGS(0, 0, $1), $2); + /*%%%*/ + $$ = new_args(0, 0, $1, 0, $2); + /*% + $$ = params_new(Qnil, Qnil, $1, Qnil,escape_Qundef($2)); + %*/ + } + | f_rest_arg ',' f_arg opt_f_block_arg + { + /*%%%*/ + $$ = new_args(0, 0, $1, $3, $4); + /*% + $$ = params_new(Qnil, Qnil, $1, $3, escape_Qundef($4)); + %*/ } | f_block_arg { - $$ = block_append(NEW_ARGS(0, 0, 0), $1); + /*%%%*/ + $$ = new_args(0, 0, 0, 0, $1); + /*% + $$ = params_new(Qnil, Qnil, Qnil, Qnil, $1); + %*/ } | /* none */ { - $$ = NEW_ARGS(0, 0, 0); + /*%%%*/ + $$ = new_args(0, 0, 0, 0, 0); + /*% + $$ = params_new(Qnil, Qnil, Qnil, Qnil, Qnil); + %*/ } ; -f_norm_arg : tCONSTANT +f_bad_arg : tCONSTANT { + /*%%%*/ yyerror("formal argument cannot be a constant"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ } - | tIVAR + | tIVAR { - yyerror("formal argument cannot be an instance variable"); + /*%%%*/ + yyerror("formal argument cannot be an instance variable"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ } - | tGVAR + | tGVAR { - yyerror("formal argument cannot be a global variable"); + /*%%%*/ + yyerror("formal argument cannot be a global variable"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ } - | tCVAR + | tCVAR { - yyerror("formal argument cannot be a class variable"); + /*%%%*/ + yyerror("formal argument cannot be a class variable"); + $$ = 0; + /*% + $$ = dispatch1(param_error, $1); + %*/ } + ; + +f_norm_arg : f_bad_arg | tIDENTIFIER { - if (!is_local_id($1)) - yyerror("formal argument must be local variable"); - else if (local_id($1)) - yyerror("duplicate argument name"); - local_cnt($1); - $$ = 1; + formal_argument(get_id($1)); + $$ = $1; } ; -f_arg : f_norm_arg - | f_arg ',' f_norm_arg +f_arg_item : f_norm_arg + { + arg_var(get_id($1)); + /*%%%*/ + $$ = NEW_ARGS_AUX($1, 1); + /*% + $$ = get_value($1); + %*/ + } + | tLPAREN f_margs rparen { - $$ += 1; + ID tid = internal_id(); + arg_var(tid); + /*%%%*/ + if (dyna_in_block()) { + $2->nd_value = NEW_DVAR(tid); + } + else { + $2->nd_value = NEW_LVAR(tid); + } + $$ = NEW_ARGS_AUX(tid, 1); + $$->nd_next = $2; + /*% + $$ = dispatch1(mlhs_paren, $2); + %*/ + } + ; + +f_arg : f_arg_item + /*%c%*/ + /*%c + { + $$ = rb_ary_new3(1, $1); + } + c%*/ + | f_arg ',' f_arg_item + { + /*%%%*/ + $$ = $1; + $$->nd_plen++; + $$->nd_next = block_append($$->nd_next, $3->nd_next); + rb_gc_force_recycle((VALUE)$3); + /*% + $$ = rb_ary_push($1, $3); + %*/ } ; f_opt : tIDENTIFIER '=' arg_value { - if (!is_local_id($1)) - yyerror("formal argument must be local variable"); - else if (local_id($1)) - yyerror("duplicate optional argument name"); + arg_var(formal_argument(get_id($1))); + $$ = assignable($1, $3); + /*%%%*/ + $$ = NEW_OPT_ARG(0, $$); + /*% + $$ = rb_assoc_new($$, $3); + %*/ + } + ; + +f_block_opt : tIDENTIFIER '=' primary_value + { + arg_var(formal_argument(get_id($1))); $$ = assignable($1, $3); + /*%%%*/ + $$ = NEW_OPT_ARG(0, $$); + /*% + $$ = rb_assoc_new($$, $3); + %*/ + } + ; + +f_block_optarg : f_block_opt + { + /*%%%*/ + $$ = $1; + /*% + $$ = rb_ary_new3(1, $1); + %*/ + } + | f_block_optarg ',' f_block_opt + { + /*%%%*/ + NODE *opts = $1; + + while (opts->nd_next) { + opts = opts->nd_next; + } + opts->nd_next = $3; + $$ = $1; + /*% + $$ = rb_ary_push($1, $3); + %*/ } ; f_optarg : f_opt { - $$ = NEW_BLOCK($1); - $$->nd_end = $$; + /*%%%*/ + $$ = $1; + /*% + $$ = rb_ary_new3(1, $1); + %*/ } | f_optarg ',' f_opt { - $$ = block_append($1, $3); + /*%%%*/ + NODE *opts = $1; + + while (opts->nd_next) { + opts = opts->nd_next; + } + opts->nd_next = $3; + $$ = $1; + /*% + $$ = rb_ary_push($1, $3); + %*/ } ; @@ -2341,23 +4610,25 @@ restarg_mark : '*' f_rest_arg : restarg_mark tIDENTIFIER { + /*%%%*/ if (!is_local_id($2)) yyerror("rest argument must be local variable"); - else if (local_id($2)) - yyerror("duplicate rest argument name"); - if (dyna_in_block()) { - rb_dvar_push($2, Qnil); - } - $$ = assignable($2, 0); + /*% %*/ + arg_var(shadowing_lvar(get_id($2))); + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(rest_param, $2); + %*/ } | restarg_mark { - if (dyna_in_block()) { - $$ = NEW_DASGN_CURR(internal_id(), 0); - } - else { - $$ = NEW_NODE(NODE_LASGN,0,0,local_append(0)); - } + /*%%%*/ + $$ = internal_id(); + arg_var($$); + /*% + $$ = dispatch1(rest_param, Qnil); + %*/ } ; @@ -2367,11 +4638,18 @@ blkarg_mark : '&' f_block_arg : blkarg_mark tIDENTIFIER { + /*%%%*/ if (!is_local_id($2)) yyerror("block argument must be local variable"); - else if (local_id($2)) - yyerror("duplicate block argument name"); - $$ = NEW_BLOCK_ARG($2); + else if (!dyna_in_block() && local_id($2)) + yyerror("duplicated block argument name"); + /*% %*/ + arg_var(shadowing_lvar(get_id($2))); + /*%%%*/ + $$ = $2; + /*% + $$ = dispatch1(blockarg, $2); + %*/ } ; @@ -2380,15 +4658,28 @@ opt_f_block_arg : ',' f_block_arg $$ = $2; } | none + { + /*%%%*/ + $$ = 0; + /*% + $$ = Qundef; + %*/ + } ; singleton : var_ref { + /*%%%*/ + value_expr($1); $$ = $1; - value_expr($$); + if (!$$) $$ = NEW_NIL(); + /*% + $$ = $1; + %*/ } - | '(' {lex_state = EXPR_BEG;} expr opt_nl ')' + | '(' {lex_state = EXPR_BEG;} expr rparen { + /*%%%*/ if ($3 == 0) { yyerror("can't define singleton method for ()."); } @@ -2409,33 +4700,55 @@ singleton : var_ref } } $$ = $3; + /*% + $$ = dispatch1(paren, $3); + %*/ } ; assoc_list : none | assocs trailer { + /*%%%*/ $$ = $1; - } - | args trailer - { - if ($1->nd_alen%2 != 0) { - yyerror("odd number list for Hash"); - } - $$ = $1; + /*% + $$ = dispatch1(assoclist_from_args, $1); + %*/ } ; assocs : assoc + /*%c%*/ + /*%c + { + $$ = rb_ary_new3(1, $1); + } + %*/ | assocs ',' assoc { + /*%%%*/ $$ = list_concat($1, $3); + /*% + $$ = rb_ary_push($1, $3); + %*/ } ; assoc : arg_value tASSOC arg_value { + /*%%%*/ $$ = list_append(NEW_LIST($1), $3); + /*% + $$ = dispatch2(assoc_new, $1, $3); + %*/ + } + | tLABEL arg_value + { + /*%%%*/ + $$ = list_append(NEW_LIST(NEW_LIT(ID2SYM($1))), $2); + /*% + $$ = dispatch2(assoc_new, $1, $2); + %*/ } ; @@ -2456,7 +4769,15 @@ operation3 : tIDENTIFIER ; dot_or_colon : '.' + /*%c%*/ + /*%c + { $$ = $<val>1; } + %*/ | tCOLON2 + /*%c%*/ + /*%c + { $$ = $<val>1; } + %*/ ; opt_terms : /* none */ @@ -2467,6 +4788,12 @@ opt_nl : /* none */ | '\n' ; +rparen : opt_nl ')' + ; + +rbracket : opt_nl ']' + ; + trailer : /* none */ | '\n' | ',' @@ -2480,15 +4807,123 @@ terms : term | terms ';' {yyerrok;} ; -none : /* none */ {$$ = 0;} +none : /* none */ + { + /*%%%*/ + $$ = 0; + /*% + $$ = Qundef; + %*/ + } ; %% -#ifdef yystacksize -#undef YYMALLOC +# undef parser +# undef yylex +# undef yylval +# define yylval (*((YYSTYPE*)(parser->parser_yylval))) + +static int parser_regx_options(struct parser_params*); +static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**); +static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc); +static int parser_parse_string(struct parser_params*,NODE*); +static int parser_here_document(struct parser_params*,NODE*); + + +# define nextc() parser_nextc(parser) +# define pushback(c) parser_pushback(parser, c) +# define newtok() parser_newtok(parser) +# define tokspace(n) parser_tokspace(parser, n) +# define tokadd(c) parser_tokadd(parser, c) +# define tok_hex(numlen) parser_tok_hex(parser, numlen) +# define read_escape(flags,e) parser_read_escape(parser, flags, e) +# define tokadd_escape(e) parser_tokadd_escape(parser, e) +# define regx_options() parser_regx_options(parser) +# define tokadd_string(f,t,p,n,e) parser_tokadd_string(parser,f,t,p,n,e) +# define parse_string(n) parser_parse_string(parser,n) +# define tokaddmbc(c, enc) parser_tokaddmbc(parser, c, enc) +# define here_document(n) parser_here_document(parser,n) +# define heredoc_identifier() parser_heredoc_identifier(parser) +# define heredoc_restore(n) parser_heredoc_restore(parser,n) +# define whole_match_p(e,l,i) parser_whole_match_p(parser,e,l,i) + +#ifndef RIPPER +# define set_yylval_str(x) yylval.node = NEW_STR(x) +# define set_yylval_num(x) yylval.num = x +# define set_yylval_id(x) yylval.id = x +# define set_yylval_name(x) yylval.id = x +# define set_yylval_literal(x) yylval.node = NEW_LIT(x) +# define set_yylval_node(x) yylval.node = x +# define yylval_id() yylval.id +#else +static inline VALUE +ripper_yylval_id(ID x) +{ + return (VALUE)NEW_LASGN(x, ID2SYM(x)); +} +# define set_yylval_str(x) (void)(x) +# define set_yylval_num(x) (void)(x) +# define set_yylval_id(x) (void)(x) +# define set_yylval_name(x) (void)(yylval.val = ripper_yylval_id(x)) +# define set_yylval_literal(x) (void)(x) +# define set_yylval_node(x) (void)(x) +# define yylval_id() yylval.id #endif -#include "regex.h" -#include "util.h" +#ifndef RIPPER +#define ripper_flush(p) (void)(p) +#else +#define ripper_flush(p) (p->tokp = p->parser_lex_p) + +#define yylval_rval *(RB_TYPE_P(yylval.val, T_NODE) ? &yylval.node->nd_rval : &yylval.val) + +static int +ripper_has_scan_event(struct parser_params *parser) +{ + + if (lex_p < parser->tokp) rb_raise(rb_eRuntimeError, "lex_p < tokp"); + return lex_p > parser->tokp; +} + +static VALUE +ripper_scan_event_val(struct parser_params *parser, int t) +{ + VALUE str = STR_NEW(parser->tokp, lex_p - parser->tokp); + VALUE rval = ripper_dispatch1(parser, ripper_token2eventid(t), str); + ripper_flush(parser); + return rval; +} + +static void +ripper_dispatch_scan_event(struct parser_params *parser, int t) +{ + if (!ripper_has_scan_event(parser)) return; + yylval_rval = ripper_scan_event_val(parser, t); +} + +static void +ripper_dispatch_ignored_scan_event(struct parser_params *parser, int t) +{ + if (!ripper_has_scan_event(parser)) return; + (void)ripper_scan_event_val(parser, t); +} + +static void +ripper_dispatch_delayed_token(struct parser_params *parser, int t) +{ + int saved_line = ruby_sourceline; + const char *saved_tokp = parser->tokp; + + ruby_sourceline = parser->delayed_line; + parser->tokp = lex_pbeg + parser->delayed_col; + yylval_rval = ripper_dispatch1(parser, ripper_token2eventid(t), parser->delayed); + parser->delayed = Qnil; + ruby_sourceline = saved_line; + parser->tokp = saved_tokp; +} +#endif /* RIPPER */ + +#include "ruby/regex.h" +#include "ruby/util.h" /* We remove any previous definition of `SIGN_EXTEND_CHAR', since ours (we hope) works properly with all combinations of @@ -2501,30 +4936,98 @@ none : /* none */ {$$ = 0;} /* As in Harbison and Steele. */ # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) #endif -#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_' || ismbchar(c))) -static char *tokenbuf = NULL; -static int tokidx, toksiz = 0; +#define parser_encoding_name() (parser->enc->name) +#define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc) +#define parser_precise_mbclen() rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc) +#define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p)) +#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc)) + +#define parser_isascii() ISASCII(*(lex_p-1)) + +#ifndef RIPPER +static int +token_info_get_column(struct parser_params *parser, const char *token) +{ + int column = 1; + const char *p, *pend = lex_p - strlen(token); + for (p = lex_pbeg; p < pend; p++) { + if (*p == '\t') { + column = (((column - 1) / 8) + 1) * 8; + } + column++; + } + return column; +} + +static int +token_info_has_nonspaces(struct parser_params *parser, const char *token) +{ + const char *p, *pend = lex_p - strlen(token); + for (p = lex_pbeg; p < pend; p++) { + if (*p != ' ' && *p != '\t') { + return 1; + } + } + return 0; +} + +#undef token_info_push +static void +token_info_push(struct parser_params *parser, const char *token) +{ + token_info *ptinfo; + + if (compile_for_eval) return; + ptinfo = ALLOC(token_info); + ptinfo->token = token; + ptinfo->linenum = ruby_sourceline; + ptinfo->column = token_info_get_column(parser, token); + ptinfo->nonspc = token_info_has_nonspaces(parser, token); + ptinfo->next = parser->parser_token_info; + + parser->parser_token_info = ptinfo; +} + +#undef token_info_pop +static void +token_info_pop(struct parser_params *parser, const char *token) +{ + int linenum; + token_info *ptinfo = parser->parser_token_info; -#define LEAVE_BS 1 + if (!ptinfo) return; + parser->parser_token_info = ptinfo->next; + if (token_info_get_column(parser, token) == ptinfo->column) { /* OK */ + goto finish; + } + linenum = ruby_sourceline; + if (linenum == ptinfo->linenum) { /* SKIP */ + goto finish; + } + if (token_info_has_nonspaces(parser, token) || ptinfo->nonspc) { /* SKIP */ + goto finish; + } + rb_compile_warning(ruby_sourcefile, linenum, + "mismatched indentations at '%s' with '%s' at %d", + token, ptinfo->token, ptinfo->linenum); -static VALUE (*lex_gets)(); /* gets function */ -static VALUE lex_input; /* non-nil if File */ -static VALUE lex_lastline; /* gc protect */ -static char *lex_pbeg; -static char *lex_p; -static char *lex_pend; + finish: + xfree(ptinfo); +} +#endif /* RIPPER */ static int -yyerror(msg) - const char *msg; +parser_yyerror(struct parser_params *parser, const char *msg) { +#ifndef RIPPER const int max_line_margin = 30; const char *p, *pe; char *buf; - int len, i; + long len; + int i; - rb_compile_error("%s", msg); + compile_error(PARSER_ARG "%s", msg); p = lex_p; while (lex_pbeg <= p) { if (*p == '\n') break; @@ -2544,13 +5047,12 @@ yyerror(msg) const char *pre = "", *post = ""; if (len > max_line_margin * 2 + 10) { - int re_mbc_startpos _((const char *, int, int, int)); - if ((len = lex_p - p) > max_line_margin) { - p = p + re_mbc_startpos(p, len, len - max_line_margin, 0); + if (lex_p - p > max_line_margin) { + p = rb_enc_prev_char(p, lex_p - max_line_margin, pe, rb_enc_get(lex_lastline)); pre = "..."; } - if ((len = pe - lex_p) > max_line_margin) { - pe = lex_p + re_mbc_startpos(lex_p, len, max_line_margin, 1); + if (pe - lex_p > max_line_margin) { + pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, pe, rb_enc_get(lex_lastline)); post = "..."; } len = pe - p; @@ -2560,7 +5062,7 @@ yyerror(msg) buf[len] = '\0'; rb_compile_error_append("%s%s%s", pre, buf, post); - i = lex_p - p; + i = (int)(lex_p - p); p2 = buf; pe = buf + len; while (p2 < pe) { @@ -2569,179 +5071,333 @@ yyerror(msg) } buf[i] = '^'; buf[i+1] = '\0'; - rb_compile_error_append("%s", buf); + rb_compile_error_append("%s%s", pre, buf); } - +#else + dispatch1(parse_error, STR_NEW2(msg)); +#endif /* !RIPPER */ return 0; } -static int heredoc_end; +static void parser_prepare(struct parser_params *parser); -int ruby_in_compile = 0; -int ruby__end__seen; +#ifndef RIPPER +VALUE ruby_suppress_tracing(VALUE (*func)(VALUE, int), VALUE arg, int always); -static VALUE ruby_debug_lines; -#ifdef YYMALLOC -static NODE *parser_heap; -#endif +static VALUE +debug_lines(const char *f) +{ + ID script_lines; + CONST_ID(script_lines, "SCRIPT_LINES__"); + if (rb_const_defined_at(rb_cObject, script_lines)) { + VALUE hash = rb_const_get_at(rb_cObject, script_lines); + if (TYPE(hash) == T_HASH) { + VALUE fname = rb_str_new2(f); + VALUE lines = rb_ary_new(); + rb_hash_aset(hash, fname, lines); + return lines; + } + } + return 0; +} -static NODE* -yycompile(f, line) - char *f; - int line; +static VALUE +coverage(const char *f, int n) { - int n; - NODE *node = 0; - struct RVarmap *vp, *vars = ruby_dyna_vars; + extern VALUE rb_get_coverages(void); + VALUE coverages = rb_get_coverages(); + if (RTEST(coverages) && RBASIC(coverages)->klass == 0) { + VALUE fname = rb_str_new2(f); + VALUE lines = rb_ary_new2(n); + int i; + RBASIC(lines)->klass = 0; + for (i = 0; i < n; i++) RARRAY_PTR(lines)[i] = Qnil; + RARRAY(lines)->as.heap.len = n; + rb_hash_aset(coverages, fname, lines); + return lines; + } + return 0; +} - ruby_in_compile = 1; - if (!compile_for_eval && rb_safe_level() == 0 && - rb_const_defined(rb_cObject, rb_intern("SCRIPT_LINES__"))) { - VALUE hash, fname; +static int +e_option_supplied(struct parser_params *parser) +{ + return strcmp(ruby_sourcefile, "-e") == 0; +} - hash = rb_const_get(rb_cObject, rb_intern("SCRIPT_LINES__")); - if (TYPE(hash) == T_HASH) { - fname = rb_str_new2(f); - ruby_debug_lines = rb_ary_new(); - rb_hash_aset(hash, fname, ruby_debug_lines); - } - if (line > 1) { - VALUE str = rb_str_new(0,0); - while (line > 1) { +static VALUE +yycompile0(VALUE arg, int tracing) +{ + int n; + NODE *tree; + struct parser_params *parser = (struct parser_params *)arg; + + if (!compile_for_eval && rb_safe_level() == 0) { + ruby_debug_lines = debug_lines(ruby_sourcefile); + if (ruby_debug_lines && ruby_sourceline > 0) { + VALUE str = STR_NEW0(); + n = ruby_sourceline; + do { rb_ary_push(ruby_debug_lines, str); - line--; - } + } while (--n); + } + + if (!e_option_supplied(parser)) { + ruby_coverage = coverage(ruby_sourcefile, ruby_sourceline); } } - ruby__end__seen = 0; - ruby_eval_tree = 0; - ruby_eval_tree_begin = 0; - heredoc_end = 0; - lex_strterm = 0; - ruby_current_node = 0; - ruby_sourcefile = rb_source_filename(f); - n = yyparse(); + parser_prepare(parser); + deferred_nodes = 0; + n = yyparse((void*)parser); ruby_debug_lines = 0; + ruby_coverage = 0; compile_for_eval = 0; - ruby_in_compile = 0; - cond_stack = 0; - cmdarg_stack = 0; - command_start = 1; - class_nest = 0; - in_single = 0; - in_def = 0; - cur_mid = 0; - - vp = ruby_dyna_vars; - ruby_dyna_vars = vars; + lex_strterm = 0; - while (vp && vp != vars) { - struct RVarmap *tmp = vp; - vp = vp->next; - rb_gc_force_recycle((VALUE)tmp); + lex_p = lex_pbeg = lex_pend = 0; + lex_lastline = lex_nextline = 0; + if (parser->nerr) { + return 0; } - if (n == 0) node = ruby_eval_tree; - if (ruby_nerrs) ruby_eval_tree_begin = 0; - return node; + tree = ruby_eval_tree; + if (!tree) { + tree = NEW_NIL(); + } + else if (ruby_eval_tree_begin) { + tree->nd_body = NEW_PRELUDE(ruby_eval_tree_begin, tree->nd_body); + } + return (VALUE)tree; +} + +static NODE* +yycompile(struct parser_params *parser, const char *f, int line) +{ + ruby_sourcefile = ruby_strdup(f); + ruby_sourceline = line - 1; + return (NODE *)ruby_suppress_tracing(yycompile0, (VALUE)parser, TRUE); } +#endif /* !RIPPER */ -static int lex_gets_ptr; +static rb_encoding * +must_be_ascii_compatible(VALUE s) +{ + rb_encoding *enc = rb_enc_get(s); + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eArgError, "invalid source encoding"); + } + return enc; +} static VALUE -lex_get_str(s) - VALUE s; +lex_get_str(struct parser_params *parser, VALUE s) { char *beg, *end, *pend; + rb_encoding *enc = must_be_ascii_compatible(s); - beg = RSTRING(s)->ptr; + beg = RSTRING_PTR(s); if (lex_gets_ptr) { - if (RSTRING(s)->len == lex_gets_ptr) return Qnil; + if (RSTRING_LEN(s) == lex_gets_ptr) return Qnil; beg += lex_gets_ptr; } - pend = RSTRING(s)->ptr + RSTRING(s)->len; + pend = RSTRING_PTR(s) + RSTRING_LEN(s); end = beg; while (end < pend) { if (*end++ == '\n') break; } - lex_gets_ptr = end - RSTRING(s)->ptr; - return rb_str_new(beg, end - beg); + lex_gets_ptr = end - RSTRING_PTR(s); + return rb_enc_str_new(beg, end - beg, enc); } static VALUE -lex_getline() +lex_getline(struct parser_params *parser) { - VALUE line = (*lex_gets)(lex_input); - if (ruby_debug_lines && !NIL_P(line)) { + VALUE line = (*parser->parser_lex_gets)(parser, parser->parser_lex_input); + if (NIL_P(line)) return line; + must_be_ascii_compatible(line); +#ifndef RIPPER + if (ruby_debug_lines) { + rb_enc_associate(line, parser->enc); rb_ary_push(ruby_debug_lines, line); } + if (ruby_coverage) { + rb_ary_push(ruby_coverage, Qnil); + } +#endif return line; } -NODE* -rb_compile_string(f, s, line) - const char *f; - VALUE s; - int line; +static const rb_data_type_t parser_data_type; + +#ifndef RIPPER +static NODE* +parser_compile_string(volatile VALUE vparser, const char *f, VALUE s, int line) { + struct parser_params *parser; + NODE *node; + volatile VALUE tmp; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); lex_gets = lex_get_str; lex_gets_ptr = 0; lex_input = s; lex_pbeg = lex_p = lex_pend = 0; - ruby_sourceline = line - 1; - compile_for_eval = ruby_in_eval; + compile_for_eval = rb_parse_in_eval(); + + node = yycompile(parser, f, line); + tmp = vparser; /* prohibit tail call optimization */ + + return node; +} + +NODE* +rb_compile_string(const char *f, VALUE s, int line) +{ + must_be_ascii_compatible(s); + return parser_compile_string(rb_parser_new(), f, s, line); +} - return yycompile(f, line); +NODE* +rb_parser_compile_string(volatile VALUE vparser, const char *f, VALUE s, int line) +{ + must_be_ascii_compatible(s); + return parser_compile_string(vparser, f, s, line); } NODE* -rb_compile_cstr(f, s, len, line) - const char *f, *s; - int len, line; +rb_compile_cstr(const char *f, const char *s, int len, int line) { - return rb_compile_string(f, rb_str_new(s, len), line); + VALUE str = rb_str_new(s, len); + return parser_compile_string(rb_parser_new(), f, str, line); } NODE* -rb_compile_file(f, file, start) - const char *f; - VALUE file; - int start; +rb_parser_compile_cstr(volatile VALUE vparser, const char *f, const char *s, int len, int line) { - lex_gets = rb_io_gets; + VALUE str = rb_str_new(s, len); + return parser_compile_string(vparser, f, str, line); +} + +static VALUE +lex_io_gets(struct parser_params *parser, VALUE io) +{ + return rb_io_gets(io); +} + +NODE* +rb_compile_file(const char *f, VALUE file, int start) +{ + VALUE volatile vparser = rb_parser_new(); + + return rb_parser_compile_file(vparser, f, file, start); +} + +NODE* +rb_parser_compile_file(volatile VALUE vparser, const char *f, VALUE file, int start) +{ + struct parser_params *parser; + volatile VALUE tmp; + NODE *node; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); + lex_gets = lex_io_gets; lex_input = file; lex_pbeg = lex_p = lex_pend = 0; - ruby_sourceline = start - 1; + compile_for_eval = rb_parse_in_eval(); + + node = yycompile(parser, f, start); + tmp = vparser; /* prohibit tail call optimization */ + + return node; +} +#endif /* !RIPPER */ + +#define STR_FUNC_ESCAPE 0x01 +#define STR_FUNC_EXPAND 0x02 +#define STR_FUNC_REGEXP 0x04 +#define STR_FUNC_QWORDS 0x08 +#define STR_FUNC_SYMBOL 0x10 +#define STR_FUNC_INDENT 0x20 + +enum string_type { + str_squote = (0), + str_dquote = (STR_FUNC_EXPAND), + str_xquote = (STR_FUNC_EXPAND), + str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), + str_sword = (STR_FUNC_QWORDS), + str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND), + str_ssym = (STR_FUNC_SYMBOL), + str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND) +}; + +static VALUE +parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0) +{ + VALUE str; + + str = rb_enc_str_new(p, n, enc); + if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) { + if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { + } + else if (enc0 == rb_usascii_encoding() && enc != rb_utf8_encoding()) { + rb_enc_associate(str, rb_ascii8bit_encoding()); + } + } - return yycompile(f, start); + return str; } +#define lex_goto_eol(parser) (parser->parser_lex_p = parser->parser_lex_pend) +#define peek(c) (lex_p < lex_pend && (c) == *lex_p) + static inline int -nextc() +parser_nextc(struct parser_params *parser) { int c; if (lex_p == lex_pend) { - if (lex_input) { - VALUE v = lex_getline(); + VALUE v = lex_nextline; + lex_nextline = 0; + if (!v) { + if (parser->eofp) + return -1; - if (NIL_P(v)) return -1; + if (!lex_input || NIL_P(v = lex_getline(parser))) { + parser->eofp = Qtrue; + lex_goto_eol(parser); + return -1; + } + } + { +#ifdef RIPPER + if (parser->tokp < lex_pend) { + if (NIL_P(parser->delayed)) { + parser->delayed = rb_str_buf_new(1024); + rb_str_buf_cat(parser->delayed, + parser->tokp, lex_pend - parser->tokp); + parser->delayed_line = ruby_sourceline; + parser->delayed_col = (int)(parser->tokp - lex_pbeg); + } + else { + rb_str_buf_cat(parser->delayed, + parser->tokp, lex_pend - parser->tokp); + } + } +#endif if (heredoc_end > 0) { ruby_sourceline = heredoc_end; heredoc_end = 0; } ruby_sourceline++; - lex_pbeg = lex_p = RSTRING(v)->ptr; - lex_pend = lex_p + RSTRING(v)->len; + parser->line_count++; + lex_pbeg = lex_p = RSTRING_PTR(v); + lex_pend = lex_p + RSTRING_LEN(v); + ripper_flush(parser); lex_lastline = v; } - else { - lex_lastline = 0; - return -1; - } } c = (unsigned char)*lex_p++; - if (c == '\r' && lex_p < lex_pend && *lex_p == '\n') { + if (c == '\r' && peek('\n')) { lex_p++; c = '\n'; } @@ -2750,15 +5406,16 @@ nextc() } static void -pushback(c) - int c; +parser_pushback(struct parser_params *parser, int c) { if (c == -1) return; lex_p--; + if (lex_p > lex_pbeg && lex_p[0] == '\n' && lex_p[-1] == '\r') { + lex_p--; + } } #define was_bol() (lex_p == lex_pbeg + 1) -#define peek(c) (lex_p != lex_pend && (c) == *lex_p) #define tokfix() (tokenbuf[tokidx]='\0') #define tok() tokenbuf @@ -2766,7 +5423,7 @@ pushback(c) #define toklast() (tokidx>0?tokenbuf[tokidx-1]:0) static char* -newtok() +parser_newtok(struct parser_params *parser) { tokidx = 0; if (!tokenbuf) { @@ -2780,11 +5437,22 @@ newtok() return tokenbuf; } +static char * +parser_tokspace(struct parser_params *parser, int n) +{ + tokidx += n; + + if (tokidx >= toksiz) { + do {toksiz *= 2;} while (toksiz < tokidx); + REALLOC_N(tokenbuf, char, toksiz); + } + return &tokenbuf[tokidx-n]; +} + static void -tokadd(c) - char c; +parser_tokadd(struct parser_params *parser, int c) { - tokenbuf[tokidx++] = c; + tokenbuf[tokidx++] = (char)c; if (tokidx >= toksiz) { toksiz *= 2; REALLOC_N(tokenbuf, char, toksiz); @@ -2792,10 +5460,103 @@ tokadd(c) } static int -read_escape() +parser_tok_hex(struct parser_params *parser, size_t *numlen) { int c; + c = scan_hex(lex_p, 2, numlen); + if (!*numlen) { + yyerror("invalid hex escape"); + return 0; + } + lex_p += *numlen; + return c; +} + +#define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n)) + +static int +parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp, + int string_literal, int symbol_literal, int regexp_literal) +{ + /* + * If string_literal is true, then we allow multiple codepoints + * in \u{}, and add the codepoints to the current token. + * Otherwise we're parsing a character literal and return a single + * codepoint without adding it + */ + + int codepoint; + size_t numlen; + + if (regexp_literal) { tokadd('\\'); tokadd('u'); } + + if (peek('{')) { /* handle \u{...} form */ + do { + if (regexp_literal) { tokadd(*lex_p); } + nextc(); + codepoint = scan_hex(lex_p, 6, &numlen); + if (numlen == 0) { + yyerror("invalid Unicode escape"); + return 0; + } + if (codepoint > 0x10ffff) { + yyerror("invalid Unicode codepoint (too large)"); + return 0; + } + lex_p += numlen; + if (regexp_literal) { + tokcopy((int)numlen); + } + else if (codepoint >= 0x80) { + *encp = UTF8_ENC(); + if (string_literal) tokaddmbc(codepoint, *encp); + } + else if (string_literal) { + tokadd(codepoint); + } + } while (string_literal && (peek(' ') || peek('\t'))); + + if (!peek('}')) { + yyerror("unterminated Unicode escape"); + return 0; + } + + if (regexp_literal) { tokadd('}'); } + nextc(); + } + else { /* handle \uxxxx form */ + codepoint = scan_hex(lex_p, 4, &numlen); + if (numlen < 4) { + yyerror("invalid Unicode escape"); + return 0; + } + lex_p += 4; + if (regexp_literal) { + tokcopy(4); + } + else if (codepoint >= 0x80) { + *encp = UTF8_ENC(); + if (string_literal) tokaddmbc(codepoint, *encp); + } + else if (string_literal) { + tokadd(codepoint); + } + } + + return codepoint; +} + +#define ESCAPE_CONTROL 1 +#define ESCAPE_META 2 + +static int +parser_read_escape(struct parser_params *parser, int flags, + rb_encoding **encp) +{ + int c; + size_t numlen; + switch (c = nextc()) { case '\\': /* Backslash */ return c; @@ -2823,26 +5584,16 @@ read_escape() case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': - { - int numlen; - - pushback(c); - c = scan_oct(lex_p, 3, &numlen); - lex_p += numlen; - } + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; + pushback(c); + c = scan_oct(lex_p, 3, &numlen); + lex_p += numlen; return c; case 'x': /* hex constant */ - { - int numlen; - - c = scan_hex(lex_p, 2, &numlen); - if (numlen == 0) { - yyerror("Invalid escape character syntax"); - return 0; - } - lex_p += numlen; - } + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; + c = tok_hex(&numlen); + if (numlen == 0) return 0; return c; case 'b': /* backspace */ @@ -2852,32 +5603,34 @@ read_escape() return ' '; case 'M': + if (flags & ESCAPE_META) goto eof; if ((c = nextc()) != '-') { - yyerror("Invalid escape character syntax"); pushback(c); - return '\0'; + goto eof; } if ((c = nextc()) == '\\') { - return read_escape() | 0x80; + if (peek('u')) goto eof; + return read_escape(flags|ESCAPE_META, encp) | 0x80; } - else if (c == -1) goto eof; + else if (c == -1 || !ISASCII(c)) goto eof; else { return ((c & 0xff) | 0x80); } case 'C': if ((c = nextc()) != '-') { - yyerror("Invalid escape character syntax"); pushback(c); - return '\0'; + goto eof; } case 'c': + if (flags & ESCAPE_CONTROL) goto eof; if ((c = nextc())== '\\') { - c = read_escape(); + if (peek('u')) goto eof; + c = read_escape(flags|ESCAPE_CONTROL, encp); } else if (c == '?') return 0177; - else if (c == -1) goto eof; + else if (c == -1 || !ISASCII(c)) goto eof; return c & 0x9f; eof: @@ -2890,74 +5643,71 @@ read_escape() } } +static void +parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc) +{ + int len = rb_enc_codelen(c, enc); + rb_enc_mbcput(c, tokspace(len), enc); +} + static int -tokadd_escape(term) - int term; +parser_tokadd_escape(struct parser_params *parser, rb_encoding **encp) { int c; + int flags = 0; + size_t numlen; + first: switch (c = nextc()) { case '\n': return 0; /* just ignore */ case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; { - int i; - - tokadd('\\'); - tokadd(c); - for (i=0; i<2; i++) { - c = nextc(); - if (c == -1) goto eof; - if (c < '0' || '7' < c) { - pushback(c); - break; - } - tokadd(c); - } + ruby_scan_oct(--lex_p, 3, &numlen); + if (numlen == 0) goto eof; + lex_p += numlen; + tokcopy((int)numlen + 1); } return 0; case 'x': /* hex constant */ + if (flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof; { - int numlen; - - tokadd('\\'); - tokadd(c); - scan_hex(lex_p, 2, &numlen); - if (numlen == 0) { - yyerror("Invalid escape character syntax"); - return -1; - } - while (numlen--) - tokadd(nextc()); + tok_hex(&numlen); + if (numlen == 0) goto eof; + tokcopy((int)numlen + 2); } return 0; case 'M': + if (flags & ESCAPE_META) goto eof; if ((c = nextc()) != '-') { - yyerror("Invalid escape character syntax"); pushback(c); - return 0; + goto eof; } - tokadd('\\'); tokadd('M'); tokadd('-'); + tokcopy(3); + flags |= ESCAPE_META; goto escaped; case 'C': + if (flags & ESCAPE_CONTROL) goto eof; if ((c = nextc()) != '-') { - yyerror("Invalid escape character syntax"); pushback(c); - return 0; + goto eof; } - tokadd('\\'); tokadd('C'); tokadd('-'); + tokcopy(3); goto escaped; case 'c': - tokadd('\\'); tokadd('c'); + if (flags & ESCAPE_CONTROL) goto eof; + tokcopy(2); + flags |= ESCAPE_CONTROL; escaped: if ((c = nextc()) == '\\') { - return tokadd_escape(term); + goto first; } else if (c == -1) goto eof; tokadd(c); @@ -2969,92 +5719,102 @@ tokadd_escape(term) return -1; default: - if (c != '\\' || c != term) - tokadd('\\'); + tokadd('\\'); tokadd(c); } return 0; } +extern int rb_char_to_option_kcode(int c, int *option, int *kcode); + static int -regx_options() +parser_regx_options(struct parser_params *parser) { - char kcode = 0; + int kcode = 0; + int kopt = 0; int options = 0; - int c; + int c, opt, kc; newtok(); while (c = nextc(), ISALPHA(c)) { - switch (c) { - case 'i': - options |= RE_OPTION_IGNORECASE; - break; - case 'x': - options |= RE_OPTION_EXTENDED; - break; - case 'm': - options |= RE_OPTION_MULTILINE; - break; - case 'o': - options |= RE_OPTION_ONCE; - break; - case 'n': - kcode = 16; - break; - case 'e': - kcode = 32; - break; - case 's': - kcode = 48; - break; - case 'u': - kcode = 64; - break; - default: + if (c == 'o') { + options |= RE_OPTION_ONCE; + } + else if (rb_char_to_option_kcode(c, &opt, &kc)) { + if (kc >= 0) { + if (kc != rb_ascii8bit_encindex()) kcode = c; + kopt = opt; + } + else { + options |= opt; + } + } + else { tokadd(c); - break; - } + } } + options |= kopt; pushback(c); if (toklen()) { tokfix(); - rb_compile_error("unknown regexp option%s - %s", - toklen() > 1 ? "s" : "", tok()); + compile_error(PARSER_ARG "unknown regexp option%s - %s", + toklen() > 1 ? "s" : "", tok()); } - return options | kcode; + return options | RE_OPTION_ENCODING(kcode); } -#define STR_FUNC_ESCAPE 0x01 -#define STR_FUNC_EXPAND 0x02 -#define STR_FUNC_REGEXP 0x04 -#define STR_FUNC_QWORDS 0x08 -#define STR_FUNC_SYMBOL 0x10 -#define STR_FUNC_INDENT 0x20 - -enum string_type { - str_squote = (0), - str_dquote = (STR_FUNC_EXPAND), - str_xquote = (STR_FUNC_EXPAND), - str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), - str_sword = (STR_FUNC_QWORDS), - str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND), - str_ssym = (STR_FUNC_SYMBOL), - str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND), -}; - static void -dispose_string(str) - VALUE str; +dispose_string(VALUE str) { - xfree(RSTRING(str)->ptr); + /* TODO: should use another API? */ + if (RBASIC(str)->flags & RSTRING_NOEMBED) + xfree(RSTRING_PTR(str)); rb_gc_force_recycle(str); } static int -tokadd_string(func, term, paren, nest) - int func, term, paren, *nest; +parser_tokadd_mbchar(struct parser_params *parser, int c) +{ + int len = parser_precise_mbclen(); + if (!MBCLEN_CHARFOUND_P(len)) { + compile_error(PARSER_ARG "invalid multibyte char (%s)", parser_encoding_name()); + return -1; + } + tokadd(c); + lex_p += --len; + if (len > 0) tokcopy(len); + return c; +} + +#define tokadd_mbchar(c) parser_tokadd_mbchar(parser, c) + +static int +parser_tokadd_string(struct parser_params *parser, + int func, int term, int paren, long *nest, + rb_encoding **encp) { int c; + int has_nonascii = 0; + rb_encoding *enc = *encp; + char *errbuf = 0; + static const char mixed_msg[] = "%s mixed within %s source"; + +#define mixed_error(enc1, enc2) if (!errbuf) { \ + size_t len = sizeof(mixed_msg) - 4; \ + len += strlen(rb_enc_name(enc1)); \ + len += strlen(rb_enc_name(enc2)); \ + errbuf = ALLOCA_N(char, len); \ + snprintf(errbuf, len, mixed_msg, \ + rb_enc_name(enc1), \ + rb_enc_name(enc2)); \ + yyerror(errbuf); \ + } +#define mixed_escape(beg, enc1, enc2) do { \ + const char *pos = lex_p; \ + lex_p = beg; \ + mixed_error(enc1, enc2); \ + lex_p = pos; \ + } while (0) while ((c = nextc()) != -1) { if (paren && c == paren) { @@ -3075,6 +5835,7 @@ tokadd_string(func, term, paren, nest) } } else if (c == '\\') { + const char *beg = lex_p - 1; c = nextc(); switch (c) { case '\n': @@ -3087,45 +5848,67 @@ tokadd_string(func, term, paren, nest) if (func & STR_FUNC_ESCAPE) tokadd(c); break; + case 'u': + if ((func & STR_FUNC_EXPAND) == 0) { + tokadd('\\'); + break; + } + parser_tokadd_utf8(parser, &enc, 1, + func & STR_FUNC_SYMBOL, + func & STR_FUNC_REGEXP); + if (has_nonascii && enc != *encp) { + mixed_escape(beg, enc, *encp); + } + continue; + default: if (func & STR_FUNC_REGEXP) { pushback(c); - if (tokadd_escape(term) < 0) + if ((c = tokadd_escape(&enc)) < 0) return -1; + if (has_nonascii && enc != *encp) { + mixed_escape(beg, enc, *encp); + } continue; } else if (func & STR_FUNC_EXPAND) { pushback(c); if (func & STR_FUNC_ESCAPE) tokadd('\\'); - c = read_escape(); + c = read_escape(0, &enc); } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { /* ignore backslashed spaces in %w */ } else if (c != term && !(paren && c == paren)) { tokadd('\\'); + pushback(c); + continue; } } } - else if (ismbchar(c)) { - int i, len = mbclen(c)-1; - - for (i = 0; i < len; i++) { - tokadd(c); - c = nextc(); + else if (!parser_isascii()) { + has_nonascii = 1; + if (enc != *encp) { + mixed_error(enc, *encp); + continue; } + if (tokadd_mbchar(c) == -1) return -1; + continue; } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { pushback(c); break; } - if (!c && (func & STR_FUNC_SYMBOL)) { - func &= ~STR_FUNC_SYMBOL; - rb_compile_error("symbol cannot contain '\\0'"); - continue; - } + if (c & 0x80) { + has_nonascii = 1; + if (enc != *encp) { + mixed_error(enc, *encp); + continue; + } + } tokadd(c); } + *encp = enc; return c; } @@ -3133,13 +5916,13 @@ tokadd_string(func, term, paren, nest) rb_node_newnode(NODE_STRTERM, (func), (term) | ((paren) << (CHAR_BIT * 2)), 0) static int -parse_string(quote) - NODE *quote; +parser_parse_string(struct parser_params *parser, NODE *quote) { - int func = quote->nd_func; + int func = (int)quote->nd_func; int term = nd_term(quote); int paren = nd_paren(quote); int c, space = 0; + rb_encoding *enc = parser->enc; if (func == -1) return tSTRING_END; c = nextc(); @@ -3153,7 +5936,7 @@ parse_string(quote) return ' '; } if (!(func & STR_FUNC_REGEXP)) return tSTRING_END; - yylval.num = regx_options(); + set_yylval_num(regx_options()); return tREGEXP_END; } if (space) { @@ -3173,21 +5956,43 @@ parse_string(quote) tokadd('#'); } pushback(c); - if (tokadd_string(func, term, paren, "e->nd_nest) == -1) { + if (tokadd_string(func, term, paren, "e->nd_nest, + &enc) == -1) { ruby_sourceline = nd_line(quote); - rb_compile_error("unterminated string meets end of file"); - return tSTRING_END; + if (func & STR_FUNC_REGEXP) { + if (parser->eofp) + compile_error(PARSER_ARG "unterminated regexp meets end of file"); + return tREGEXP_END; + } + else { + if (parser->eofp) + compile_error(PARSER_ARG "unterminated string meets end of file"); + return tSTRING_END; + } } tokfix(); - yylval.node = NEW_STR(rb_str_new(tok(), toklen())); + set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); + +#ifdef RIPPER + if (!NIL_P(parser->delayed)){ + ptrdiff_t len = lex_p - parser->tokp; + if (len > 0) { + rb_enc_str_buf_cat(parser->delayed, parser->tokp, len, enc); + } + ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); + parser->tokp = lex_p; + } +#endif + return tSTRING_CONTENT; } static int -heredoc_identifier() +parser_heredoc_identifier(struct parser_params *parser) { - int c = nextc(), term, func = 0, len; + int c = nextc(), term, func = 0; + long len; if (c == '-') { c = nextc(); @@ -3205,17 +6010,16 @@ heredoc_identifier() tokadd(func); term = c; while ((c = nextc()) != -1 && c != term) { - len = mbclen(c); - do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1); + if (tokadd_mbchar(c) == -1) return 0; } if (c == -1) { - rb_compile_error("unterminated here document identifier"); + compile_error(PARSER_ARG "unterminated here document identifier"); return 0; } break; default: - if (!is_identchar(c)) { + if (!parser_is_identchar()) { pushback(c); if (func & STR_FUNC_INDENT) { pushback('-'); @@ -3226,71 +6030,90 @@ heredoc_identifier() term = '"'; tokadd(func |= str_dquote); do { - len = mbclen(c); - do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1); - } while ((c = nextc()) != -1 && is_identchar(c)); + if (tokadd_mbchar(c) == -1) return 0; + } while ((c = nextc()) != -1 && parser_is_identchar()); pushback(c); break; } tokfix(); +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tHEREDOC_BEG); +#endif len = lex_p - lex_pbeg; - lex_p = lex_pend; + lex_goto_eol(parser); lex_strterm = rb_node_newnode(NODE_HEREDOC, - rb_str_new(tok(), toklen()), /* nd_lit */ + STR_NEW(tok(), toklen()), /* nd_lit */ len, /* nd_nth */ lex_lastline); /* nd_orig */ + nd_set_line(lex_strterm, ruby_sourceline); + ripper_flush(parser); return term == '`' ? tXSTRING_BEG : tSTRING_BEG; } static void -heredoc_restore(here) - NODE *here; +parser_heredoc_restore(struct parser_params *parser, NODE *here) { - VALUE line = here->nd_orig; + VALUE line; + + line = here->nd_orig; lex_lastline = line; - lex_pbeg = RSTRING(line)->ptr; - lex_pend = lex_pbeg + RSTRING(line)->len; + lex_pbeg = RSTRING_PTR(line); + lex_pend = lex_pbeg + RSTRING_LEN(line); lex_p = lex_pbeg + here->nd_nth; heredoc_end = ruby_sourceline; ruby_sourceline = nd_line(here); dispose_string(here->nd_lit); rb_gc_force_recycle((VALUE)here); + ripper_flush(parser); } static int -whole_match_p(eos, len, indent) - char *eos; - int len, indent; +parser_whole_match_p(struct parser_params *parser, + const char *eos, long len, int indent) { - char *p = lex_pbeg; - int n; + const char *p = lex_pbeg; + long n; if (indent) { while (*p && ISSPACE(*p)) p++; } - n= lex_pend - (p + len); - if (n < 0 || (n > 0 && p[len] != '\n' && p[len] != '\r')) return Qfalse; - if (strncmp(eos, p, len) == 0) return Qtrue; - return Qfalse; + n = lex_pend - (p + len); + if (n < 0 || (n > 0 && p[len] != '\n' && p[len] != '\r')) return FALSE; + return strncmp(eos, p, len) == 0; } static int -here_document(here) - NODE *here; +parser_here_document(struct parser_params *parser, NODE *here) { int c, func, indent = 0; - char *eos, *p, *pend; + const char *eos, *p, *pend; long len; VALUE str = 0; + rb_encoding *enc = parser->enc; - eos = RSTRING(here->nd_lit)->ptr; - len = RSTRING(here->nd_lit)->len - 1; + eos = RSTRING_PTR(here->nd_lit); + len = RSTRING_LEN(here->nd_lit) - 1; indent = (func = *eos++) & STR_FUNC_INDENT; if ((c = nextc()) == -1) { error: - rb_compile_error("can't find string \"%s\" anywhere before EOF", eos); + compile_error(PARSER_ARG "can't find string \"%s\" anywhere before EOF", eos); +#ifdef RIPPER + if (NIL_P(parser->delayed)) { + ripper_dispatch_scan_event(parser, tSTRING_CONTENT); + } + else { + if (str || + ((len = lex_p - parser->tokp) > 0 && + (str = STR_NEW3(parser->tokp, len, enc, func), 1))) { + rb_str_append(parser->delayed, str); + } + ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); + } + lex_goto_eol(parser); +#endif + restore: heredoc_restore(lex_strterm); lex_strterm = 0; return 0; @@ -3302,7 +6125,7 @@ here_document(here) if (!(func & STR_FUNC_EXPAND)) { do { - p = RSTRING(lex_lastline)->ptr; + p = RSTRING_PTR(lex_lastline); pend = lex_pend; if (pend > p) { switch (pend[-1]) { @@ -3318,9 +6141,9 @@ here_document(here) if (str) rb_str_cat(str, p, pend - p); else - str = rb_str_new(p, pend - p); + str = STR_NEW(p, pend - p); if (pend < lex_pend) rb_str_cat(str, "\n", 1); - lex_p = lex_pend; + lex_goto_eol(parser); if (nextc() == -1) { if (str) dispose_string(str); goto error; @@ -3328,6 +6151,7 @@ here_document(here) } while (!whole_match_p(eos, len, indent)); } else { + /* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/ newtok(); if (c == '#') { switch (c = nextc()) { @@ -3342,39 +6166,380 @@ here_document(here) } do { pushback(c); - if ((c = tokadd_string(func, '\n', 0, NULL)) == -1) goto error; + if ((c = tokadd_string(func, '\n', 0, NULL, &enc)) == -1) { + if (parser->eofp) goto error; + goto restore; + } if (c != '\n') { - yylval.node = NEW_STR(rb_str_new(tok(), toklen())); + set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); return tSTRING_CONTENT; } tokadd(nextc()); + /* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/ if ((c = nextc()) == -1) goto error; } while (!whole_match_p(eos, len, indent)); - str = rb_str_new(tok(), toklen()); + str = STR_NEW3(tok(), toklen(), enc, func); } +#ifdef RIPPER + if (!NIL_P(parser->delayed)) + ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); + lex_goto_eol(parser); + ripper_dispatch_ignored_scan_event(parser, tHEREDOC_END); +#endif heredoc_restore(lex_strterm); lex_strterm = NEW_STRTERM(-1, 0, 0); - yylval.node = NEW_STR(str); + set_yylval_str(str); return tSTRING_CONTENT; } #include "lex.c" static void -arg_ambiguous() +arg_ambiguous_gen(struct parser_params *parser) +{ +#ifndef RIPPER + rb_warning0("ambiguous first argument; put parentheses or even spaces"); +#else + dispatch0(arg_ambiguous); +#endif +} +#define arg_ambiguous() (arg_ambiguous_gen(parser), 1) + +static ID +formal_argument_gen(struct parser_params *parser, ID lhs) +{ +#ifndef RIPPER + if (!is_local_id(lhs)) + yyerror("formal argument must be local variable"); +#endif + shadowing_lvar(lhs); + return lhs; +} + +static int +lvar_defined_gen(struct parser_params *parser, ID id) +{ + return (dyna_in_block() && dvar_defined(id)) || local_id(id); +} + +/* emacsen -*- hack */ +static long +parser_encode_length(struct parser_params *parser, const char *name, long len) +{ + long nlen; + + if (len > 5 && name[nlen = len - 5] == '-') { + if (rb_memcicmp(name + nlen + 1, "unix", 4) == 0) + return nlen; + } + if (len > 4 && name[nlen = len - 4] == '-') { + if (rb_memcicmp(name + nlen + 1, "dos", 3) == 0) + return nlen; + if (rb_memcicmp(name + nlen + 1, "mac", 3) == 0 && + !(len == 8 && rb_memcicmp(name, "utf8-mac", len) == 0)) + /* exclude UTF8-MAC because the encoding named "UTF8" doesn't exist in Ruby */ + return nlen; + } + return len; +} + +static void +parser_set_encode(struct parser_params *parser, const char *name) +{ + int idx = rb_enc_find_index(name); + rb_encoding *enc; + VALUE excargs[3]; + + if (idx < 0) { + VALUE rb_make_backtrace(void); + VALUE rb_make_exception(int, VALUE*); + + excargs[1] = rb_sprintf("unknown encoding name: %s", name); + error: + excargs[0] = rb_eArgError; + excargs[2] = rb_make_backtrace(); + rb_ary_unshift(excargs[2], rb_sprintf("%s:%d", ruby_sourcefile, ruby_sourceline)); + rb_exc_raise(rb_make_exception(3, excargs)); + } + enc = rb_enc_from_index(idx); + if (!rb_enc_asciicompat(enc)) { + excargs[1] = rb_sprintf("%s is not ASCII compatible", rb_enc_name(enc)); + goto error; + } + parser->enc = enc; +#ifndef RIPPER + if (ruby_debug_lines) { + long i, n = RARRAY_LEN(ruby_debug_lines); + const VALUE *p = RARRAY_PTR(ruby_debug_lines); + for (i = 0; i < n; ++i) { + rb_enc_associate_index(*p, idx); + } + } +#endif +} + +static int +comment_at_top(struct parser_params *parser) +{ + const char *p = lex_pbeg, *pend = lex_p - 1; + if (parser->line_count != (parser->has_shebang ? 2 : 1)) return 0; + while (p < pend) { + if (!ISSPACE(*p)) return 0; + p++; + } + return 1; +} + +#ifndef RIPPER +typedef long (*rb_magic_comment_length_t)(struct parser_params *parser, const char *name, long len); +typedef void (*rb_magic_comment_setter_t)(struct parser_params *parser, const char *name, const char *val); + +static void +magic_comment_encoding(struct parser_params *parser, const char *name, const char *val) { - rb_warning("ambiguous first argument; put parentheses or even spaces"); + if (!comment_at_top(parser)) { + return; + } + parser_set_encode(parser, val); +} + +struct magic_comment { + const char *name; + rb_magic_comment_setter_t func; + rb_magic_comment_length_t length; +}; + +static const struct magic_comment magic_comments[] = { + {"coding", magic_comment_encoding, parser_encode_length}, + {"encoding", magic_comment_encoding, parser_encode_length}, +}; +#endif + +static const char * +magic_comment_marker(const char *str, long len) +{ + long i = 2; + + while (i < len) { + switch (str[i]) { + case '-': + if (str[i-1] == '*' && str[i-2] == '-') { + return str + i + 1; + } + i += 2; + break; + case '*': + if (i + 1 >= len) return 0; + if (str[i+1] != '-') { + i += 4; + } + else if (str[i-1] != '-') { + i += 2; + } + else { + return str + i + 2; + } + break; + default: + i += 3; + break; + } + } + return 0; +} + +static int +parser_magic_comment(struct parser_params *parser, const char *str, long len) +{ + VALUE name = 0, val = 0; + const char *beg, *end, *vbeg, *vend; +#define str_copy(_s, _p, _n) ((_s) \ + ? (rb_str_resize((_s), (_n)), \ + MEMCPY(RSTRING_PTR(_s), (_p), char, (_n)), (_s)) \ + : ((_s) = STR_NEW((_p), (_n)))) + + if (len <= 7) return FALSE; + if (!(beg = magic_comment_marker(str, len))) return FALSE; + if (!(end = magic_comment_marker(beg, str + len - beg))) return FALSE; + str = beg; + len = end - beg - 3; + + /* %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*" */ + while (len > 0) { +#ifndef RIPPER + const struct magic_comment *p = magic_comments; +#endif + char *s; + int i; + long n = 0; + + for (; len > 0 && *str; str++, --len) { + switch (*str) { + case '\'': case '"': case ':': case ';': + continue; + } + if (!ISSPACE(*str)) break; + } + for (beg = str; len > 0; str++, --len) { + switch (*str) { + case '\'': case '"': case ':': case ';': + break; + default: + if (ISSPACE(*str)) break; + continue; + } + break; + } + for (end = str; len > 0 && ISSPACE(*str); str++, --len); + if (!len) break; + if (*str != ':') continue; + + do str++; while (--len > 0 && ISSPACE(*str)); + if (!len) break; + if (*str == '"') { + for (vbeg = ++str; --len > 0 && *str != '"'; str++) { + if (*str == '\\') { + --len; + ++str; + } + } + vend = str; + if (len) { + --len; + ++str; + } + } + else { + for (vbeg = str; len > 0 && *str != '"' && *str != ';' && !ISSPACE(*str); --len, str++); + vend = str; + } + while (len > 0 && (*str == ';' || ISSPACE(*str))) --len, str++; + + n = end - beg; + str_copy(name, beg, n); + s = RSTRING_PTR(name); + for (i = 0; i < n; ++i) { + if (s[i] == '-') s[i] = '_'; + } +#ifndef RIPPER + do { + if (STRNCASECMP(p->name, s, n) == 0) { + n = vend - vbeg; + if (p->length) { + n = (*p->length)(parser, vbeg, n); + } + str_copy(val, vbeg, n); + (*p->func)(parser, s, RSTRING_PTR(val)); + break; + } + } while (++p < magic_comments + numberof(magic_comments)); +#else + dispatch2(magic_comment, name, val); +#endif + } + + return TRUE; +} + +static void +set_file_encoding(struct parser_params *parser, const char *str, const char *send) +{ + int sep = 0; + const char *beg = str; + VALUE s; + + for (;;) { + if (send - str <= 6) return; + switch (str[6]) { + case 'C': case 'c': str += 6; continue; + case 'O': case 'o': str += 5; continue; + case 'D': case 'd': str += 4; continue; + case 'I': case 'i': str += 3; continue; + case 'N': case 'n': str += 2; continue; + case 'G': case 'g': str += 1; continue; + case '=': case ':': + sep = 1; + str += 6; + break; + default: + str += 6; + if (ISSPACE(*str)) break; + continue; + } + if (STRNCASECMP(str-6, "coding", 6) == 0) break; + } + for (;;) { + do { + if (++str >= send) return; + } while (ISSPACE(*str)); + if (sep) break; + if (*str != '=' && *str != ':') return; + sep = 1; + str++; + } + beg = str; + while ((*str == '-' || *str == '_' || ISALNUM(*str)) && ++str < send); + s = rb_str_new(beg, parser_encode_length(parser, beg, str - beg)); + parser_set_encode(parser, RSTRING_PTR(s)); + rb_str_resize(s, 0); +} + +static void +parser_prepare(struct parser_params *parser) +{ + int c = nextc(); + switch (c) { + case '#': + if (peek('!')) parser->has_shebang = 1; + break; + case 0xef: /* UTF-8 BOM marker */ + if (lex_pend - lex_p >= 2 && + (unsigned char)lex_p[0] == 0xbb && + (unsigned char)lex_p[1] == 0xbf) { + parser->enc = rb_utf8_encoding(); + lex_p += 2; + lex_pbeg = lex_p; + return; + } + break; + case EOF: + return; + } + pushback(c); + parser->enc = rb_enc_get(lex_lastline); } #define IS_ARG() (lex_state == EXPR_ARG || lex_state == EXPR_CMDARG) +#define IS_END() (lex_state == EXPR_END || lex_state == EXPR_ENDARG || lex_state == EXPR_ENDFN) +#define IS_BEG() (lex_state == EXPR_BEG || lex_state == EXPR_MID || lex_state == EXPR_VALUE || lex_state == EXPR_CLASS) +#define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c)) + +#ifndef RIPPER +#define ambiguous_operator(op, syn) ( \ + rb_warning0("`"op"' after local variable is interpreted as binary operator"), \ + rb_warning0("even though it seems like "syn"")) +#else +#define ambiguous_operator(op, syn) dispatch2(operator_ambiguous, ripper_intern(op), rb_str_new_cstr(syn)) +#endif +#define warn_balanced(op, syn) \ + (last_state != EXPR_CLASS && last_state != EXPR_DOT && \ + last_state != EXPR_FNAME && last_state != EXPR_ENDFN && \ + last_state != EXPR_ENDARG && \ + space_seen && !ISSPACE(c) && \ + (ambiguous_operator(op, syn), 0)) static int -yylex() +parser_yylex(struct parser_params *parser) { register int c; int space_seen = 0; int cmd_state; - enum lex_state last_state; + enum lex_state_e last_state; + rb_encoding *enc; + int mb; +#ifdef RIPPER + int fallthru = FALSE; +#endif if (lex_strterm) { int token; @@ -3396,8 +6561,9 @@ yylex() return token; } cmd_state = command_start; - command_start = Qfalse; + command_start = FALSE; retry: + last_state = lex_state; switch (c = nextc()) { case '\0': /* NUL */ case '\004': /* ^D */ @@ -3408,14 +6574,35 @@ yylex() /* white spaces */ case ' ': case '\t': case '\f': case '\r': case '\13': /* '\v' */ - space_seen++; + space_seen = 1; +#ifdef RIPPER + while ((c = nextc())) { + switch (c) { + case ' ': case '\t': case '\f': case '\r': + case '\13': /* '\v' */ + break; + default: + goto outofloop; + } + } + outofloop: + pushback(c); + ripper_dispatch_scan_event(parser, tSP); +#endif goto retry; case '#': /* it's a comment */ - while ((c = nextc()) != '\n') { - if (c == -1) - return 0; + /* no magic_comment in shebang line */ + if (!parser_magic_comment(parser, lex_p, lex_pend - lex_p)) { + if (comment_at_top(parser)) { + set_file_encoding(parser, lex_p, lex_pend); + } } + lex_p = lex_pend; +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tCOMMENT); + fallthru = TRUE; +#endif /* fall through */ case '\n': switch (lex_state) { @@ -3423,18 +6610,52 @@ yylex() case EXPR_FNAME: case EXPR_DOT: case EXPR_CLASS: + case EXPR_VALUE: +#ifdef RIPPER + if (!fallthru) { + ripper_dispatch_scan_event(parser, tIGNORED_NL); + } + fallthru = FALSE; +#endif goto retry; default: break; } - command_start = Qtrue; + while ((c = nextc())) { + switch (c) { + case ' ': case '\t': case '\f': case '\r': + case '\13': /* '\v' */ + space_seen = 1; + break; + case '.': { + if ((c = nextc()) != '.') { + pushback(c); + pushback('.'); + goto retry; + } + } + default: + --ruby_sourceline; + lex_nextline = lex_lastline; + case -1: /* EOF no decrement*/ + lex_goto_eol(parser); +#ifdef RIPPER + if (c != -1) { + parser->tokp = lex_p; + } +#endif + goto normal_newline; + } + } + normal_newline: + command_start = TRUE; lex_state = EXPR_BEG; return '\n'; case '*': if ((c = nextc()) == '*') { if ((c = nextc()) == '=') { - yylval.id = tPOW; + set_yylval_id(tPOW); lex_state = EXPR_BEG; return tOP_ASGN; } @@ -3443,19 +6664,20 @@ yylex() } else { if (c == '=') { - yylval.id = '*'; + set_yylval_id('*'); lex_state = EXPR_BEG; return tOP_ASGN; } pushback(c); - if (IS_ARG() && space_seen && !ISSPACE(c)){ - rb_warning("`*' interpreted as argument prefix"); + if (IS_SPCARG(c)) { + rb_warning0("`*' interpreted as argument prefix"); c = tSTAR; } - else if (lex_state == EXPR_BEG || lex_state == EXPR_MID) { + else if (IS_BEG()) { c = tSTAR; } else { + warn_balanced("*", "argument prefix"); c = '*'; } } @@ -3468,8 +6690,17 @@ yylex() return c; case '!': - lex_state = EXPR_BEG; - if ((c = nextc()) == '=') { + c = nextc(); + if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { + lex_state = EXPR_ARG; + if (c == '@') { + return '!'; + } + } + else { + lex_state = EXPR_BEG; + } + if (c == '=') { return tNEQ; } if (c == '~') { @@ -3482,11 +6713,23 @@ yylex() if (was_bol()) { /* skip embedded rd document */ if (strncmp(lex_p, "begin", 5) == 0 && ISSPACE(lex_p[5])) { +#ifdef RIPPER + int first_p = TRUE; + + lex_goto_eol(parser); + ripper_dispatch_scan_event(parser, tEMBDOC_BEG); +#endif for (;;) { - lex_p = lex_pend; + lex_goto_eol(parser); +#ifdef RIPPER + if (!first_p) { + ripper_dispatch_scan_event(parser, tEMBDOC); + } + first_p = FALSE; +#endif c = nextc(); if (c == -1) { - rb_compile_error("embedded document meets end of file"); + compile_error(PARSER_ARG "embedded document meets end of file"); return 0; } if (c != '=') continue; @@ -3495,7 +6738,10 @@ yylex() break; } } - lex_p = lex_pend; + lex_goto_eol(parser); +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tEMBDOC_END); +#endif goto retry; } } @@ -3523,12 +6769,12 @@ yylex() return '='; case '<': + last_state = lex_state; c = nextc(); if (c == '<' && - lex_state != EXPR_END && lex_state != EXPR_DOT && - lex_state != EXPR_ENDARG && lex_state != EXPR_CLASS && + !IS_END() && (!IS_ARG() || space_seen)) { int token = heredoc_identifier(); if (token) return token; @@ -3548,11 +6794,12 @@ yylex() } if (c == '<') { if ((c = nextc()) == '=') { - yylval.id = tLSHFT; + set_yylval_id(tLSHFT); lex_state = EXPR_BEG; return tOP_ASGN; } pushback(c); + warn_balanced("<<", "here document"); return tLSHFT; } pushback(c); @@ -3570,7 +6817,7 @@ yylex() } if (c == '>') { if ((c = nextc()) == '=') { - yylval.id = tRSHFT; + set_yylval_id(tRSHFT); lex_state = EXPR_BEG; return tOP_ASGN; } @@ -3586,7 +6833,7 @@ yylex() case '`': if (lex_state == EXPR_FNAME) { - lex_state = EXPR_END; + lex_state = EXPR_ENDFN; return c; } if (lex_state == EXPR_DOT) { @@ -3604,17 +6851,17 @@ yylex() return tSTRING_BEG; case '?': - if (lex_state == EXPR_END || lex_state == EXPR_ENDARG) { - lex_state = EXPR_BEG; + if (IS_END()) { + lex_state = EXPR_VALUE; return '?'; } c = nextc(); if (c == -1) { - rb_compile_error("incomplete character syntax"); + compile_error(PARSER_ARG "incomplete character syntax"); return 0; } - if (ISSPACE(c)){ - if (!IS_ARG()){ + if (rb_enc_isspace(c, parser->enc)) { + if (!IS_ARG()) { int c2 = 0; switch (c) { case ' ': @@ -3637,34 +6884,52 @@ yylex() break; } if (c2) { - rb_warn("invalid character syntax; use ?\\%c", c2); + rb_warnI("invalid character syntax; use ?\\%c", c2); } } ternary: pushback(c); - lex_state = EXPR_BEG; + lex_state = EXPR_VALUE; return '?'; } - else if (ismbchar(c)) { - rb_warn("multibyte character literal not supported yet; use ?\\%.3o", c); - goto ternary; + newtok(); + enc = parser->enc; + if (!parser_isascii()) { + if (tokadd_mbchar(c) == -1) return 0; } - else if ((ISALNUM(c) || c == '_') && lex_p < lex_pend && is_identchar(*lex_p)) { + else if ((rb_enc_isalnum(c, parser->enc) || c == '_') && + lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser->enc)) { goto ternary; } - else if (c == '\\') { - c = read_escape(); - } - c &= 0xff; + else if (c == '\\') { + if (peek('u')) { + nextc(); + c = parser_tokadd_utf8(parser, &enc, 0, 0, 0); + if (0x80 <= c) { + tokaddmbc(c, enc); + } + else { + tokadd(c); + } + } + else { + c = read_escape(0, &enc); + tokadd(c); + } + } + else { + tokadd(c); + } + tokfix(); + set_yylval_str(STR_NEW3(tok(), toklen(), enc, 0)); lex_state = EXPR_END; - yylval.node = NEW_LIT(INT2FIX(c)); - return tINTEGER; + return tCHAR; case '&': if ((c = nextc()) == '&') { lex_state = EXPR_BEG; if ((c = nextc()) == '=') { - yylval.id = tANDOP; + set_yylval_id(tANDOP); lex_state = EXPR_BEG; return tOP_ASGN; } @@ -3672,19 +6937,20 @@ yylex() return tANDOP; } else if (c == '=') { - yylval.id = '&'; + set_yylval_id('&'); lex_state = EXPR_BEG; return tOP_ASGN; } pushback(c); - if (IS_ARG() && space_seen && !ISSPACE(c)){ - rb_warning("`&' interpreted as argument prefix"); + if (IS_SPCARG(c)) { + rb_warning0("`&' interpreted as argument prefix"); c = tAMPER; } - else if (lex_state == EXPR_BEG || lex_state == EXPR_MID) { + else if (IS_BEG()) { c = tAMPER; } else { + warn_balanced("&", "argument prefix"); c = '&'; } switch (lex_state) { @@ -3699,7 +6965,7 @@ yylex() if ((c = nextc()) == '|') { lex_state = EXPR_BEG; if ((c = nextc()) == '=') { - yylval.id = tOROP; + set_yylval_id(tOROP); lex_state = EXPR_BEG; return tOP_ASGN; } @@ -3707,7 +6973,7 @@ yylex() return tOROP; } if (c == '=') { - yylval.id = '|'; + set_yylval_id('|'); lex_state = EXPR_BEG; return tOP_ASGN; } @@ -3731,16 +6997,14 @@ yylex() return '+'; } if (c == '=') { - yylval.id = '+'; + set_yylval_id('+'); lex_state = EXPR_BEG; return tOP_ASGN; } - if (lex_state == EXPR_BEG || lex_state == EXPR_MID || - (IS_ARG() && space_seen && !ISSPACE(c))) { - if (IS_ARG()) arg_ambiguous(); + if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) { lex_state = EXPR_BEG; pushback(c); - if (ISDIGIT(c)) { + if (c != -1 && ISDIGIT(c)) { c = '+'; goto start_num; } @@ -3748,6 +7012,7 @@ yylex() } lex_state = EXPR_BEG; pushback(c); + warn_balanced("+", "unary operator"); return '+'; case '-': @@ -3761,22 +7026,25 @@ yylex() return '-'; } if (c == '=') { - yylval.id = '-'; + set_yylval_id('-'); lex_state = EXPR_BEG; return tOP_ASGN; } - if (lex_state == EXPR_BEG || lex_state == EXPR_MID || - (IS_ARG() && space_seen && !ISSPACE(c))) { - if (IS_ARG()) arg_ambiguous(); + if (c == '>') { + lex_state = EXPR_ARG; + return tLAMBDA; + } + if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) { lex_state = EXPR_BEG; pushback(c); - if (ISDIGIT(c)) { + if (c != -1 && ISDIGIT(c)) { return tUMINUS_NUM; } return tUMINUS; } lex_state = EXPR_BEG; pushback(c); + warn_balanced("-", "unary operator"); return '-'; case '.': @@ -3789,7 +7057,7 @@ yylex() return tDOT2; } pushback(c); - if (ISDIGIT(c)) { + if (c != -1 && ISDIGIT(c)) { yyerror("no .<digit> floating literal anymore; put 0 before dot"); } lex_state = EXPR_DOT; @@ -3809,12 +7077,13 @@ yylex() c = nextc(); } if (c == '0') { +#define no_digits() do {yyerror("numeric literal without digits"); return 0;} while (0) int start = toklen(); c = nextc(); if (c == 'x' || c == 'X') { /* hexadecimal */ c = nextc(); - if (ISXDIGIT(c)) { + if (c != -1 && ISXDIGIT(c)) { do { if (c == '_') { if (nondigit) break; @@ -3829,10 +7098,10 @@ yylex() pushback(c); tokfix(); if (toklen() == start) { - yyerror("numeric literal without digits"); + no_digits(); } else if (nondigit) goto trailing_uc; - yylval.node = NEW_LIT(rb_cstr_to_inum(tok(), 16, Qfalse)); + set_yylval_literal(rb_cstr_to_inum(tok(), 16, FALSE)); return tINTEGER; } if (c == 'b' || c == 'B') { @@ -3853,16 +7122,16 @@ yylex() pushback(c); tokfix(); if (toklen() == start) { - yyerror("numeric literal without digits"); + no_digits(); } else if (nondigit) goto trailing_uc; - yylval.node = NEW_LIT(rb_cstr_to_inum(tok(), 2, Qfalse)); + set_yylval_literal(rb_cstr_to_inum(tok(), 2, FALSE)); return tINTEGER; } if (c == 'd' || c == 'D') { /* decimal */ c = nextc(); - if (ISDIGIT(c)) { + if (c != -1 && ISDIGIT(c)) { do { if (c == '_') { if (nondigit) break; @@ -3877,10 +7146,10 @@ yylex() pushback(c); tokfix(); if (toklen() == start) { - yyerror("numeric literal without digits"); + no_digits(); } else if (nondigit) goto trailing_uc; - yylval.node = NEW_LIT(rb_cstr_to_inum(tok(), 10, Qfalse)); + set_yylval_literal(rb_cstr_to_inum(tok(), 10, FALSE)); return tINTEGER; } if (c == '_') { @@ -3890,8 +7159,8 @@ yylex() if (c == 'o' || c == 'O') { /* prefixed octal */ c = nextc(); - if (c == '_') { - yyerror("numeric literal without digits"); + if (c == -1 || c == '_' || !ISDIGIT(c)) { + no_digits(); } } if (c >= '0' && c <= '7') { @@ -3903,7 +7172,8 @@ yylex() nondigit = c; continue; } - if (c < '0' || c > '7') break; + if (c < '0' || c > '9') break; + if (c > '7') goto invalid_octal; nondigit = 0; tokadd(c); } while ((c = nextc()) != -1); @@ -3911,7 +7181,7 @@ yylex() pushback(c); tokfix(); if (nondigit) goto trailing_uc; - yylval.node = NEW_LIT(rb_cstr_to_inum(tok(), 8, Qfalse)); + set_yylval_literal(rb_cstr_to_inum(tok(), 8, FALSE)); return tINTEGER; } if (nondigit) { @@ -3920,14 +7190,15 @@ yylex() } } if (c > '7' && c <= '9') { - yyerror("Illegal octal digit"); + invalid_octal: + yyerror("Invalid octal digit"); } else if (c == '.' || c == 'e' || c == 'E') { tokadd('0'); } else { pushback(c); - yylval.node = NEW_LIT(INT2FIX(0)); + set_yylval_literal(INT2FIX(0)); return tINTEGER; } } @@ -3947,7 +7218,7 @@ yylex() } else { int c0 = nextc(); - if (!ISDIGIT(c0)) { + if (c0 == -1 || !ISDIGIT(c0)) { pushback(c0); goto decode_num; } @@ -3993,47 +7264,51 @@ yylex() decode_num: pushback(c); - tokfix(); if (nondigit) { char tmp[30]; trailing_uc: - sprintf(tmp, "trailing `%c' in number", nondigit); + snprintf(tmp, sizeof(tmp), "trailing `%c' in number", nondigit); yyerror(tmp); } + tokfix(); if (is_float) { double d = strtod(tok(), 0); if (errno == ERANGE) { - rb_warn("Float %s out of range", tok()); + rb_warningS("Float %s out of range", tok()); errno = 0; } - yylval.node = NEW_LIT(rb_float_new(d)); + set_yylval_literal(DBL2NUM(d)); return tFLOAT; } - yylval.node = NEW_LIT(rb_cstr_to_inum(tok(), 10, Qfalse)); + set_yylval_literal(rb_cstr_to_inum(tok(), 10, FALSE)); return tINTEGER; } + case ')': case ']': + paren_nest--; case '}': - case ')': COND_LEXPOP(); CMDARG_LEXPOP(); - lex_state = EXPR_END; + if (c == ')') + lex_state = EXPR_ENDFN; + else + lex_state = EXPR_ENDARG; return c; case ':': c = nextc(); if (c == ':') { - if (lex_state == EXPR_BEG || lex_state == EXPR_MID || - lex_state == EXPR_CLASS || (IS_ARG() && space_seen)) { + if (IS_BEG() || lex_state == EXPR_CLASS || IS_SPCARG(-1)) { lex_state = EXPR_BEG; return tCOLON3; } lex_state = EXPR_DOT; return tCOLON2; } - if (lex_state == EXPR_END || lex_state == EXPR_ENDARG || ISSPACE(c)) { + if (IS_END() || ISSPACE(c)) { pushback(c); + warn_balanced(":", "symbol literal"); lex_state = EXPR_BEG; return ':'; } @@ -4052,22 +7327,20 @@ yylex() return tSYMBEG; case '/': - if (lex_state == EXPR_BEG || lex_state == EXPR_MID) { + if (IS_BEG()) { lex_strterm = NEW_STRTERM(str_regexp, '/', 0); return tREGEXP_BEG; } if ((c = nextc()) == '=') { - yylval.id = '/'; + set_yylval_id('/'); lex_state = EXPR_BEG; return tOP_ASGN; } pushback(c); - if (IS_ARG() && space_seen) { - if (!ISSPACE(c)) { - arg_ambiguous(); - lex_strterm = NEW_STRTERM(str_regexp, '/', 0); - return tREGEXP_BEG; - } + if (IS_SPCARG(c)) { + arg_ambiguous(); + lex_strterm = NEW_STRTERM(str_regexp, '/', 0); + return tREGEXP_BEG; } switch (lex_state) { case EXPR_FNAME: case EXPR_DOT: @@ -4075,11 +7348,12 @@ yylex() default: lex_state = EXPR_BEG; break; } + warn_balanced("/", "regexp literal"); return '/'; case '^': if ((c = nextc()) == '=') { - yylval.id = '^'; + set_yylval_id('^'); lex_state = EXPR_BEG; return tOP_ASGN; } @@ -4093,45 +7367,41 @@ yylex() return '^'; case ';': - command_start = Qtrue; + lex_state = EXPR_BEG; + command_start = TRUE; + return ';'; + case ',': lex_state = EXPR_BEG; - return c; + return ','; case '~': if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { if ((c = nextc()) != '@') { pushback(c); } + lex_state = EXPR_ARG; } - switch (lex_state) { - case EXPR_FNAME: case EXPR_DOT: - lex_state = EXPR_ARG; break; - default: - lex_state = EXPR_BEG; break; + else { + lex_state = EXPR_BEG; } return '~'; case '(': - command_start = Qtrue; - if (lex_state == EXPR_BEG || lex_state == EXPR_MID) { + if (IS_BEG()) { c = tLPAREN; } - else if (space_seen) { - if (lex_state == EXPR_CMDARG) { - c = tLPAREN_ARG; - } - else if (lex_state == EXPR_ARG) { - rb_warn("don't put space before argument parentheses"); - c = '('; - } + else if (IS_SPCARG(-1)) { + c = tLPAREN_ARG; } + paren_nest++; COND_PUSH(0); CMDARG_PUSH(0); lex_state = EXPR_BEG; return c; case '[': + paren_nest++; if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) { lex_state = EXPR_ARG; if ((c = nextc()) == ']') { @@ -4144,7 +7414,7 @@ yylex() pushback(c); return '['; } - else if (lex_state == EXPR_BEG || lex_state == EXPR_MID) { + else if (IS_BEG()) { c = tLBRACK; } else if (IS_ARG() && space_seen) { @@ -4156,7 +7426,15 @@ yylex() return c; case '{': - if (IS_ARG() || lex_state == EXPR_END) + if (lpar_beg && lpar_beg == paren_nest) { + lex_state = EXPR_BEG; + lpar_beg = 0; + --paren_nest; + COND_PUSH(0); + CMDARG_PUSH(0); + return tLAMBEG; + } + if (IS_ARG() || lex_state == EXPR_END || lex_state == EXPR_ENDFN) c = '{'; /* block (primary) */ else if (lex_state == EXPR_ENDARG) c = tLBRACE_ARG; /* block (expr) */ @@ -4165,37 +7443,41 @@ yylex() COND_PUSH(0); CMDARG_PUSH(0); lex_state = EXPR_BEG; + if (c != tLBRACE) command_start = TRUE; return c; case '\\': c = nextc(); if (c == '\n') { space_seen = 1; +#ifdef RIPPER + ripper_dispatch_scan_event(parser, tSP); +#endif goto retry; /* skip \\n */ } pushback(c); return '\\'; case '%': - if (lex_state == EXPR_BEG || lex_state == EXPR_MID) { + if (IS_BEG()) { int term; int paren; c = nextc(); quotation: - if (!ISALNUM(c)) { + if (c == -1 || !ISALNUM(c)) { term = c; c = 'Q'; } else { term = nextc(); - if (ISALNUM(term) || ismbchar(term)) { + if (rb_enc_isalnum(term, parser->enc) || !parser_isascii()) { yyerror("unknown type of %string"); return 0; } } if (c == -1 || term == -1) { - rb_compile_error("unterminated quoted string meets end of file"); + compile_error(PARSER_ARG "unterminated quoted string meets end of file"); return 0; } paren = term; @@ -4245,11 +7527,11 @@ yylex() } } if ((c = nextc()) == '=') { - yylval.id = '%'; + set_yylval_id('%'); lex_state = EXPR_BEG; return tOP_ASGN; } - if (IS_ARG() && space_seen && !ISSPACE(c)) { + if (IS_SPCARG(c)) { goto quotation; } switch (lex_state) { @@ -4259,17 +7541,17 @@ yylex() lex_state = EXPR_BEG; break; } pushback(c); + warn_balanced("%%", "string literal"); return '%'; case '$': - last_state = lex_state; lex_state = EXPR_END; newtok(); c = nextc(); switch (c) { case '_': /* $_: last read line string */ c = nextc(); - if (is_identchar(c)) { + if (parser_is_identchar()) { tokadd('$'); tokadd('_'); break; @@ -4278,8 +7560,6 @@ yylex() c = '_'; /* fall through */ case '~': /* $~: match-data */ - local_cnt(c); - /* fall through */ case '*': /* $*: argv */ case '$': /* $$: pid */ case '?': /* $?: last status */ @@ -4298,23 +7578,22 @@ yylex() tokadd('$'); tokadd(c); tokfix(); - yylval.id = rb_intern(tok()); + set_yylval_name(rb_intern(tok())); return tGVAR; case '-': tokadd('$'); tokadd(c); c = nextc(); - if (is_identchar(c)) { - tokadd(c); + if (parser_is_identchar()) { + if (tokadd_mbchar(c) == -1) return 0; } else { pushback(c); } gvar: tokfix(); - yylval.id = rb_intern(tok()); - /* xxx shouldn't check if valid option variable */ + set_yylval_name(rb_intern(tok())); return tGVAR; case '&': /* $&: last match */ @@ -4326,7 +7605,7 @@ yylex() tokadd(c); goto gvar; } - yylval.node = NEW_BACK_REF(c); + set_yylval_node(NEW_BACK_REF(c)); return tBACK_REF; case '1': case '2': case '3': @@ -4336,15 +7615,15 @@ yylex() do { tokadd(c); c = nextc(); - } while (ISDIGIT(c)); + } while (c != -1 && ISDIGIT(c)); pushback(c); if (last_state == EXPR_FNAME) goto gvar; tokfix(); - yylval.node = NEW_NTH_REF(atoi(tok()+1)); + set_yylval_node(NEW_NTH_REF(atoi(tok()+1))); return tNTH_REF; default: - if (!is_identchar(c)) { + if (!parser_is_identchar()) { pushback(c); return '$'; } @@ -4361,16 +7640,16 @@ yylex() tokadd('@'); c = nextc(); } - if (ISDIGIT(c)) { + if (c != -1 && ISDIGIT(c)) { if (tokidx == 1) { - rb_compile_error("`@%c' is not allowed as an instance variable name", c); + compile_error(PARSER_ARG "`@%c' is not allowed as an instance variable name", c); } else { - rb_compile_error("`@@%c' is not allowed as a class variable name", c); + compile_error(PARSER_ARG "`@@%c' is not allowed as a class variable name", c); } return 0; } - if (!is_identchar(c)) { + if (!parser_is_identchar()) { pushback(c); return '@'; } @@ -4379,15 +7658,21 @@ yylex() case '_': if (was_bol() && whole_match_p("__END__", 7, 0)) { ruby__end__seen = 1; - lex_lastline = 0; + parser->eofp = Qtrue; +#ifndef RIPPER return -1; +#else + lex_goto_eol(parser); + ripper_dispatch_scan_event(parser, k__END__); + return 0; +#endif } newtok(); break; default: - if (!is_identchar(c)) { - rb_compile_error("Invalid char `\\%03o' in expression", c); + if (!parser_is_identchar()) { + rb_compile_error(PARSER_ARG "Invalid char `\\x%02X' in expression", c); goto retry; } @@ -4395,23 +7680,23 @@ yylex() break; } + mb = ENC_CODERANGE_7BIT; do { - tokadd(c); - if (ismbchar(c)) { - int i, len = mbclen(c)-1; - - for (i = 0; i < len; i++) { - c = nextc(); - tokadd(c); - } - } + if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN; + if (tokadd_mbchar(c) == -1) return 0; c = nextc(); - } while (is_identchar(c)); - if ((c == '!' || c == '?') && is_identchar(tok()[0]) && !peek('=')) { - tokadd(c); - } - else { + } while (parser_is_identchar()); + switch (tok()[0]) { + case '@': case '$': pushback(c); + break; + default: + if ((c == '!' || c == '?') && !peek('=')) { + tokadd(c); + } + else { + pushback(c); + } } tokfix(); @@ -4456,27 +7741,42 @@ yylex() } } - if (lex_state != EXPR_DOT) { - struct kwtable *kw; + if ((lex_state == EXPR_BEG && !cmd_state) || + IS_ARG()) { + if (peek(':') && !(lex_p + 1 < lex_pend && lex_p[1] == ':')) { + lex_state = EXPR_BEG; + nextc(); + set_yylval_name(TOK_INTERN(!ENC_SINGLE(mb))); + return tLABEL; + } + } + if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) { + const struct kwtable *kw; /* See if it is a reserved word. */ kw = rb_reserved_word(tok(), toklen()); if (kw) { - enum lex_state state = lex_state; + enum lex_state_e state = lex_state; lex_state = kw->state; if (state == EXPR_FNAME) { - yylval.id = rb_intern(kw->name); + set_yylval_name(rb_intern(kw->name)); return kw->id[0]; } - if (kw->id[0] == kDO) { - if (COND_P()) return kDO_COND; + if (kw->id[0] == keyword_do) { + command_start = TRUE; + if (lpar_beg && lpar_beg == paren_nest) { + lpar_beg = 0; + --paren_nest; + return keyword_do_LAMBDA; + } + if (COND_P()) return keyword_do_cond; if (CMDARG_P() && state != EXPR_CMDARG) - return kDO_BLOCK; - if (state == EXPR_ENDARG) - return kDO_BLOCK; - return kDO; + return keyword_do_block; + if (state == EXPR_ENDARG || state == EXPR_BEG) + return keyword_do_block; + return keyword_do; } - if (state == EXPR_BEG) + if (state == EXPR_BEG || state == EXPR_VALUE) return kw->id[0]; else { if (kw->id[0] != kw->id[1]) @@ -4486,11 +7786,9 @@ yylex() } } - if (lex_state == EXPR_BEG || - lex_state == EXPR_MID || + if (IS_BEG() || lex_state == EXPR_DOT || - lex_state == EXPR_ARG || - lex_state == EXPR_CMDARG) { + IS_ARG()) { if (cmd_state) { lex_state = EXPR_CMDARG; } @@ -4498,119 +7796,121 @@ yylex() lex_state = EXPR_ARG; } } + else if (lex_state == EXPR_FNAME) { + lex_state = EXPR_ENDFN; + } else { lex_state = EXPR_END; } } - yylval.id = rb_intern(tok()); - if (is_local_id(yylval.id) && - last_state != EXPR_DOT && - ((dyna_in_block() && rb_dvar_defined(yylval.id)) || local_id(yylval.id))) { - lex_state = EXPR_END; - } + { + ID ident = TOK_INTERN(!ENC_SINGLE(mb)); + + set_yylval_name(ident); + if (last_state != EXPR_DOT && is_local_id(ident) && lvar_defined(ident)) { + lex_state = EXPR_END; + } + } return result; } } -NODE* -rb_node_newnode(type, a0, a1, a2) - enum node_type type; - VALUE a0, a1, a2; +#if YYPURE +static int +yylex(void *lval, void *p) +#else +yylex(void *p) +#endif { - NODE *n = (NODE*)rb_newobj(); + struct parser_params *parser = (struct parser_params*)p; + int t; - n->flags |= T_NODE; - nd_set_type(n, type); - nd_set_line(n, ruby_sourceline); - n->nd_file = ruby_sourcefile; +#if YYPURE + parser->parser_yylval = lval; + parser->parser_yylval->val = Qundef; +#endif + t = parser_yylex(parser); +#ifdef RIPPER + if (!NIL_P(parser->delayed)) { + ripper_dispatch_delayed_token(parser, t); + return t; + } + if (t != 0) + ripper_dispatch_scan_event(parser, t); +#endif - n->u1.value = a0; - n->u2.value = a1; - n->u3.value = a2; + return t; +} +#ifndef RIPPER +static NODE* +node_newnode(struct parser_params *parser, enum node_type type, VALUE a0, VALUE a1, VALUE a2) +{ + NODE *n = (rb_node_newnode)(type, a0, a1, a2); + nd_set_line(n, ruby_sourceline); return n; } -static enum node_type -nodetype(node) /* for debug */ - NODE *node; +enum node_type +nodetype(NODE *node) /* for debug */ { return (enum node_type)nd_type(node); } -static int -nodeline(node) - NODE *node; +int +nodeline(NODE *node) { return nd_line(node); } static NODE* -newline_node(node) - NODE *node; +newline_node(NODE *node) { - NODE *nl = 0; if (node) { - int line; - if (nd_type(node) == NODE_NEWLINE) return node; - line = nd_line(node); node = remove_begin(node); - nl = NEW_NEWLINE(node); - nd_set_line(nl, line); - nl->nd_nth = line; + node->flags |= NODE_FL_NEWLINE; } - return nl; + return node; } static void -fixpos(node, orig) - NODE *node, *orig; +fixpos(NODE *node, NODE *orig) { if (!node) return; if (!orig) return; if (orig == (NODE*)1) return; - node->nd_file = orig->nd_file; nd_set_line(node, nd_line(orig)); } static void -parser_warning(node, mesg) - NODE *node; - const char *mesg; +parser_warning(struct parser_params *parser, NODE *node, const char *mesg) { - int line = ruby_sourceline; - ruby_sourceline = nd_line(node); - rb_warning("%s", mesg); - ruby_sourceline = line; + rb_compile_warning(ruby_sourcefile, nd_line(node), "%s", mesg); } +#define parser_warning(node, mesg) parser_warning(parser, node, mesg) static void -parser_warn(node, mesg) - NODE *node; - const char *mesg; +parser_warn(struct parser_params *parser, NODE *node, const char *mesg) { - int line = ruby_sourceline; - ruby_sourceline = nd_line(node); - rb_warn("%s", mesg); - ruby_sourceline = line; + rb_compile_warn(ruby_sourcefile, nd_line(node), "%s", mesg); } +#define parser_warn(node, mesg) parser_warn(parser, node, mesg) static NODE* -block_append(head, tail) - NODE *head, *tail; +block_append_gen(struct parser_params *parser, NODE *head, NODE *tail) { - NODE *end, *h = head; + NODE *end, *h = head, *nd; if (tail == 0) return head; - again: if (h == 0) return tail; switch (nd_type(h)) { - case NODE_NEWLINE: - h = h->nd_next; - goto again; case NODE_LIT: case NODE_STR: + case NODE_SELF: + case NODE_TRUE: + case NODE_FALSE: + case NODE_NIL: parser_warning(h, "unused literal ignored"); return tail; default: @@ -4624,25 +7924,20 @@ block_append(head, tail) break; } - if (RTEST(ruby_verbose)) { - NODE *nd = end->nd_head; - newline: - switch (nd_type(nd)) { - case NODE_RETURN: - case NODE_BREAK: - case NODE_NEXT: - case NODE_REDO: - case NODE_RETRY: + nd = end->nd_head; + switch (nd_type(nd)) { + case NODE_RETURN: + case NODE_BREAK: + case NODE_NEXT: + case NODE_REDO: + case NODE_RETRY: + if (RTEST(ruby_verbose)) { parser_warning(nd, "statement not reached"); - break; - - case NODE_NEWLINE: - nd = nd->nd_next; - goto newline; - - default: - break; } + break; + + default: + break; } if (nd_type(tail) != NODE_BLOCK) { @@ -4656,8 +7951,7 @@ block_append(head, tail) /* append item to the list */ static NODE* -list_append(list, item) - NODE *list, *item; +list_append_gen(struct parser_params *parser, NODE *list, NODE *item) { NODE *last; @@ -4677,8 +7971,7 @@ list_append(list, item) /* concat two lists */ static NODE* -list_concat(head, tail) - NODE *head, *tail; +list_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) { NODE *last; @@ -4701,10 +7994,25 @@ list_concat(head, tail) return head; } +static int +literal_concat0(struct parser_params *parser, VALUE head, VALUE tail) +{ + if (NIL_P(tail)) return 1; + if (!rb_enc_compatible(head, tail)) { + compile_error(PARSER_ARG "string literal encodings differ (%s / %s)", + rb_enc_name(rb_enc_get(head)), + rb_enc_name(rb_enc_get(tail))); + rb_str_resize(head, 0); + rb_str_resize(tail, 0); + return 0; + } + rb_str_buf_append(head, tail); + return 1; +} + /* concat two string literals */ static NODE * -literal_concat(head, tail) - NODE *head, *tail; +literal_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) { enum node_type htype; @@ -4713,13 +8021,18 @@ literal_concat(head, tail) htype = nd_type(head); if (htype == NODE_EVSTR) { - NODE *node = NEW_DSTR(rb_str_new(0, 0)); + NODE *node = NEW_DSTR(Qnil); head = list_append(node, head); } switch (nd_type(tail)) { case NODE_STR: if (htype == NODE_STR) { - rb_str_concat(head->nd_lit, tail->nd_lit); + if (!literal_concat0(parser, head->nd_lit, tail->nd_lit)) { + error: + rb_gc_force_recycle((VALUE)head); + rb_gc_force_recycle((VALUE)tail); + return 0; + } rb_gc_force_recycle((VALUE)tail); } else { @@ -4729,11 +8042,18 @@ literal_concat(head, tail) case NODE_DSTR: if (htype == NODE_STR) { - rb_str_concat(head->nd_lit, tail->nd_lit); + if (!literal_concat0(parser, head->nd_lit, tail->nd_lit)) + goto error; tail->nd_lit = head->nd_lit; rb_gc_force_recycle((VALUE)head); head = tail; } + else if (NIL_P(tail->nd_lit)) { + head->nd_alen += tail->nd_alen - 1; + head->nd_next->nd_end->nd_next = tail->nd_next; + head->nd_next->nd_end = tail->nd_next->nd_end; + rb_gc_force_recycle((VALUE)tail); + } else { nd_set_type(tail, NODE_ARRAY); tail->nd_head = NEW_STR(tail->nd_lit); @@ -4753,59 +8073,46 @@ literal_concat(head, tail) } static NODE * -evstr2dstr(node) - NODE *node; +evstr2dstr_gen(struct parser_params *parser, NODE *node) { if (nd_type(node) == NODE_EVSTR) { - node = list_append(NEW_DSTR(rb_str_new(0, 0)), node); + node = list_append(NEW_DSTR(Qnil), node); } return node; } static NODE * -new_evstr(node) - NODE *node; +new_evstr_gen(struct parser_params *parser, NODE *node) { NODE *head = node; - again: if (node) { switch (nd_type(node)) { case NODE_STR: case NODE_DSTR: case NODE_EVSTR: return node; - case NODE_NEWLINE: - node = node->nd_next; - goto again; } } return NEW_EVSTR(head); } static NODE * -call_op(recv, id, narg, arg1) - NODE *recv; - ID id; - int narg; - NODE *arg1; +call_bin_op_gen(struct parser_params *parser, NODE *recv, ID id, NODE *arg1) { value_expr(recv); - if (narg == 1) { - value_expr(arg1); - arg1 = NEW_LIST(arg1); - } - else { - arg1 = 0; - } - return NEW_CALL(recv, id, arg1); + value_expr(arg1); + return NEW_CALL(recv, id, NEW_LIST(arg1)); } -static NODE* -match_gen(node1, node2) - NODE *node1; - NODE *node2; +static NODE * +call_uni_op_gen(struct parser_params *parser, NODE *recv, ID id) { - local_cnt('~'); + value_expr(recv); + return NEW_CALL(recv, id, 0); +} +static NODE* +match_op_gen(struct parser_params *parser, NODE *node1, NODE *node2) +{ value_expr(node1); value_expr(node2); if (node1) { @@ -4838,36 +8145,34 @@ match_gen(node1, node2) } static NODE* -gettable(id) - ID id; +gettable_gen(struct parser_params *parser, ID id) { - if (id == kSELF) { + if (id == keyword_self) { return NEW_SELF(); } - else if (id == kNIL) { + else if (id == keyword_nil) { return NEW_NIL(); } - else if (id == kTRUE) { + else if (id == keyword_true) { return NEW_TRUE(); } - else if (id == kFALSE) { + else if (id == keyword_false) { return NEW_FALSE(); } - else if (id == k__FILE__) { - return NEW_STR(rb_str_new2(ruby_sourcefile)); + else if (id == keyword__FILE__) { + return NEW_STR(rb_external_str_new_with_enc(ruby_sourcefile, strlen(ruby_sourcefile), + rb_filesystem_encoding())); } - else if (id == k__LINE__) { + else if (id == keyword__LINE__) { return NEW_LIT(INT2FIX(ruby_sourceline)); } + else if (id == keyword__ENCODING__) { + return NEW_LIT(rb_enc_from_encoding(parser->enc)); + } else if (is_local_id(id)) { - if (dyna_in_block() && rb_dvar_defined(id)) return NEW_DVAR(id); + if (dyna_in_block() && dvar_defined(id)) return NEW_DVAR(id); if (local_id(id)) return NEW_LVAR(id); /* method call without arguments */ -#if 0 - /* Rite will warn this */ - rb_warn("ambiguous identifier; %s() or self.%s is better for method call", - rb_id2name(id), rb_id2name(id)); -#endif return NEW_VCALL(id); } else if (is_global_id(id)) { @@ -4882,86 +8187,149 @@ gettable(id) else if (is_class_id(id)) { return NEW_CVAR(id); } - rb_compile_error("identifier %s is not valid", rb_id2name(id)); + compile_error(PARSER_ARG "identifier %s is not valid to get", rb_id2name(id)); return 0; } +#endif /* !RIPPER */ -static VALUE dyna_var_lookup _((ID id)); - +#ifdef RIPPER +static VALUE +assignable_gen(struct parser_params *parser, VALUE lhs) +#else static NODE* -assignable(id, val) - ID id; - NODE *val; +assignable_gen(struct parser_params *parser, ID id, NODE *val) +#endif { - value_expr(val); - if (id == kSELF) { +#ifdef RIPPER + ID id = get_id(lhs); +# define assignable_result(x) get_value(lhs) +# define parser_yyerror(parser, x) dispatch1(assign_error, lhs) +#else +# define assignable_result(x) x +#endif + if (!id) return assignable_result(0); + if (id == keyword_self) { yyerror("Can't change the value of self"); } - else if (id == kNIL) { + else if (id == keyword_nil) { yyerror("Can't assign to nil"); } - else if (id == kTRUE) { + else if (id == keyword_true) { yyerror("Can't assign to true"); } - else if (id == kFALSE) { + else if (id == keyword_false) { yyerror("Can't assign to false"); } - else if (id == k__FILE__) { + else if (id == keyword__FILE__) { yyerror("Can't assign to __FILE__"); } - else if (id == k__LINE__) { + else if (id == keyword__LINE__) { yyerror("Can't assign to __LINE__"); } + else if (id == keyword__ENCODING__) { + yyerror("Can't assign to __ENCODING__"); + } else if (is_local_id(id)) { - if (rb_dvar_curr(id)) { - return NEW_DASGN_CURR(id, val); - } - else if (dyna_var_lookup(id)) { - return NEW_DASGN(id, val); - } - else if (local_id(id) || !dyna_in_block()) { - return NEW_LASGN(id, val); + if (dyna_in_block()) { + if (dvar_curr(id)) { + return assignable_result(NEW_DASGN_CURR(id, val)); + } + else if (dvar_defined(id)) { + return assignable_result(NEW_DASGN(id, val)); + } + else if (local_id(id)) { + return assignable_result(NEW_LASGN(id, val)); + } + else { + dyna_var(id); + return assignable_result(NEW_DASGN_CURR(id, val)); + } } - else{ - rb_dvar_push(id, Qnil); - return NEW_DASGN_CURR(id, val); + else { + if (!local_id(id)) { + local_var(id); + } + return assignable_result(NEW_LASGN(id, val)); } } else if (is_global_id(id)) { - return NEW_GASGN(id, val); + return assignable_result(NEW_GASGN(id, val)); } else if (is_instance_id(id)) { - return NEW_IASGN(id, val); + return assignable_result(NEW_IASGN(id, val)); } else if (is_const_id(id)) { - if (in_def || in_single) - yyerror("dynamic constant assignment"); - return NEW_CDECL(id, val, 0); + if (!in_def && !in_single) + return assignable_result(NEW_CDECL(id, val, 0)); + yyerror("dynamic constant assignment"); } else if (is_class_id(id)) { - if (in_def || in_single) return NEW_CVASGN(id, val); - return NEW_CVDECL(id, val); + return assignable_result(NEW_CVASGN(id, val)); } else { - rb_compile_error("identifier %s is not valid", rb_id2name(id)); + compile_error(PARSER_ARG "identifier %s is not valid to set", rb_id2name(id)); } - return 0; + return assignable_result(0); +#undef assignable_result +#undef parser_yyerror +} + +static ID +shadowing_lvar_gen(struct parser_params *parser, ID name) +{ + ID uscore; + + CONST_ID(uscore, "_"); + if (uscore == name) return name; + if (dyna_in_block()) { + if (dvar_curr(name)) { + yyerror("duplicated argument name"); + } + else if (dvar_defined(name) || local_id(name)) { + rb_warningS("shadowing outer local variable - %s", rb_id2name(name)); + vtable_add(lvtbl->vars, name); + } + } + else { + if (local_id(name)) { + yyerror("duplicated argument name"); + } + } + return name; +} + +static void +new_bv_gen(struct parser_params *parser, ID name) +{ + if (!name) return; + if (!is_local_id(name)) { + compile_error(PARSER_ARG "invalid local variable - %s", + rb_id2name(name)); + return; + } + shadowing_lvar(name); + dyna_var(name); } +#ifndef RIPPER static NODE * -aryset(recv, idx) - NODE *recv, *idx; +aryset_gen(struct parser_params *parser, NODE *recv, NODE *idx) { if (recv && nd_type(recv) == NODE_SELF) recv = (NODE *)1; - else - value_expr(recv); return NEW_ATTRASGN(recv, tASET, idx); } +static void +block_dup_check_gen(struct parser_params *parser, NODE *node1, NODE *node2) +{ + if (node2 && node1 && nd_type(node1) == NODE_BLOCK_PASS) { + compile_error(PARSER_ARG "both block arg and actual block given"); + } +} + ID -rb_id_attrset(id) - ID id; +rb_id_attrset(ID id) { id &= ~ID_SCOPE_MASK; id |= ID_ATTRSET; @@ -4969,77 +8337,98 @@ rb_id_attrset(id) } static NODE * -attrset(recv, id) - NODE *recv; - ID id; +attrset_gen(struct parser_params *parser, NODE *recv, ID id) { if (recv && nd_type(recv) == NODE_SELF) recv = (NODE *)1; - else - value_expr(recv); return NEW_ATTRASGN(recv, rb_id_attrset(id), 0); } static void -rb_backref_error(node) - NODE *node; +rb_backref_error_gen(struct parser_params *parser, NODE *node) { switch (nd_type(node)) { case NODE_NTH_REF: - rb_compile_error("Can't set variable $%d", node->nd_nth); + compile_error(PARSER_ARG "Can't set variable $%ld", node->nd_nth); break; case NODE_BACK_REF: - rb_compile_error("Can't set variable $%c", (int)node->nd_nth); + compile_error(PARSER_ARG "Can't set variable $%c", (int)node->nd_nth); break; } } static NODE * -arg_concat(node1, node2) - NODE *node1; - NODE *node2; +arg_concat_gen(struct parser_params *parser, NODE *node1, NODE *node2) { if (!node2) return node1; + switch (nd_type(node1)) { + case NODE_BLOCK_PASS: + if (node1->nd_head) + node1->nd_head = arg_concat(node1->nd_head, node2); + else + node1->nd_head = NEW_LIST(node2); + return node1; + case NODE_ARGSPUSH: + if (nd_type(node2) != NODE_ARRAY) break; + node1->nd_body = list_concat(NEW_LIST(node1->nd_body), node2); + nd_set_type(node1, NODE_ARGSCAT); + return node1; + case NODE_ARGSCAT: + if (nd_type(node2) != NODE_ARRAY || + nd_type(node1->nd_body) != NODE_ARRAY) break; + node1->nd_body = list_concat(node1->nd_body, node2); + return node1; + } return NEW_ARGSCAT(node1, node2); } static NODE * -arg_add(node1, node2) - NODE *node1; - NODE *node2; +arg_append_gen(struct parser_params *parser, NODE *node1, NODE *node2) { if (!node1) return NEW_LIST(node2); - if (nd_type(node1) == NODE_ARRAY) { + switch (nd_type(node1)) { + case NODE_ARRAY: return list_append(node1, node2); - } - else { - return NEW_ARGSPUSH(node1, node2); - } + case NODE_BLOCK_PASS: + node1->nd_head = arg_append(node1->nd_head, node2); + return node1; + case NODE_ARGSPUSH: + node1->nd_body = list_append(NEW_LIST(node1->nd_body), node2); + nd_set_type(node1, NODE_ARGSCAT); + return node1; + } + return NEW_ARGSPUSH(node1, node2); } -static NODE* -node_assign(lhs, rhs) - NODE *lhs, *rhs; +static NODE * +splat_array(NODE* node) +{ + if (nd_type(node) == NODE_SPLAT) node = node->nd_head; + if (nd_type(node) == NODE_ARRAY) return node; + return 0; +} + +static NODE * +node_assign_gen(struct parser_params *parser, NODE *lhs, NODE *rhs) { if (!lhs) return 0; - value_expr(rhs); switch (nd_type(lhs)) { case NODE_GASGN: case NODE_IASGN: + case NODE_IASGN2: case NODE_LASGN: case NODE_DASGN: case NODE_DASGN_CURR: case NODE_MASGN: case NODE_CDECL: - case NODE_CVDECL: case NODE_CVASGN: lhs->nd_value = rhs; break; case NODE_ATTRASGN: case NODE_CALL: - lhs->nd_args = arg_add(lhs->nd_args, rhs); + lhs->nd_args = arg_append(lhs->nd_args, rhs); break; default: @@ -5051,17 +8440,19 @@ node_assign(lhs, rhs) } static int -value_expr0(node) - NODE *node; +value_expr_gen(struct parser_params *parser, NODE *node) { int cond = 0; + if (!node) { + rb_warning0("empty expression"); + } while (node) { switch (nd_type(node)) { case NODE_DEFN: case NODE_DEFS: parser_warning(node, "void value expression"); - return Qfalse; + return FALSE; case NODE_RETURN: case NODE_BREAK: @@ -5070,7 +8461,7 @@ value_expr0(node) case NODE_RETRY: if (!cond) yyerror("void value expression"); /* or "control never reach"? */ - return Qfalse; + return FALSE; case NODE_BLOCK: while (node->nd_next) { @@ -5084,7 +8475,15 @@ value_expr0(node) break; case NODE_IF: - if (!value_expr(node->nd_body)) return Qfalse; + if (!node->nd_body) { + node = node->nd_else; + break; + } + else if (!node->nd_else) { + node = node->nd_body; + break; + } + if (!value_expr(node->nd_body)) return FALSE; node = node->nd_else; break; @@ -5094,33 +8493,23 @@ value_expr0(node) node = node->nd_2nd; break; - case NODE_NEWLINE: - node = node->nd_next; - break; - default: - return Qtrue; + return TRUE; } } - return Qtrue; + return TRUE; } static void -void_expr0(node) - NODE *node; +void_expr_gen(struct parser_params *parser, NODE *node) { - char *useless = 0; + const char *useless = 0; if (!RTEST(ruby_verbose)) return; - again: if (!node) return; switch (nd_type(node)) { - case NODE_NEWLINE: - node = node->nd_next; - goto again; - case NODE_CALL: switch (node->nd_mid) { case '+': @@ -5156,7 +8545,6 @@ void_expr0(node) useless = "a variable"; break; case NODE_CONST: - case NODE_CREF: useless = "a constant"; break; case NODE_LIT: @@ -5197,14 +8585,13 @@ void_expr0(node) int line = ruby_sourceline; ruby_sourceline = nd_line(node); - rb_warn("useless use of %s in void context", useless); + rb_warnS("useless use of %s in void context", useless); ruby_sourceline = line; } } static void -void_stmts(node) - NODE *node; +void_stmts_gen(struct parser_params *parser, NODE *node) { if (!RTEST(ruby_verbose)) return; if (!node) return; @@ -5218,27 +8605,74 @@ void_stmts(node) } static NODE * -remove_begin(node) - NODE *node; +remove_begin(NODE *node) +{ + NODE **n = &node, *n1 = node; + while (n1 && nd_type(n1) == NODE_BEGIN && n1->nd_body) { + *n = n1 = n1->nd_body; + } + return node; +} + +static void +reduce_nodes_gen(struct parser_params *parser, NODE **body) { - NODE **n = &node; - while (*n) { - switch (nd_type(*n)) { - case NODE_NEWLINE: - n = &(*n)->nd_next; + NODE *node = *body; + + if (!node) { + *body = NEW_NIL(); + return; + } +#define subnodes(n1, n2) \ + ((!node->n1) ? (node->n2 ? (body = &node->n2, 1) : 0) : \ + (!node->n2) ? (body = &node->n1, 1) : \ + (reduce_nodes(&node->n1), body = &node->n2, 1)) + + while (node) { + int newline = (int)(node->flags & NODE_FL_NEWLINE); + switch (nd_type(node)) { + end: + case NODE_NIL: + *body = 0; + return; + case NODE_RETURN: + *body = node = node->nd_stts; + if (newline && node) node->flags |= NODE_FL_NEWLINE; continue; case NODE_BEGIN: - *n = (*n)->nd_body; + *body = node = node->nd_body; + if (newline && node) node->flags |= NODE_FL_NEWLINE; + continue; + case NODE_BLOCK: + body = &node->nd_end->nd_head; + break; + case NODE_IF: + if (subnodes(nd_body, nd_else)) break; + return; + case NODE_CASE: + body = &node->nd_body; + break; + case NODE_WHEN: + if (!subnodes(nd_body, nd_next)) goto end; + break; + case NODE_ENSURE: + if (!subnodes(nd_head, nd_resq)) goto end; + break; + case NODE_RESCUE: + if (!subnodes(nd_head, nd_resq)) goto end; + break; default: - return node; + return; } + node = *body; + if (newline && node) node->flags |= NODE_FL_NEWLINE; } - return node; + +#undef subnodes } static int -assign_in_cond(node) - NODE *node; +assign_in_cond(struct parser_params *parser, NODE *node) { switch (nd_type(node)) { case NODE_MASGN: @@ -5247,15 +8681,16 @@ assign_in_cond(node) case NODE_LASGN: case NODE_DASGN: + case NODE_DASGN_CURR: case NODE_GASGN: case NODE_IASGN: break; - case NODE_NEWLINE: default: return 0; } + if (!node->nd_value) return 1; switch (nd_type(node->nd_value)) { case NODE_LIT: case NODE_STR: @@ -5274,66 +8709,70 @@ assign_in_cond(node) default: break; } -#if 0 - if (assign_in_cond(node->nd_value) == 0) { - parser_warning(node->nd_value, "assignment in condition"); - } -#endif return 1; } -static int -e_option_supplied() +static void +warn_unless_e_option(struct parser_params *parser, NODE *node, const char *str) { - if (strcmp(ruby_sourcefile, "-e") == 0) - return Qtrue; - return Qfalse; + if (!e_option_supplied(parser)) parser_warn(node, str); } static void -warn_unless_e_option(node, str) - NODE *node; - const char *str; +warning_unless_e_option(struct parser_params *parser, NODE *node, const char *str) { - if (!e_option_supplied()) parser_warn(node, str); + if (!e_option_supplied(parser)) parser_warning(node, str); } static void -warning_unless_e_option(node, str) - NODE *node; - const char *str; +fixup_nodes(NODE **rootnode) { - if (!e_option_supplied()) parser_warning(node, str); + NODE *node, *next, *head; + + for (node = *rootnode; node; node = next) { + enum node_type type; + VALUE val; + + next = node->nd_next; + head = node->nd_head; + rb_gc_force_recycle((VALUE)node); + *rootnode = next; + switch (type = nd_type(head)) { + case NODE_DOT2: + case NODE_DOT3: + val = rb_range_new(head->nd_beg->nd_lit, head->nd_end->nd_lit, + type == NODE_DOT3); + rb_gc_force_recycle((VALUE)head->nd_beg); + rb_gc_force_recycle((VALUE)head->nd_end); + nd_set_type(head, NODE_LIT); + head->nd_lit = val; + break; + default: + break; + } + } } -static NODE *cond0(); +static NODE *cond0(struct parser_params*,NODE*); static NODE* -range_op(node) - NODE *node; +range_op(struct parser_params *parser, NODE *node) { enum node_type type; - if (!e_option_supplied()) return node; if (node == 0) return 0; - value_expr(node); - node = cond0(node); type = nd_type(node); - if (type == NODE_NEWLINE) { - node = node->nd_next; - type = nd_type(node); - } + value_expr(node); if (type == NODE_LIT && FIXNUM_P(node->nd_lit)) { - warn_unless_e_option(node, "integer literal in conditional range"); - return call_op(node,tEQ,1,NEW_GVAR(rb_intern("$."))); + warn_unless_e_option(parser, node, "integer literal in conditional range"); + return NEW_CALL(node, tEQ, NEW_LIST(NEW_GVAR(rb_intern("$.")))); } - return node; + return cond0(parser, node); } static int -literal_node(node) - NODE *node; +literal_node(NODE *node) { if (!node) return 1; /* same as NODE_NIL */ switch (nd_type(node)) { @@ -5354,40 +8793,36 @@ literal_node(node) } static NODE* -cond0(node) - NODE *node; +cond0(struct parser_params *parser, NODE *node) { if (node == 0) return 0; - assign_in_cond(node); + assign_in_cond(parser, node); switch (nd_type(node)) { case NODE_DSTR: case NODE_EVSTR: case NODE_STR: - rb_warn("string literal in condition"); + rb_warn0("string literal in condition"); break; case NODE_DREGX: case NODE_DREGX_ONCE: - warning_unless_e_option(node, "regex literal in condition"); - local_cnt('_'); - local_cnt('~'); + warning_unless_e_option(parser, node, "regex literal in condition"); return NEW_MATCH2(node, NEW_GVAR(rb_intern("$_"))); case NODE_AND: case NODE_OR: - node->nd_1st = cond0(node->nd_1st); - node->nd_2nd = cond0(node->nd_2nd); + node->nd_1st = cond0(parser, node->nd_1st); + node->nd_2nd = cond0(parser, node->nd_2nd); break; case NODE_DOT2: case NODE_DOT3: - node->nd_beg = range_op(node->nd_beg); - node->nd_end = range_op(node->nd_end); + node->nd_beg = range_op(parser, node->nd_beg); + node->nd_end = range_op(parser, node->nd_end); if (nd_type(node) == NODE_DOT2) nd_set_type(node,NODE_FLIP2); else if (nd_type(node) == NODE_DOT3) nd_set_type(node, NODE_FLIP3); - node->nd_cnt = local_append(internal_id()); - if (!e_option_supplied()) { + if (!e_option_supplied(parser)) { int b = literal_node(node->nd_beg); int e = literal_node(node->nd_end); if ((b == 1 && e == 1) || (b + e >= 2 && RTEST(ruby_verbose))) { @@ -5402,10 +8837,8 @@ cond0(node) case NODE_LIT: if (TYPE(node->nd_lit) == T_REGEXP) { - warn_unless_e_option(node, "regex literal in condition"); + warn_unless_e_option(parser, node, "regex literal in condition"); nd_set_type(node, NODE_MATCH); - local_cnt('_'); - local_cnt('~'); } else { parser_warning(node, "literal in condition"); @@ -5417,27 +8850,19 @@ cond0(node) } static NODE* -cond(node) - NODE *node; +cond_gen(struct parser_params *parser, NODE *node) { if (node == 0) return 0; - value_expr(node); - if (nd_type(node) == NODE_NEWLINE){ - node->nd_next = cond0(node->nd_next); - return node; - } - return cond0(node); + return cond0(parser, node); } static NODE* -logop(type, left, right) - enum node_type type; - NODE *left, *right; +logop_gen(struct parser_params *parser, enum node_type type, NODE *left, NODE *right) { value_expr(left); - if (left && nd_type(left) == type) { + if (left && (enum node_type)nd_type(left) == type) { NODE *node = left, *second; - while ((second = node->nd_2nd) != 0 && nd_type(second) == type) { + while ((second = node->nd_2nd) != 0 && (enum node_type)nd_type(second) == type) { node = second; } node->nd_2nd = NEW_NODE(type, second, right, 0); @@ -5446,66 +8871,41 @@ logop(type, left, right) return NEW_NODE(type, left, right, 0); } -static int -cond_negative(nodep) - NODE **nodep; -{ - NODE *c = *nodep; - - if (!c) return 0; - switch (nd_type(c)) { - case NODE_NOT: - *nodep = c->nd_body; - return 1; - case NODE_NEWLINE: - if (c->nd_next && nd_type(c->nd_next) == NODE_NOT) { - c->nd_next = c->nd_next->nd_body; - return 1; - } - } - return 0; -} - static void -no_blockarg(node) - NODE *node; +no_blockarg(struct parser_params *parser, NODE *node) { if (node && nd_type(node) == NODE_BLOCK_PASS) { - rb_compile_error("block argument should not be given"); + compile_error(PARSER_ARG "block argument should not be given"); } } static NODE * -ret_args(node) - NODE *node; +ret_args_gen(struct parser_params *parser, NODE *node) { if (node) { - no_blockarg(node); - if (nd_type(node) == NODE_ARRAY && node->nd_next == 0) { - node = node->nd_head; - } - if (node && nd_type(node) == NODE_SPLAT) { - node = NEW_SVALUE(node); + no_blockarg(parser, node); + if (nd_type(node) == NODE_ARRAY) { + if (node->nd_next == 0) { + node = node->nd_head; + } + else { + nd_set_type(node, NODE_VALUES); + } } } return node; } static NODE * -new_yield(node) - NODE *node; +new_yield_gen(struct parser_params *parser, NODE *node) { long state = Qtrue; if (node) { - no_blockarg(node); - if (nd_type(node) == NODE_ARRAY && node->nd_next == 0) { - node = node->nd_head; - state = Qfalse; - } - if (node && nd_type(node) == NODE_SPLAT) { - state = Qtrue; - } + no_blockarg(parser, node); + if (node && nd_type(node) == NODE_SPLAT) { + state = Qtrue; + } } else { state = Qfalse; @@ -5514,8 +8914,7 @@ new_yield(node) } static NODE* -negate_lit(node) - NODE *node; +negate_lit(NODE *node) { switch (TYPE(node->nd_lit)) { case T_FIXNUM: @@ -5525,7 +8924,7 @@ negate_lit(node) node->nd_lit = rb_funcall(node->nd_lit,tUMINUS,0,0); break; case T_FLOAT: - RFLOAT(node->nd_lit)->value = -RFLOAT(node->nd_lit)->value; + RFLOAT(node->nd_lit)->float_value = -RFLOAT_VALUE(node->nd_lit); break; default: break; @@ -5534,9 +8933,7 @@ negate_lit(node) } static NODE * -arg_blk_pass(node1, node2) - NODE *node1; - NODE *node2; +arg_blk_pass(NODE *node1, NODE *node2) { if (node2) { node2->nd_head = node1; @@ -5546,403 +8943,546 @@ arg_blk_pass(node1, node2) } static NODE* -arg_prepend(node1, node2) - NODE *node1, *node2; +new_args_gen(struct parser_params *parser, NODE *m, NODE *o, ID r, NODE *p, ID b) { - switch (nd_type(node2)) { - case NODE_ARRAY: - return list_concat(NEW_LIST(node1), node2); - - case NODE_SPLAT: - return arg_concat(node1, node2->nd_head); - - case NODE_BLOCK_PASS: - node2->nd_body = arg_prepend(node1, node2->nd_body); - return node2; + int saved_line = ruby_sourceline; + NODE *node; + NODE *i1, *i2 = 0; - default: - rb_bug("unknown nodetype(%d) for arg_prepend", nd_type(node2)); - } - return 0; /* not reached */ -} + node = NEW_ARGS(m ? m->nd_plen : 0, o); + i1 = m ? m->nd_next : 0; + node->nd_next = NEW_ARGS_AUX(r, b); -static NODE* -new_call(r,m,a) - NODE *r; - ID m; - NODE *a; -{ - if (a && nd_type(a) == NODE_BLOCK_PASS) { - a->nd_iter = NEW_CALL(r,m,a->nd_head); - return a; + if (p) { + i2 = p->nd_next; + node->nd_next->nd_next = NEW_ARGS_AUX(p->nd_pid, p->nd_plen); } - return NEW_CALL(r,m,a); -} - -static NODE* -new_fcall(m,a) - ID m; - NODE *a; -{ - if (a && nd_type(a) == NODE_BLOCK_PASS) { - a->nd_iter = NEW_FCALL(m,a->nd_head); - return a; + else if (i1) { + node->nd_next->nd_next = NEW_ARGS_AUX(0, 0); } - return NEW_FCALL(m,a); -} - -static NODE* -new_super(a) - NODE *a; -{ - if (a && nd_type(a) == NODE_BLOCK_PASS) { - a->nd_iter = NEW_SUPER(a->nd_head); - return a; + if (i1 || i2) { + node->nd_next->nd_next->nd_next = NEW_NODE(NODE_AND, i1, i2, 0); } - return NEW_SUPER(a); + ruby_sourceline = saved_line; + return node; } - -static struct local_vars { - ID *tbl; - int nofree; - int cnt; - int dlev; - struct RVarmap* dyna_vars; - struct local_vars *prev; -} *lvtbl; +#endif /* !RIPPER */ static void -local_push(top) - int top; +local_push_gen(struct parser_params *parser, int inherit_dvars) { struct local_vars *local; local = ALLOC(struct local_vars); local->prev = lvtbl; - local->nofree = 0; - local->cnt = 0; - local->tbl = 0; - local->dlev = 0; - local->dyna_vars = ruby_dyna_vars; + local->args = vtable_alloc(0); + local->vars = vtable_alloc(inherit_dvars ? DVARS_INHERIT : DVARS_TOPSCOPE); lvtbl = local; - if (!top) { - /* preserve reference for GC, but link should be cut. */ - rb_dvar_push(0, (VALUE)ruby_dyna_vars); - ruby_dyna_vars->next = 0; - } } static void -local_pop() +local_pop_gen(struct parser_params *parser) { struct local_vars *local = lvtbl->prev; - - if (lvtbl->tbl) { - if (!lvtbl->nofree) xfree(lvtbl->tbl); - else lvtbl->tbl[0] = lvtbl->cnt; - } - ruby_dyna_vars = lvtbl->dyna_vars; + vtable_free(lvtbl->args); + vtable_free(lvtbl->vars); xfree(lvtbl); lvtbl = local; } +#ifndef RIPPER static ID* -local_tbl() +vtable_tblcpy(ID *buf, const struct vtable *src) { - lvtbl->nofree = 1; - return lvtbl->tbl; -} + int i, cnt = vtable_size(src); -static int -local_append(id) - ID id; -{ - if (lvtbl->tbl == 0) { - lvtbl->tbl = ALLOC_N(ID, 4); - lvtbl->tbl[0] = 0; - lvtbl->tbl[1] = '_'; - lvtbl->tbl[2] = '~'; - lvtbl->cnt = 2; - if (id == '_') return 0; - if (id == '~') return 1; - } - else { - REALLOC_N(lvtbl->tbl, ID, lvtbl->cnt+2); + if (cnt > 0) { + buf[0] = cnt; + for (i = 0; i < cnt; i++) { + buf[i] = src->tbl[i]; + } + return buf; } + return 0; +} - lvtbl->tbl[lvtbl->cnt+1] = id; - return lvtbl->cnt++; +static ID* +local_tbl_gen(struct parser_params *parser) +{ + int cnt = vtable_size(lvtbl->args) + vtable_size(lvtbl->vars); + ID *buf; + + if (cnt <= 0) return 0; + buf = ALLOC_N(ID, cnt + 1); + vtable_tblcpy(buf+1, lvtbl->args); + vtable_tblcpy(buf+vtable_size(lvtbl->args)+1, lvtbl->vars); + buf[0] = cnt; + return buf; } +#endif static int -local_cnt(id) - ID id; +arg_var_gen(struct parser_params *parser, ID id) { - int cnt, max; - - if (id == 0) return lvtbl->cnt; + vtable_add(lvtbl->args, id); + return vtable_size(lvtbl->args) - 1; +} - for (cnt=1, max=lvtbl->cnt+1; cnt<max;cnt++) { - if (lvtbl->tbl[cnt] == id) return cnt-1; - } - return local_append(id); +static int +local_var_gen(struct parser_params *parser, ID id) +{ + vtable_add(lvtbl->vars, id); + return vtable_size(lvtbl->vars) - 1; } static int -local_id(id) - ID id; +local_id_gen(struct parser_params *parser, ID id) { - int i, max; + struct vtable *vars, *args; - if (lvtbl == 0) return Qfalse; - for (i=3, max=lvtbl->cnt+1; i<max; i++) { - if (lvtbl->tbl[i] == id) return Qtrue; + vars = lvtbl->vars; + args = lvtbl->args; + + while (vars && POINTER_P(vars->prev)) { + vars = vars->prev; + args = args->prev; } - return Qfalse; -} -static void -top_local_init() -{ - local_push(1); - lvtbl->cnt = ruby_scope->local_tbl?ruby_scope->local_tbl[0]:0; - if (lvtbl->cnt > 0) { - lvtbl->tbl = ALLOC_N(ID, lvtbl->cnt+3); - MEMCPY(lvtbl->tbl, ruby_scope->local_tbl, ID, lvtbl->cnt+1); + if (vars && vars->prev == DVARS_INHERIT) { + return rb_local_defined(id); } else { - lvtbl->tbl = 0; + return (vtable_included(args, id) || + vtable_included(vars, id)); } - if (ruby_dyna_vars) - lvtbl->dlev = 1; - else - lvtbl->dlev = 0; } -static void -top_local_setup() +static const struct vtable * +dyna_push_gen(struct parser_params *parser) { - int len = lvtbl->cnt; - int i; + lvtbl->args = vtable_alloc(lvtbl->args); + lvtbl->vars = vtable_alloc(lvtbl->vars); + return lvtbl->args; +} - if (len > 0) { - i = ruby_scope->local_tbl?ruby_scope->local_tbl[0]:0; +static void +dyna_pop_1(struct parser_params *parser) +{ + struct vtable *tmp; + + tmp = lvtbl->args; + lvtbl->args = lvtbl->args->prev; + vtable_free(tmp); + tmp = lvtbl->vars; + lvtbl->vars = lvtbl->vars->prev; + vtable_free(tmp); +} - if (i < len) { - if (i == 0 || (ruby_scope->flags & SCOPE_MALLOC) == 0) { - VALUE *vars = ALLOC_N(VALUE, len+1); - if (ruby_scope->local_vars) { - *vars++ = ruby_scope->local_vars[-1]; - MEMCPY(vars, ruby_scope->local_vars, VALUE, i); - rb_mem_clear(vars+i, len-i); - } - else { - *vars++ = 0; - rb_mem_clear(vars, len); - } - ruby_scope->local_vars = vars; - ruby_scope->flags |= SCOPE_MALLOC; - } - else { - VALUE *vars = ruby_scope->local_vars-1; - REALLOC_N(vars, VALUE, len+1); - ruby_scope->local_vars = vars+1; - rb_mem_clear(ruby_scope->local_vars+i, len-i); - } - if (ruby_scope->local_tbl && ruby_scope->local_vars[-1] == 0) { - if (!(ruby_scope->flags & SCOPE_CLONE)) - xfree(ruby_scope->local_tbl); - } - ruby_scope->local_vars[-1] = 0; /* no reference needed */ - ruby_scope->local_tbl = local_tbl(); +static void +dyna_pop_gen(struct parser_params *parser, const struct vtable *lvargs) +{ + while (lvtbl->args != lvargs) { + dyna_pop_1(parser); + if (!lvtbl->args) { + struct local_vars *local = lvtbl->prev; + xfree(lvtbl); + lvtbl = local; } } - local_pop(); + dyna_pop_1(parser); } -#define DVAR_USED FL_USER6 +static int +dyna_in_block_gen(struct parser_params *parser) +{ + return POINTER_P(lvtbl->vars) && lvtbl->vars->prev != DVARS_TOPSCOPE; +} -static VALUE -dyna_var_lookup(id) - ID id; +static int +dvar_defined_gen(struct parser_params *parser, ID id) { - struct RVarmap *vars = ruby_dyna_vars; + struct vtable *vars, *args; + + args = lvtbl->args; + vars = lvtbl->vars; - while (vars) { - if (vars->id == id) { - FL_SET(vars, DVAR_USED); - return Qtrue; + while (POINTER_P(vars)) { + if (vtable_included(args, id)) { + return 1; + } + if (vtable_included(vars, id)) { + return 1; } - vars = vars->next; + args = args->prev; + vars = vars->prev; + } + + if (vars == DVARS_INHERIT) { + return rb_dvar_defined(id); } - return Qfalse; + + return 0; } -static struct RVarmap* -dyna_push() +static int +dvar_curr_gen(struct parser_params *parser, ID id) { - struct RVarmap* vars = ruby_dyna_vars; - - rb_dvar_push(0, 0); - lvtbl->dlev++; - return vars; + return (vtable_included(lvtbl->args, id) || + vtable_included(lvtbl->vars, id)); } +#ifndef RIPPER +VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline); +VALUE rb_reg_check_preprocess(VALUE); + static void -dyna_pop(vars) - struct RVarmap* vars; +reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) { - lvtbl->dlev--; - ruby_dyna_vars = vars; + int c = RE_OPTION_ENCODING_IDX(options); + + if (c) { + int opt, idx; + rb_char_to_option_kcode(c, &opt, &idx); + if (idx != ENCODING_GET(str) && + rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + goto error; + } + ENCODING_SET(str, idx); + } + else if (RE_OPTION_ENCODING_NONE(options)) { + if (!ENCODING_IS_ASCII8BIT(str) && + rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + c = 'n'; + goto error; + } + rb_enc_associate(str, rb_ascii8bit_encoding()); + } + else if (parser->enc == rb_usascii_encoding()) { + if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { + /* raise in re.c */ + rb_enc_associate(str, rb_usascii_encoding()); + } + else { + rb_enc_associate(str, rb_ascii8bit_encoding()); + } + } + return; + + error: + compile_error(PARSER_ARG + "regexp encoding option '%c' differs from source encoding '%s'", + c, rb_enc_name(rb_enc_get(str))); } static int -dyna_in_block() +reg_fragment_check_gen(struct parser_params* parser, VALUE str, int options) { - return (lvtbl->dlev > 0); + VALUE err; + reg_fragment_setenc(str, options); + err = rb_reg_check_preprocess(str); + if (err != Qnil) { + err = rb_obj_as_string(err); + compile_error(PARSER_ARG "%s", RSTRING_PTR(err)); + RB_GC_GUARD(err); + return 0; + } + return 1; +} + +typedef struct { + struct parser_params* parser; + rb_encoding *enc; + NODE *succ_block; + NODE *fail_block; + int num; +} reg_named_capture_assign_t; + +static int +reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end, + int back_num, int *back_refs, OnigRegex regex, void *arg0) +{ + reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0; + struct parser_params* parser = arg->parser; + rb_encoding *enc = arg->enc; + long len = name_end - name; + const char *s = (const char *)name; + ID var; + + arg->num++; + + if (arg->succ_block == 0) { + arg->succ_block = NEW_BEGIN(0); + arg->fail_block = NEW_BEGIN(0); + } + + if (!len || (*name != '_' && ISASCII(*name) && !rb_enc_islower(*name, enc)) || + (len < MAX_WORD_LENGTH && rb_reserved_word(s, (int)len)) || + !rb_enc_symname2_p(s, len, enc)) { + return ST_CONTINUE; + } + var = rb_intern3(s, len, enc); + if (dvar_defined(var) || local_id(var)) { + rb_warningS("named capture conflicts a local variable - %s", + rb_id2name(var)); + } + arg->succ_block = block_append(arg->succ_block, + newline_node(node_assign(assignable(var,0), + NEW_CALL( + gettable(rb_intern("$~")), + idAREF, + NEW_LIST(NEW_LIT(ID2SYM(var)))) + ))); + arg->fail_block = block_append(arg->fail_block, + newline_node(node_assign(assignable(var,0), NEW_LIT(Qnil)))); + return ST_CONTINUE; } static NODE * -dyna_init(node, pre) - NODE *node; - struct RVarmap *pre; +reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match) { - struct RVarmap *post = ruby_dyna_vars; - NODE *var; + reg_named_capture_assign_t arg; + + arg.parser = parser; + arg.enc = rb_enc_get(regexp); + arg.succ_block = 0; + arg.fail_block = 0; + arg.num = 0; + onig_foreach_name(RREGEXP(regexp)->ptr, reg_named_capture_assign_iter, (void*)&arg); + + if (arg.num == 0) + return match; + + return + block_append( + newline_node(match), + NEW_IF(gettable(rb_intern("$~")), + block_append( + newline_node(arg.succ_block), + newline_node( + NEW_CALL( + gettable(rb_intern("$~")), + rb_intern("begin"), + NEW_LIST(NEW_LIT(INT2FIX(0)))))), + block_append( + newline_node(arg.fail_block), + newline_node( + NEW_LIT(Qnil))))); +} - if (!node || !post || pre == post) return node; - for (var = 0; post != pre && post->id; post = post->next) { - if (FL_TEST(post, DVAR_USED)) { - var = NEW_DASGN_CURR(post->id, var); +static VALUE +reg_compile_gen(struct parser_params* parser, VALUE str, int options) +{ + VALUE re; + VALUE err; + + reg_fragment_setenc(str, options); + err = rb_errinfo(); + re = rb_reg_compile(str, options & RE_OPTION_MASK, ruby_sourcefile, ruby_sourceline); + if (NIL_P(re)) { + ID mesg = rb_intern("mesg"); + VALUE m = rb_attr_get(rb_errinfo(), mesg); + rb_set_errinfo(err); + if (!NIL_P(err)) { + rb_str_append(rb_str_cat(rb_attr_get(err, mesg), "\n", 1), m); + } + else { + compile_error(PARSER_ARG "%s", RSTRING_PTR(m)); } + return Qnil; } - return block_append(var, node); + return re; } -int -ruby_parser_stack_on_heap() +void +rb_gc_mark_parser(void) { -#if defined(YYMALLOC) - return Qfalse; -#else - return Qtrue; -#endif } -void -rb_gc_mark_parser() +NODE* +rb_parser_append_print(VALUE vparser, NODE *node) { -#if defined YYMALLOC - rb_gc_mark((VALUE)parser_heap); -#elif defined yystacksize - if (yyvsp) rb_gc_mark_locations((VALUE *)yyvs, (VALUE *)yyvsp); -#endif + NODE *prelude = 0; + NODE *scope = node; + struct parser_params *parser; - if (!ruby_in_compile) return; + if (!node) return node; - rb_gc_mark_maybe((VALUE)yylval.node); - rb_gc_mark(ruby_debug_lines); - rb_gc_mark(lex_lastline); - rb_gc_mark(lex_input); - rb_gc_mark((VALUE)lex_strterm); -} + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); -void -rb_parser_append_print() -{ - ruby_eval_tree = - block_append(ruby_eval_tree, - NEW_FCALL(rb_intern("print"), - NEW_ARRAY(NEW_GVAR(rb_intern("$_"))))); + node = node->nd_body; + + if (nd_type(node) == NODE_PRELUDE) { + prelude = node; + node = node->nd_body; + } + + node = block_append(node, + NEW_FCALL(rb_intern("print"), + NEW_ARRAY(NEW_GVAR(rb_intern("$_"))))); + if (prelude) { + prelude->nd_body = node; + scope->nd_body = prelude; + } + else { + scope->nd_body = node; + } + + return scope; } -void -rb_parser_while_loop(chop, split) - int chop, split; +NODE * +rb_parser_while_loop(VALUE vparser, NODE *node, int chop, int split) { + NODE *prelude = 0; + NODE *scope = node; + struct parser_params *parser; + + if (!node) return node; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); + + node = node->nd_body; + + if (nd_type(node) == NODE_PRELUDE) { + prelude = node; + node = node->nd_body; + } if (split) { - ruby_eval_tree = - block_append(NEW_GASGN(rb_intern("$F"), - NEW_CALL(NEW_GVAR(rb_intern("$_")), - rb_intern("split"), 0)), - ruby_eval_tree); + node = block_append(NEW_GASGN(rb_intern("$F"), + NEW_CALL(NEW_GVAR(rb_intern("$_")), + rb_intern("split"), 0)), + node); } if (chop) { - ruby_eval_tree = - block_append(NEW_CALL(NEW_GVAR(rb_intern("$_")), - rb_intern("chop!"), 0), ruby_eval_tree); + node = block_append(NEW_CALL(NEW_GVAR(rb_intern("$_")), + rb_intern("chop!"), 0), node); + } + + node = NEW_OPT_N(node); + + if (prelude) { + prelude->nd_body = node; + scope->nd_body = prelude; + } + else { + scope->nd_body = node; } - ruby_eval_tree = NEW_OPT_N(ruby_eval_tree); + + return scope; } -static struct { +static const struct { ID token; - char *name; + const char *name; } op_tbl[] = { {tDOT2, ".."}, {tDOT3, "..."}, - {'+', "+"}, - {'-', "-"}, {'+', "+(binary)"}, {'-', "-(binary)"}, - {'*', "*"}, - {'/', "/"}, - {'%', "%"}, {tPOW, "**"}, {tUPLUS, "+@"}, {tUMINUS, "-@"}, - {tUPLUS, "+(unary)"}, - {tUMINUS, "-(unary)"}, - {'|', "|"}, - {'^', "^"}, - {'&', "&"}, {tCMP, "<=>"}, - {'>', ">"}, {tGEQ, ">="}, - {'<', "<"}, {tLEQ, "<="}, {tEQ, "=="}, {tEQQ, "==="}, {tNEQ, "!="}, {tMATCH, "=~"}, {tNMATCH, "!~"}, - {'!', "!"}, - {'~', "~"}, - {'!', "!(unary)"}, - {'~', "~(unary)"}, - {'!', "!@"}, - {'~', "~@"}, {tAREF, "[]"}, {tASET, "[]="}, {tLSHFT, "<<"}, {tRSHFT, ">>"}, {tCOLON2, "::"}, - {'`', "`"}, - {0, 0} }; -static st_table *sym_tbl; -static st_table *sym_rev_tbl; +#define op_tbl_count numberof(op_tbl) + +#ifndef ENABLE_SELECTOR_NAMESPACE +#define ENABLE_SELECTOR_NAMESPACE 0 +#endif + +static struct symbols { + ID last_id; + st_table *sym_id; + st_table *id_str; +#if ENABLE_SELECTOR_NAMESPACE + st_table *ivar2_id; + st_table *id_ivar2; +#endif + VALUE op_sym[tLAST_TOKEN]; +} global_symbols = {tLAST_ID}; + +static const struct st_hash_type symhash = { + rb_str_hash_cmp, + rb_str_hash, +}; + +#if ENABLE_SELECTOR_NAMESPACE +struct ivar2_key { + ID id; + VALUE klass; +}; + +static int +ivar2_cmp(struct ivar2_key *key1, struct ivar2_key *key2) +{ + if (key1->id == key2->id && key1->klass == key2->klass) { + return 0; + } + return 1; +} + +static int +ivar2_hash(struct ivar2_key *key) +{ + return (key->id << 8) ^ (key->klass >> 2); +} + +static const struct st_hash_type ivar2_hash_type = { + ivar2_cmp, + ivar2_hash, +}; +#endif void -Init_sym() +Init_sym(void) { - sym_tbl = st_init_strtable_with_size(200); - sym_rev_tbl = st_init_numtable_with_size(200); + global_symbols.sym_id = st_init_table_with_size(&symhash, 1000); + global_symbols.id_str = st_init_numtable_with_size(1000); +#if ENABLE_SELECTOR_NAMESPACE + global_symbols.ivar2_id = st_init_table_with_size(&ivar2_hash_type, 1000); + global_symbols.id_ivar2 = st_init_numtable_with_size(1000); +#endif + + Init_id(); } -static ID last_id = tLAST_TOKEN; +void +rb_gc_mark_symbols(void) +{ + rb_mark_tbl(global_symbols.id_str); + rb_gc_mark_locations(global_symbols.op_sym, + global_symbols.op_sym + tLAST_TOKEN); +} +#endif /* !RIPPER */ static ID -internal_id() +internal_id_gen(struct parser_params *parser) { - return ID_INTERNAL | (++last_id << ID_SCOPE_SHIFT); + ID id = (ID)vtable_size(lvtbl->args) + (ID)vtable_size(lvtbl->vars); + id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1; + return ID_INTERNAL | (id << ID_SCOPE_SHIFT); } +#ifndef RIPPER static int -is_special_global_name(m) - const char *m; +is_special_global_name(const char *m, const char *e, rb_encoding *enc) { + int mb = 0; + + if (m >= e) return 0; switch (*m) { case '~': case '*': case '$': case '?': case '!': case '@': case '/': case '\\': case ';': case ',': case '.': case '=': @@ -5953,29 +9493,47 @@ is_special_global_name(m) break; case '-': ++m; - if (is_identchar(*m)) m += mbclen(*m); + if (m < e && is_identchar(m, e, enc)) { + if (!ISASCII(*m)) mb = 1; + m += rb_enc_mbclen(m, e, enc); + } break; default: - if (!ISDIGIT(*m)) return 0; - do ++m; while (ISDIGIT(*m)); + if (!rb_enc_isdigit(*m, enc)) return 0; + do { + if (!ISASCII(*m)) mb = 1; + ++m; + } while (m < e && rb_enc_isdigit(*m, enc)); } - return !*m; + return m == e ? mb + 1 : 0; } int -rb_symname_p(name) - const char *name; +rb_symname_p(const char *name) +{ + return rb_enc_symname_p(name, rb_ascii8bit_encoding()); +} + +int +rb_enc_symname_p(const char *name, rb_encoding *enc) +{ + return rb_enc_symname2_p(name, strlen(name), enc); +} + +int +rb_enc_symname2_p(const char *name, long len, rb_encoding *enc) { const char *m = name; - int localid = Qfalse; + const char *e = m + len; + int localid = FALSE; - if (!m) return Qfalse; + if (!m) return FALSE; switch (*m) { case '\0': - return Qfalse; + return FALSE; case '$': - if (is_special_global_name(++m)) return Qtrue; + if (is_special_global_name(++m, e, enc)) return TRUE; goto id; case '@': @@ -6000,7 +9558,7 @@ rb_symname_p(name) switch (*++m) { case '~': ++m; break; case '=': if (*++m == '=') ++m; break; - default: return Qfalse; + default: return FALSE; } break; @@ -6017,15 +9575,24 @@ rb_symname_p(name) break; case '[': - if (*++m != ']') return Qfalse; + if (*++m != ']') return FALSE; if (*++m == '=') ++m; break; + case '!': + switch (*++m) { + case '\0': return TRUE; + case '=': case '~': ++m; break; + default: return FALSE; + } + break; + default: - localid = !ISUPPER(*m); + localid = !rb_enc_isupper(*m, enc); id: - if (*m != '_' && !ISALPHA(*m) && !ismbchar(*m)) return Qfalse; - while (is_identchar(*m)) m += mbclen(*m); + if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m))) + return FALSE; + while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); if (localid) { switch (*m) { case '!': case '?': case '=': ++m; @@ -6033,40 +9600,62 @@ rb_symname_p(name) } break; } - return *m ? Qfalse : Qtrue; + return m == e; } -int -rb_sym_interned_p(str) - VALUE str; +static ID +register_symid(ID id, const char *name, long len, rb_encoding *enc) { - ID id; - - if (st_lookup(sym_tbl, (st_data_t)RSTRING(str)->ptr, (st_data_t *)&id)) - return Qtrue; - return Qfalse; + VALUE str = rb_enc_str_new(name, len, enc); + OBJ_FREEZE(str); + st_add_direct(global_symbols.sym_id, (st_data_t)str, id); + st_add_direct(global_symbols.id_str, id, (st_data_t)str); + return id; } ID -rb_intern(name) - const char *name; +rb_intern3(const char *name, long len, rb_encoding *enc) { const char *m = name; + const char *e = m + len; + unsigned char c; + VALUE str; ID id; - int last; + long last; + int mb; + st_data_t data; + struct RString fake_str; + fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE; + fake_str.basic.klass = rb_cString; + fake_str.as.heap.len = len; + fake_str.as.heap.ptr = (char *)name; + fake_str.as.heap.aux.capa = len; + str = (VALUE)&fake_str; + rb_enc_associate(str, enc); - if (st_lookup(sym_tbl, (st_data_t)name, (st_data_t *)&id)) - return id; + if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncodingError, "invalid encoding symbol"); + } + + if (st_lookup(global_symbols.sym_id, str, &data)) + return (ID)data; - last = strlen(name)-1; + if (rb_cString && !rb_enc_asciicompat(enc)) { + id = ID_JUNK; + goto new_id; + } + last = len-1; id = 0; - switch (*name) { + switch (*m) { case '$': id |= ID_GLOBAL; - if (is_special_global_name(++m)) goto new_id; + if ((mb = is_special_global_name(++m, e, enc)) != 0) { + if (!--mb) enc = rb_ascii8bit_encoding(); + goto new_id; + } break; case '@': - if (name[1] == '@') { + if (m[1] == '@') { m++; id |= ID_CLASS; } @@ -6076,33 +9665,35 @@ rb_intern(name) m++; break; default: - if (name[0] != '_' && ISASCII(name[0]) && !ISALNUM(name[0])) { + c = m[0]; + if (c != '_' && rb_enc_isascii(c, enc) && rb_enc_ispunct(c, enc)) { /* operators */ int i; - for (i=0; op_tbl[i].token; i++) { - if (*op_tbl[i].name == *name && - strcmp(op_tbl[i].name, name) == 0) { + if (len == 1) { + id = c; + goto id_register; + } + for (i = 0; i < op_tbl_count; i++) { + if (*op_tbl[i].name == *m && + strcmp(op_tbl[i].name, m) == 0) { id = op_tbl[i].token; - goto id_regist; + goto id_register; } } } - if (name[last] == '=') { + if (m[last] == '=') { /* attribute assignment */ - char *buf = ALLOCA_N(char,last+1); - - strncpy(buf, name, last); - buf[last] = '\0'; - id = rb_intern(buf); + id = rb_intern3(name, last, enc); if (id > tLAST_TOKEN && !is_attrset_id(id)) { + enc = rb_enc_get(rb_id2str(id)); id = rb_id_attrset(id); - goto id_regist; + goto id_register; } id = ID_ATTRSET; } - else if (ISUPPER(name[0])) { + else if (rb_enc_isupper(m[0], enc)) { id = ID_CONST; } else { @@ -6110,66 +9701,150 @@ rb_intern(name) } break; } - if (!ISDIGIT(*m)) { - while (m <= name + last && is_identchar(*m)) { - m += mbclen(*m); + mb = 0; + if (!rb_enc_isdigit(*m, enc)) { + while (m <= name + last && is_identchar(m, e, enc)) { + if (ISASCII(*m)) { + m++; + } + else { + mb = 1; + m += rb_enc_mbclen(m, e, enc); + } + } + } + if (m - name < len) id = ID_JUNK; + if (enc != rb_usascii_encoding()) { + /* + * this clause makes sense only when called from other than + * rb_intern_str() taking care of code-range. + */ + if (!mb) { + for (; m <= name + len; ++m) { + if (!ISASCII(*m)) goto mbstr; + } + enc = rb_usascii_encoding(); } + mbstr:; } - if (*m) id = ID_JUNK; new_id: - id |= ++last_id << ID_SCOPE_SHIFT; - id_regist: - name = strdup(name); - st_add_direct(sym_tbl, (st_data_t)name, id); - st_add_direct(sym_rev_tbl, id, (st_data_t)name); - return id; + if (global_symbols.last_id >= ~(ID)0 >> (ID_SCOPE_SHIFT+RUBY_SPECIAL_SHIFT)) { + if (len > 20) { + rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.20s...)", + name); + } + else { + rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.*s)", + (int)len, name); + } + } + id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; + id_register: + return register_symid(id, name, len, enc); +} + +ID +rb_intern2(const char *name, long len) +{ + return rb_intern3(name, len, rb_usascii_encoding()); +} + +#undef rb_intern +ID +rb_intern(const char *name) +{ + return rb_intern2(name, strlen(name)); } -char * -rb_id2name(id) +ID +rb_intern_str(VALUE str) +{ + rb_encoding *enc; ID id; + + if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { + enc = rb_usascii_encoding(); + } + else { + enc = rb_enc_get(str); + } + id = rb_intern3(RSTRING_PTR(str), RSTRING_LEN(str), enc); + RB_GC_GUARD(str); + return id; +} + +VALUE +rb_id2str(ID id) { - char *name; st_data_t data; if (id < tLAST_TOKEN) { - int i; - - for (i=0; op_tbl[i].token; i++) { - if (op_tbl[i].token == id) - return op_tbl[i].name; + int i = 0; + + if (id < INT_MAX && rb_ispunct((int)id)) { + VALUE str = global_symbols.op_sym[i = (int)id]; + if (!str) { + char name[2]; + name[0] = (char)id; + name[1] = 0; + str = rb_usascii_str_new(name, 1); + OBJ_FREEZE(str); + global_symbols.op_sym[i] = str; + } + return str; + } + for (i = 0; i < op_tbl_count; i++) { + if (op_tbl[i].token == id) { + VALUE str = global_symbols.op_sym[i]; + if (!str) { + str = rb_usascii_str_new2(op_tbl[i].name); + OBJ_FREEZE(str); + global_symbols.op_sym[i] = str; + } + return str; + } } } - if (st_lookup(sym_rev_tbl, id, &data)) - return (char *)data; + if (st_lookup(global_symbols.id_str, id, &data)) { + VALUE str = (VALUE)data; + if (RBASIC(str)->klass == 0) + RBASIC(str)->klass = rb_cString; + return str; + } if (is_attrset_id(id)) { ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL; + VALUE str; - again: - name = rb_id2name(id2); - if (name) { - char *buf = ALLOCA_N(char, strlen(name)+2); - - strcpy(buf, name); - strcat(buf, "="); - rb_intern(buf); - return rb_id2name(id); - } - if (is_local_id(id2)) { + while (!(str = rb_id2str(id2))) { + if (!is_local_id(id2)) return 0; id2 = (id & ~ID_SCOPE_MASK) | ID_CONST; - goto again; } + str = rb_str_dup(str); + rb_str_cat(str, "=", 1); + rb_intern_str(str); + if (st_lookup(global_symbols.id_str, id, &data)) { + VALUE str = (VALUE)data; + if (RBASIC(str)->klass == 0) + RBASIC(str)->klass = rb_cString; + return str; + } } return 0; } +const char * +rb_id2name(ID id) +{ + VALUE str = rb_id2str(id); + + if (!str) return 0; + return RSTRING_PTR(str); +} + static int -symbols_i(key, value, ary) - char *key; - ID value; - VALUE ary; +symbols_i(VALUE sym, ID value, VALUE ary) { rb_ary_push(ary, ID2SYM(value)); return ST_CONTINUE; @@ -6178,136 +9853,286 @@ symbols_i(key, value, ary) /* * call-seq: * Symbol.all_symbols => array - * + * * Returns an array of all the symbols currently in Ruby's symbol * table. - * + * * Symbol.all_symbols.size #=> 903 * Symbol.all_symbols[1,20] #=> [:floor, :ARGV, :Binding, :symlink, - * :chown, :EOFError, :$;, :String, - * :LOCK_SH, :"setuid?", :$<, - * :default_proc, :compact, :extend, + * :chown, :EOFError, :$;, :String, + * :LOCK_SH, :"setuid?", :$<, + * :default_proc, :compact, :extend, * :Tms, :getwd, :$=, :ThreadGroup, * :wait2, :$>] */ VALUE -rb_sym_all_symbols() +rb_sym_all_symbols(void) { - VALUE ary = rb_ary_new2(sym_tbl->num_entries); + VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries); - st_foreach(sym_tbl, symbols_i, ary); + st_foreach(global_symbols.sym_id, symbols_i, ary); return ary; } int -rb_is_const_id(id) - ID id; +rb_is_const_id(ID id) { - if (is_const_id(id)) return Qtrue; - return Qfalse; + return is_const_id(id); } int -rb_is_class_id(id) - ID id; +rb_is_class_id(ID id) { - if (is_class_id(id)) return Qtrue; - return Qfalse; + return is_class_id(id); } int -rb_is_instance_id(id) - ID id; +rb_is_instance_id(ID id) { - if (is_instance_id(id)) return Qtrue; - return Qfalse; + return is_instance_id(id); } int -rb_is_local_id(id) - ID id; +rb_is_local_id(ID id) { - if (is_local_id(id)) return Qtrue; - return Qfalse; + return is_local_id(id); } int -rb_is_junk_id(id) - ID id; +rb_is_junk_id(ID id) { - if (is_junk_id(id)) return Qtrue; - return Qfalse; + return is_junk_id(id); } +#endif /* !RIPPER */ + static void -special_local_set(c, val) - char c; - VALUE val; +parser_initialize(struct parser_params *parser) { - int cnt; + parser->eofp = Qfalse; + + parser->parser_lex_strterm = 0; + parser->parser_cond_stack = 0; + parser->parser_cmdarg_stack = 0; + parser->parser_class_nest = 0; + parser->parser_paren_nest = 0; + parser->parser_lpar_beg = 0; + parser->parser_in_single = 0; + parser->parser_in_def = 0; + parser->parser_in_defined = 0; + parser->parser_compile_for_eval = 0; + parser->parser_cur_mid = 0; + parser->parser_tokenbuf = NULL; + parser->parser_tokidx = 0; + parser->parser_toksiz = 0; + parser->parser_heredoc_end = 0; + parser->parser_command_start = TRUE; + parser->parser_deferred_nodes = 0; + parser->parser_lex_pbeg = 0; + parser->parser_lex_p = 0; + parser->parser_lex_pend = 0; + parser->parser_lvtbl = 0; + parser->parser_ruby__end__seen = 0; + parser->parser_ruby_sourcefile = 0; +#ifndef RIPPER + parser->is_ripper = 0; + parser->parser_eval_tree_begin = 0; + parser->parser_eval_tree = 0; +#else + parser->is_ripper = 1; + parser->parser_ruby_sourcefile_string = Qnil; + parser->delayed = Qnil; - top_local_init(); - cnt = local_cnt(c); - top_local_setup(); - ruby_scope->local_vars[cnt] = val; + parser->result = Qnil; + parser->parsing_thread = Qnil; + parser->toplevel_p = TRUE; +#endif +#ifdef YYMALLOC + parser->heap = NULL; +#endif + parser->enc = rb_usascii_encoding(); } -VALUE -rb_backref_get() +#ifdef RIPPER +#define parser_mark ripper_parser_mark +#define parser_free ripper_parser_free +#endif + +static void +parser_mark(void *ptr) +{ + struct parser_params *p = (struct parser_params*)ptr; + + rb_gc_mark((VALUE)p->parser_lex_strterm); + rb_gc_mark((VALUE)p->parser_deferred_nodes); + rb_gc_mark(p->parser_lex_input); + rb_gc_mark(p->parser_lex_lastline); + rb_gc_mark(p->parser_lex_nextline); +#ifndef RIPPER + rb_gc_mark((VALUE)p->parser_eval_tree_begin) ; + rb_gc_mark((VALUE)p->parser_eval_tree) ; + rb_gc_mark(p->debug_lines); +#else + rb_gc_mark(p->parser_ruby_sourcefile_string); + rb_gc_mark(p->delayed); + rb_gc_mark(p->value); + rb_gc_mark(p->result); + rb_gc_mark(p->parsing_thread); +#endif +#ifdef YYMALLOC + rb_gc_mark((VALUE)p->heap); +#endif +} + +static void +parser_free(void *ptr) { - VALUE *var = rb_svar(1); - if (var) { - return *var; + struct parser_params *p = (struct parser_params*)ptr; + struct local_vars *local, *prev; + + if (p->parser_tokenbuf) { + xfree(p->parser_tokenbuf); } - return Qnil; + for (local = p->parser_lvtbl; local; local = prev) { + if (local->vars) xfree(local->vars); + prev = local->prev; + xfree(local); + } +#ifndef RIPPER + xfree(p->parser_ruby_sourcefile); +#endif + xfree(p); } -void -rb_backref_set(val) - VALUE val; +static size_t +parser_memsize(const void *ptr) { - VALUE *var = rb_svar(1); - if (var) { - *var = val; + struct parser_params *p = (struct parser_params*)ptr; + struct local_vars *local; + size_t size = sizeof(*p); + + if (!ptr) return 0; + size += p->parser_toksiz; + for (local = p->parser_lvtbl; local; local = local->prev) { + size += sizeof(*local); + if (local->vars) size += local->vars->capa * sizeof(ID); } - else { - special_local_set('~', val); +#ifndef RIPPER + if (p->parser_ruby_sourcefile) { + size += strlen(p->parser_ruby_sourcefile) + 1; } +#endif + return size; +} + +static const rb_data_type_t parser_data_type = { + "parser", + parser_mark, + parser_free, + parser_memsize, +}; + +VALUE rb_parser_get_yydebug(VALUE); +VALUE rb_parser_set_yydebug(VALUE, VALUE); + +#ifndef RIPPER +#undef rb_reserved_word + +const struct kwtable * +rb_reserved_word(const char *str, unsigned int len) +{ + return reserved_word(str, len); +} + +static struct parser_params * +parser_new(void) +{ + struct parser_params *p; + + p = ALLOC_N(struct parser_params, 1); + MEMZERO(p, struct parser_params, 1); + parser_initialize(p); + return p; } VALUE -rb_lastline_get() +rb_parser_new(void) { - VALUE *var = rb_svar(0); - if (var) { - return *var; - } - return Qnil; + struct parser_params *p = parser_new(); + + return TypedData_Wrap_Struct(0, &parser_data_type, p); } -void -rb_lastline_set(val) - VALUE val; +/* + * call-seq: + * ripper#end_seen? -> Boolean + * + * Return true if parsed source ended by +\_\_END\_\_+. + */ +VALUE +rb_parser_end_seen_p(VALUE vparser) { - VALUE *var = rb_svar(0); - if (var) { - *var = val; - } - else { - special_local_set('_', val); - } + struct parser_params *parser; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); + return ruby__end__seen ? Qtrue : Qfalse; +} + +/* + * call-seq: + * ripper#encoding -> encoding + * + * Return encoding of the source. + */ +VALUE +rb_parser_encoding(VALUE vparser) +{ + struct parser_params *parser; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); + return rb_enc_from_encoding(parser->enc); +} + +/* + * call-seq: + * ripper.yydebug -> true or false + * + * Get yydebug. + */ +VALUE +rb_parser_get_yydebug(VALUE self) +{ + struct parser_params *parser; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + return yydebug ? Qtrue : Qfalse; +} + +/* + * call-seq: + * ripper.yydebug = flag + * + * Set yydebug. + */ +VALUE +rb_parser_set_yydebug(VALUE self, VALUE flag) +{ + struct parser_params *parser; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + yydebug = RTEST(flag); + return flag; } #ifdef YYMALLOC #define HEAPCNT(n, size) ((n) * (size) / sizeof(YYSTYPE)) -#define NEWHEAP() rb_node_newnode(NODE_ALLOCA, 0, (VALUE)parser_heap, 0) -#define ADD2HEAP(n, c, p) ((parser_heap = (n))->u1.node = (p), \ +#define NEWHEAP() rb_node_newnode(NODE_ALLOCA, 0, (VALUE)parser->heap, 0) +#define ADD2HEAP(n, c, p) ((parser->heap = (n))->u1.node = (p), \ (n)->u3.cnt = (c), (p)) -static void * -rb_parser_malloc(size) - size_t size; +void * +rb_parser_malloc(struct parser_params *parser, size_t size) { size_t cnt = HEAPCNT(1, size); NODE *n = NEWHEAP(); @@ -6316,9 +10141,8 @@ rb_parser_malloc(size) return ADD2HEAP(n, cnt, ptr); } -static void * -rb_parser_calloc(nelem, size) - size_t nelem, size; +void * +rb_parser_calloc(struct parser_params *parser, size_t nelem, size_t size) { size_t cnt = HEAPCNT(nelem, size); NODE *n = NEWHEAP(); @@ -6327,15 +10151,13 @@ rb_parser_calloc(nelem, size) return ADD2HEAP(n, cnt, ptr); } -static void * -rb_parser_realloc(ptr, size) - void *ptr; - size_t size; +void * +rb_parser_realloc(struct parser_params *parser, void *ptr, size_t size) { NODE *n; size_t cnt = HEAPCNT(1, size); - if (ptr && (n = parser_heap) != NULL) { + if (ptr && (n = parser->heap) != NULL) { do { if (n->u1.node == ptr) { n->u1.node = ptr = xrealloc(ptr, size); @@ -6349,13 +10171,12 @@ rb_parser_realloc(ptr, size) return ADD2HEAP(n, cnt, ptr); } -static void -rb_parser_free(ptr) - void *ptr; +void +rb_parser_free(struct parser_params *parser, void *ptr) { - NODE **prev = &parser_heap, *n; + NODE **prev = &parser->heap, *n; - while ((n = *prev) != 0) { + while ((n = *prev) != NULL) { if (n->u1.node == ptr) { *prev = n->u2.node; rb_gc_force_recycle((VALUE)n); @@ -6366,3 +10187,492 @@ rb_parser_free(ptr) xfree(ptr); } #endif +#endif + +#ifdef RIPPER +#ifdef RIPPER_DEBUG +extern int rb_is_pointer_to_heap(VALUE); + +/* :nodoc: */ +static VALUE +ripper_validate_object(VALUE self, VALUE x) +{ + if (x == Qfalse) return x; + if (x == Qtrue) return x; + if (x == Qnil) return x; + if (x == Qundef) + rb_raise(rb_eArgError, "Qundef given"); + if (FIXNUM_P(x)) return x; + if (SYMBOL_P(x)) return x; + if (!rb_is_pointer_to_heap(x)) + rb_raise(rb_eArgError, "invalid pointer: %p", x); + switch (TYPE(x)) { + case T_STRING: + case T_OBJECT: + case T_ARRAY: + case T_BIGNUM: + case T_FLOAT: + return x; + case T_NODE: + if (nd_type(x) != NODE_LASGN) { + rb_raise(rb_eArgError, "NODE given: %p", x); + } + return ((NODE *)x)->nd_rval; + default: + rb_raise(rb_eArgError, "wrong type of ruby object: %p (%s)", + x, rb_obj_classname(x)); + } + return x; +} +#endif + +#define validate(x) (x = get_value(x)) + +static VALUE +ripper_dispatch0(struct parser_params *parser, ID mid) +{ + return rb_funcall(parser->value, mid, 0); +} + +static VALUE +ripper_dispatch1(struct parser_params *parser, ID mid, VALUE a) +{ + validate(a); + return rb_funcall(parser->value, mid, 1, a); +} + +static VALUE +ripper_dispatch2(struct parser_params *parser, ID mid, VALUE a, VALUE b) +{ + validate(a); + validate(b); + return rb_funcall(parser->value, mid, 2, a, b); +} + +static VALUE +ripper_dispatch3(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c) +{ + validate(a); + validate(b); + validate(c); + return rb_funcall(parser->value, mid, 3, a, b, c); +} + +static VALUE +ripper_dispatch4(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c, VALUE d) +{ + validate(a); + validate(b); + validate(c); + validate(d); + return rb_funcall(parser->value, mid, 4, a, b, c, d); +} + +static VALUE +ripper_dispatch5(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c, VALUE d, VALUE e) +{ + validate(a); + validate(b); + validate(c); + validate(d); + validate(e); + return rb_funcall(parser->value, mid, 5, a, b, c, d, e); +} + +static const struct kw_assoc { + ID id; + const char *name; +} keyword_to_name[] = { + {keyword_class, "class"}, + {keyword_module, "module"}, + {keyword_def, "def"}, + {keyword_undef, "undef"}, + {keyword_begin, "begin"}, + {keyword_rescue, "rescue"}, + {keyword_ensure, "ensure"}, + {keyword_end, "end"}, + {keyword_if, "if"}, + {keyword_unless, "unless"}, + {keyword_then, "then"}, + {keyword_elsif, "elsif"}, + {keyword_else, "else"}, + {keyword_case, "case"}, + {keyword_when, "when"}, + {keyword_while, "while"}, + {keyword_until, "until"}, + {keyword_for, "for"}, + {keyword_break, "break"}, + {keyword_next, "next"}, + {keyword_redo, "redo"}, + {keyword_retry, "retry"}, + {keyword_in, "in"}, + {keyword_do, "do"}, + {keyword_do_cond, "do"}, + {keyword_do_block, "do"}, + {keyword_return, "return"}, + {keyword_yield, "yield"}, + {keyword_super, "super"}, + {keyword_self, "self"}, + {keyword_nil, "nil"}, + {keyword_true, "true"}, + {keyword_false, "false"}, + {keyword_and, "and"}, + {keyword_or, "or"}, + {keyword_not, "not"}, + {modifier_if, "if"}, + {modifier_unless, "unless"}, + {modifier_while, "while"}, + {modifier_until, "until"}, + {modifier_rescue, "rescue"}, + {keyword_alias, "alias"}, + {keyword_defined, "defined?"}, + {keyword_BEGIN, "BEGIN"}, + {keyword_END, "END"}, + {keyword__LINE__, "__LINE__"}, + {keyword__FILE__, "__FILE__"}, + {keyword__ENCODING__, "__ENCODING__"}, + {0, NULL} +}; + +static const char* +keyword_id_to_str(ID id) +{ + const struct kw_assoc *a; + + for (a = keyword_to_name; a->id; a++) { + if (a->id == id) + return a->name; + } + return NULL; +} + +#undef ripper_id2sym +static VALUE +ripper_id2sym(ID id) +{ + const char *name; + char buf[8]; + + if (id <= 256) { + buf[0] = (char)id; + buf[1] = '\0'; + return ID2SYM(rb_intern2(buf, 1)); + } + if ((name = keyword_id_to_str(id))) { + return ID2SYM(rb_intern(name)); + } + switch (id) { + case tOROP: + name = "||"; + break; + case tANDOP: + name = "&&"; + break; + default: + name = rb_id2name(id); + if (!name) { + rb_bug("cannot convert ID to string: %ld", (unsigned long)id); + } + return ID2SYM(id); + } + return ID2SYM(rb_intern(name)); +} + +static ID +ripper_get_id(VALUE v) +{ + NODE *nd; + if (!RB_TYPE_P(v, T_NODE)) return 0; + nd = (NODE *)v; + if (nd_type(nd) != NODE_LASGN) return 0; + return nd->nd_vid; +} + +static VALUE +ripper_get_value(VALUE v) +{ + NODE *nd; + if (v == Qundef) return Qnil; + if (!RB_TYPE_P(v, T_NODE)) return v; + nd = (NODE *)v; + if (nd_type(nd) != NODE_LASGN) return Qnil; + return nd->nd_rval; +} + +static void +ripper_compile_error(struct parser_params *parser, const char *fmt, ...) +{ + VALUE str; + va_list args; + + va_start(args, fmt); + str = rb_vsprintf(fmt, args); + va_end(args); + rb_funcall(parser->value, rb_intern("compile_error"), 1, str); +} + +static void +ripper_warn0(struct parser_params *parser, const char *fmt) +{ + rb_funcall(parser->value, rb_intern("warn"), 1, STR_NEW2(fmt)); +} + +static void +ripper_warnI(struct parser_params *parser, const char *fmt, int a) +{ + rb_funcall(parser->value, rb_intern("warn"), 2, + STR_NEW2(fmt), INT2NUM(a)); +} + +#if 0 +static void +ripper_warnS(struct parser_params *parser, const char *fmt, const char *str) +{ + rb_funcall(parser->value, rb_intern("warn"), 2, + STR_NEW2(fmt), STR_NEW2(str)); +} +#endif + +static void +ripper_warning0(struct parser_params *parser, const char *fmt) +{ + rb_funcall(parser->value, rb_intern("warning"), 1, STR_NEW2(fmt)); +} + +static void +ripper_warningS(struct parser_params *parser, const char *fmt, const char *str) +{ + rb_funcall(parser->value, rb_intern("warning"), 2, + STR_NEW2(fmt), STR_NEW2(str)); +} + +static VALUE +ripper_lex_get_generic(struct parser_params *parser, VALUE src) +{ + return rb_funcall(src, ripper_id_gets, 0); +} + +static VALUE +ripper_s_allocate(VALUE klass) +{ + struct parser_params *p; + VALUE self; + + p = ALLOC_N(struct parser_params, 1); + MEMZERO(p, struct parser_params, 1); + self = TypedData_Wrap_Struct(klass, &parser_data_type, p); + p->value = self; + return self; +} + +#define ripper_initialized_p(r) ((r)->parser_lex_input != 0) + +/* + * call-seq: + * Ripper.new(src, filename="(ripper)", lineno=1) -> ripper + * + * Create a new Ripper object. + * _src_ must be a String, an IO, or an Object which has #gets method. + * + * This method does not starts parsing. + * See also Ripper#parse and Ripper.parse. + */ +static VALUE +ripper_initialize(int argc, VALUE *argv, VALUE self) +{ + struct parser_params *parser; + VALUE src, fname, lineno; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + rb_scan_args(argc, argv, "12", &src, &fname, &lineno); + if (rb_obj_respond_to(src, ripper_id_gets, 0)) { + parser->parser_lex_gets = ripper_lex_get_generic; + } + else { + StringValue(src); + parser->parser_lex_gets = lex_get_str; + } + parser->parser_lex_input = src; + parser->eofp = Qfalse; + if (NIL_P(fname)) { + fname = STR_NEW2("(ripper)"); + } + else { + StringValue(fname); + } + parser_initialize(parser); + + parser->parser_ruby_sourcefile_string = fname; + parser->parser_ruby_sourcefile = RSTRING_PTR(fname); + parser->parser_ruby_sourceline = NIL_P(lineno) ? 0 : NUM2INT(lineno) - 1; + + return Qnil; +} + +extern VALUE rb_thread_pass(void); + +struct ripper_args { + struct parser_params *parser; + int argc; + VALUE *argv; +}; + +static VALUE +ripper_parse0(VALUE parser_v) +{ + struct parser_params *parser; + + TypedData_Get_Struct(parser_v, struct parser_params, &parser_data_type, parser); + parser_prepare(parser); + ripper_yyparse((void*)parser); + return parser->result; +} + +static VALUE +ripper_ensure(VALUE parser_v) +{ + struct parser_params *parser; + + TypedData_Get_Struct(parser_v, struct parser_params, &parser_data_type, parser); + parser->parsing_thread = Qnil; + return Qnil; +} + +/* + * call-seq: + * ripper#parse + * + * Start parsing and returns the value of the root action. + */ +static VALUE +ripper_parse(VALUE self) +{ + struct parser_params *parser; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (!NIL_P(parser->parsing_thread)) { + if (parser->parsing_thread == rb_thread_current()) + rb_raise(rb_eArgError, "Ripper#parse is not reentrant"); + else + rb_raise(rb_eArgError, "Ripper#parse is not multithread-safe"); + } + parser->parsing_thread = rb_thread_current(); + rb_ensure(ripper_parse0, self, ripper_ensure, self); + + return parser->result; +} + +/* + * call-seq: + * ripper#column -> Integer + * + * Return column number of current parsing line. + * This number starts from 0. + */ +static VALUE +ripper_column(VALUE self) +{ + struct parser_params *parser; + long col; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (NIL_P(parser->parsing_thread)) return Qnil; + col = parser->tokp - parser->parser_lex_pbeg; + return LONG2NUM(col); +} + +/* + * call-seq: + * ripper#filename -> String + * + * Return current parsing filename. + */ +static VALUE +ripper_filename(VALUE self) +{ + struct parser_params *parser; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + return parser->parser_ruby_sourcefile_string; +} + +/* + * call-seq: + * ripper#lineno -> Integer + * + * Return line number of current parsing line. + * This number starts from 1. + */ +static VALUE +ripper_lineno(VALUE self) +{ + struct parser_params *parser; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + if (!ripper_initialized_p(parser)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (NIL_P(parser->parsing_thread)) return Qnil; + return INT2NUM(parser->parser_ruby_sourceline); +} + +#ifdef RIPPER_DEBUG +/* :nodoc: */ +static VALUE +ripper_assert_Qundef(VALUE self, VALUE obj, VALUE msg) +{ + StringValue(msg); + if (obj == Qundef) { + rb_raise(rb_eArgError, "%s", RSTRING_PTR(msg)); + } + return Qnil; +} + +/* :nodoc: */ +static VALUE +ripper_value(VALUE self, VALUE obj) +{ + return ULONG2NUM(obj); +} +#endif + +void +Init_ripper(void) +{ + VALUE Ripper; + + Ripper = rb_define_class("Ripper", rb_cObject); + rb_define_const(Ripper, "Version", rb_usascii_str_new2(RIPPER_VERSION)); + rb_define_alloc_func(Ripper, ripper_s_allocate); + rb_define_method(Ripper, "initialize", ripper_initialize, -1); + rb_define_method(Ripper, "parse", ripper_parse, 0); + rb_define_method(Ripper, "column", ripper_column, 0); + rb_define_method(Ripper, "filename", ripper_filename, 0); + rb_define_method(Ripper, "lineno", ripper_lineno, 0); + rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0); + rb_define_method(Ripper, "encoding", rb_parser_encoding, 0); + rb_define_method(Ripper, "yydebug", rb_parser_get_yydebug, 0); + rb_define_method(Ripper, "yydebug=", rb_parser_set_yydebug, 1); +#ifdef RIPPER_DEBUG + rb_define_method(rb_mKernel, "assert_Qundef", ripper_assert_Qundef, 2); + rb_define_method(rb_mKernel, "rawVALUE", ripper_value, 1); + rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1); +#endif + + ripper_id_gets = rb_intern("gets"); + ripper_init_eventids1(Ripper); + ripper_init_eventids2(Ripper); + /* ensure existing in symbol table */ + rb_intern("||"); + rb_intern("&&"); +} +#endif /* RIPPER */ |
