diff options
Diffstat (limited to 'parse.y')
| -rw-r--r-- | parse.y | 15144 |
1 files changed, 8935 insertions, 6209 deletions
@@ -11,57 +11,116 @@ %{ -#ifndef PARSER_DEBUG -#define PARSER_DEBUG 0 +#if !YYPURE +# error needs pure parser #endif #define YYDEBUG 1 #define YYERROR_VERBOSE 1 #define YYSTACK_USE_ALLOCA 0 +#define YYLTYPE rb_code_location_t +#define YYLTYPE_IS_DECLARED 1 + +#include "ruby/internal/config.h" + +#include <ctype.h> +#include <errno.h> +#include <stdio.h> + +struct lex_context; -#include "ruby/ruby.h" -#include "ruby/st.h" -#include "ruby/encoding.h" #include "internal.h" +#include "internal/compile.h" +#include "internal/compilers.h" +#include "internal/complex.h" +#include "internal/error.h" +#include "internal/hash.h" +#include "internal/imemo.h" +#include "internal/io.h" +#include "internal/numeric.h" +#include "internal/parse.h" +#include "internal/rational.h" +#include "internal/re.h" +#include "internal/symbol.h" +#include "internal/thread.h" +#include "internal/variable.h" #include "node.h" -#include "parse.h" -#include "id.h" +#include "probes.h" #include "regenc.h" -#include <stdio.h> -#include <errno.h> -#include <ctype.h> +#include "ruby/encoding.h" +#include "ruby/regex.h" +#include "ruby/ruby.h" +#include "ruby/st.h" +#include "ruby/util.h" +#include "ruby/ractor.h" +#include "symbol.h" + +enum shareability { + shareable_none, + shareable_literal, + shareable_copy, + shareable_everything, +}; -#define numberof(array) (int)(sizeof(array) / sizeof((array)[0])) +struct lex_context { + unsigned int in_defined: 1; + unsigned int in_kwarg: 1; + unsigned int in_argdef: 1; + unsigned int in_def: 1; + unsigned int in_class: 1; + BITFIELD(enum shareability, shareable_constant_value, 2); +}; -#define YYMALLOC(size) rb_parser_malloc(parser, (size)) -#define YYREALLOC(ptr, size) rb_parser_realloc(parser, (ptr), (size)) -#define YYCALLOC(nelem, size) rb_parser_calloc(parser, (nelem), (size)) -#define YYFREE(ptr) rb_parser_free(parser, (ptr)) -#define malloc YYMALLOC -#define realloc YYREALLOC -#define calloc YYCALLOC -#define free YYFREE +#include "parse.h" -#ifndef RIPPER -static ID register_symid(ID, const char *, long, rb_encoding *); -static ID register_symid_str(ID, VALUE); -#define REGISTER_SYMID(id, name) register_symid((id), (name), strlen(name), enc) -#include "id.c" +#define NO_LEX_CTXT (struct lex_context){0} + +#define AREF(ary, i) RARRAY_AREF(ary, i) + +#ifndef WARN_PAST_SCOPE +# define WARN_PAST_SCOPE 0 #endif -#define is_notop_id(id) ((id)>tLAST_OP_ID) -#define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL) -#define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL) -#define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE) -#define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET) -#define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST) -#define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS) -#define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK) -#define id_type(id) (is_notop_id(id) ? (int)((id)&ID_SCOPE_MASK) : -1) - -#define is_asgn_or_id(id) ((is_notop_id(id)) && \ - (((id)&ID_SCOPE_MASK) == ID_GLOBAL || \ - ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \ - ((id)&ID_SCOPE_MASK) == ID_CLASS)) +#define TAB_WIDTH 8 + +#define yydebug (p->debug) /* disable the global variable definition */ + +#define YYMALLOC(size) rb_parser_malloc(p, (size)) +#define YYREALLOC(ptr, size) rb_parser_realloc(p, (ptr), (size)) +#define YYCALLOC(nelem, size) rb_parser_calloc(p, (nelem), (size)) +#define YYFREE(ptr) rb_parser_free(p, (ptr)) +#define YYFPRINTF rb_parser_printf +#define YY_LOCATION_PRINT(File, loc) \ + rb_parser_printf(p, "%d.%d-%d.%d", \ + (loc).beg_pos.lineno, (loc).beg_pos.column,\ + (loc).end_pos.lineno, (loc).end_pos.column) +#define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (N) \ + { \ + (Current).beg_pos = YYRHSLOC(Rhs, 1).beg_pos; \ + (Current).end_pos = YYRHSLOC(Rhs, N).end_pos; \ + } \ + else \ + { \ + (Current).beg_pos = YYRHSLOC(Rhs, 0).end_pos; \ + (Current).end_pos = YYRHSLOC(Rhs, 0).end_pos; \ + } \ + while (0) +#define YY_(Msgid) \ + (((Msgid)[0] == 'm') && (strcmp((Msgid), "memory exhausted") == 0) ? \ + "nesting too deep" : (Msgid)) + +#define RUBY_SET_YYLLOC_FROM_STRTERM_HEREDOC(Current) \ + rb_parser_set_location_from_strterm_heredoc(p, &p->lex.strterm->u.heredoc, &(Current)) +#define RUBY_SET_YYLLOC_OF_NONE(Current) \ + rb_parser_set_location_of_none(p, &(Current)) +#define RUBY_SET_YYLLOC(Current) \ + rb_parser_set_location(p, &(Current)) +#define RUBY_INIT_YYLLOC() \ + { \ + {p->ruby_sourceline, (int)(p->lex.ptok - p->lex.pbeg)}, \ + {p->ruby_sourceline, (int)(p->lex.pcur - p->lex.pbeg)}, \ + } enum lex_state_bits { EXPR_BEG_bit, /* ignore newline, +/- is a sign. */ @@ -74,7 +133,9 @@ enum lex_state_bits { EXPR_FNAME_bit, /* ignore newline, no reserved words. */ EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */ EXPR_CLASS_bit, /* immediate after `class', no here document. */ - EXPR_VALUE_bit, /* alike EXPR_BEG but label is disallowed. */ + EXPR_LABEL_bit, /* flag bit, label is allowed. */ + EXPR_LABELED_bit, /* flag bit, just after a label. */ + EXPR_FITEM_bit, /* symbol literal as FNAME. */ EXPR_MAX_STATE }; /* examine combinations */ @@ -90,33 +151,47 @@ enum lex_state_e { DEF_EXPR(FNAME), DEF_EXPR(DOT), DEF_EXPR(CLASS), - DEF_EXPR(VALUE), - EXPR_BEG_ANY = (EXPR_BEG | EXPR_VALUE | EXPR_MID | EXPR_CLASS), + DEF_EXPR(LABEL), + DEF_EXPR(LABELED), + DEF_EXPR(FITEM), + EXPR_VALUE = EXPR_BEG, + EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS), EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG), - EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) + EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN), + EXPR_NONE = 0 }; -#define IS_lex_state(ls) (lex_state & ( ls )) +#define IS_lex_state_for(x, ls) ((x) & (ls)) +#define IS_lex_state_all_for(x, ls) (((x) & (ls)) == (ls)) +#define IS_lex_state(ls) IS_lex_state_for(p->lex.state, (ls)) +#define IS_lex_state_all(ls) IS_lex_state_all_for(p->lex.state, (ls)) -#if PARSER_DEBUG -static const char *lex_state_name(enum lex_state_e state); -#endif +# define SET_LEX_STATE(ls) \ + parser_set_lex_state(p, ls, __LINE__) +static inline enum lex_state_e parser_set_lex_state(struct parser_params *p, enum lex_state_e ls, int line); typedef VALUE stack_type; -# define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1)) -# define BITSTACK_POP(stack) ((stack) = (stack) >> 1) -# define BITSTACK_LEXPOP(stack) ((stack) = ((stack) >> 1) | ((stack) & 1)) -# define BITSTACK_SET_P(stack) ((stack)&1) +static const rb_code_location_t NULL_LOC = { {0, -1}, {0, -1} }; + +# define SHOW_BITSTACK(stack, name) (p->debug ? rb_parser_show_bitstack(p, stack, name, __LINE__) : (void)0) +# define BITSTACK_PUSH(stack, n) (((p->stack) = ((p->stack)<<1)|((n)&1)), SHOW_BITSTACK(p->stack, #stack"(push)")) +# define BITSTACK_POP(stack) (((p->stack) = (p->stack) >> 1), SHOW_BITSTACK(p->stack, #stack"(pop)")) +# define BITSTACK_SET_P(stack) (SHOW_BITSTACK(p->stack, #stack), (p->stack)&1) +# define BITSTACK_SET(stack, n) ((p->stack)=(n), SHOW_BITSTACK(p->stack, #stack"(set)")) +/* A flag to identify keyword_do_cond, "do" keyword after condition expression. + Examples: `while ... do`, `until ... do`, and `for ... in ... do` */ #define COND_PUSH(n) BITSTACK_PUSH(cond_stack, (n)) #define COND_POP() BITSTACK_POP(cond_stack) -#define COND_LEXPOP() BITSTACK_LEXPOP(cond_stack) #define COND_P() BITSTACK_SET_P(cond_stack) +#define COND_SET(n) BITSTACK_SET(cond_stack, (n)) +/* A flag to identify keyword_do_block; "do" keyword after command_call. + Example: `foo 1, 2 do`. */ #define CMDARG_PUSH(n) BITSTACK_PUSH(cmdarg_stack, (n)) #define CMDARG_POP() BITSTACK_POP(cmdarg_stack) -#define CMDARG_LEXPOP() BITSTACK_LEXPOP(cmdarg_stack) #define CMDARG_P() BITSTACK_SET_P(cmdarg_stack) +#define CMDARG_SET(n) BITSTACK_SET(cmdarg_stack, (n)) struct vtable { ID *tbl; @@ -129,390 +204,508 @@ struct local_vars { struct vtable *args; struct vtable *vars; struct vtable *used; +# if WARN_PAST_SCOPE + struct vtable *past; +# endif struct local_vars *prev; +# ifndef RIPPER + struct { + NODE *outer, *inner, *current; + } numparam; +# endif }; -#define DVARS_INHERIT ((void*)1) -#define DVARS_TOPSCOPE NULL -#define DVARS_SPECIAL_P(tbl) (!POINTER_P(tbl)) -#define POINTER_P(val) ((VALUE)(val) & ~(VALUE)3) - -static int -vtable_size(const struct vtable *tbl) -{ - if (POINTER_P(tbl)) { - return tbl->pos; - } - else { - return 0; - } -} - -#define VTBL_DEBUG 0 - -static struct vtable * -vtable_alloc(struct vtable *prev) -{ - struct vtable *tbl = ALLOC(struct vtable); - tbl->pos = 0; - tbl->capa = 8; - tbl->tbl = ALLOC_N(ID, tbl->capa); - tbl->prev = prev; - if (VTBL_DEBUG) printf("vtable_alloc: %p\n", (void *)tbl); - return tbl; -} - -static void -vtable_free(struct vtable *tbl) -{ - if (VTBL_DEBUG)printf("vtable_free: %p\n", (void *)tbl); - if (POINTER_P(tbl)) { - if (tbl->tbl) { - xfree(tbl->tbl); - } - xfree(tbl); - } -} - -static void -vtable_add(struct vtable *tbl, ID id) -{ - if (!POINTER_P(tbl)) { - rb_bug("vtable_add: vtable is not allocated (%p)", (void *)tbl); - } - if (VTBL_DEBUG) printf("vtable_add: %p, %s\n", (void *)tbl, rb_id2name(id)); - - if (tbl->pos == tbl->capa) { - tbl->capa = tbl->capa * 2; - REALLOC_N(tbl->tbl, ID, tbl->capa); - } - tbl->tbl[tbl->pos++] = id; -} +enum { + ORDINAL_PARAM = -1, + NO_PARAM = 0, + NUMPARAM_MAX = 9, +}; +#define NUMPARAM_ID_P(id) numparam_id_p(id) +#define NUMPARAM_ID_TO_IDX(id) (unsigned int)(((id) >> ID_SCOPE_SHIFT) - tNUMPARAM_1 + 1) +#define NUMPARAM_IDX_TO_ID(idx) TOKEN2LOCALID((tNUMPARAM_1 + (idx) - 1)) static int -vtable_included(const struct vtable * tbl, ID id) +numparam_id_p(ID id) { - int i; - - if (POINTER_P(tbl)) { - for (i = 0; i < tbl->pos; i++) { - if (tbl->tbl[i] == id) { - return i+1; - } - } - } - return 0; + if (!is_local_id(id)) return 0; + unsigned int idx = NUMPARAM_ID_TO_IDX(id); + return idx > 0 && idx <= NUMPARAM_MAX; } +static void numparam_name(struct parser_params *p, ID id); +#define DVARS_INHERIT ((void*)1) +#define DVARS_TOPSCOPE NULL +#define DVARS_TERMINAL_P(tbl) ((tbl) == DVARS_INHERIT || (tbl) == DVARS_TOPSCOPE) -#ifndef RIPPER typedef struct token_info { const char *token; - int linenum; - int column; + rb_code_position_t beg; + int indent; int nonspc; struct token_info *next; } token_info; -#endif + +typedef struct rb_strterm_struct rb_strterm_t; /* Structure of Lexer Buffer: - lex_pbeg tokp lex_p lex_pend - | | | | - |-----------+--------------+------------| - |<------------>| + lex.pbeg lex.ptok lex.pcur lex.pend + | | | | + |------------+------------+------------| + |<---------->| token */ struct parser_params { - int is_ripper; - NODE *heap; - - YYSTYPE *parser_yylval; - VALUE eofp; - - NODE *parser_lex_strterm; - enum lex_state_e parser_lex_state; - stack_type parser_cond_stack; - stack_type parser_cmdarg_stack; - int parser_class_nest; - int parser_paren_nest; - int parser_lpar_beg; - int parser_in_single; - int parser_in_def; - int parser_brace_nest; - int parser_compile_for_eval; - VALUE parser_cur_mid; - int parser_in_defined; - char *parser_tokenbuf; - int parser_tokidx; - int parser_toksiz; - VALUE parser_lex_input; - VALUE parser_lex_lastline; - VALUE parser_lex_nextline; - const char *parser_lex_pbeg; - const char *parser_lex_p; - const char *parser_lex_pend; - int parser_heredoc_end; - int parser_command_start; - NODE *parser_deferred_nodes; - long parser_lex_gets_ptr; - VALUE (*parser_lex_gets)(struct parser_params*,VALUE); - struct local_vars *parser_lvtbl; - int parser_ruby__end__seen; + rb_imemo_tmpbuf_t *heap; + + YYSTYPE *lval; + + struct { + rb_strterm_t *strterm; + VALUE (*gets)(struct parser_params*,VALUE); + VALUE input; + VALUE prevline; + VALUE lastline; + VALUE nextline; + const char *pbeg; + const char *pcur; + const char *pend; + const char *ptok; + union { + long ptr; + VALUE (*call)(VALUE, int); + } gets_; + enum lex_state_e state; + /* track the nest level of any parens "()[]{}" */ + int paren_nest; + /* keep p->lex.paren_nest at the beginning of lambda "->" to detect tLAMBEG and keyword_do_LAMBDA */ + int lpar_beg; + /* track the nest level of only braces "{}" */ + int brace_nest; + } lex; + stack_type cond_stack; + stack_type cmdarg_stack; + int tokidx; + int toksiz; + int tokline; + int heredoc_end; + int heredoc_indent; + int heredoc_line_indent; + char *tokenbuf; + struct local_vars *lvtbl; + st_table *pvtbl; + st_table *pktbl; int line_count; - int has_shebang; - char *parser_ruby_sourcefile; /* current source file */ - int parser_ruby_sourceline; /* current line no. */ + int ruby_sourceline; /* current line no. */ + const char *ruby_sourcefile; /* current source file */ + VALUE ruby_sourcefile_string; rb_encoding *enc; - rb_encoding *utf8; + token_info *token_info; + VALUE case_labels; + VALUE compile_option; + + VALUE debug_buffer; + VALUE debug_output; + + ID cur_arg; + + rb_ast_t *ast; + int node_id; + + int max_numparam; + + struct lex_context ctxt; - int parser_yydebug; + unsigned int command_start:1; + unsigned int eofp: 1; + unsigned int ruby__end__seen: 1; + unsigned int debug: 1; + unsigned int has_shebang: 1; + unsigned int token_seen: 1; + unsigned int token_info_enabled: 1; +# if WARN_PAST_SCOPE + unsigned int past_scope_enabled: 1; +# endif + unsigned int error_p: 1; + unsigned int cr_seen: 1; #ifndef RIPPER /* Ruby core only */ - NODE *parser_eval_tree_begin; - NODE *parser_eval_tree; - VALUE debug_lines; - VALUE coverage; - int nerr; - int parser_token_info_enabled; - token_info *parser_token_info; + unsigned int do_print: 1; + unsigned int do_loop: 1; + unsigned int do_chomp: 1; + unsigned int do_split: 1; + unsigned int keep_script_lines: 1; + + NODE *eval_tree_begin; + NODE *eval_tree; + VALUE error_buffer; + VALUE debug_lines; + const struct rb_iseq_struct *parent_iseq; #else /* Ripper only */ - VALUE parser_ruby_sourcefile_string; - const char *tokp; - VALUE delayed; - int delayed_line; - int delayed_col; + + struct { + VALUE token; + int line; + int col; + } delayed; VALUE value; VALUE result; VALUE parsing_thread; - int toplevel_p; #endif }; -#define UTF8_ENC() (parser->utf8 ? parser->utf8 : \ - (parser->utf8 = rb_utf8_encoding())) -#define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc) -#define STR_NEW0() rb_enc_str_new(0,0,parser->enc) -#define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc) -#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc) -#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT) -#define TOK_INTERN(mb) rb_intern3(tok(), toklen(), parser->enc) - -static int parser_yyerror(struct parser_params*, const char*); -#define yyerror(msg) parser_yyerror(parser, (msg)) - -#define YYLEX_PARAM parser - -#define lex_strterm (parser->parser_lex_strterm) -#define lex_state (parser->parser_lex_state) -#define cond_stack (parser->parser_cond_stack) -#define cmdarg_stack (parser->parser_cmdarg_stack) -#define class_nest (parser->parser_class_nest) -#define paren_nest (parser->parser_paren_nest) -#define lpar_beg (parser->parser_lpar_beg) -#define brace_nest (parser->parser_brace_nest) -#define in_single (parser->parser_in_single) -#define in_def (parser->parser_in_def) -#define compile_for_eval (parser->parser_compile_for_eval) -#define cur_mid (parser->parser_cur_mid) -#define in_defined (parser->parser_in_defined) -#define tokenbuf (parser->parser_tokenbuf) -#define tokidx (parser->parser_tokidx) -#define toksiz (parser->parser_toksiz) -#define lex_input (parser->parser_lex_input) -#define lex_lastline (parser->parser_lex_lastline) -#define lex_nextline (parser->parser_lex_nextline) -#define lex_pbeg (parser->parser_lex_pbeg) -#define lex_p (parser->parser_lex_p) -#define lex_pend (parser->parser_lex_pend) -#define heredoc_end (parser->parser_heredoc_end) -#define command_start (parser->parser_command_start) -#define deferred_nodes (parser->parser_deferred_nodes) -#define lex_gets_ptr (parser->parser_lex_gets_ptr) -#define lex_gets (parser->parser_lex_gets) -#define lvtbl (parser->parser_lvtbl) -#define ruby__end__seen (parser->parser_ruby__end__seen) -#define ruby_sourceline (parser->parser_ruby_sourceline) -#define ruby_sourcefile (parser->parser_ruby_sourcefile) -#define current_enc (parser->enc) -#define yydebug (parser->parser_yydebug) +#define intern_cstr(n,l,en) rb_intern3(n,l,en) + +#define STR_NEW(ptr,len) rb_enc_str_new((ptr),(len),p->enc) +#define STR_NEW0() rb_enc_str_new(0,0,p->enc) +#define STR_NEW2(ptr) rb_enc_str_new((ptr),strlen(ptr),p->enc) +#define STR_NEW3(ptr,len,e,func) parser_str_new((ptr),(len),(e),(func),p->enc) +#define TOK_INTERN() intern_cstr(tok(p), toklen(p), p->enc) + +static st_table * +push_pvtbl(struct parser_params *p) +{ + st_table *tbl = p->pvtbl; + p->pvtbl = st_init_numtable(); + return tbl; +} + +static void +pop_pvtbl(struct parser_params *p, st_table *tbl) +{ + st_free_table(p->pvtbl); + p->pvtbl = tbl; +} + +static st_table * +push_pktbl(struct parser_params *p) +{ + st_table *tbl = p->pktbl; + p->pktbl = 0; + return tbl; +} + +static void +pop_pktbl(struct parser_params *p, st_table *tbl) +{ + if (p->pktbl) st_free_table(p->pktbl); + p->pktbl = tbl; +} + +RBIMPL_ATTR_NONNULL((1, 2, 3)) +static int parser_yyerror(struct parser_params*, const YYLTYPE *yylloc, const char*); +RBIMPL_ATTR_NONNULL((1, 2)) +static int parser_yyerror0(struct parser_params*, const char*); +#define yyerror0(msg) parser_yyerror0(p, (msg)) +#define yyerror1(loc, msg) parser_yyerror(p, (loc), (msg)) +#define yyerror(yylloc, p, msg) parser_yyerror(p, yylloc, msg) +#define token_flush(ptr) ((ptr)->lex.ptok = (ptr)->lex.pcur) + +static void token_info_setup(token_info *ptinfo, const char *ptr, const rb_code_location_t *loc); +static void token_info_push(struct parser_params*, const char *token, const rb_code_location_t *loc); +static void token_info_pop(struct parser_params*, const char *token, const rb_code_location_t *loc); +static void token_info_warn(struct parser_params *p, const char *token, token_info *ptinfo_beg, int same, const rb_code_location_t *loc); +static void token_info_drop(struct parser_params *p, const char *token, rb_code_position_t beg_pos); + +#ifdef RIPPER +#define compile_for_eval (0) +#else +#define compile_for_eval (p->parent_iseq != 0) +#endif + +#define token_column ((int)(p->lex.ptok - p->lex.pbeg)) + +#define CALL_Q_P(q) ((q) == TOKEN2VAL(tANDDOT)) +#define NODE_CALL_Q(q) (CALL_Q_P(q) ? NODE_QCALL : NODE_CALL) +#define NEW_QCALL(q,r,m,a,loc) NEW_NODE(NODE_CALL_Q(q),r,m,a,loc) + +#define lambda_beginning_p() (p->lex.lpar_beg == p->lex.paren_nest) + +#define ANON_BLOCK_ID '&' + +static enum yytokentype yylex(YYSTYPE*, YYLTYPE*, struct parser_params*); + +#ifndef RIPPER +static inline void +rb_discard_node(struct parser_params *p, NODE *n) +{ + rb_ast_delete_node(p->ast, n); +} +#endif + #ifdef RIPPER +static inline VALUE +add_mark_object(struct parser_params *p, VALUE obj) +{ + if (!SPECIAL_CONST_P(obj) + && !RB_TYPE_P(obj, T_NODE) /* Ripper jumbles NODE objects and other objects... */ + ) { + rb_ast_add_mark_object(p->ast, obj); + } + return obj; +} #else -#define ruby_eval_tree (parser->parser_eval_tree) -#define ruby_eval_tree_begin (parser->parser_eval_tree_begin) -#define ruby_debug_lines (parser->debug_lines) -#define ruby_coverage (parser->coverage) +static NODE* node_newnode_with_locals(struct parser_params *, enum node_type, VALUE, VALUE, const rb_code_location_t*); #endif -static int yylex(void*, void*); +static NODE* node_newnode(struct parser_params *, enum node_type, VALUE, VALUE, VALUE, const rb_code_location_t*); +#define rb_node_newnode(type, a1, a2, a3, loc) node_newnode(p, (type), (a1), (a2), (a3), (loc)) + +static NODE *nd_set_loc(NODE *nd, const YYLTYPE *loc); + +static int +parser_get_node_id(struct parser_params *p) +{ + int node_id = p->node_id; + p->node_id++; + return node_id; +} #ifndef RIPPER -#define yyparse ruby_yyparse +static inline void +set_line_body(NODE *body, int line) +{ + if (!body) return; + switch (nd_type(body)) { + case NODE_RESCUE: + case NODE_ENSURE: + nd_set_line(body, line); + } +} -static NODE* node_newnode(struct parser_params *, enum node_type, VALUE, VALUE, VALUE); -#define rb_node_newnode(type, a1, a2, a3) node_newnode(parser, (type), (a1), (a2), (a3)) +#define yyparse ruby_yyparse -static NODE *cond_gen(struct parser_params*,NODE*); -#define cond(node) cond_gen(parser, (node)) -static NODE *logop_gen(struct parser_params*,enum node_type,NODE*,NODE*); -#define logop(type,node1,node2) logop_gen(parser, (type), (node1), (node2)) +static NODE* cond(struct parser_params *p, NODE *node, const YYLTYPE *loc); +static NODE* method_cond(struct parser_params *p, NODE *node, const YYLTYPE *loc); +#define new_nil(loc) NEW_NIL(loc) +static NODE *new_nil_at(struct parser_params *p, const rb_code_position_t *pos); +static NODE *new_if(struct parser_params*,NODE*,NODE*,NODE*,const YYLTYPE*); +static NODE *new_unless(struct parser_params*,NODE*,NODE*,NODE*,const YYLTYPE*); +static NODE *logop(struct parser_params*,ID,NODE*,NODE*,const YYLTYPE*,const YYLTYPE*); static NODE *newline_node(NODE*); static void fixpos(NODE*,NODE*); static int value_expr_gen(struct parser_params*,NODE*); -static void void_expr_gen(struct parser_params*,NODE*); +static void void_expr(struct parser_params*,NODE*); static NODE *remove_begin(NODE*); -#define value_expr(node) value_expr_gen(parser, (node) = remove_begin(node)) -#define void_expr0(node) void_expr_gen(parser, (node)) -#define void_expr(node) void_expr0((node) = remove_begin(node)) -static void void_stmts_gen(struct parser_params*,NODE*); -#define void_stmts(node) void_stmts_gen(parser, (node)) -static void reduce_nodes_gen(struct parser_params*,NODE**); -#define reduce_nodes(n) reduce_nodes_gen(parser,(n)) -static void block_dup_check_gen(struct parser_params*,NODE*,NODE*); -#define block_dup_check(n1,n2) block_dup_check_gen(parser,(n1),(n2)) - -static NODE *block_append_gen(struct parser_params*,NODE*,NODE*); -#define block_append(h,t) block_append_gen(parser,(h),(t)) -static NODE *list_append_gen(struct parser_params*,NODE*,NODE*); -#define list_append(l,i) list_append_gen(parser,(l),(i)) -static NODE *list_concat_gen(struct parser_params*,NODE*,NODE*); -#define list_concat(h,t) list_concat_gen(parser,(h),(t)) -static NODE *arg_append_gen(struct parser_params*,NODE*,NODE*); -#define arg_append(h,t) arg_append_gen(parser,(h),(t)) -static NODE *arg_concat_gen(struct parser_params*,NODE*,NODE*); -#define arg_concat(h,t) arg_concat_gen(parser,(h),(t)) -static NODE *literal_concat_gen(struct parser_params*,NODE*,NODE*); -#define literal_concat(h,t) literal_concat_gen(parser,(h),(t)) -static int literal_concat0(struct parser_params *, VALUE, VALUE); -static NODE *new_evstr_gen(struct parser_params*,NODE*); -#define new_evstr(n) new_evstr_gen(parser,(n)) -static NODE *evstr2dstr_gen(struct parser_params*,NODE*); -#define evstr2dstr(n) evstr2dstr_gen(parser,(n)) +static NODE *remove_begin_all(NODE*); +#define value_expr(node) value_expr_gen(p, (node)) +static NODE *void_stmts(struct parser_params*,NODE*); +static void reduce_nodes(struct parser_params*,NODE**); +static void block_dup_check(struct parser_params*,NODE*,NODE*); + +static NODE *block_append(struct parser_params*,NODE*,NODE*); +static NODE *list_append(struct parser_params*,NODE*,NODE*); +static NODE *list_concat(NODE*,NODE*); +static NODE *arg_append(struct parser_params*,NODE*,NODE*,const YYLTYPE*); +static NODE *last_arg_append(struct parser_params *p, NODE *args, NODE *last_arg, const YYLTYPE *loc); +static NODE *rest_arg_append(struct parser_params *p, NODE *args, NODE *rest_arg, const YYLTYPE *loc); +static NODE *literal_concat(struct parser_params*,NODE*,NODE*,const YYLTYPE*); +static NODE *new_evstr(struct parser_params*,NODE*,const YYLTYPE*); +static NODE *new_dstr(struct parser_params*,NODE*,const YYLTYPE*); +static NODE *evstr2dstr(struct parser_params*,NODE*); static NODE *splat_array(NODE*); +static void mark_lvar_used(struct parser_params *p, NODE *rhs); + +static NODE *call_bin_op(struct parser_params*,NODE*,ID,NODE*,const YYLTYPE*,const YYLTYPE*); +static NODE *call_uni_op(struct parser_params*,NODE*,ID,const YYLTYPE*,const YYLTYPE*); +static NODE *new_qcall(struct parser_params* p, ID atype, NODE *recv, ID mid, NODE *args, const YYLTYPE *op_loc, const YYLTYPE *loc); +static NODE *new_command_qcall(struct parser_params* p, ID atype, NODE *recv, ID mid, NODE *args, NODE *block, const YYLTYPE *op_loc, const YYLTYPE *loc); +static NODE *method_add_block(struct parser_params*p, NODE *m, NODE *b, const YYLTYPE *loc) {b->nd_iter = m; b->nd_loc = *loc; return b;} + +static bool args_info_empty_p(struct rb_args_info *args); +static NODE *new_args(struct parser_params*,NODE*,NODE*,ID,NODE*,NODE*,const YYLTYPE*); +static NODE *new_args_tail(struct parser_params*,NODE*,ID,ID,const YYLTYPE*); +static NODE *new_array_pattern(struct parser_params *p, NODE *constant, NODE *pre_arg, NODE *aryptn, const YYLTYPE *loc); +static NODE *new_array_pattern_tail(struct parser_params *p, NODE *pre_args, int has_rest, ID rest_arg, NODE *post_args, const YYLTYPE *loc); +static NODE *new_find_pattern(struct parser_params *p, NODE *constant, NODE *fndptn, const YYLTYPE *loc); +static NODE *new_find_pattern_tail(struct parser_params *p, ID pre_rest_arg, NODE *args, ID post_rest_arg, const YYLTYPE *loc); +static NODE *new_hash_pattern(struct parser_params *p, NODE *constant, NODE *hshptn, const YYLTYPE *loc); +static NODE *new_hash_pattern_tail(struct parser_params *p, NODE *kw_args, ID kw_rest_arg, const YYLTYPE *loc); + +static NODE *new_kw_arg(struct parser_params *p, NODE *k, const YYLTYPE *loc); +static NODE *args_with_numbered(struct parser_params*,NODE*,int); + +static VALUE negate_lit(struct parser_params*, VALUE); +static NODE *ret_args(struct parser_params*,NODE*); +static NODE *arg_blk_pass(NODE*,NODE*); +static NODE *new_yield(struct parser_params*,NODE*,const YYLTYPE*); +static NODE *dsym_node(struct parser_params*,NODE*,const YYLTYPE*); -static NODE *call_bin_op_gen(struct parser_params*,NODE*,ID,NODE*); -#define call_bin_op(recv,id,arg1) call_bin_op_gen(parser, (recv),(id),(arg1)) -static NODE *call_uni_op_gen(struct parser_params*,NODE*,ID); -#define call_uni_op(recv,id) call_uni_op_gen(parser, (recv),(id)) +static NODE *gettable(struct parser_params*,ID,const YYLTYPE*); +static NODE *assignable(struct parser_params*,ID,NODE*,const YYLTYPE*); -static NODE *new_args_gen(struct parser_params*,NODE*,NODE*,ID,NODE*,NODE*); -#define new_args(f,o,r,p,t) new_args_gen(parser, (f),(o),(r),(p),(t)) -static NODE *new_args_tail_gen(struct parser_params*,NODE*,ID,ID); -#define new_args_tail(k,kr,b) new_args_tail_gen(parser, (k),(kr),(b)) +static NODE *aryset(struct parser_params*,NODE*,NODE*,const YYLTYPE*); +static NODE *attrset(struct parser_params*,NODE*,ID,ID,const YYLTYPE*); -static NODE *negate_lit(NODE*); -static NODE *ret_args_gen(struct parser_params*,NODE*); -#define ret_args(node) ret_args_gen(parser, (node)) -static NODE *arg_blk_pass(NODE*,NODE*); -static NODE *new_yield_gen(struct parser_params*,NODE*); -#define new_yield(node) new_yield_gen(parser, (node)) -static NODE *dsym_node_gen(struct parser_params*,NODE*); -#define dsym_node(node) dsym_node_gen(parser, (node)) - -static NODE *gettable_gen(struct parser_params*,ID); -#define gettable(id) gettable_gen(parser,(id)) -static NODE *assignable_gen(struct parser_params*,ID,NODE*); -#define assignable(id,node) assignable_gen(parser, (id), (node)) - -static NODE *aryset_gen(struct parser_params*,NODE*,NODE*); -#define aryset(node1,node2) aryset_gen(parser, (node1), (node2)) -static NODE *attrset_gen(struct parser_params*,NODE*,ID); -#define attrset(node,id) attrset_gen(parser, (node), (id)) - -static void rb_backref_error_gen(struct parser_params*,NODE*); -#define rb_backref_error(n) rb_backref_error_gen(parser,(n)) -static NODE *node_assign_gen(struct parser_params*,NODE*,NODE*); -#define node_assign(node1, node2) node_assign_gen(parser, (node1), (node2)) - -static NODE *new_op_assign_gen(struct parser_params *parser, NODE *lhs, ID op, NODE *rhs); -static NODE *new_attr_op_assign_gen(struct parser_params *parser, NODE *lhs, ID attr, ID op, NODE *rhs); -#define new_attr_op_assign(lhs, type, attr, op, rhs) new_attr_op_assign_gen(parser, (lhs), (attr), (op), (rhs)) - -static NODE *match_op_gen(struct parser_params*,NODE*,NODE*); -#define match_op(node1,node2) match_op_gen(parser, (node1), (node2)) - -static ID *local_tbl_gen(struct parser_params*); -#define local_tbl() local_tbl_gen(parser) - -static void fixup_nodes(NODE **); - -static VALUE reg_compile_gen(struct parser_params*, VALUE, int); -#define reg_compile(str,options) reg_compile_gen(parser, (str), (options)) -static void reg_fragment_setenc_gen(struct parser_params*, VALUE, int); -#define reg_fragment_setenc(str,options) reg_fragment_setenc_gen(parser, (str), (options)) -static int reg_fragment_check_gen(struct parser_params*, VALUE, int); -#define reg_fragment_check(str,options) reg_fragment_check_gen(parser, (str), (options)) -static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); -#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match)) +static void rb_backref_error(struct parser_params*,NODE*); +static NODE *node_assign(struct parser_params*,NODE*,NODE*,struct lex_context,const YYLTYPE*); + +static NODE *new_op_assign(struct parser_params *p, NODE *lhs, ID op, NODE *rhs, struct lex_context, const YYLTYPE *loc); +static NODE *new_ary_op_assign(struct parser_params *p, NODE *ary, NODE *args, ID op, NODE *rhs, const YYLTYPE *args_loc, const YYLTYPE *loc); +static NODE *new_attr_op_assign(struct parser_params *p, NODE *lhs, ID atype, ID attr, ID op, NODE *rhs, const YYLTYPE *loc); +static NODE *new_const_op_assign(struct parser_params *p, NODE *lhs, ID op, NODE *rhs, struct lex_context, const YYLTYPE *loc); +static NODE *new_bodystmt(struct parser_params *p, NODE *head, NODE *rescue, NODE *rescue_else, NODE *ensure, const YYLTYPE *loc); + +static NODE *const_decl(struct parser_params *p, NODE* path, const YYLTYPE *loc); + +static NODE *opt_arg_append(NODE*, NODE*); +static NODE *kwd_append(NODE*, NODE*); + +static NODE *new_hash(struct parser_params *p, NODE *hash, const YYLTYPE *loc); +static NODE *new_unique_key_hash(struct parser_params *p, NODE *hash, const YYLTYPE *loc); + +static NODE *new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc); + +static NODE *new_regexp(struct parser_params *, NODE *, int, const YYLTYPE *); + +#define make_list(list, loc) ((list) ? (nd_set_loc(list, loc), list) : NEW_ZLIST(loc)) + +static NODE *new_xstring(struct parser_params *, NODE *, const YYLTYPE *loc); + +static NODE *symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol); + +static NODE *match_op(struct parser_params*,NODE*,NODE*,const YYLTYPE*,const YYLTYPE*); + +static rb_ast_id_table_t *local_tbl(struct parser_params*); + +static VALUE reg_compile(struct parser_params*, VALUE, int); +static void reg_fragment_setenc(struct parser_params*, VALUE, int); +static int reg_fragment_check(struct parser_params*, VALUE, int); +static NODE *reg_named_capture_assign(struct parser_params* p, VALUE regexp, const YYLTYPE *loc); + +static int literal_concat0(struct parser_params *p, VALUE head, VALUE tail); +static NODE *heredoc_dedent(struct parser_params*,NODE*); + +static void check_literal_when(struct parser_params *p, NODE *args, const YYLTYPE *loc); #define get_id(id) (id) #define get_value(val) (val) -#else +#define get_num(num) (num) +#else /* RIPPER */ +#define NODE_RIPPER NODE_CDECL +#define NEW_RIPPER(a,b,c,loc) (VALUE)NEW_CDECL(a,b,c,loc) + +static inline int ripper_is_node_yylval(VALUE n); + +static inline VALUE +ripper_new_yylval(struct parser_params *p, ID a, VALUE b, VALUE c) +{ + if (ripper_is_node_yylval(c)) c = RNODE(c)->nd_cval; + add_mark_object(p, b); + add_mark_object(p, c); + return NEW_RIPPER(a, b, c, &NULL_LOC); +} + +static inline int +ripper_is_node_yylval(VALUE n) +{ + return RB_TYPE_P(n, T_NODE) && nd_type_p(RNODE(n), NODE_RIPPER); +} + #define value_expr(node) ((void)(node)) #define remove_begin(node) (node) -#define rb_dvar_defined(id) 0 -#define rb_local_defined(id) 0 +#define void_stmts(p,x) (x) +#define rb_dvar_defined(id, base) 0 +#define rb_local_defined(id, base) 0 static ID ripper_get_id(VALUE); #define get_id(id) ripper_get_id(id) static VALUE ripper_get_value(VALUE); #define get_value(val) ripper_get_value(val) -static VALUE assignable_gen(struct parser_params*,VALUE); -#define assignable(lhs,node) assignable_gen(parser, (lhs)) -static int id_is_var_gen(struct parser_params *parser, ID id); -#define id_is_var(id) id_is_var_gen(parser, (id)) +#define get_num(num) (int)get_id(num) +static VALUE assignable(struct parser_params*,VALUE); +static int id_is_var(struct parser_params *p, ID id); + +#define method_cond(p,node,loc) (node) +#define call_bin_op(p, recv,id,arg1,op_loc,loc) dispatch3(binary, (recv), STATIC_ID2SYM(id), (arg1)) +#define match_op(p,node1,node2,op_loc,loc) call_bin_op(0, (node1), idEqTilde, (node2), op_loc, loc) +#define call_uni_op(p, recv,id,op_loc,loc) dispatch2(unary, STATIC_ID2SYM(id), (recv)) +#define logop(p,id,node1,node2,op_loc,loc) call_bin_op(0, (node1), (id), (node2), op_loc, loc) + +#define new_nil(loc) Qnil -#define node_assign(node1, node2) dispatch2(assign, (node1), (node2)) +static VALUE new_regexp(struct parser_params *, VALUE, VALUE, const YYLTYPE *); -static VALUE new_op_assign_gen(struct parser_params *parser, VALUE lhs, VALUE op, VALUE rhs); -static VALUE new_attr_op_assign_gen(struct parser_params *parser, VALUE lhs, VALUE type, VALUE attr, VALUE op, VALUE rhs); -#define new_attr_op_assign(lhs, type, attr, op, rhs) new_attr_op_assign_gen(parser, (lhs), (type), (attr), (op), (rhs)) +static VALUE const_decl(struct parser_params *p, VALUE path); +static VALUE var_field(struct parser_params *p, VALUE a); +static VALUE assign_error(struct parser_params *p, const char *mesg, VALUE a); + +static VALUE parser_reg_compile(struct parser_params*, VALUE, int, VALUE *); + +static VALUE backref_error(struct parser_params*, NODE *, VALUE); #endif /* !RIPPER */ -#define new_op_assign(lhs, op, rhs) new_op_assign_gen(parser, (lhs), (op), (rhs)) - -static ID formal_argument_gen(struct parser_params*, ID); -#define formal_argument(id) formal_argument_gen(parser, (id)) -static ID shadowing_lvar_gen(struct parser_params*,ID); -#define shadowing_lvar(name) shadowing_lvar_gen(parser, (name)) -static void new_bv_gen(struct parser_params*,ID); -#define new_bv(id) new_bv_gen(parser, (id)) - -static void local_push_gen(struct parser_params*,int); -#define local_push(top) local_push_gen(parser,(top)) -static void local_pop_gen(struct parser_params*); -#define local_pop() local_pop_gen(parser) -static int local_var_gen(struct parser_params*, ID); -#define local_var(id) local_var_gen(parser, (id)); -static int arg_var_gen(struct parser_params*, ID); -#define arg_var(id) arg_var_gen(parser, (id)) -static int local_id_gen(struct parser_params*, ID); -#define local_id(id) local_id_gen(parser, (id)) -static ID internal_id_gen(struct parser_params*); -#define internal_id() internal_id_gen(parser) - -static const struct vtable *dyna_push_gen(struct parser_params *); -#define dyna_push() dyna_push_gen(parser) -static void dyna_pop_gen(struct parser_params*, const struct vtable *); -#define dyna_pop(node) dyna_pop_gen(parser, (node)) -static int dyna_in_block_gen(struct parser_params*); -#define dyna_in_block() dyna_in_block_gen(parser) -#define dyna_var(id) local_var(id) -static int dvar_defined_gen(struct parser_params*,ID,int); -#define dvar_defined(id) dvar_defined_gen(parser, (id), 0) -#define dvar_defined_get(id) dvar_defined_gen(parser, (id), 1) -static int dvar_curr_gen(struct parser_params*,ID); -#define dvar_curr(id) dvar_curr_gen(parser, (id)) - -static int lvar_defined_gen(struct parser_params*, ID); -#define lvar_defined(id) lvar_defined_gen(parser, (id)) +/* forward declaration */ +typedef struct rb_strterm_heredoc_struct rb_strterm_heredoc_t; + +RUBY_SYMBOL_EXPORT_BEGIN +VALUE rb_parser_reg_compile(struct parser_params* p, VALUE str, int options); +int rb_reg_fragment_setenc(struct parser_params*, VALUE, int); +enum lex_state_e rb_parser_trace_lex_state(struct parser_params *, enum lex_state_e, enum lex_state_e, int); +VALUE rb_parser_lex_state_name(enum lex_state_e state); +void rb_parser_show_bitstack(struct parser_params *, stack_type, const char *, int); +PRINTF_ARGS(void rb_parser_fatal(struct parser_params *p, const char *fmt, ...), 2, 3); +YYLTYPE *rb_parser_set_location_from_strterm_heredoc(struct parser_params *p, rb_strterm_heredoc_t *here, YYLTYPE *yylloc); +YYLTYPE *rb_parser_set_location_of_none(struct parser_params *p, YYLTYPE *yylloc); +YYLTYPE *rb_parser_set_location(struct parser_params *p, YYLTYPE *yylloc); +RUBY_SYMBOL_EXPORT_END + +static void error_duplicate_pattern_variable(struct parser_params *p, ID id, const YYLTYPE *loc); +static void error_duplicate_pattern_key(struct parser_params *p, ID id, const YYLTYPE *loc); +#ifndef RIPPER +static ID formal_argument(struct parser_params*, ID); +#else +static ID formal_argument(struct parser_params*, VALUE); +#endif +static ID shadowing_lvar(struct parser_params*,ID); +static void new_bv(struct parser_params*,ID); + +static void local_push(struct parser_params*,int); +static void local_pop(struct parser_params*); +static void local_var(struct parser_params*, ID); +static void arg_var(struct parser_params*, ID); +static int local_id(struct parser_params *p, ID id); +static int local_id_ref(struct parser_params*, ID, ID **); +#ifndef RIPPER +static ID internal_id(struct parser_params*); +static NODE *new_args_forward_call(struct parser_params*, NODE*, const YYLTYPE*, const YYLTYPE*); +#endif +static int check_forwarding_args(struct parser_params*); +static void add_forwarding_args(struct parser_params *p); + +static const struct vtable *dyna_push(struct parser_params *); +static void dyna_pop(struct parser_params*, const struct vtable *); +static int dyna_in_block(struct parser_params*); +#define dyna_var(p, id) local_var(p, id) +static int dvar_defined(struct parser_params*, ID); +static int dvar_defined_ref(struct parser_params*, ID, ID**); +static int dvar_curr(struct parser_params*,ID); + +static int lvar_defined(struct parser_params*, ID); + +static NODE *numparam_push(struct parser_params *p); +static void numparam_pop(struct parser_params *p, NODE *prev_inner); + +#ifdef RIPPER +# define METHOD_NOT idNOT +#else +# define METHOD_NOT '!' +#endif + +#define idFWD_REST '*' +#ifdef RUBY3_KEYWORDS +#define idFWD_KWREST idPow /* Use simple "**", as tDSTAR is "**arg" */ +#else +#define idFWD_KWREST 0 +#endif +#define idFWD_BLOCK '&' #define RE_OPTION_ONCE (1<<16) #define RE_OPTION_ENCODING_SHIFT 8 @@ -522,23 +715,91 @@ static int lvar_defined_gen(struct parser_params*, ID); #define RE_OPTION_MASK 0xff #define RE_OPTION_ARG_ENCODING_NONE 32 -#define NODE_STRTERM NODE_ZARRAY /* nothing to gc */ -#define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */ -#define SIGN_EXTEND(x,n) (((1<<(n)-1)^((x)&~(~0<<(n))))-(1<<(n)-1)) -#define nd_func u1.id -#if SIZEOF_SHORT == 2 -#define nd_term(node) ((signed short)(node)->u2.id) +/* structs for managing terminator of string literal and heredocment */ +typedef struct rb_strterm_literal_struct { + union { + VALUE dummy; + long nest; + } u0; + union { + VALUE dummy; + long func; /* STR_FUNC_* (e.g., STR_FUNC_ESCAPE and STR_FUNC_EXPAND) */ + } u1; + union { + VALUE dummy; + long paren; /* '(' of `%q(...)` */ + } u2; + union { + VALUE dummy; + long term; /* ')' of `%q(...)` */ + } u3; +} rb_strterm_literal_t; + +#define HERETERM_LENGTH_BITS ((SIZEOF_VALUE - 1) * CHAR_BIT - 1) + +struct rb_strterm_heredoc_struct { + VALUE lastline; /* the string of line that contains `<<"END"` */ + long offset; /* the column of END in `<<"END"` */ + int sourceline; /* lineno of the line that contains `<<"END"` */ + unsigned length /* the length of END in `<<"END"` */ +#if HERETERM_LENGTH_BITS < SIZEOF_INT * CHAR_BIT + : HERETERM_LENGTH_BITS +# define HERETERM_LENGTH_MAX ((1U << HERETERM_LENGTH_BITS) - 1) +#else +# define HERETERM_LENGTH_MAX UINT_MAX +#endif + ; +#if HERETERM_LENGTH_BITS < SIZEOF_INT * CHAR_BIT + unsigned quote: 1; + unsigned func: 8; #else -#define nd_term(node) SIGN_EXTEND((node)->u2.id, CHAR_BIT*2) + uint8_t quote; + uint8_t func; +#endif +}; +STATIC_ASSERT(rb_strterm_heredoc_t, sizeof(rb_strterm_heredoc_t) <= 4 * SIZEOF_VALUE); + +#define STRTERM_HEREDOC IMEMO_FL_USER0 + +struct rb_strterm_struct { + VALUE flags; + union { + rb_strterm_literal_t literal; + rb_strterm_heredoc_t heredoc; + } u; +}; + +#ifndef RIPPER +void +rb_strterm_mark(VALUE obj) +{ + rb_strterm_t *strterm = (rb_strterm_t*)obj; + if (RBASIC(obj)->flags & STRTERM_HEREDOC) { + rb_strterm_heredoc_t *heredoc = &strterm->u.heredoc; + rb_gc_mark(heredoc->lastline); + } +} #endif -#define nd_paren(node) (char)((node)->u2.id >> CHAR_BIT*2) -#define nd_nest u3.cnt + +#define yytnamerr(yyres, yystr) (YYSIZE_T)rb_yytnamerr(p, yyres, yystr) +size_t rb_yytnamerr(struct parser_params *p, char *yyres, const char *yystr); + +#define TOKEN2ID(tok) ( \ + tTOKEN_LOCAL_BEGIN<(tok)&&(tok)<tTOKEN_LOCAL_END ? TOKEN2LOCALID(tok) : \ + tTOKEN_INSTANCE_BEGIN<(tok)&&(tok)<tTOKEN_INSTANCE_END ? TOKEN2INSTANCEID(tok) : \ + tTOKEN_GLOBAL_BEGIN<(tok)&&(tok)<tTOKEN_GLOBAL_END ? TOKEN2GLOBALID(tok) : \ + tTOKEN_CONST_BEGIN<(tok)&&(tok)<tTOKEN_CONST_END ? TOKEN2CONSTID(tok) : \ + tTOKEN_CLASS_BEGIN<(tok)&&(tok)<tTOKEN_CLASS_END ? TOKEN2CLASSID(tok) : \ + tTOKEN_ATTRSET_BEGIN<(tok)&&(tok)<tTOKEN_ATTRSET_END ? TOKEN2ATTRSETID(tok) : \ + ((tok) / ((tok)<tPRESERVED_ID_END && ((tok)>=128 || rb_ispunct(tok))))) /****** Ripper *******/ #ifdef RIPPER #define RIPPER_VERSION "0.1.0" +static inline VALUE intern_sym(const char *name); + #include "eventids1.c" #include "eventids2.c" @@ -549,234 +810,441 @@ static VALUE ripper_dispatch3(struct parser_params*,ID,VALUE,VALUE,VALUE); static VALUE ripper_dispatch4(struct parser_params*,ID,VALUE,VALUE,VALUE,VALUE); static VALUE ripper_dispatch5(struct parser_params*,ID,VALUE,VALUE,VALUE,VALUE,VALUE); static VALUE ripper_dispatch7(struct parser_params*,ID,VALUE,VALUE,VALUE,VALUE,VALUE,VALUE,VALUE); +static void ripper_error(struct parser_params *p); -#define dispatch0(n) ripper_dispatch0(parser, TOKEN_PASTE(ripper_id_, n)) -#define dispatch1(n,a) ripper_dispatch1(parser, TOKEN_PASTE(ripper_id_, n), (a)) -#define dispatch2(n,a,b) ripper_dispatch2(parser, TOKEN_PASTE(ripper_id_, n), (a), (b)) -#define dispatch3(n,a,b,c) ripper_dispatch3(parser, TOKEN_PASTE(ripper_id_, n), (a), (b), (c)) -#define dispatch4(n,a,b,c,d) ripper_dispatch4(parser, TOKEN_PASTE(ripper_id_, n), (a), (b), (c), (d)) -#define dispatch5(n,a,b,c,d,e) ripper_dispatch5(parser, TOKEN_PASTE(ripper_id_, n), (a), (b), (c), (d), (e)) -#define dispatch7(n,a,b,c,d,e,f,g) ripper_dispatch7(parser, TOKEN_PASTE(ripper_id_, n), (a), (b), (c), (d), (e), (f), (g)) +#define dispatch0(n) ripper_dispatch0(p, TOKEN_PASTE(ripper_id_, n)) +#define dispatch1(n,a) ripper_dispatch1(p, TOKEN_PASTE(ripper_id_, n), (a)) +#define dispatch2(n,a,b) ripper_dispatch2(p, TOKEN_PASTE(ripper_id_, n), (a), (b)) +#define dispatch3(n,a,b,c) ripper_dispatch3(p, TOKEN_PASTE(ripper_id_, n), (a), (b), (c)) +#define dispatch4(n,a,b,c,d) ripper_dispatch4(p, TOKEN_PASTE(ripper_id_, n), (a), (b), (c), (d)) +#define dispatch5(n,a,b,c,d,e) ripper_dispatch5(p, TOKEN_PASTE(ripper_id_, n), (a), (b), (c), (d), (e)) +#define dispatch7(n,a,b,c,d,e,f,g) ripper_dispatch7(p, TOKEN_PASTE(ripper_id_, n), (a), (b), (c), (d), (e), (f), (g)) #define yyparse ripper_yyparse -#define ripper_intern(s) ID2SYM(rb_intern(s)) -static VALUE ripper_id2sym(ID); -#ifdef __GNUC__ -#define ripper_id2sym(id) ((id) < 256 && rb_ispunct(id) ? \ - ID2SYM(id) : ripper_id2sym(id)) -#endif - -#define arg_new() dispatch0(args_new) -#define arg_add(l,a) dispatch2(args_add, (l), (a)) -#define arg_add_star(l,a) dispatch2(args_add_star, (l), (a)) -#define arg_add_block(l,b) dispatch2(args_add_block, (l), (b)) -#define arg_add_optblock(l,b) ((b)==Qundef? (l) : dispatch2(args_add_block, (l), (b))) -#define bare_assoc(v) dispatch1(bare_assoc_hash, (v)) -#define arg_add_assocs(l,b) arg_add((l), bare_assoc(b)) - -#define args2mrhs(a) dispatch1(mrhs_new_from_args, (a)) -#define mrhs_new() dispatch0(mrhs_new) -#define mrhs_add(l,a) dispatch2(mrhs_add, (l), (a)) -#define mrhs_add_star(l,a) dispatch2(mrhs_add_star, (l), (a)) - -#define mlhs_new() dispatch0(mlhs_new) -#define mlhs_add(l,a) dispatch2(mlhs_add, (l), (a)) -#define mlhs_add_star(l,a) dispatch2(mlhs_add_star, (l), (a)) +#define ID2VAL(id) STATIC_ID2SYM(id) +#define TOKEN2VAL(t) ID2VAL(TOKEN2ID(t)) +#define KWD2EID(t, v) ripper_new_yylval(p, keyword_##t, get_value(v), 0) #define params_new(pars, opts, rest, pars2, kws, kwrest, blk) \ dispatch7(params, (pars), (opts), (rest), (pars2), (kws), (kwrest), (blk)) -#define blockvar_new(p,v) dispatch2(block_var, (p), (v)) -#define blockvar_add_star(l,a) dispatch2(block_var_add_star, (l), (a)) -#define blockvar_add_block(l,a) dispatch2(block_var_add_block, (l), (a)) - -#define method_optarg(m,a) ((a)==Qundef ? (m) : dispatch2(method_add_arg,(m),(a))) -#define method_arg(m,a) dispatch2(method_add_arg,(m),(a)) -#define method_add_block(m,b) dispatch2(method_add_block, (m), (b)) - #define escape_Qundef(x) ((x)==Qundef ? Qnil : (x)) static inline VALUE -new_args_gen(struct parser_params *parser, VALUE f, VALUE o, VALUE r, VALUE p, VALUE tail) +new_args(struct parser_params *p, VALUE pre_args, VALUE opt_args, VALUE rest_arg, VALUE post_args, VALUE tail, YYLTYPE *loc) { NODE *t = (NODE *)tail; - VALUE k = t->u1.value, kr = t->u2.value, b = t->u3.value; - return params_new(f, o, r, p, k, kr, escape_Qundef(b)); + VALUE kw_args = t->u1.value, kw_rest_arg = t->u2.value, block = t->u3.value; + return params_new(pre_args, opt_args, rest_arg, post_args, kw_args, kw_rest_arg, escape_Qundef(block)); } -#define new_args(f,o,r,p,t) new_args_gen(parser, (f),(o),(r),(p),(t)) static inline VALUE -new_args_tail_gen(struct parser_params *parser, VALUE k, VALUE kr, VALUE b) +new_args_tail(struct parser_params *p, VALUE kw_args, VALUE kw_rest_arg, VALUE block, YYLTYPE *loc) +{ + NODE *t = rb_node_newnode(NODE_ARGS_AUX, kw_args, kw_rest_arg, block, &NULL_LOC); + add_mark_object(p, kw_args); + add_mark_object(p, kw_rest_arg); + add_mark_object(p, block); + return (VALUE)t; +} + +static inline VALUE +args_with_numbered(struct parser_params *p, VALUE args, int max_numparam) +{ + return args; +} + +static VALUE +new_array_pattern(struct parser_params *p, VALUE constant, VALUE pre_arg, VALUE aryptn, const YYLTYPE *loc) +{ + NODE *t = (NODE *)aryptn; + VALUE pre_args = t->u1.value, rest_arg = t->u2.value, post_args = t->u3.value; + + if (!NIL_P(pre_arg)) { + if (!NIL_P(pre_args)) { + rb_ary_unshift(pre_args, pre_arg); + } + else { + pre_args = rb_ary_new_from_args(1, pre_arg); + } + } + return dispatch4(aryptn, constant, pre_args, rest_arg, post_args); +} + +static VALUE +new_array_pattern_tail(struct parser_params *p, VALUE pre_args, VALUE has_rest, VALUE rest_arg, VALUE post_args, const YYLTYPE *loc) +{ + NODE *t; + + if (has_rest) { + rest_arg = dispatch1(var_field, rest_arg ? rest_arg : Qnil); + } + else { + rest_arg = Qnil; + } + + t = rb_node_newnode(NODE_ARYPTN, pre_args, rest_arg, post_args, &NULL_LOC); + add_mark_object(p, pre_args); + add_mark_object(p, rest_arg); + add_mark_object(p, post_args); + return (VALUE)t; +} + +static VALUE +new_find_pattern(struct parser_params *p, VALUE constant, VALUE fndptn, const YYLTYPE *loc) +{ + NODE *t = (NODE *)fndptn; + VALUE pre_rest_arg = t->u1.value, args = t->u2.value, post_rest_arg = t->u3.value; + + return dispatch4(fndptn, constant, pre_rest_arg, args, post_rest_arg); +} + +static VALUE +new_find_pattern_tail(struct parser_params *p, VALUE pre_rest_arg, VALUE args, VALUE post_rest_arg, const YYLTYPE *loc) +{ + NODE *t; + + pre_rest_arg = dispatch1(var_field, pre_rest_arg ? pre_rest_arg : Qnil); + post_rest_arg = dispatch1(var_field, post_rest_arg ? post_rest_arg : Qnil); + + t = rb_node_newnode(NODE_FNDPTN, pre_rest_arg, args, post_rest_arg, &NULL_LOC); + add_mark_object(p, pre_rest_arg); + add_mark_object(p, args); + add_mark_object(p, post_rest_arg); + return (VALUE)t; +} + +#define new_hash(p,h,l) rb_ary_new_from_args(0) + +static VALUE +new_unique_key_hash(struct parser_params *p, VALUE ary, const YYLTYPE *loc) +{ + return ary; +} + +static VALUE +new_hash_pattern(struct parser_params *p, VALUE constant, VALUE hshptn, const YYLTYPE *loc) +{ + NODE *t = (NODE *)hshptn; + VALUE kw_args = t->u1.value, kw_rest_arg = t->u2.value; + return dispatch3(hshptn, constant, kw_args, kw_rest_arg); +} + +static VALUE +new_hash_pattern_tail(struct parser_params *p, VALUE kw_args, VALUE kw_rest_arg, const YYLTYPE *loc) { - return (VALUE)rb_node_newnode(NODE_MEMO, k, kr, b); + NODE *t; + if (kw_rest_arg) { + kw_rest_arg = dispatch1(var_field, kw_rest_arg); + } + else { + kw_rest_arg = Qnil; + } + t = rb_node_newnode(NODE_HSHPTN, kw_args, kw_rest_arg, 0, &NULL_LOC); + + add_mark_object(p, kw_args); + add_mark_object(p, kw_rest_arg); + return (VALUE)t; } -#define new_args_tail(k,kr,b) new_args_tail_gen(parser, (k),(kr),(b)) -#define FIXME 0 +#define new_defined(p,expr,loc) dispatch1(defined, (expr)) + +static VALUE heredoc_dedent(struct parser_params*,VALUE); + +#else +#define ID2VAL(id) (id) +#define TOKEN2VAL(t) ID2VAL(t) +#define KWD2EID(t, v) keyword_##t + +static NODE * +set_defun_body(struct parser_params *p, NODE *n, NODE *args, NODE *body, const YYLTYPE *loc) +{ + body = remove_begin(body); + reduce_nodes(p, &body); + n->nd_defn = NEW_SCOPE(args, body, loc); + n->nd_loc = *loc; + nd_set_line(n->nd_defn, loc->end_pos.lineno); + set_line_body(body, loc->beg_pos.lineno); + return n; +} + +static NODE * +rescued_expr(struct parser_params *p, NODE *arg, NODE *rescue, + const YYLTYPE *arg_loc, const YYLTYPE *mod_loc, const YYLTYPE *res_loc) +{ + YYLTYPE loc = code_loc_gen(mod_loc, res_loc); + rescue = NEW_RESBODY(0, remove_begin(rescue), 0, &loc); + loc.beg_pos = arg_loc->beg_pos; + return NEW_RESCUE(arg, rescue, 0, &loc); +} #endif /* RIPPER */ +static void +restore_defun(struct parser_params *p, NODE *name) +{ + YYSTYPE c = {.val = name->nd_cval}; + p->cur_arg = name->nd_vid; + p->ctxt.in_def = c.ctxt.in_def; + p->ctxt.shareable_constant_value = c.ctxt.shareable_constant_value; +} + +static void +endless_method_name(struct parser_params *p, NODE *defn, const YYLTYPE *loc) +{ +#ifdef RIPPER + defn = defn->nd_defn; +#endif + ID mid = defn->nd_mid; + if (is_attrset_id(mid)) { + yyerror1(loc, "setter method cannot be defined in an endless method definition"); + } + token_info_drop(p, "def", loc->beg_pos); +} + #ifndef RIPPER # define Qnone 0 +# define Qnull 0 # define ifndef_ripper(x) (x) #else # define Qnone Qnil +# define Qnull Qundef # define ifndef_ripper(x) #endif -#ifndef RIPPER -# define rb_warn0(fmt) rb_compile_warn(ruby_sourcefile, ruby_sourceline, (fmt)) -# define rb_warnI(fmt,a) rb_compile_warn(ruby_sourcefile, ruby_sourceline, (fmt), (a)) -# define rb_warnS(fmt,a) rb_compile_warn(ruby_sourcefile, ruby_sourceline, (fmt), (a)) -# define rb_warn4S(file,line,fmt,a) rb_compile_warn((file), (line), (fmt), (a)) -# define rb_warning0(fmt) rb_compile_warning(ruby_sourcefile, ruby_sourceline, (fmt)) -# define rb_warningS(fmt,a) rb_compile_warning(ruby_sourcefile, ruby_sourceline, (fmt), (a)) -#else -# define rb_warn0(fmt) ripper_warn0(parser, (fmt)) -# define rb_warnI(fmt,a) ripper_warnI(parser, (fmt), (a)) -# define rb_warnS(fmt,a) ripper_warnS(parser, (fmt), (a)) -# define rb_warn4S(file,line,fmt,a) ripper_warnS(parser, (fmt), (a)) -# define rb_warning0(fmt) ripper_warning0(parser, (fmt)) -# define rb_warningS(fmt,a) ripper_warningS(parser, (fmt), (a)) -static void ripper_warn0(struct parser_params*, const char*); -static void ripper_warnI(struct parser_params*, const char*, int); -static void ripper_warnS(struct parser_params*, const char*, const char*); -static void ripper_warning0(struct parser_params*, const char*); -static void ripper_warningS(struct parser_params*, const char*, const char*); -#endif - +# define rb_warn0(fmt) WARN_CALL(WARN_ARGS(fmt, 1)) +# define rb_warn1(fmt,a) WARN_CALL(WARN_ARGS(fmt, 2), (a)) +# define rb_warn2(fmt,a,b) WARN_CALL(WARN_ARGS(fmt, 3), (a), (b)) +# define rb_warn3(fmt,a,b,c) WARN_CALL(WARN_ARGS(fmt, 4), (a), (b), (c)) +# define rb_warn4(fmt,a,b,c,d) WARN_CALL(WARN_ARGS(fmt, 5), (a), (b), (c), (d)) +# define rb_warning0(fmt) WARNING_CALL(WARNING_ARGS(fmt, 1)) +# define rb_warning1(fmt,a) WARNING_CALL(WARNING_ARGS(fmt, 2), (a)) +# define rb_warning2(fmt,a,b) WARNING_CALL(WARNING_ARGS(fmt, 3), (a), (b)) +# define rb_warning3(fmt,a,b,c) WARNING_CALL(WARNING_ARGS(fmt, 4), (a), (b), (c)) +# define rb_warning4(fmt,a,b,c,d) WARNING_CALL(WARNING_ARGS(fmt, 5), (a), (b), (c), (d)) +# define rb_warn0L(l,fmt) WARN_CALL(WARN_ARGS_L(l, fmt, 1)) +# define rb_warn1L(l,fmt,a) WARN_CALL(WARN_ARGS_L(l, fmt, 2), (a)) +# define rb_warn2L(l,fmt,a,b) WARN_CALL(WARN_ARGS_L(l, fmt, 3), (a), (b)) +# define rb_warn3L(l,fmt,a,b,c) WARN_CALL(WARN_ARGS_L(l, fmt, 4), (a), (b), (c)) +# define rb_warn4L(l,fmt,a,b,c,d) WARN_CALL(WARN_ARGS_L(l, fmt, 5), (a), (b), (c), (d)) +# define rb_warning0L(l,fmt) WARNING_CALL(WARNING_ARGS_L(l, fmt, 1)) +# define rb_warning1L(l,fmt,a) WARNING_CALL(WARNING_ARGS_L(l, fmt, 2), (a)) +# define rb_warning2L(l,fmt,a,b) WARNING_CALL(WARNING_ARGS_L(l, fmt, 3), (a), (b)) +# define rb_warning3L(l,fmt,a,b,c) WARNING_CALL(WARNING_ARGS_L(l, fmt, 4), (a), (b), (c)) +# define rb_warning4L(l,fmt,a,b,c,d) WARNING_CALL(WARNING_ARGS_L(l, fmt, 5), (a), (b), (c), (d)) #ifdef RIPPER -static void ripper_compile_error(struct parser_params*, const char *fmt, ...); -# define rb_compile_error ripper_compile_error +static ID id_warn, id_warning, id_gets, id_assoc; +# define ERR_MESG() STR_NEW2(mesg) /* to bypass Ripper DSL */ +# define WARN_S_L(s,l) STR_NEW(s,l) +# define WARN_S(s) STR_NEW2(s) +# define WARN_I(i) INT2NUM(i) +# define WARN_ID(i) rb_id2str(i) +# define WARN_IVAL(i) i +# define PRIsWARN "s" +# define rb_warn0L_experimental(l,fmt) WARN_CALL(WARN_ARGS_L(l, fmt, 1)) +# define WARN_ARGS(fmt,n) p->value, id_warn, n, rb_usascii_str_new_lit(fmt) +# define WARN_ARGS_L(l,fmt,n) WARN_ARGS(fmt,n) +# ifdef HAVE_VA_ARGS_MACRO +# define WARN_CALL(...) rb_funcall(__VA_ARGS__) +# else +# define WARN_CALL rb_funcall +# endif +# define WARNING_ARGS(fmt,n) p->value, id_warning, n, rb_usascii_str_new_lit(fmt) +# define WARNING_ARGS_L(l, fmt,n) WARNING_ARGS(fmt,n) +# ifdef HAVE_VA_ARGS_MACRO +# define WARNING_CALL(...) rb_funcall(__VA_ARGS__) +# else +# define WARNING_CALL rb_funcall +# endif +PRINTF_ARGS(static void ripper_compile_error(struct parser_params*, const char *fmt, ...), 2, 3); # define compile_error ripper_compile_error -# define PARSER_ARG parser, #else -# define rb_compile_error rb_compile_error_with_enc -# define compile_error parser->nerr++,rb_compile_error_with_enc -# define PARSER_ARG ruby_sourcefile, ruby_sourceline, current_enc, +# define WARN_S_L(s,l) s +# define WARN_S(s) s +# define WARN_I(i) i +# define WARN_ID(i) rb_id2name(i) +# define WARN_IVAL(i) NUM2INT(i) +# define PRIsWARN PRIsVALUE +# define WARN_ARGS(fmt,n) WARN_ARGS_L(p->ruby_sourceline,fmt,n) +# define WARN_ARGS_L(l,fmt,n) p->ruby_sourcefile, (l), (fmt) +# define WARN_CALL rb_compile_warn +# define rb_warn0L_experimental(l,fmt) rb_category_compile_warn(RB_WARN_CATEGORY_EXPERIMENTAL, WARN_ARGS_L(l, fmt, 1)) +# define WARNING_ARGS(fmt,n) WARN_ARGS(fmt,n) +# define WARNING_ARGS_L(l,fmt,n) WARN_ARGS_L(l,fmt,n) +# define WARNING_CALL rb_compile_warning +PRINTF_ARGS(static void parser_compile_error(struct parser_params*, const char *fmt, ...), 2, 3); +# define compile_error parser_compile_error #endif -/* Older versions of Yacc set YYMAXDEPTH to a very low value by default (150, - for instance). This is too low for Ruby to parse some files, such as - date/format.rb, therefore bump the value up to at least Bison's default. */ -#ifdef OLD_YACC -#ifndef YYMAXDEPTH -#define YYMAXDEPTH 10000 +#define WARN_EOL(tok) \ + (looking_at_eol_p(p) ? \ + (void)rb_warning0("`" tok "' at the end of line without an expression") : \ + (void)0) +static int looking_at_eol_p(struct parser_params *p); +%} + +%expect 0 +%define api.pure +%define parse.error verbose +%printer { +#ifndef RIPPER + rb_parser_printf(p, "%"PRIsVALUE, rb_id2str($$)); +#else + rb_parser_printf(p, "%"PRIsVALUE, RNODE($$)->nd_rval); #endif +} tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL tOP_ASGN +%printer { +#ifndef RIPPER + rb_parser_printf(p, "%+"PRIsVALUE, $$->nd_lit); +#else + rb_parser_printf(p, "%+"PRIsVALUE, get_value($$)); #endif - +} tINTEGER tFLOAT tRATIONAL tIMAGINARY tSTRING_CONTENT tCHAR +%printer { #ifndef RIPPER -static void token_info_push(struct parser_params*, const char *token); -static void token_info_pop(struct parser_params*, const char *token); -#define token_info_push(token) (RTEST(ruby_verbose) ? token_info_push(parser, (token)) : (void)0) -#define token_info_pop(token) (RTEST(ruby_verbose) ? token_info_pop(parser, (token)) : (void)0) + rb_parser_printf(p, "$%ld", $$->nd_nth); #else -#define token_info_push(token) /* nothing */ -#define token_info_pop(token) /* nothing */ + rb_parser_printf(p, "%"PRIsVALUE, $$); #endif -%} +} tNTH_REF +%printer { +#ifndef RIPPER + rb_parser_printf(p, "$%c", (int)$$->nd_nth); +#else + rb_parser_printf(p, "%"PRIsVALUE, $$); +#endif +} tBACK_REF -%pure_parser -%parse-param {struct parser_params *parser} +%lex-param {struct parser_params *p} +%parse-param {struct parser_params *p} +%initial-action +{ + RUBY_SET_YYLLOC_OF_NONE(@$); +}; %union { VALUE val; NODE *node; ID id; int num; + st_table *tbl; const struct vtable *vars; -} - -/*%%%*/ -%token -/*% -%token <val> -%*/ - keyword_class - keyword_module - keyword_def - keyword_undef - keyword_begin - keyword_rescue - keyword_ensure - keyword_end - keyword_if - keyword_unless - keyword_then - keyword_elsif - keyword_else - keyword_case - keyword_when - keyword_while - keyword_until - keyword_for - keyword_break - keyword_next - keyword_redo - keyword_retry - keyword_in - keyword_do - keyword_do_cond - keyword_do_block - keyword_do_LAMBDA - keyword_return - keyword_yield - keyword_super - keyword_self - keyword_nil - keyword_true - keyword_false - keyword_and - keyword_or - keyword_not - modifier_if - modifier_unless - modifier_while - modifier_until - modifier_rescue - keyword_alias - keyword_defined - keyword_BEGIN - keyword_END - keyword__LINE__ - keyword__FILE__ - keyword__ENCODING__ - -%token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL -%token <node> tINTEGER tFLOAT tSTRING_CONTENT tCHAR -%token <node> tNTH_REF tBACK_REF + struct rb_strterm_struct *strterm; + struct lex_context ctxt; +} + +%token <id> + keyword_class "`class'" + keyword_module "`module'" + keyword_def "`def'" + keyword_undef "`undef'" + keyword_begin "`begin'" + keyword_rescue "`rescue'" + keyword_ensure "`ensure'" + keyword_end "`end'" + keyword_if "`if'" + keyword_unless "`unless'" + keyword_then "`then'" + keyword_elsif "`elsif'" + keyword_else "`else'" + keyword_case "`case'" + keyword_when "`when'" + keyword_while "`while'" + keyword_until "`until'" + keyword_for "`for'" + keyword_break "`break'" + keyword_next "`next'" + keyword_redo "`redo'" + keyword_retry "`retry'" + keyword_in "`in'" + keyword_do "`do'" + keyword_do_cond "`do' for condition" + keyword_do_block "`do' for block" + keyword_do_LAMBDA "`do' for lambda" + keyword_return "`return'" + keyword_yield "`yield'" + keyword_super "`super'" + keyword_self "`self'" + keyword_nil "`nil'" + keyword_true "`true'" + keyword_false "`false'" + keyword_and "`and'" + keyword_or "`or'" + keyword_not "`not'" + modifier_if "`if' modifier" + modifier_unless "`unless' modifier" + modifier_while "`while' modifier" + modifier_until "`until' modifier" + modifier_rescue "`rescue' modifier" + keyword_alias "`alias'" + keyword_defined "`defined?'" + keyword_BEGIN "`BEGIN'" + keyword_END "`END'" + keyword__LINE__ "`__LINE__'" + keyword__FILE__ "`__FILE__'" + keyword__ENCODING__ "`__ENCODING__'" + +%token <id> tIDENTIFIER "local variable or method" +%token <id> tFID "method" +%token <id> tGVAR "global variable" +%token <id> tIVAR "instance variable" +%token <id> tCONSTANT "constant" +%token <id> tCVAR "class variable" +%token <id> tLABEL "label" +%token <node> tINTEGER "integer literal" +%token <node> tFLOAT "float literal" +%token <node> tRATIONAL "rational literal" +%token <node> tIMAGINARY "imaginary literal" +%token <node> tCHAR "char literal" +%token <node> tNTH_REF "numbered reference" +%token <node> tBACK_REF "back reference" +%token <node> tSTRING_CONTENT "literal content" %token <num> tREGEXP_END %type <node> singleton strings string string1 xstring regexp %type <node> string_contents xstring_contents regexp_contents string_content %type <node> words symbols symbol_list qwords qsymbols word_list qword_list qsym_list word -%type <node> literal numeric dsym cpath -%type <node> top_compstmt top_stmts top_stmt +%type <node> literal numeric simple_numeric ssym dsym symbol cpath def_name defn_head defs_head +%type <node> top_compstmt top_stmts top_stmt begin_block %type <node> bodystmt compstmt stmts stmt_or_begin stmt expr arg primary command command_call method_call -%type <node> expr_value arg_value primary_value -%type <node> if_tail opt_else case_body cases opt_rescue exc_list exc_var opt_ensure +%type <node> expr_value expr_value_do arg_value primary_value fcall rel_expr +%type <node> if_tail opt_else case_body case_args cases opt_rescue exc_list exc_var opt_ensure %type <node> args call_args opt_call_args %type <node> paren_args opt_paren_args args_tail opt_args_tail block_args_tail opt_block_args_tail %type <node> command_args aref_args opt_block_arg block_arg var_ref var_lhs -%type <node> command_asgn mrhs superclass block_call block_command +%type <node> command_rhs arg_rhs +%type <node> command_asgn mrhs mrhs_arg superclass block_call block_command %type <node> f_block_optarg f_block_opt -%type <node> f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list f_margs +%type <node> f_arglist f_opt_paren_args f_paren_args f_args f_arg f_arg_item +%type <node> f_optarg f_marg f_marg_list f_margs f_rest_marg %type <node> assoc_list assocs assoc undef_list backref string_dvar for_var %type <node> block_param opt_block_param block_param_def f_opt %type <node> f_kwarg f_kw f_block_kwarg f_block_kw %type <node> bv_decls opt_bv_decl bvar -%type <node> lambda f_larglist lambda_body +%type <node> lambda f_larglist lambda_body brace_body do_body %type <node> brace_block cmd_brace_block do_block lhs none fitem %type <node> mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner -%type <id> fsym keyword_variable user_variable sym symbol operation operation2 operation3 +%type <node> p_case_body p_cases p_top_expr p_top_expr_body +%type <node> p_expr p_as p_alt p_expr_basic p_find +%type <node> p_args p_args_head p_args_tail p_args_post p_arg +%type <node> p_value p_primitive p_variable p_var_ref p_expr_ref p_const +%type <node> p_kwargs p_kwarg p_kw +%type <id> keyword_variable user_variable sym operation operation2 operation3 %type <id> cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg -%type <id> f_kwrest -/*%%%*/ -/*% -%type <val> program reswords then do dot_or_colon -%*/ +%type <id> f_kwrest f_label f_arg_asgn call_op call_op2 reswords relop dot_or_colon +%type <id> p_rest p_kwrest p_kwnorest p_any_kwrest p_kw_label +%type <id> f_no_kwarg f_any_kwrest args_forward excessed_comma nonlocal_var + %type <ctxt> lex_ctxt /* keep <ctxt> in ripper */ %token END_OF_INPUT 0 "end-of-input" +%token <id> '.' +/* escaped chars, should be ignored otherwise */ +%token <id> '\\' "backslash" +%token tSP "escaped space" +%token <id> '\t' "escaped horizontal tab" +%token <id> '\f' "escaped form feed" +%token <id> '\r' "escaped carriage return" +%token <id> '\13' "escaped vertical tab" %token tUPLUS RUBY_TOKEN(UPLUS) "unary+" %token tUMINUS RUBY_TOKEN(UMINUS) "unary-" %token tPOW RUBY_TOKEN(POW) "**" @@ -786,19 +1254,22 @@ static void token_info_pop(struct parser_params*, const char *token); %token tNEQ RUBY_TOKEN(NEQ) "!=" %token tGEQ RUBY_TOKEN(GEQ) ">=" %token tLEQ RUBY_TOKEN(LEQ) "<=" -%token tANDOP "&&" -%token tOROP "||" +%token tANDOP RUBY_TOKEN(ANDOP) "&&" +%token tOROP RUBY_TOKEN(OROP) "||" %token tMATCH RUBY_TOKEN(MATCH) "=~" %token tNMATCH RUBY_TOKEN(NMATCH) "!~" %token tDOT2 RUBY_TOKEN(DOT2) ".." %token tDOT3 RUBY_TOKEN(DOT3) "..." +%token tBDOT2 RUBY_TOKEN(BDOT2) "(.." +%token tBDOT3 RUBY_TOKEN(BDOT3) "(..." %token tAREF RUBY_TOKEN(AREF) "[]" %token tASET RUBY_TOKEN(ASET) "[]=" %token tLSHFT RUBY_TOKEN(LSHFT) "<<" %token tRSHFT RUBY_TOKEN(RSHFT) ">>" -%token tCOLON2 "::" +%token <id> tANDDOT RUBY_TOKEN(ANDDOT) "&." +%token <id> tCOLON2 RUBY_TOKEN(COLON2) "::" %token tCOLON3 ":: at EXPR_BEG" -%token <id> tOP_ASGN /* +=, -= etc. */ +%token <id> tOP_ASGN "operator-assignment" /* +=, -= etc. */ %token tASSOC "=>" %token tLPAREN "(" %token tLPAREN_ARG "( arg" @@ -810,8 +1281,17 @@ static void token_info_pop(struct parser_params*, const char *token); %token tDSTAR "**arg" %token tAMPER "&" %token tLAMBDA "->" -%token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG tSYMBOLS_BEG tQSYMBOLS_BEG -%token tSTRING_DBEG tSTRING_DEND tSTRING_DVAR tSTRING_END tLAMBEG +%token tSYMBEG "symbol literal" +%token tSTRING_BEG "string literal" +%token tXSTRING_BEG "backtick literal" +%token tREGEXP_BEG "regexp literal" +%token tWORDS_BEG "word list" +%token tQWORDS_BEG "verbatim word list" +%token tSYMBOLS_BEG "symbol list" +%token tQSYMBOLS_BEG "verbatim symbol list" +%token tSTRING_END "terminator" +%token tSTRING_DEND "'}'" +%token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END /* * precedence table @@ -820,14 +1300,14 @@ static void token_info_pop(struct parser_params*, const char *token); %nonassoc tLOWEST %nonassoc tLBRACE_ARG -%nonassoc modifier_if modifier_unless modifier_while modifier_until +%nonassoc modifier_if modifier_unless modifier_while modifier_until keyword_in %left keyword_or keyword_and %right keyword_not %nonassoc keyword_defined %right '=' tOP_ASGN %left modifier_rescue %right '?' ':' -%nonassoc tDOT2 tDOT3 +%nonassoc tDOT2 tDOT3 tBDOT2 tBDOT3 %left tOROP %left tANDOP %nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH @@ -845,71 +1325,57 @@ static void token_info_pop(struct parser_params*, const char *token); %% program : { - lex_state = EXPR_BEG; - /*%%%*/ - local_push(compile_for_eval || rb_parse_in_main()); - /*% - local_push(0); - %*/ + SET_LEX_STATE(EXPR_BEG); + local_push(p, ifndef_ripper(1)+0); } top_compstmt { /*%%%*/ if ($2 && !compile_for_eval) { + NODE *node = $2; /* last expression should not be void */ - if (nd_type($2) != NODE_BLOCK) void_expr($2); - else { - NODE *node = $2; + if (nd_type_p(node, NODE_BLOCK)) { while (node->nd_next) { node = node->nd_next; } - void_expr(node->nd_head); + node = node->nd_head; } + node = remove_begin(node); + void_expr(p, node); } - ruby_eval_tree = NEW_SCOPE(0, block_append(ruby_eval_tree, $2)); - /*% - $$ = $2; - parser->result = dispatch1(program, $$); - %*/ - local_pop(); + p->eval_tree = NEW_SCOPE(0, block_append(p, p->eval_tree, $2), &@$); + /*% %*/ + /*% ripper[final]: program!($2) %*/ + local_pop(p); } ; top_compstmt : top_stmts opt_terms { - /*%%%*/ - void_stmts($1); - fixup_nodes(&deferred_nodes); - /*% - %*/ - $$ = $1; + $$ = void_stmts(p, $1); } ; top_stmts : none { /*%%%*/ - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch2(stmts_add, dispatch0(stmts_new), - dispatch0(void_stmt)); - %*/ + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper: stmts_add!(stmts_new!, void_stmt!) %*/ } | top_stmt { /*%%%*/ $$ = newline_node($1); - /*% - $$ = dispatch2(stmts_add, dispatch0(stmts_new), $1); - %*/ + /*% %*/ + /*% ripper: stmts_add!(stmts_new!, $1) %*/ } | top_stmts terms top_stmt { /*%%%*/ - $$ = block_append($1, newline_node($3)); - /*% - $$ = dispatch2(stmts_add, $1, $3); - %*/ + $$ = block_append(p, $1, newline_node($3)); + /*% %*/ + /*% ripper: stmts_add!($1, $3) %*/ } | error top_stmt { @@ -918,95 +1384,71 @@ top_stmts : none ; top_stmt : stmt - | keyword_BEGIN + | keyword_BEGIN begin_block { - /*%%%*/ - /* local_push(0); */ - /*% - %*/ + $$ = $2; } - '{' top_compstmt '}' + ; + +begin_block : '{' top_compstmt '}' { /*%%%*/ - ruby_eval_tree_begin = block_append(ruby_eval_tree_begin, - $4); - /* NEW_PREEXE($4)); */ - /* local_pop(); */ - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(BEGIN, $4); - %*/ + p->eval_tree_begin = block_append(p, p->eval_tree_begin, + NEW_BEGIN($2, &@$)); + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper: BEGIN!($2) %*/ } ; bodystmt : compstmt opt_rescue - opt_else + k_else {if (!$2) {yyerror1(&@3, "else without rescue is useless");}} + compstmt opt_ensure { /*%%%*/ - $$ = $1; - if ($2) { - $$ = NEW_RESCUE($1, $2, $3); - } - else if ($3) { - rb_warn0("else without rescue is useless"); - $$ = block_append($$, $3); - } - if ($4) { - if ($$) { - $$ = NEW_ENSURE($$, $4); - } - else { - $$ = block_append($4, NEW_NIL()); - } - } - fixpos($$, $1); - /*% - $$ = dispatch4(bodystmt, - escape_Qundef($1), - escape_Qundef($2), - escape_Qundef($3), - escape_Qundef($4)); - %*/ + $$ = new_bodystmt(p, $1, $2, $5, $6, &@$); + /*% %*/ + /*% ripper: bodystmt!(escape_Qundef($1), escape_Qundef($2), escape_Qundef($5), escape_Qundef($6)) %*/ + } + | compstmt + opt_rescue + opt_ensure + { + /*%%%*/ + $$ = new_bodystmt(p, $1, $2, 0, $3, &@$); + /*% %*/ + /*% ripper: bodystmt!(escape_Qundef($1), escape_Qundef($2), Qnil, escape_Qundef($3)) %*/ } ; compstmt : stmts opt_terms { - /*%%%*/ - void_stmts($1); - fixup_nodes(&deferred_nodes); - /*% - %*/ - $$ = $1; + $$ = void_stmts(p, $1); } ; stmts : none { /*%%%*/ - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch2(stmts_add, dispatch0(stmts_new), - dispatch0(void_stmt)); - %*/ + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper: stmts_add!(stmts_new!, void_stmt!) %*/ } | stmt_or_begin { /*%%%*/ $$ = newline_node($1); - /*% - $$ = dispatch2(stmts_add, dispatch0(stmts_new), $1); - %*/ + /*% %*/ + /*% ripper: stmts_add!(stmts_new!, $1) %*/ } | stmts terms stmt_or_begin { /*%%%*/ - $$ = block_append($1, newline_node($3)); - /*% - $$ = dispatch2(stmts_add, $1, $3); - %*/ + $$ = block_append(p, $1, newline_node($3)); + /*% %*/ + /*% ripper: stmts_add!($1, $3) %*/ } | error stmt { @@ -1020,40 +1462,27 @@ stmt_or_begin : stmt } | keyword_BEGIN { - yyerror("BEGIN is permitted only at toplevel"); - /*%%%*/ - /* local_push(0); */ - /*% - %*/ + yyerror1(&@1, "BEGIN is permitted only at toplevel"); } - '{' top_compstmt '}' + begin_block { - /*%%%*/ - ruby_eval_tree_begin = block_append(ruby_eval_tree_begin, - $4); - /* NEW_PREEXE($4)); */ - /* local_pop(); */ - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(BEGIN, $4); - %*/ + $$ = $3; } + ; -stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem +stmt : keyword_alias fitem {SET_LEX_STATE(EXPR_FNAME|EXPR_FITEM);} fitem { /*%%%*/ - $$ = NEW_ALIAS($2, $4); - /*% - $$ = dispatch2(alias, $2, $4); - %*/ + $$ = NEW_ALIAS($2, $4, &@$); + /*% %*/ + /*% ripper: alias!($2, $4) %*/ } | keyword_alias tGVAR tGVAR { /*%%%*/ - $$ = NEW_VALIAS($2, $3); - /*% - $$ = dispatch2(var_alias, $2, $3); - %*/ + $$ = NEW_VALIAS($2, $3, &@$); + /*% %*/ + /*% ripper: var_alias!($2, $3) %*/ } | keyword_alias tGVAR tBACK_REF { @@ -1061,263 +1490,371 @@ stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem char buf[2]; buf[0] = '$'; buf[1] = (char)$3->nd_nth; - $$ = NEW_VALIAS($2, rb_intern2(buf, 2)); - /*% - $$ = dispatch2(var_alias, $2, $3); - %*/ + $$ = NEW_VALIAS($2, rb_intern2(buf, 2), &@$); + /*% %*/ + /*% ripper: var_alias!($2, $3) %*/ } | keyword_alias tGVAR tNTH_REF { + static const char mesg[] = "can't make alias for the number variables"; /*%%%*/ - yyerror("can't make alias for the number variables"); - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch2(var_alias, $2, $3); - $$ = dispatch1(alias_error, $$); - %*/ + yyerror1(&@3, mesg); + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper[error]: alias_error!(ERR_MESG(), $3) %*/ } | keyword_undef undef_list { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(undef, $2); - %*/ + /*% %*/ + /*% ripper: undef!($2) %*/ } | stmt modifier_if expr_value { /*%%%*/ - $$ = NEW_IF(cond($3), remove_begin($1), 0); + $$ = new_if(p, $3, remove_begin($1), 0, &@$); fixpos($$, $3); - /*% - $$ = dispatch2(if_mod, $3, $1); - %*/ + /*% %*/ + /*% ripper: if_mod!($3, $1) %*/ } | stmt modifier_unless expr_value { /*%%%*/ - $$ = NEW_UNLESS(cond($3), remove_begin($1), 0); + $$ = new_unless(p, $3, remove_begin($1), 0, &@$); fixpos($$, $3); - /*% - $$ = dispatch2(unless_mod, $3, $1); - %*/ + /*% %*/ + /*% ripper: unless_mod!($3, $1) %*/ } | stmt modifier_while expr_value { /*%%%*/ - if ($1 && nd_type($1) == NODE_BEGIN) { - $$ = NEW_WHILE(cond($3), $1->nd_body, 0); + if ($1 && nd_type_p($1, NODE_BEGIN)) { + $$ = NEW_WHILE(cond(p, $3, &@3), $1->nd_body, 0, &@$); } else { - $$ = NEW_WHILE(cond($3), $1, 1); + $$ = NEW_WHILE(cond(p, $3, &@3), $1, 1, &@$); } - /*% - $$ = dispatch2(while_mod, $3, $1); - %*/ + /*% %*/ + /*% ripper: while_mod!($3, $1) %*/ } | stmt modifier_until expr_value { /*%%%*/ - if ($1 && nd_type($1) == NODE_BEGIN) { - $$ = NEW_UNTIL(cond($3), $1->nd_body, 0); + if ($1 && nd_type_p($1, NODE_BEGIN)) { + $$ = NEW_UNTIL(cond(p, $3, &@3), $1->nd_body, 0, &@$); } else { - $$ = NEW_UNTIL(cond($3), $1, 1); + $$ = NEW_UNTIL(cond(p, $3, &@3), $1, 1, &@$); } - /*% - $$ = dispatch2(until_mod, $3, $1); - %*/ + /*% %*/ + /*% ripper: until_mod!($3, $1) %*/ } | stmt modifier_rescue stmt { /*%%%*/ - NODE *resq = NEW_RESBODY(0, remove_begin($3), 0); - $$ = NEW_RESCUE(remove_begin($1), resq, 0); - /*% - $$ = dispatch2(rescue_mod, $1, $3); - %*/ + NODE *resq; + YYLTYPE loc = code_loc_gen(&@2, &@3); + resq = NEW_RESBODY(0, remove_begin($3), 0, &loc); + $$ = NEW_RESCUE(remove_begin($1), resq, 0, &@$); + /*% %*/ + /*% ripper: rescue_mod!($1, $3) %*/ } | keyword_END '{' compstmt '}' { - if (in_def || in_single) { + if (p->ctxt.in_def) { rb_warn0("END in method; use at_exit"); } /*%%%*/ - $$ = NEW_POSTEXE(NEW_NODE( - NODE_SCOPE, 0 /* tbl */, $3 /* body */, 0 /* args */)); - /*% - $$ = dispatch1(END, $3); - %*/ + { + NODE *scope = NEW_NODE( + NODE_SCOPE, 0 /* tbl */, $3 /* body */, 0 /* args */, &@$); + $$ = NEW_POSTEXE(scope, &@$); + } + /*% %*/ + /*% ripper: END!($3) %*/ } | command_asgn - | mlhs '=' command_call + | mlhs '=' lex_ctxt command_call { /*%%%*/ - value_expr($3); - $1->nd_value = $3; - $$ = $1; - /*% - $$ = dispatch2(massign, $1, $3); - %*/ + value_expr($4); + $$ = node_assign(p, $1, $4, $3, &@$); + /*% %*/ + /*% ripper: massign!($1, $4) %*/ } - | var_lhs tOP_ASGN command_call + | lhs '=' lex_ctxt mrhs { - value_expr($3); - $$ = new_op_assign($1, $2, $3); + /*%%%*/ + $$ = node_assign(p, $1, $4, $3, &@$); + /*% %*/ + /*% ripper: assign!($1, $4) %*/ } - | primary_value '[' opt_call_args rbracket tOP_ASGN command_call + | mlhs '=' lex_ctxt mrhs_arg modifier_rescue stmt + { + /*%%%*/ + YYLTYPE loc = code_loc_gen(&@5, &@6); + $$ = node_assign(p, $1, NEW_RESCUE($4, NEW_RESBODY(0, remove_begin($6), 0, &loc), 0, &@$), $3, &@$); + /*% %*/ + /*% ripper: massign!($1, rescue_mod!($4, $6)) %*/ + } + | mlhs '=' lex_ctxt mrhs_arg { /*%%%*/ - NODE *args; + $$ = node_assign(p, $1, $4, $3, &@$); + /*% %*/ + /*% ripper: massign!($1, $4) %*/ + } + | expr + ; - value_expr($6); - if (!$3) $3 = NEW_ZARRAY(); - args = arg_concat($3, $6); - if ($5 == tOROP) { - $5 = 0; - } - else if ($5 == tANDOP) { - $5 = 1; - } - $$ = NEW_OP_ASGN1($1, $5, args); - fixpos($$, $1); - /*% - $$ = dispatch2(aref_field, $1, escape_Qundef($3)); - $$ = dispatch3(opassign, $$, $5, $6); - %*/ +command_asgn : lhs '=' lex_ctxt command_rhs + { + /*%%%*/ + $$ = node_assign(p, $1, $4, $3, &@$); + /*% %*/ + /*% ripper: assign!($1, $4) %*/ } - | primary_value '.' tIDENTIFIER tOP_ASGN command_call + | var_lhs tOP_ASGN lex_ctxt command_rhs { - value_expr($5); - $$ = new_attr_op_assign($1, ripper_id2sym('.'), $3, $4, $5); + /*%%%*/ + $$ = new_op_assign(p, $1, $2, $4, $3, &@$); + /*% %*/ + /*% ripper: opassign!($1, $2, $4) %*/ } - | primary_value '.' tCONSTANT tOP_ASGN command_call + | primary_value '[' opt_call_args rbracket tOP_ASGN lex_ctxt command_rhs { - value_expr($5); - $$ = new_attr_op_assign($1, ripper_id2sym('.'), $3, $4, $5); + /*%%%*/ + $$ = new_ary_op_assign(p, $1, $3, $5, $7, &@3, &@$); + /*% %*/ + /*% ripper: opassign!(aref_field!($1, escape_Qundef($3)), $5, $7) %*/ + } - | primary_value tCOLON2 tCONSTANT tOP_ASGN command_call + | primary_value call_op tIDENTIFIER tOP_ASGN lex_ctxt command_rhs { /*%%%*/ - yyerror("constant re-assignment"); - $$ = 0; - /*% - $$ = dispatch2(const_path_field, $1, $3); - $$ = dispatch3(opassign, $$, $4, $5); - $$ = dispatch1(assign_error, $$); - %*/ + $$ = new_attr_op_assign(p, $1, $2, $3, $4, $6, &@$); + /*% %*/ + /*% ripper: opassign!(field!($1, $2, $3), $4, $6) %*/ } - | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call + | primary_value call_op tCONSTANT tOP_ASGN lex_ctxt command_rhs { - value_expr($5); - $$ = new_attr_op_assign($1, ripper_intern("::"), $3, $4, $5); + /*%%%*/ + $$ = new_attr_op_assign(p, $1, $2, $3, $4, $6, &@$); + /*% %*/ + /*% ripper: opassign!(field!($1, $2, $3), $4, $6) %*/ } - | backref tOP_ASGN command_call + | primary_value tCOLON2 tCONSTANT tOP_ASGN lex_ctxt command_rhs { /*%%%*/ - rb_backref_error($1); - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch2(assign, dispatch1(var_field, $1), $3); - $$ = dispatch1(assign_error, $$); - %*/ + YYLTYPE loc = code_loc_gen(&@1, &@3); + $$ = new_const_op_assign(p, NEW_COLON2($1, $3, &loc), $4, $6, $5, &@$); + /*% %*/ + /*% ripper: opassign!(const_path_field!($1, $3), $4, $6) %*/ } - | lhs '=' mrhs + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN lex_ctxt command_rhs { /*%%%*/ - value_expr($3); - $$ = node_assign($1, $3); - /*% - $$ = dispatch2(assign, $1, $3); - %*/ + $$ = new_attr_op_assign(p, $1, ID2VAL(idCOLON2), $3, $4, $6, &@$); + /*% %*/ + /*% ripper: opassign!(field!($1, $2, $3), $4, $6) %*/ } - | mlhs '=' arg_value + | defn_head f_opt_paren_args '=' command { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - $1->nd_value = $3; - $$ = $1; - /*% - $$ = dispatch2(massign, $1, $3); - %*/ + $$ = set_defun_body(p, $1, $2, $4, &@$); + /*% %*/ + /*% ripper[$4]: bodystmt!($4, Qnil, Qnil, Qnil) %*/ + /*% ripper: def!(get_value($1), $2, $4) %*/ + local_pop(p); } - | mlhs '=' mrhs + | defn_head f_opt_paren_args '=' command modifier_rescue arg { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - $1->nd_value = $3; - $$ = $1; - /*% - $$ = dispatch2(massign, $1, $3); - %*/ + $4 = rescued_expr(p, $4, $6, &@4, &@5, &@6); + $$ = set_defun_body(p, $1, $2, $4, &@$); + /*% %*/ + /*% ripper[$4]: bodystmt!(rescue_mod!($4, $6), Qnil, Qnil, Qnil) %*/ + /*% ripper: def!(get_value($1), $2, $4) %*/ + local_pop(p); } - | expr - ; - -command_asgn : lhs '=' command_call + | defs_head f_opt_paren_args '=' command { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - value_expr($3); - $$ = node_assign($1, $3); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% - $$ = dispatch2(assign, $1, $3); + $1 = get_value($1); %*/ + /*% ripper[$4]: bodystmt!($4, Qnil, Qnil, Qnil) %*/ + /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $4) %*/ + local_pop(p); } - | lhs '=' command_asgn + | defs_head f_opt_paren_args '=' command modifier_rescue arg { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - value_expr($3); - $$ = node_assign($1, $3); + $4 = rescued_expr(p, $4, $6, &@4, &@5, &@6); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% - $$ = dispatch2(assign, $1, $3); + $1 = get_value($1); %*/ + /*% ripper[$4]: bodystmt!(rescue_mod!($4, $6), Qnil, Qnil, Qnil) %*/ + /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $4) %*/ + local_pop(p); + } + | backref tOP_ASGN lex_ctxt command_rhs + { + /*%%%*/ + rb_backref_error(p, $1); + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper[error]: backref_error(p, RNODE($1), assign!(var_field(p, $1), $4)) %*/ } ; +command_rhs : command_call %prec tOP_ASGN + { + value_expr($1); + $$ = $1; + } + | command_call modifier_rescue stmt + { + /*%%%*/ + YYLTYPE loc = code_loc_gen(&@2, &@3); + value_expr($1); + $$ = NEW_RESCUE($1, NEW_RESBODY(0, remove_begin($3), 0, &loc), 0, &@$); + /*% %*/ + /*% ripper: rescue_mod!($1, $3) %*/ + } + | command_asgn + ; expr : command_call | expr keyword_and expr { - /*%%%*/ - $$ = logop(NODE_AND, $1, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("and"), $3); - %*/ + $$ = logop(p, idAND, $1, $3, &@2, &@$); } | expr keyword_or expr { - /*%%%*/ - $$ = logop(NODE_OR, $1, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("or"), $3); - %*/ + $$ = logop(p, idOR, $1, $3, &@2, &@$); } | keyword_not opt_nl expr { + $$ = call_uni_op(p, method_cond(p, $3, &@3), METHOD_NOT, &@1, &@$); + } + | '!' command_call + { + $$ = call_uni_op(p, method_cond(p, $2, &@2), '!', &@1, &@$); + } + | arg tASSOC + { + value_expr($1); + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); + p->command_start = FALSE; + $<ctxt>2 = p->ctxt; + p->ctxt.in_kwarg = 1; + $<tbl>$ = push_pvtbl(p); + } + { + $<tbl>$ = push_pktbl(p); + } + p_top_expr_body + { + pop_pktbl(p, $<tbl>4); + pop_pvtbl(p, $<tbl>3); + p->ctxt.in_kwarg = $<ctxt>2.in_kwarg; + /*%%%*/ + $$ = NEW_CASE3($1, NEW_IN($5, 0, 0, &@5), &@$); + /*% %*/ + /*% ripper: case!($1, in!($5, Qnil, Qnil)) %*/ + } + | arg keyword_in + { + value_expr($1); + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); + p->command_start = FALSE; + $<ctxt>2 = p->ctxt; + p->ctxt.in_kwarg = 1; + $<tbl>$ = push_pvtbl(p); + } + { + $<tbl>$ = push_pktbl(p); + } + p_top_expr_body + { + pop_pktbl(p, $<tbl>4); + pop_pvtbl(p, $<tbl>3); + p->ctxt.in_kwarg = $<ctxt>2.in_kwarg; + /*%%%*/ + $$ = NEW_CASE3($1, NEW_IN($5, NEW_TRUE(&@5), NEW_FALSE(&@5), &@5), &@$); + /*% %*/ + /*% ripper: case!($1, in!($5, Qnil, Qnil)) %*/ + } + | arg %prec tLBRACE_ARG + ; + +def_name : fname + { + ID fname = get_id($1); + ID cur_arg = p->cur_arg; + YYSTYPE c = {.ctxt = p->ctxt}; + numparam_name(p, fname); + local_push(p, 0); + p->cur_arg = 0; + p->ctxt.in_def = 1; + $<node>$ = NEW_NODE(NODE_SELF, /*vid*/cur_arg, /*mid*/fname, /*cval*/c.val, &@$); /*%%%*/ - $$ = call_uni_op(cond($3), '!'); /*% - $$ = dispatch2(unary, ripper_intern("not"), $3); + $$ = NEW_RIPPER(fname, get_value($1), $$, &NULL_LOC); %*/ } - | '!' command_call + ; + +defn_head : k_def def_name + { + $$ = $2; + /*%%%*/ + $$ = NEW_NODE(NODE_DEFN, 0, $$->nd_mid, $$, &@$); + /*% %*/ + } + ; + +defs_head : k_def singleton dot_or_colon { + SET_LEX_STATE(EXPR_FNAME); + p->ctxt.in_argdef = 1; + } + def_name + { + SET_LEX_STATE(EXPR_ENDFN|EXPR_LABEL); /* force for args */ + $$ = $5; /*%%%*/ - $$ = call_uni_op(cond($2), '!'); + $$ = NEW_NODE(NODE_DEFS, $2, $$->nd_mid, $$, &@$); /*% - $$ = dispatch2(unary, ripper_id2sym('!'), $2); + VALUE ary = rb_ary_new_from_args(3, $2, $3, get_value($$)); + add_mark_object(p, ary); + $<node>$->nd_rval = ary; %*/ } - | arg ; expr_value : expr { - /*%%%*/ value_expr($1); $$ = $1; - if (!$$) $$ = NEW_NIL(); - /*% - $$ = $1; - %*/ + } + ; + +expr_value_do : {COND_PUSH(1);} expr_value do {COND_POP();} + { + $$ = $2; } ; @@ -1326,143 +1863,119 @@ command_call : command ; block_command : block_call - | block_call dot_or_colon operation2 command_args + | block_call call_op2 operation2 command_args { /*%%%*/ - $$ = NEW_CALL($1, $3, $4); - /*% - $$ = dispatch3(call, $1, $2, $3); - $$ = method_arg($$, $4); - %*/ + $$ = new_qcall(p, $2, $1, $3, $4, &@3, &@$); + /*% %*/ + /*% ripper: method_add_arg!(call!($1, $2, $3), $4) %*/ } ; -cmd_brace_block : tLBRACE_ARG +cmd_brace_block : tLBRACE_ARG brace_body '}' { - $<vars>1 = dyna_push(); + $$ = $2; /*%%%*/ - $<num>$ = ruby_sourceline; - /*% - %*/ + $$->nd_body->nd_loc = code_loc_gen(&@1, &@3); + nd_set_line($$, @1.end_pos.lineno); + /*% %*/ } - opt_block_param - compstmt - '}' + ; + +fcall : operation { /*%%%*/ - $$ = NEW_ITER($3,$4); - nd_set_line($$, $<num>2); - /*% - $$ = dispatch2(brace_block, escape_Qundef($3), $4); - %*/ - dyna_pop($<vars>1); + $$ = NEW_FCALL($1, 0, &@$); + nd_set_line($$, p->tokline); + /*% %*/ + /*% ripper: $1 %*/ } ; -command : operation command_args %prec tLOWEST +command : fcall command_args %prec tLOWEST { /*%%%*/ - $$ = NEW_FCALL($1, $2); - fixpos($$, $2); - /*% - $$ = dispatch2(command, $1, $2); - %*/ + $1->nd_args = $2; + nd_set_last_loc($1, @2.end_pos); + $$ = $1; + /*% %*/ + /*% ripper: command!($1, $2) %*/ } - | operation command_args cmd_brace_block + | fcall command_args cmd_brace_block { /*%%%*/ - block_dup_check($2,$3); - $3->nd_iter = NEW_FCALL($1, $2); - $$ = $3; - fixpos($$, $2); - /*% - $$ = dispatch2(command, $1, $2); - $$ = method_add_block($$, $3); - %*/ + block_dup_check(p, $2, $3); + $1->nd_args = $2; + $$ = method_add_block(p, $1, $3, &@$); + fixpos($$, $1); + nd_set_last_loc($1, @2.end_pos); + /*% %*/ + /*% ripper: method_add_block!(command!($1, $2), $3) %*/ } - | primary_value '.' operation2 command_args %prec tLOWEST + | primary_value call_op operation2 command_args %prec tLOWEST { /*%%%*/ - $$ = NEW_CALL($1, $3, $4); - fixpos($$, $1); - /*% - $$ = dispatch4(command_call, $1, ripper_id2sym('.'), $3, $4); - %*/ + $$ = new_command_qcall(p, $2, $1, $3, $4, Qnull, &@3, &@$); + /*% %*/ + /*% ripper: command_call!($1, $2, $3, $4) %*/ } - | primary_value '.' operation2 command_args cmd_brace_block + | primary_value call_op operation2 command_args cmd_brace_block { /*%%%*/ - block_dup_check($4,$5); - $5->nd_iter = NEW_CALL($1, $3, $4); - $$ = $5; - fixpos($$, $1); - /*% - $$ = dispatch4(command_call, $1, ripper_id2sym('.'), $3, $4); - $$ = method_add_block($$, $5); - %*/ - } + $$ = new_command_qcall(p, $2, $1, $3, $4, $5, &@3, &@$); + /*% %*/ + /*% ripper: method_add_block!(command_call!($1, $2, $3, $4), $5) %*/ + } | primary_value tCOLON2 operation2 command_args %prec tLOWEST { /*%%%*/ - $$ = NEW_CALL($1, $3, $4); - fixpos($$, $1); - /*% - $$ = dispatch4(command_call, $1, ripper_intern("::"), $3, $4); - %*/ + $$ = new_command_qcall(p, ID2VAL(idCOLON2), $1, $3, $4, Qnull, &@3, &@$); + /*% %*/ + /*% ripper: command_call!($1, $2, $3, $4) %*/ } | primary_value tCOLON2 operation2 command_args cmd_brace_block { /*%%%*/ - block_dup_check($4,$5); - $5->nd_iter = NEW_CALL($1, $3, $4); - $$ = $5; - fixpos($$, $1); - /*% - $$ = dispatch4(command_call, $1, ripper_intern("::"), $3, $4); - $$ = method_add_block($$, $5); - %*/ + $$ = new_command_qcall(p, ID2VAL(idCOLON2), $1, $3, $4, $5, &@3, &@$); + /*% %*/ + /*% ripper: method_add_block!(command_call!($1, $2, $3, $4), $5) %*/ } | keyword_super command_args { /*%%%*/ - $$ = NEW_SUPER($2); + $$ = NEW_SUPER($2, &@$); fixpos($$, $2); - /*% - $$ = dispatch1(super, $2); - %*/ + /*% %*/ + /*% ripper: super!($2) %*/ } | keyword_yield command_args { /*%%%*/ - $$ = new_yield($2); + $$ = new_yield(p, $2, &@$); fixpos($$, $2); - /*% - $$ = dispatch1(yield, $2); - %*/ + /*% %*/ + /*% ripper: yield!($2) %*/ } - | keyword_return call_args + | k_return call_args { /*%%%*/ - $$ = NEW_RETURN(ret_args($2)); - /*% - $$ = dispatch1(return, $2); - %*/ + $$ = NEW_RETURN(ret_args(p, $2), &@$); + /*% %*/ + /*% ripper: return!($2) %*/ } | keyword_break call_args { /*%%%*/ - $$ = NEW_BREAK(ret_args($2)); - /*% - $$ = dispatch1(break, $2); - %*/ + $$ = NEW_BREAK(ret_args(p, $2), &@$); + /*% %*/ + /*% ripper: break!($2) %*/ } | keyword_next call_args { /*%%%*/ - $$ = NEW_NEXT(ret_args($2)); - /*% - $$ = dispatch1(next, $2); - %*/ + $$ = NEW_NEXT(ret_args(p, $2), &@$); + /*% %*/ + /*% ripper: next!($2) %*/ } ; @@ -1471,9 +1984,8 @@ mlhs : mlhs_basic { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(mlhs_paren, $2); - %*/ + /*% %*/ + /*% ripper: mlhs_paren!($2) %*/ } ; @@ -1481,96 +1993,81 @@ mlhs_inner : mlhs_basic | tLPAREN mlhs_inner rparen { /*%%%*/ - $$ = NEW_MASGN(NEW_LIST($2), 0); - /*% - $$ = dispatch1(mlhs_paren, $2); - %*/ + $$ = NEW_MASGN(NEW_LIST($2, &@$), 0, &@$); + /*% %*/ + /*% ripper: mlhs_paren!($2) %*/ } ; mlhs_basic : mlhs_head { /*%%%*/ - $$ = NEW_MASGN($1, 0); - /*% - $$ = $1; - %*/ + $$ = NEW_MASGN($1, 0, &@$); + /*% %*/ + /*% ripper: $1 %*/ } | mlhs_head mlhs_item { /*%%%*/ - $$ = NEW_MASGN(list_append($1,$2), 0); - /*% - $$ = mlhs_add($1, $2); - %*/ + $$ = NEW_MASGN(list_append(p, $1,$2), 0, &@$); + /*% %*/ + /*% ripper: mlhs_add!($1, $2) %*/ } | mlhs_head tSTAR mlhs_node { /*%%%*/ - $$ = NEW_MASGN($1, $3); - /*% - $$ = mlhs_add_star($1, $3); - %*/ + $$ = NEW_MASGN($1, $3, &@$); + /*% %*/ + /*% ripper: mlhs_add_star!($1, $3) %*/ } | mlhs_head tSTAR mlhs_node ',' mlhs_post { /*%%%*/ - $$ = NEW_MASGN($1, NEW_POSTARG($3,$5)); - /*% - $1 = mlhs_add_star($1, $3); - $$ = mlhs_add($1, $5); - %*/ + $$ = NEW_MASGN($1, NEW_POSTARG($3,$5,&@$), &@$); + /*% %*/ + /*% ripper: mlhs_add_post!(mlhs_add_star!($1, $3), $5) %*/ } | mlhs_head tSTAR { /*%%%*/ - $$ = NEW_MASGN($1, -1); - /*% - $$ = mlhs_add_star($1, Qnil); - %*/ + $$ = NEW_MASGN($1, NODE_SPECIAL_NO_NAME_REST, &@$); + /*% %*/ + /*% ripper: mlhs_add_star!($1, Qnil) %*/ } | mlhs_head tSTAR ',' mlhs_post { /*%%%*/ - $$ = NEW_MASGN($1, NEW_POSTARG(-1, $4)); - /*% - $1 = mlhs_add_star($1, Qnil); - $$ = mlhs_add($1, $4); - %*/ + $$ = NEW_MASGN($1, NEW_POSTARG(NODE_SPECIAL_NO_NAME_REST, $4, &@$), &@$); + /*% %*/ + /*% ripper: mlhs_add_post!(mlhs_add_star!($1, Qnil), $4) %*/ } | tSTAR mlhs_node { /*%%%*/ - $$ = NEW_MASGN(0, $2); - /*% - $$ = mlhs_add_star(mlhs_new(), $2); - %*/ + $$ = NEW_MASGN(0, $2, &@$); + /*% %*/ + /*% ripper: mlhs_add_star!(mlhs_new!, $2) %*/ } | tSTAR mlhs_node ',' mlhs_post { /*%%%*/ - $$ = NEW_MASGN(0, NEW_POSTARG($2,$4)); - /*% - $2 = mlhs_add_star(mlhs_new(), $2); - $$ = mlhs_add($2, $4); - %*/ + $$ = NEW_MASGN(0, NEW_POSTARG($2,$4,&@$), &@$); + /*% %*/ + /*% ripper: mlhs_add_post!(mlhs_add_star!(mlhs_new!, $2), $4) %*/ } | tSTAR { /*%%%*/ - $$ = NEW_MASGN(0, -1); - /*% - $$ = mlhs_add_star(mlhs_new(), Qnil); - %*/ + $$ = NEW_MASGN(0, NODE_SPECIAL_NO_NAME_REST, &@$); + /*% %*/ + /*% ripper: mlhs_add_star!(mlhs_new!, Qnil) %*/ } | tSTAR ',' mlhs_post { /*%%%*/ - $$ = NEW_MASGN(0, NEW_POSTARG(-1, $3)); - /*% - $$ = mlhs_add_star(mlhs_new(), Qnil); - $$ = mlhs_add($$, $3); - %*/ + $$ = NEW_MASGN(0, NEW_POSTARG(NODE_SPECIAL_NO_NAME_REST, $3, &@$), &@$); + /*% %*/ + /*% ripper: mlhs_add_post!(mlhs_add_star!(mlhs_new!, Qnil), $3) %*/ } ; @@ -1579,216 +2076,188 @@ mlhs_item : mlhs_node { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(mlhs_paren, $2); - %*/ + /*% %*/ + /*% ripper: mlhs_paren!($2) %*/ } ; mlhs_head : mlhs_item ',' { /*%%%*/ - $$ = NEW_LIST($1); - /*% - $$ = mlhs_add(mlhs_new(), $1); - %*/ + $$ = NEW_LIST($1, &@1); + /*% %*/ + /*% ripper: mlhs_add!(mlhs_new!, $1) %*/ } | mlhs_head mlhs_item ',' { /*%%%*/ - $$ = list_append($1, $2); - /*% - $$ = mlhs_add($1, $2); - %*/ + $$ = list_append(p, $1, $2); + /*% %*/ + /*% ripper: mlhs_add!($1, $2) %*/ } ; mlhs_post : mlhs_item { /*%%%*/ - $$ = NEW_LIST($1); - /*% - $$ = mlhs_add(mlhs_new(), $1); - %*/ + $$ = NEW_LIST($1, &@$); + /*% %*/ + /*% ripper: mlhs_add!(mlhs_new!, $1) %*/ } | mlhs_post ',' mlhs_item { /*%%%*/ - $$ = list_append($1, $3); - /*% - $$ = mlhs_add($1, $3); - %*/ + $$ = list_append(p, $1, $3); + /*% %*/ + /*% ripper: mlhs_add!($1, $3) %*/ } ; mlhs_node : user_variable { - $$ = assignable($1, 0); + /*%%%*/ + $$ = assignable(p, $1, 0, &@$); + /*% %*/ + /*% ripper: assignable(p, var_field(p, $1)) %*/ } | keyword_variable { - $$ = assignable($1, 0); + /*%%%*/ + $$ = assignable(p, $1, 0, &@$); + /*% %*/ + /*% ripper: assignable(p, var_field(p, $1)) %*/ } | primary_value '[' opt_call_args rbracket { /*%%%*/ - $$ = aryset($1, $3); - /*% - $$ = dispatch2(aref_field, $1, escape_Qundef($3)); - %*/ + $$ = aryset(p, $1, $3, &@$); + /*% %*/ + /*% ripper: aref_field!($1, escape_Qundef($3)) %*/ } - | primary_value '.' tIDENTIFIER + | primary_value call_op tIDENTIFIER { + if ($2 == tANDDOT) { + yyerror1(&@2, "&. inside multiple assignment destination"); + } /*%%%*/ - $$ = attrset($1, $3); - /*% - $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); - %*/ + $$ = attrset(p, $1, $2, $3, &@$); + /*% %*/ + /*% ripper: field!($1, $2, $3) %*/ } | primary_value tCOLON2 tIDENTIFIER { /*%%%*/ - $$ = attrset($1, $3); - /*% - $$ = dispatch2(const_path_field, $1, $3); - %*/ + $$ = attrset(p, $1, idCOLON2, $3, &@$); + /*% %*/ + /*% ripper: const_path_field!($1, $3) %*/ } - | primary_value '.' tCONSTANT + | primary_value call_op tCONSTANT { + if ($2 == tANDDOT) { + yyerror1(&@2, "&. inside multiple assignment destination"); + } /*%%%*/ - $$ = attrset($1, $3); - /*% - $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); - %*/ + $$ = attrset(p, $1, $2, $3, &@$); + /*% %*/ + /*% ripper: field!($1, $2, $3) %*/ } | primary_value tCOLON2 tCONSTANT { /*%%%*/ - if (in_def || in_single) - yyerror("dynamic constant assignment"); - $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3)); - /*% - if (in_def || in_single) - yyerror("dynamic constant assignment"); - $$ = dispatch2(const_path_field, $1, $3); - %*/ + $$ = const_decl(p, NEW_COLON2($1, $3, &@$), &@$); + /*% %*/ + /*% ripper: const_decl(p, const_path_field!($1, $3)) %*/ } | tCOLON3 tCONSTANT { /*%%%*/ - if (in_def || in_single) - yyerror("dynamic constant assignment"); - $$ = NEW_CDECL(0, 0, NEW_COLON3($2)); - /*% - $$ = dispatch1(top_const_field, $2); - %*/ + $$ = const_decl(p, NEW_COLON3($2, &@$), &@$); + /*% %*/ + /*% ripper: const_decl(p, top_const_field!($2)) %*/ } | backref { /*%%%*/ - rb_backref_error($1); - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(var_field, $1); - $$ = dispatch1(assign_error, $$); - %*/ + rb_backref_error(p, $1); + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper[error]: backref_error(p, RNODE($1), var_field(p, $1)) %*/ } ; lhs : user_variable { - $$ = assignable($1, 0); /*%%%*/ - if (!$$) $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(var_field, $$); - %*/ + $$ = assignable(p, $1, 0, &@$); + /*% %*/ + /*% ripper: assignable(p, var_field(p, $1)) %*/ } | keyword_variable { - $$ = assignable($1, 0); /*%%%*/ - if (!$$) $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(var_field, $$); - %*/ + $$ = assignable(p, $1, 0, &@$); + /*% %*/ + /*% ripper: assignable(p, var_field(p, $1)) %*/ } | primary_value '[' opt_call_args rbracket { /*%%%*/ - $$ = aryset($1, $3); - /*% - $$ = dispatch2(aref_field, $1, escape_Qundef($3)); - %*/ + $$ = aryset(p, $1, $3, &@$); + /*% %*/ + /*% ripper: aref_field!($1, escape_Qundef($3)) %*/ } - | primary_value '.' tIDENTIFIER + | primary_value call_op tIDENTIFIER { /*%%%*/ - $$ = attrset($1, $3); - /*% - $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); - %*/ + $$ = attrset(p, $1, $2, $3, &@$); + /*% %*/ + /*% ripper: field!($1, $2, $3) %*/ } | primary_value tCOLON2 tIDENTIFIER { /*%%%*/ - $$ = attrset($1, $3); - /*% - $$ = dispatch3(field, $1, ripper_intern("::"), $3); - %*/ + $$ = attrset(p, $1, idCOLON2, $3, &@$); + /*% %*/ + /*% ripper: field!($1, idCOLON2, $3) %*/ } - | primary_value '.' tCONSTANT + | primary_value call_op tCONSTANT { /*%%%*/ - $$ = attrset($1, $3); - /*% - $$ = dispatch3(field, $1, ripper_id2sym('.'), $3); - %*/ + $$ = attrset(p, $1, $2, $3, &@$); + /*% %*/ + /*% ripper: field!($1, $2, $3) %*/ } | primary_value tCOLON2 tCONSTANT { /*%%%*/ - if (in_def || in_single) - yyerror("dynamic constant assignment"); - $$ = NEW_CDECL(0, 0, NEW_COLON2($1, $3)); - /*% - $$ = dispatch2(const_path_field, $1, $3); - if (in_def || in_single) { - $$ = dispatch1(assign_error, $$); - } - %*/ + $$ = const_decl(p, NEW_COLON2($1, $3, &@$), &@$); + /*% %*/ + /*% ripper: const_decl(p, const_path_field!($1, $3)) %*/ } | tCOLON3 tCONSTANT { /*%%%*/ - if (in_def || in_single) - yyerror("dynamic constant assignment"); - $$ = NEW_CDECL(0, 0, NEW_COLON3($2)); - /*% - $$ = dispatch1(top_const_field, $2); - if (in_def || in_single) { - $$ = dispatch1(assign_error, $$); - } - %*/ + $$ = const_decl(p, NEW_COLON3($2, &@$), &@$); + /*% %*/ + /*% ripper: const_decl(p, top_const_field!($2)) %*/ } | backref { /*%%%*/ - rb_backref_error($1); - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(assign_error, $1); - %*/ + rb_backref_error(p, $1); + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper[error]: backref_error(p, RNODE($1), var_field(p, $1)) %*/ } ; cname : tIDENTIFIER { + static const char mesg[] = "class/module name must be CONSTANT"; /*%%%*/ - yyerror("class/module name must be CONSTANT"); - /*% - $$ = dispatch1(class_name_error, $1); - %*/ + yyerror1(&@1, mesg); + /*% %*/ + /*% ripper[error]: class_name_error!(ERR_MESG(), $1) %*/ } | tCONSTANT ; @@ -1796,26 +2265,23 @@ cname : tIDENTIFIER cpath : tCOLON3 cname { /*%%%*/ - $$ = NEW_COLON3($2); - /*% - $$ = dispatch1(top_const_ref, $2); - %*/ + $$ = NEW_COLON3($2, &@$); + /*% %*/ + /*% ripper: top_const_ref!($2) %*/ } | cname { /*%%%*/ - $$ = NEW_COLON2(0, $$); - /*% - $$ = dispatch1(const_ref, $1); - %*/ + $$ = NEW_COLON2(0, $$, &@$); + /*% %*/ + /*% ripper: const_ref!($1) %*/ } | primary_value tCOLON2 cname { /*%%%*/ - $$ = NEW_COLON2($1, $3); - /*% - $$ = dispatch2(const_path_ref, $1, $3); - %*/ + $$ = NEW_COLON2($1, $3, &@$); + /*% %*/ + /*% ripper: const_path_ref!($1, $3) %*/ } ; @@ -1824,50 +2290,36 @@ fname : tIDENTIFIER | tFID | op { - lex_state = EXPR_ENDFN; + SET_LEX_STATE(EXPR_ENDFN); $$ = $1; } | reswords - { - lex_state = EXPR_ENDFN; - /*%%%*/ - $$ = $<id>1; - /*% - $$ = $1; - %*/ - } - ; - -fsym : fname - | symbol ; -fitem : fsym +fitem : fname { /*%%%*/ - $$ = NEW_LIT(ID2SYM($1)); - /*% - $$ = dispatch1(symbol_literal, $1); - %*/ + $$ = NEW_LIT(ID2SYM($1), &@$); + /*% %*/ + /*% ripper: symbol_literal!($1) %*/ } - | dsym + | symbol ; undef_list : fitem { /*%%%*/ - $$ = NEW_UNDEF($1); - /*% - $$ = rb_ary_new3(1, $1); - %*/ + $$ = NEW_UNDEF($1, &@$); + /*% %*/ + /*% ripper: rb_ary_new3(1, get_value($1)) %*/ } - | undef_list ',' {lex_state = EXPR_FNAME;} fitem + | undef_list ',' {SET_LEX_STATE(EXPR_FNAME|EXPR_FITEM);} fitem { /*%%%*/ - $$ = block_append($1, NEW_UNDEF($4)); - /*% - rb_ary_push($1, $4); - %*/ + NODE *undef = NEW_UNDEF($4, &@4); + $$ = block_append(p, $1, undef); + /*% %*/ + /*% ripper: rb_ary_push($1, get_value($4)) %*/ } ; @@ -1917,398 +2369,282 @@ reswords : keyword__LINE__ | keyword__FILE__ | keyword__ENCODING__ | keyword_while | keyword_until ; -arg : lhs '=' arg +arg : lhs '=' lex_ctxt arg_rhs { /*%%%*/ - value_expr($3); - $$ = node_assign($1, $3); - /*% - $$ = dispatch2(assign, $1, $3); - %*/ + $$ = node_assign(p, $1, $4, $3, &@$); + /*% %*/ + /*% ripper: assign!($1, $4) %*/ } - | lhs '=' arg modifier_rescue arg + | var_lhs tOP_ASGN lex_ctxt arg_rhs { /*%%%*/ - value_expr($3); - $3 = NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0); - $$ = node_assign($1, $3); - /*% - $$ = dispatch2(assign, $1, dispatch2(rescue_mod, $3, $5)); - %*/ - } - | var_lhs tOP_ASGN arg - { - value_expr($3); - $$ = new_op_assign($1, $2, $3); + $$ = new_op_assign(p, $1, $2, $4, $3, &@$); + /*% %*/ + /*% ripper: opassign!($1, $2, $4) %*/ } - | var_lhs tOP_ASGN arg modifier_rescue arg + | primary_value '[' opt_call_args rbracket tOP_ASGN lex_ctxt arg_rhs { /*%%%*/ - value_expr($3); - $3 = NEW_RESCUE($3, NEW_RESBODY(0,$5,0), 0); - /*% - $3 = dispatch2(rescue_mod, $3, $5); - %*/ - $$ = new_op_assign($1, $2, $3); + $$ = new_ary_op_assign(p, $1, $3, $5, $7, &@3, &@$); + /*% %*/ + /*% ripper: opassign!(aref_field!($1, escape_Qundef($3)), $5, $7) %*/ } - | primary_value '[' opt_call_args rbracket tOP_ASGN arg + | primary_value call_op tIDENTIFIER tOP_ASGN lex_ctxt arg_rhs { /*%%%*/ - NODE *args; - - value_expr($6); - if (!$3) $3 = NEW_ZARRAY(); - if (nd_type($3) == NODE_BLOCK_PASS) { - args = NEW_ARGSCAT($3, $6); - } - else { - args = arg_concat($3, $6); - } - if ($5 == tOROP) { - $5 = 0; - } - else if ($5 == tANDOP) { - $5 = 1; - } - $$ = NEW_OP_ASGN1($1, $5, args); - fixpos($$, $1); - /*% - $1 = dispatch2(aref_field, $1, escape_Qundef($3)); - $$ = dispatch3(opassign, $1, $5, $6); - %*/ - } - | primary_value '.' tIDENTIFIER tOP_ASGN arg - { - value_expr($5); - $$ = new_attr_op_assign($1, ripper_id2sym('.'), $3, $4, $5); + $$ = new_attr_op_assign(p, $1, $2, $3, $4, $6, &@$); + /*% %*/ + /*% ripper: opassign!(field!($1, $2, $3), $4, $6) %*/ } - | primary_value '.' tCONSTANT tOP_ASGN arg + | primary_value call_op tCONSTANT tOP_ASGN lex_ctxt arg_rhs { - value_expr($5); - $$ = new_attr_op_assign($1, ripper_id2sym('.'), $3, $4, $5); + /*%%%*/ + $$ = new_attr_op_assign(p, $1, $2, $3, $4, $6, &@$); + /*% %*/ + /*% ripper: opassign!(field!($1, $2, $3), $4, $6) %*/ } - | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN lex_ctxt arg_rhs { - value_expr($5); - $$ = new_attr_op_assign($1, ripper_intern("::"), $3, $4, $5); + /*%%%*/ + $$ = new_attr_op_assign(p, $1, ID2VAL(idCOLON2), $3, $4, $6, &@$); + /*% %*/ + /*% ripper: opassign!(field!($1, $2, $3), $4, $6) %*/ } - | primary_value tCOLON2 tCONSTANT tOP_ASGN arg + | primary_value tCOLON2 tCONSTANT tOP_ASGN lex_ctxt arg_rhs { /*%%%*/ - yyerror("constant re-assignment"); - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch2(const_path_field, $1, $3); - $$ = dispatch3(opassign, $$, $4, $5); - $$ = dispatch1(assign_error, $$); - %*/ + YYLTYPE loc = code_loc_gen(&@1, &@3); + $$ = new_const_op_assign(p, NEW_COLON2($1, $3, &loc), $4, $6, $5, &@$); + /*% %*/ + /*% ripper: opassign!(const_path_field!($1, $3), $4, $6) %*/ } - | tCOLON3 tCONSTANT tOP_ASGN arg + | tCOLON3 tCONSTANT tOP_ASGN lex_ctxt arg_rhs { /*%%%*/ - yyerror("constant re-assignment"); - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(top_const_field, $2); - $$ = dispatch3(opassign, $$, $3, $4); - $$ = dispatch1(assign_error, $$); - %*/ + YYLTYPE loc = code_loc_gen(&@1, &@2); + $$ = new_const_op_assign(p, NEW_COLON3($2, &loc), $3, $5, $4, &@$); + /*% %*/ + /*% ripper: opassign!(top_const_field!($2), $3, $5) %*/ } - | backref tOP_ASGN arg + | backref tOP_ASGN lex_ctxt arg_rhs { /*%%%*/ - rb_backref_error($1); - $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(var_field, $1); - $$ = dispatch3(opassign, $$, $2, $3); - $$ = dispatch1(assign_error, $$); - %*/ + rb_backref_error(p, $1); + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper[error]: backref_error(p, RNODE($1), opassign!(var_field(p, $1), $2, $4)) %*/ } | arg tDOT2 arg { /*%%%*/ value_expr($1); value_expr($3); - $$ = NEW_DOT2($1, $3); - if (nd_type($1) == NODE_LIT && FIXNUM_P($1->nd_lit) && - nd_type($3) == NODE_LIT && FIXNUM_P($3->nd_lit)) { - deferred_nodes = list_append(deferred_nodes, $$); - } - /*% - $$ = dispatch2(dot2, $1, $3); - %*/ + $$ = NEW_DOT2($1, $3, &@$); + /*% %*/ + /*% ripper: dot2!($1, $3) %*/ } | arg tDOT3 arg { /*%%%*/ value_expr($1); value_expr($3); - $$ = NEW_DOT3($1, $3); - if (nd_type($1) == NODE_LIT && FIXNUM_P($1->nd_lit) && - nd_type($3) == NODE_LIT && FIXNUM_P($3->nd_lit)) { - deferred_nodes = list_append(deferred_nodes, $$); - } - /*% - $$ = dispatch2(dot3, $1, $3); - %*/ - } - | arg '+' arg - { - /*%%%*/ - $$ = call_bin_op($1, '+', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('+'), $3); - %*/ + $$ = NEW_DOT3($1, $3, &@$); + /*% %*/ + /*% ripper: dot3!($1, $3) %*/ } - | arg '-' arg + | arg tDOT2 { /*%%%*/ - $$ = call_bin_op($1, '-', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('-'), $3); - %*/ + value_expr($1); + $$ = NEW_DOT2($1, new_nil_at(p, &@2.end_pos), &@$); + /*% %*/ + /*% ripper: dot2!($1, Qnil) %*/ } - | arg '*' arg + | arg tDOT3 { /*%%%*/ - $$ = call_bin_op($1, '*', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('*'), $3); - %*/ + value_expr($1); + $$ = NEW_DOT3($1, new_nil_at(p, &@2.end_pos), &@$); + /*% %*/ + /*% ripper: dot3!($1, Qnil) %*/ } - | arg '/' arg + | tBDOT2 arg { /*%%%*/ - $$ = call_bin_op($1, '/', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('/'), $3); - %*/ + value_expr($2); + $$ = NEW_DOT2(new_nil_at(p, &@1.beg_pos), $2, &@$); + /*% %*/ + /*% ripper: dot2!(Qnil, $2) %*/ } - | arg '%' arg + | tBDOT3 arg { /*%%%*/ - $$ = call_bin_op($1, '%', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('%'), $3); - %*/ + value_expr($2); + $$ = NEW_DOT3(new_nil_at(p, &@1.beg_pos), $2, &@$); + /*% %*/ + /*% ripper: dot3!(Qnil, $2) %*/ } - | arg tPOW arg + | arg '+' arg { - /*%%%*/ - $$ = call_bin_op($1, tPOW, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("**"), $3); - %*/ + $$ = call_bin_op(p, $1, '+', $3, &@2, &@$); } - | tUMINUS_NUM tINTEGER tPOW arg + | arg '-' arg { - /*%%%*/ - $$ = NEW_CALL(call_bin_op($2, tPOW, $4), tUMINUS, 0); - /*% - $$ = dispatch3(binary, $2, ripper_intern("**"), $4); - $$ = dispatch2(unary, ripper_intern("-@"), $$); - %*/ + $$ = call_bin_op(p, $1, '-', $3, &@2, &@$); } - | tUMINUS_NUM tFLOAT tPOW arg + | arg '*' arg { - /*%%%*/ - $$ = NEW_CALL(call_bin_op($2, tPOW, $4), tUMINUS, 0); - /*% - $$ = dispatch3(binary, $2, ripper_intern("**"), $4); - $$ = dispatch2(unary, ripper_intern("-@"), $$); - %*/ + $$ = call_bin_op(p, $1, '*', $3, &@2, &@$); } - | tUPLUS arg + | arg '/' arg { - /*%%%*/ - $$ = call_uni_op($2, tUPLUS); - /*% - $$ = dispatch2(unary, ripper_intern("+@"), $2); - %*/ + $$ = call_bin_op(p, $1, '/', $3, &@2, &@$); } - | tUMINUS arg + | arg '%' arg { - /*%%%*/ - $$ = call_uni_op($2, tUMINUS); - /*% - $$ = dispatch2(unary, ripper_intern("-@"), $2); - %*/ + $$ = call_bin_op(p, $1, '%', $3, &@2, &@$); } - | arg '|' arg + | arg tPOW arg { - /*%%%*/ - $$ = call_bin_op($1, '|', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('|'), $3); - %*/ + $$ = call_bin_op(p, $1, idPow, $3, &@2, &@$); } - | arg '^' arg + | tUMINUS_NUM simple_numeric tPOW arg { - /*%%%*/ - $$ = call_bin_op($1, '^', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('^'), $3); - %*/ + $$ = call_uni_op(p, call_bin_op(p, $2, idPow, $4, &@2, &@$), idUMinus, &@1, &@$); } - | arg '&' arg + | tUPLUS arg { - /*%%%*/ - $$ = call_bin_op($1, '&', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('&'), $3); - %*/ + $$ = call_uni_op(p, $2, idUPlus, &@1, &@$); } - | arg tCMP arg + | tUMINUS arg { - /*%%%*/ - $$ = call_bin_op($1, tCMP, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("<=>"), $3); - %*/ + $$ = call_uni_op(p, $2, idUMinus, &@1, &@$); } - | arg '>' arg + | arg '|' arg { - /*%%%*/ - $$ = call_bin_op($1, '>', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('>'), $3); - %*/ + $$ = call_bin_op(p, $1, '|', $3, &@2, &@$); } - | arg tGEQ arg + | arg '^' arg { - /*%%%*/ - $$ = call_bin_op($1, tGEQ, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern(">="), $3); - %*/ + $$ = call_bin_op(p, $1, '^', $3, &@2, &@$); } - | arg '<' arg + | arg '&' arg { - /*%%%*/ - $$ = call_bin_op($1, '<', $3); - /*% - $$ = dispatch3(binary, $1, ID2SYM('<'), $3); - %*/ + $$ = call_bin_op(p, $1, '&', $3, &@2, &@$); } - | arg tLEQ arg + | arg tCMP arg { - /*%%%*/ - $$ = call_bin_op($1, tLEQ, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("<="), $3); - %*/ + $$ = call_bin_op(p, $1, idCmp, $3, &@2, &@$); } + | rel_expr %prec tCMP | arg tEQ arg { - /*%%%*/ - $$ = call_bin_op($1, tEQ, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("=="), $3); - %*/ + $$ = call_bin_op(p, $1, idEq, $3, &@2, &@$); } | arg tEQQ arg { - /*%%%*/ - $$ = call_bin_op($1, tEQQ, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("==="), $3); - %*/ + $$ = call_bin_op(p, $1, idEqq, $3, &@2, &@$); } | arg tNEQ arg { - /*%%%*/ - $$ = call_bin_op($1, tNEQ, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("!="), $3); - %*/ + $$ = call_bin_op(p, $1, idNeq, $3, &@2, &@$); } | arg tMATCH arg { - /*%%%*/ - $$ = match_op($1, $3); - if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) { - $$ = reg_named_capture_assign($1->nd_lit, $$); - } - /*% - $$ = dispatch3(binary, $1, ripper_intern("=~"), $3); - %*/ + $$ = match_op(p, $1, $3, &@2, &@$); } | arg tNMATCH arg { - /*%%%*/ - $$ = call_bin_op($1, tNMATCH, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("!~"), $3); - %*/ + $$ = call_bin_op(p, $1, idNeqTilde, $3, &@2, &@$); } | '!' arg { - /*%%%*/ - $$ = call_uni_op(cond($2), '!'); - /*% - $$ = dispatch2(unary, ID2SYM('!'), $2); - %*/ + $$ = call_uni_op(p, method_cond(p, $2, &@2), '!', &@1, &@$); } | '~' arg { - /*%%%*/ - $$ = call_uni_op($2, '~'); - /*% - $$ = dispatch2(unary, ID2SYM('~'), $2); - %*/ + $$ = call_uni_op(p, $2, '~', &@1, &@$); } | arg tLSHFT arg { - /*%%%*/ - $$ = call_bin_op($1, tLSHFT, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("<<"), $3); - %*/ + $$ = call_bin_op(p, $1, idLTLT, $3, &@2, &@$); } | arg tRSHFT arg { - /*%%%*/ - $$ = call_bin_op($1, tRSHFT, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern(">>"), $3); - %*/ + $$ = call_bin_op(p, $1, idGTGT, $3, &@2, &@$); } | arg tANDOP arg { - /*%%%*/ - $$ = logop(NODE_AND, $1, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("&&"), $3); - %*/ + $$ = logop(p, idANDOP, $1, $3, &@2, &@$); } | arg tOROP arg { + $$ = logop(p, idOROP, $1, $3, &@2, &@$); + } + | keyword_defined opt_nl {p->ctxt.in_defined = 1;} arg + { + p->ctxt.in_defined = 0; + $$ = new_defined(p, $4, &@$); + } + | arg '?' arg opt_nl ':' arg + { /*%%%*/ - $$ = logop(NODE_OR, $1, $3); - /*% - $$ = dispatch3(binary, $1, ripper_intern("||"), $3); - %*/ + value_expr($1); + $$ = new_if(p, $1, $3, $6, &@$); + fixpos($$, $1); + /*% %*/ + /*% ripper: ifop!($1, $3, $6) %*/ + } + | defn_head f_opt_paren_args '=' arg + { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); + /*%%%*/ + $$ = set_defun_body(p, $1, $2, $4, &@$); + /*% %*/ + /*% ripper[$4]: bodystmt!($4, Qnil, Qnil, Qnil) %*/ + /*% ripper: def!(get_value($1), $2, $4) %*/ + local_pop(p); + } + | defn_head f_opt_paren_args '=' arg modifier_rescue arg + { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); + /*%%%*/ + $4 = rescued_expr(p, $4, $6, &@4, &@5, &@6); + $$ = set_defun_body(p, $1, $2, $4, &@$); + /*% %*/ + /*% ripper[$4]: bodystmt!(rescue_mod!($4, $6), Qnil, Qnil, Qnil) %*/ + /*% ripper: def!(get_value($1), $2, $4) %*/ + local_pop(p); } - | keyword_defined opt_nl {in_defined = 1;} arg + | defs_head f_opt_paren_args '=' arg { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - in_defined = 0; - $$ = NEW_DEFINED($4); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% - in_defined = 0; - $$ = dispatch1(defined, $4); + $1 = get_value($1); %*/ + /*% ripper[$4]: bodystmt!($4, Qnil, Qnil, Qnil) %*/ + /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $4) %*/ + local_pop(p); } - | arg '?' arg opt_nl ':' arg + | defs_head f_opt_paren_args '=' arg modifier_rescue arg { + endless_method_name(p, $<node>1, &@1); + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - value_expr($1); - $$ = NEW_IF(cond($1), $3, $6); - fixpos($$, $1); + $4 = rescued_expr(p, $4, $6, &@4, &@5, &@6); + $$ = set_defun_body(p, $1, $2, $4, &@$); /*% - $$ = dispatch3(ifop, $1, $3, $6); + $1 = get_value($1); %*/ + /*% ripper[$4]: bodystmt!(rescue_mod!($4, $6), Qnil, Qnil, Qnil) %*/ + /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $4) %*/ + local_pop(p); } | primary { @@ -2316,15 +2652,37 @@ arg : lhs '=' arg } ; +relop : '>' {$$ = '>';} + | '<' {$$ = '<';} + | tGEQ {$$ = idGE;} + | tLEQ {$$ = idLE;} + ; + +rel_expr : arg relop arg %prec '>' + { + $$ = call_bin_op(p, $1, $2, $3, &@2, &@$); + } + | rel_expr relop arg %prec '>' + { + rb_warning1("comparison '%s' after comparison", WARN_ID($2)); + $$ = call_bin_op(p, $1, $2, $3, &@2, &@$); + } + ; + +lex_ctxt : tSP + { + $$ = p->ctxt; + } + | none + { + $$ = p->ctxt; + } + ; + arg_value : arg { - /*%%%*/ value_expr($1); $$ = $1; - if (!$$) $$ = NEW_NIL(); - /*% - $$ = $1; - %*/ } ; @@ -2336,18 +2694,31 @@ aref_args : none | args ',' assocs trailer { /*%%%*/ - $$ = arg_append($1, NEW_HASH($3)); - /*% - $$ = arg_add_assocs($1, $3); - %*/ + $$ = $3 ? arg_append(p, $1, new_hash(p, $3, &@3), &@$) : $1; + /*% %*/ + /*% ripper: args_add!($1, bare_assoc_hash!($3)) %*/ } | assocs trailer { /*%%%*/ - $$ = NEW_LIST(NEW_HASH($1)); - /*% - $$ = arg_add_assocs(arg_new(), $1); - %*/ + $$ = $1 ? NEW_LIST(new_hash(p, $1, &@1), &@$) : 0; + /*% %*/ + /*% ripper: args_add!(args_new!, bare_assoc_hash!($1)) %*/ + } + ; + +arg_rhs : arg %prec tOP_ASGN + { + value_expr($1); + $$ = $1; + } + | arg modifier_rescue arg + { + /*%%%*/ + value_expr($1); + $$ = rescued_expr(p, $1, $3, &@1, &@2, &@3); + /*% %*/ + /*% ripper: rescue_mod!($1, $3) %*/ } ; @@ -2355,9 +2726,32 @@ paren_args : '(' opt_call_args rparen { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(arg_paren, escape_Qundef($2)); - %*/ + /*% %*/ + /*% ripper: arg_paren!(escape_Qundef($2)) %*/ + } + | '(' args ',' args_forward rparen + { + if (!check_forwarding_args(p)) { + $$ = Qnone; + } + else { + /*%%%*/ + $$ = new_args_forward_call(p, $2, &@4, &@$); + /*% %*/ + /*% ripper: arg_paren!(args_add!($2, $4)) %*/ + } + } + | '(' args_forward rparen + { + if (!check_forwarding_args(p)) { + $$ = Qnone; + } + else { + /*%%%*/ + $$ = new_args_forward_call(p, 0, &@2, &@$); + /*% %*/ + /*% ripper: arg_paren!($2) %*/ + } } ; @@ -2374,18 +2768,16 @@ opt_call_args : none | args ',' assocs ',' { /*%%%*/ - $$ = arg_append($1, NEW_HASH($3)); - /*% - $$ = arg_add_assocs($1, $3); - %*/ + $$ = $3 ? arg_append(p, $1, new_hash(p, $3, &@3), &@$) : $1; + /*% %*/ + /*% ripper: args_add!($1, bare_assoc_hash!($3)) %*/ } | assocs ',' { /*%%%*/ - $$ = NEW_LIST(NEW_HASH($1)); - /*% - $$ = arg_add_assocs(arg_new(), $1); - %*/ + $$ = $1 ? NEW_LIST(new_hash(p, $1, &@1), &@1) : 0; + /*% %*/ + /*% ripper: args_add!(args_new!, bare_assoc_hash!($1)) %*/ } ; @@ -2393,55 +2785,70 @@ call_args : command { /*%%%*/ value_expr($1); - $$ = NEW_LIST($1); - /*% - $$ = arg_add(arg_new(), $1); - %*/ + $$ = NEW_LIST($1, &@$); + /*% %*/ + /*% ripper: args_add!(args_new!, $1) %*/ } | args opt_block_arg { /*%%%*/ $$ = arg_blk_pass($1, $2); - /*% - $$ = arg_add_optblock($1, $2); - %*/ + /*% %*/ + /*% ripper: args_add_block!($1, $2) %*/ } | assocs opt_block_arg { /*%%%*/ - $$ = NEW_LIST(NEW_HASH($1)); + $$ = $1 ? NEW_LIST(new_hash(p, $1, &@1), &@1) : 0; $$ = arg_blk_pass($$, $2); - /*% - $$ = arg_add_assocs(arg_new(), $1); - $$ = arg_add_optblock($$, $2); - %*/ + /*% %*/ + /*% ripper: args_add_block!(args_add!(args_new!, bare_assoc_hash!($1)), $2) %*/ } | args ',' assocs opt_block_arg { /*%%%*/ - $$ = arg_append($1, NEW_HASH($3)); + $$ = $3 ? arg_append(p, $1, new_hash(p, $3, &@3), &@$) : $1; $$ = arg_blk_pass($$, $4); - /*% - $$ = arg_add_optblock(arg_add_assocs($1, $3), $4); - %*/ + /*% %*/ + /*% ripper: args_add_block!(args_add!($1, bare_assoc_hash!($3)), $4) %*/ } | block_arg - /*%c%*/ - /*%c - { - $$ = arg_add_block(arg_new(), $1); - } - %*/ + /*% ripper[brace]: args_add_block!(args_new!, $1) %*/ ; -command_args : { - $<val>$ = cmdarg_stack; +command_args : { + /* If call_args starts with a open paren '(' or '[', + * look-ahead reading of the letters calls CMDARG_PUSH(0), + * but the push must be done after CMDARG_PUSH(1). + * So this code makes them consistent by first cancelling + * the premature CMDARG_PUSH(0), doing CMDARG_PUSH(1), + * and finally redoing CMDARG_PUSH(0). + */ + int lookahead = 0; + switch (yychar) { + case '(': case tLPAREN: case tLPAREN_ARG: case '[': case tLBRACK: + lookahead = 1; + } + if (lookahead) CMDARG_POP(); CMDARG_PUSH(1); + if (lookahead) CMDARG_PUSH(0); } call_args { - /* CMDARG_POP() */ - cmdarg_stack = $<val>1; + /* call_args can be followed by tLBRACE_ARG (that does CMDARG_PUSH(0) in the lexer) + * but the push must be done after CMDARG_POP() in the parser. + * So this code does CMDARG_POP() to pop 0 pushed by tLBRACE_ARG, + * CMDARG_POP() to pop 1 pushed by command_args, + * and CMDARG_PUSH(0) to restore back the flag set by tLBRACE_ARG. + */ + int lookahead = 0; + switch (yychar) { + case tLBRACE_ARG: + lookahead = 1; + } + if (lookahead) CMDARG_POP(); + CMDARG_POP(); + if (lookahead) CMDARG_PUSH(0); $$ = $2; } ; @@ -2449,11 +2856,21 @@ command_args : { block_arg : tAMPER arg_value { /*%%%*/ - $$ = NEW_BLOCK_PASS($2); - /*% - $$ = $2; - %*/ + $$ = NEW_BLOCK_PASS($2, &@$); + /*% %*/ + /*% ripper: $2 %*/ } + | tAMPER + { + /*%%%*/ + if (!local_id(p, ANON_BLOCK_ID)) { + compile_error(p, "no anonymous block parameter"); + } + $$ = NEW_BLOCK_PASS(NEW_LVAR(ANON_BLOCK_ID, &@1), &@$); + /*% + $$ = Qnil; + %*/ + } ; opt_block_arg : ',' block_arg @@ -2466,88 +2883,63 @@ opt_block_arg : ',' block_arg } ; +/* value */ args : arg_value { /*%%%*/ - $$ = NEW_LIST($1); - /*% - $$ = arg_add(arg_new(), $1); - %*/ + $$ = NEW_LIST($1, &@$); + /*% %*/ + /*% ripper: args_add!(args_new!, $1) %*/ } | tSTAR arg_value { /*%%%*/ - $$ = NEW_SPLAT($2); - /*% - $$ = arg_add_star(arg_new(), $2); - %*/ + $$ = NEW_SPLAT($2, &@$); + /*% %*/ + /*% ripper: args_add_star!(args_new!, $2) %*/ } | args ',' arg_value { /*%%%*/ - NODE *n1; - if ((n1 = splat_array($1)) != 0) { - $$ = list_append(n1, $3); - } - else { - $$ = arg_append($1, $3); - } - /*% - $$ = arg_add($1, $3); - %*/ + $$ = last_arg_append(p, $1, $3, &@$); + /*% %*/ + /*% ripper: args_add!($1, $3) %*/ } | args ',' tSTAR arg_value { /*%%%*/ - NODE *n1; - if ((nd_type($4) == NODE_ARRAY) && (n1 = splat_array($1)) != 0) { - $$ = list_concat(n1, $4); - } - else { - $$ = arg_concat($1, $4); - } - /*% - $$ = arg_add_star($1, $4); - %*/ + $$ = rest_arg_append(p, $1, $4, &@$); + /*% %*/ + /*% ripper: args_add_star!($1, $4) %*/ } ; +/* value */ +mrhs_arg : mrhs + | arg_value + ; + +/* value */ mrhs : args ',' arg_value { /*%%%*/ - NODE *n1; - if ((n1 = splat_array($1)) != 0) { - $$ = list_append(n1, $3); - } - else { - $$ = arg_append($1, $3); - } - /*% - $$ = mrhs_add(args2mrhs($1), $3); - %*/ + $$ = last_arg_append(p, $1, $3, &@$); + /*% %*/ + /*% ripper: mrhs_add!(mrhs_new_from_args!($1), $3) %*/ } | args ',' tSTAR arg_value { /*%%%*/ - NODE *n1; - if (nd_type($4) == NODE_ARRAY && - (n1 = splat_array($1)) != 0) { - $$ = list_concat(n1, $4); - } - else { - $$ = arg_concat($1, $4); - } - /*% - $$ = mrhs_add_star(args2mrhs($1), $4); - %*/ + $$ = rest_arg_append(p, $1, $4, &@$); + /*% %*/ + /*% ripper: mrhs_add_star!(mrhs_new_from_args!($1), $4) %*/ } | tSTAR arg_value { /*%%%*/ - $$ = NEW_SPLAT($2); - /*% - $$ = mrhs_add_star(mrhs_new(), $2); - %*/ + $$ = NEW_SPLAT($2, &@$); + /*% %*/ + /*% ripper: mrhs_add_star!(mrhs_new!, $2) %*/ } ; @@ -2564,194 +2956,145 @@ primary : literal | tFID { /*%%%*/ - $$ = NEW_FCALL($1, 0); - /*% - $$ = method_arg(dispatch1(fcall, $1), arg_new()); - %*/ + $$ = NEW_FCALL($1, 0, &@$); + /*% %*/ + /*% ripper: method_add_arg!(fcall!($1), args_new!) %*/ } | k_begin { - $<val>1 = cmdarg_stack; - cmdarg_stack = 0; - /*%%%*/ - $<num>$ = ruby_sourceline; - /*% - %*/ + CMDARG_PUSH(0); } bodystmt k_end { - cmdarg_stack = $<val>1; + CMDARG_POP(); /*%%%*/ - if ($3 == NULL) { - $$ = NEW_NIL(); - } - else { - if (nd_type($3) == NODE_RESCUE || - nd_type($3) == NODE_ENSURE) - nd_set_line($3, $<num>2); - $$ = NEW_BEGIN($3); - } - nd_set_line($$, $<num>2); - /*% - $$ = dispatch1(begin, $3); - %*/ + set_line_body($3, @1.end_pos.lineno); + $$ = NEW_BEGIN($3, &@$); + nd_set_line($$, @1.end_pos.lineno); + /*% %*/ + /*% ripper: begin!($3) %*/ } - | tLPAREN_ARG {lex_state = EXPR_ENDARG;} rparen + | tLPAREN_ARG {SET_LEX_STATE(EXPR_ENDARG);} rparen { /*%%%*/ - $$ = 0; - /*% - $$ = dispatch1(paren, 0); - %*/ + $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper: paren!(0) %*/ } - | tLPAREN_ARG expr {lex_state = EXPR_ENDARG;} rparen + | tLPAREN_ARG stmt {SET_LEX_STATE(EXPR_ENDARG);} rparen { /*%%%*/ + if (nd_type_p($2, NODE_SELF)) $2->nd_state = 0; $$ = $2; - /*% - $$ = dispatch1(paren, $2); - %*/ + /*% %*/ + /*% ripper: paren!($2) %*/ } | tLPAREN compstmt ')' { /*%%%*/ + if (nd_type_p($2, NODE_SELF)) $2->nd_state = 0; $$ = $2; - /*% - $$ = dispatch1(paren, $2); - %*/ + /*% %*/ + /*% ripper: paren!($2) %*/ } | primary_value tCOLON2 tCONSTANT { /*%%%*/ - $$ = NEW_COLON2($1, $3); - /*% - $$ = dispatch2(const_path_ref, $1, $3); - %*/ + $$ = NEW_COLON2($1, $3, &@$); + /*% %*/ + /*% ripper: const_path_ref!($1, $3) %*/ } | tCOLON3 tCONSTANT { /*%%%*/ - $$ = NEW_COLON3($2); - /*% - $$ = dispatch1(top_const_ref, $2); - %*/ + $$ = NEW_COLON3($2, &@$); + /*% %*/ + /*% ripper: top_const_ref!($2) %*/ } | tLBRACK aref_args ']' { /*%%%*/ - if ($2 == 0) { - $$ = NEW_ZARRAY(); /* zero length array*/ - } - else { - $$ = $2; - } - /*% - $$ = dispatch1(array, escape_Qundef($2)); - %*/ + $$ = make_list($2, &@$); + /*% %*/ + /*% ripper: array!(escape_Qundef($2)) %*/ } | tLBRACE assoc_list '}' { /*%%%*/ - $$ = NEW_HASH($2); - /*% - $$ = dispatch1(hash, escape_Qundef($2)); - %*/ + $$ = new_hash(p, $2, &@$); + $$->nd_brace = TRUE; + /*% %*/ + /*% ripper: hash!(escape_Qundef($2)) %*/ } - | keyword_return + | k_return { /*%%%*/ - $$ = NEW_RETURN(0); - /*% - $$ = dispatch0(return0); - %*/ + $$ = NEW_RETURN(0, &@$); + /*% %*/ + /*% ripper: return0! %*/ } | keyword_yield '(' call_args rparen { /*%%%*/ - $$ = new_yield($3); - /*% - $$ = dispatch1(yield, dispatch1(paren, $3)); - %*/ + $$ = new_yield(p, $3, &@$); + /*% %*/ + /*% ripper: yield!(paren!($3)) %*/ } | keyword_yield '(' rparen { /*%%%*/ - $$ = NEW_YIELD(0); - /*% - $$ = dispatch1(yield, dispatch1(paren, arg_new())); - %*/ + $$ = NEW_YIELD(0, &@$); + /*% %*/ + /*% ripper: yield!(paren!(args_new!)) %*/ } | keyword_yield { /*%%%*/ - $$ = NEW_YIELD(0); - /*% - $$ = dispatch0(yield0); - %*/ + $$ = NEW_YIELD(0, &@$); + /*% %*/ + /*% ripper: yield0! %*/ } - | keyword_defined opt_nl '(' {in_defined = 1;} expr rparen + | keyword_defined opt_nl '(' {p->ctxt.in_defined = 1;} expr rparen { - /*%%%*/ - in_defined = 0; - $$ = NEW_DEFINED($5); - /*% - in_defined = 0; - $$ = dispatch1(defined, $5); - %*/ + p->ctxt.in_defined = 0; + $$ = new_defined(p, $5, &@$); } | keyword_not '(' expr rparen { - /*%%%*/ - $$ = call_uni_op(cond($3), '!'); - /*% - $$ = dispatch2(unary, ripper_intern("not"), $3); - %*/ + $$ = call_uni_op(p, method_cond(p, $3, &@3), METHOD_NOT, &@1, &@$); } | keyword_not '(' rparen { - /*%%%*/ - $$ = call_uni_op(cond(NEW_NIL()), '!'); - /*% - $$ = dispatch2(unary, ripper_intern("not"), Qnil); - %*/ + $$ = call_uni_op(p, method_cond(p, new_nil(&@2), &@2), METHOD_NOT, &@1, &@$); } - | operation brace_block + | fcall brace_block { /*%%%*/ - $2->nd_iter = NEW_FCALL($1, 0); - $$ = $2; - /*% - $$ = method_arg(dispatch1(fcall, $1), arg_new()); - $$ = method_add_block($$, $2); - %*/ + $$ = method_add_block(p, $1, $2, &@$); + /*% %*/ + /*% ripper: method_add_block!(method_add_arg!(fcall!($1), args_new!), $2) %*/ } | method_call | method_call brace_block { /*%%%*/ - block_dup_check($1->nd_args, $2); - $2->nd_iter = $1; - $$ = $2; - /*% - $$ = method_add_block($1, $2); - %*/ - } - | tLAMBDA lambda - { - $$ = $2; + block_dup_check(p, $1->nd_args, $2); + $$ = method_add_block(p, $1, $2, &@$); + /*% %*/ + /*% ripper: method_add_block!($1, $2) %*/ } + | lambda | k_if expr_value then compstmt if_tail k_end { /*%%%*/ - $$ = NEW_IF(cond($2), $4, $5); + $$ = new_if(p, $2, $4, $5, &@$); fixpos($$, $2); - /*% - $$ = dispatch3(if, $2, $4, escape_Qundef($5)); - %*/ + /*% %*/ + /*% ripper: if!($2, $4, escape_Qundef($5)) %*/ } | k_unless expr_value then compstmt @@ -2759,57 +3102,72 @@ primary : literal k_end { /*%%%*/ - $$ = NEW_UNLESS(cond($2), $4, $5); + $$ = new_unless(p, $2, $4, $5, &@$); fixpos($$, $2); - /*% - $$ = dispatch3(unless, $2, $4, escape_Qundef($5)); - %*/ + /*% %*/ + /*% ripper: unless!($2, $4, escape_Qundef($5)) %*/ } - | k_while {COND_PUSH(1);} expr_value do {COND_POP();} + | k_while expr_value_do compstmt k_end { /*%%%*/ - $$ = NEW_WHILE(cond($3), $6, 1); - fixpos($$, $3); - /*% - $$ = dispatch2(while, $3, $6); - %*/ + $$ = NEW_WHILE(cond(p, $2, &@2), $3, 1, &@$); + fixpos($$, $2); + /*% %*/ + /*% ripper: while!($2, $3) %*/ } - | k_until {COND_PUSH(1);} expr_value do {COND_POP();} + | k_until expr_value_do compstmt k_end { /*%%%*/ - $$ = NEW_UNTIL(cond($3), $6, 1); - fixpos($$, $3); - /*% - $$ = dispatch2(until, $3, $6); - %*/ + $$ = NEW_UNTIL(cond(p, $2, &@2), $3, 1, &@$); + fixpos($$, $2); + /*% %*/ + /*% ripper: until!($2, $3) %*/ } | k_case expr_value opt_terms + { + $<val>$ = p->case_labels; + p->case_labels = Qnil; + } case_body k_end { + if (RTEST(p->case_labels)) rb_hash_clear(p->case_labels); + p->case_labels = $<val>4; /*%%%*/ - $$ = NEW_CASE($2, $4); + $$ = NEW_CASE($2, $5, &@$); fixpos($$, $2); - /*% - $$ = dispatch2(case, $2, $4); - %*/ + /*% %*/ + /*% ripper: case!($2, $5) %*/ } - | k_case opt_terms case_body k_end + | k_case opt_terms { + $<val>$ = p->case_labels; + p->case_labels = 0; + } + case_body + k_end + { + if (RTEST(p->case_labels)) rb_hash_clear(p->case_labels); + p->case_labels = $<val>3; /*%%%*/ - $$ = NEW_CASE(0, $3); - /*% - $$ = dispatch2(case, Qnil, $3); - %*/ + $$ = NEW_CASE2($4, &@$); + /*% %*/ + /*% ripper: case!(Qnil, $4) %*/ } - | k_for for_var keyword_in - {COND_PUSH(1);} - expr_value do - {COND_POP();} + | k_case expr_value opt_terms + p_case_body + k_end + { + /*%%%*/ + $$ = NEW_CASE3($2, $4, &@$); + /*% %*/ + /*% ripper: case!($2, $4) %*/ + } + | k_for for_var keyword_in expr_value_do compstmt k_end { @@ -2817,330 +3175,339 @@ primary : literal /* * for a, b, c in e * #=> - * e.each{|*x| a, b, c = x + * e.each{|*x| a, b, c = x} * * for a in e * #=> * e.each{|x| a, = x} */ - ID id = internal_id(); - ID *tbl = ALLOC_N(ID, 2); - NODE *m = NEW_ARGS_AUX(0, 0); - NODE *args, *scope; - - if (nd_type($2) == NODE_MASGN) { - /* if args.length == 1 && args[0].kind_of?(Array) - * args = args[0] - * end - */ - NODE *one = NEW_LIST(NEW_LIT(INT2FIX(1))); - NODE *zero = NEW_LIST(NEW_LIT(INT2FIX(0))); - m->nd_next = block_append( - NEW_IF( - NEW_NODE(NODE_AND, - NEW_CALL(NEW_CALL(NEW_DVAR(id), rb_intern("length"), 0), - rb_intern("=="), one), - NEW_CALL(NEW_CALL(NEW_DVAR(id), rb_intern("[]"), zero), - rb_intern("kind_of?"), NEW_LIST(NEW_LIT(rb_cArray))), - 0), - NEW_DASGN_CURR(id, - NEW_CALL(NEW_DVAR(id), rb_intern("[]"), zero)), - 0), - node_assign($2, NEW_DVAR(id))); - - args = new_args(m, 0, id, 0, new_args_tail(0, 0, 0)); - } - else { - if (nd_type($2) == NODE_LASGN || - nd_type($2) == NODE_DASGN || - nd_type($2) == NODE_DASGN_CURR) { - $2->nd_value = NEW_DVAR(id); - m->nd_plen = 1; - m->nd_next = $2; - args = new_args(m, 0, 0, 0, new_args_tail(0, 0, 0)); - } - else { - m->nd_next = node_assign(NEW_MASGN(NEW_LIST($2), 0), NEW_DVAR(id)); - args = new_args(m, 0, id, 0, new_args_tail(0, 0, 0)); - } + ID id = internal_id(p); + NODE *m = NEW_ARGS_AUX(0, 0, &NULL_LOC); + NODE *args, *scope, *internal_var = NEW_DVAR(id, &@2); + rb_ast_id_table_t *tbl = rb_ast_new_local_table(p->ast, 1); + tbl->ids[0] = id; /* internal id */ + + switch (nd_type($2)) { + case NODE_LASGN: + case NODE_DASGN: /* e.each {|internal_var| a = internal_var; ... } */ + $2->nd_value = internal_var; + id = 0; + m->nd_plen = 1; + m->nd_next = $2; + break; + case NODE_MASGN: /* e.each {|*internal_var| a, b, c = (internal_var.length == 1 && Array === (tmp = internal_var[0]) ? tmp : internal_var); ... } */ + m->nd_next = node_assign(p, $2, NEW_FOR_MASGN(internal_var, &@2), NO_LEX_CTXT, &@2); + break; + default: /* e.each {|*internal_var| @a, B, c[1], d.attr = internal_val; ... } */ + m->nd_next = node_assign(p, NEW_MASGN(NEW_LIST($2, &@2), 0, &@2), internal_var, NO_LEX_CTXT, &@2); } - scope = NEW_NODE(NODE_SCOPE, tbl, $8, args); - tbl[0] = 1; tbl[1] = id; - $$ = NEW_FOR(0, $5, scope); + /* {|*internal_id| <m> = internal_id; ... } */ + args = new_args(p, m, 0, id, 0, new_args_tail(p, 0, 0, 0, &@2), &@2); + scope = NEW_NODE(NODE_SCOPE, tbl, $5, args, &@$); + $$ = NEW_FOR($4, scope, &@$); fixpos($$, $2); - /*% - $$ = dispatch3(for, $2, $5, $8); - %*/ + /*% %*/ + /*% ripper: for!($2, $4, $5) %*/ } | k_class cpath superclass { - if (in_def || in_single) - yyerror("class definition in method body"); - local_push(0); - /*%%%*/ - $<num>$ = ruby_sourceline; - /*% - %*/ + if (p->ctxt.in_def) { + YYLTYPE loc = code_loc_gen(&@1, &@2); + yyerror1(&loc, "class definition in method body"); + } + p->ctxt.in_class = 1; + local_push(p, 0); } bodystmt k_end { /*%%%*/ - $$ = NEW_CLASS($2, $5, $3); - nd_set_line($$, $<num>4); - /*% - $$ = dispatch3(class, $2, $3, $5); - %*/ - local_pop(); + $$ = NEW_CLASS($2, $5, $3, &@$); + nd_set_line($$->nd_body, @6.end_pos.lineno); + set_line_body($5, @3.end_pos.lineno); + nd_set_line($$, @3.end_pos.lineno); + /*% %*/ + /*% ripper: class!($2, $3, $5) %*/ + local_pop(p); + p->ctxt.in_class = $<ctxt>1.in_class; + p->ctxt.shareable_constant_value = $<ctxt>1.shareable_constant_value; } | k_class tLSHFT expr { - $<num>$ = in_def; - in_def = 0; + p->ctxt.in_def = 0; + p->ctxt.in_class = 0; + local_push(p, 0); } term - { - $<num>$ = in_single; - in_single = 0; - local_push(0); - } bodystmt k_end { /*%%%*/ - $$ = NEW_SCLASS($3, $7); + $$ = NEW_SCLASS($3, $6, &@$); + nd_set_line($$->nd_body, @7.end_pos.lineno); + set_line_body($6, nd_line($3)); fixpos($$, $3); - /*% - $$ = dispatch2(sclass, $3, $7); - %*/ - local_pop(); - in_def = $<num>4; - in_single = $<num>6; + /*% %*/ + /*% ripper: sclass!($3, $6) %*/ + local_pop(p); + p->ctxt.in_def = $<ctxt>1.in_def; + p->ctxt.in_class = $<ctxt>1.in_class; + p->ctxt.shareable_constant_value = $<ctxt>1.shareable_constant_value; } | k_module cpath { - if (in_def || in_single) - yyerror("module definition in method body"); - local_push(0); - /*%%%*/ - $<num>$ = ruby_sourceline; - /*% - %*/ + if (p->ctxt.in_def) { + YYLTYPE loc = code_loc_gen(&@1, &@2); + yyerror1(&loc, "module definition in method body"); + } + p->ctxt.in_class = 1; + local_push(p, 0); } bodystmt k_end { /*%%%*/ - $$ = NEW_MODULE($2, $4); - nd_set_line($$, $<num>3); - /*% - $$ = dispatch2(module, $2, $4); - %*/ - local_pop(); - } - | k_def fname - { - $<id>$ = cur_mid; - cur_mid = $2; - in_def++; - local_push(0); + $$ = NEW_MODULE($2, $4, &@$); + nd_set_line($$->nd_body, @5.end_pos.lineno); + set_line_body($4, @2.end_pos.lineno); + nd_set_line($$, @2.end_pos.lineno); + /*% %*/ + /*% ripper: module!($2, $4) %*/ + local_pop(p); + p->ctxt.in_class = $<ctxt>1.in_class; + p->ctxt.shareable_constant_value = $<ctxt>1.shareable_constant_value; } + | defn_head f_arglist bodystmt k_end { + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - NODE *body = remove_begin($5); - reduce_nodes(&body); - $$ = NEW_DEFN($2, $4, body, NOEX_PRIVATE); - nd_set_line($$, $<num>1); - /*% - $$ = dispatch3(def, $2, $4, $5); - %*/ - local_pop(); - in_def--; - cur_mid = $<id>3; - } - | k_def singleton dot_or_colon {lex_state = EXPR_FNAME;} fname - { - in_single++; - lex_state = EXPR_ENDFN; /* force for args */ - local_push(0); + $$ = set_defun_body(p, $1, $2, $3, &@$); + /*% %*/ + /*% ripper: def!(get_value($1), $2, $3) %*/ + local_pop(p); } + | defs_head f_arglist bodystmt k_end { + restore_defun(p, $<node>1->nd_defn); /*%%%*/ - NODE *body = remove_begin($8); - reduce_nodes(&body); - $$ = NEW_DEFS($2, $5, $7, body); - nd_set_line($$, $<num>1); + $$ = set_defun_body(p, $1, $2, $3, &@$); /*% - $$ = dispatch5(defs, $2, $3, $5, $7, $8); + $1 = get_value($1); %*/ - local_pop(); - in_single--; + /*% ripper: defs!(AREF($1, 0), AREF($1, 1), AREF($1, 2), $2, $3) %*/ + local_pop(p); } | keyword_break { /*%%%*/ - $$ = NEW_BREAK(0); - /*% - $$ = dispatch1(break, arg_new()); - %*/ + $$ = NEW_BREAK(0, &@$); + /*% %*/ + /*% ripper: break!(args_new!) %*/ } | keyword_next { /*%%%*/ - $$ = NEW_NEXT(0); - /*% - $$ = dispatch1(next, arg_new()); - %*/ + $$ = NEW_NEXT(0, &@$); + /*% %*/ + /*% ripper: next!(args_new!) %*/ } | keyword_redo { /*%%%*/ - $$ = NEW_REDO(); - /*% - $$ = dispatch0(redo); - %*/ + $$ = NEW_REDO(&@$); + /*% %*/ + /*% ripper: redo! %*/ } | keyword_retry { /*%%%*/ - $$ = NEW_RETRY(); - /*% - $$ = dispatch0(retry); - %*/ + $$ = NEW_RETRY(&@$); + /*% %*/ + /*% ripper: retry! %*/ } ; primary_value : primary { - /*%%%*/ value_expr($1); $$ = $1; - if (!$$) $$ = NEW_NIL(); - /*% - $$ = $1; - %*/ } ; k_begin : keyword_begin { - token_info_push("begin"); + token_info_push(p, "begin", &@$); } ; k_if : keyword_if { - token_info_push("if"); + WARN_EOL("if"); + token_info_push(p, "if", &@$); + if (p->token_info && p->token_info->nonspc && + p->token_info->next && !strcmp(p->token_info->next->token, "else")) { + const char *tok = p->lex.ptok; + const char *beg = p->lex.pbeg + p->token_info->next->beg.column; + beg += rb_strlen_lit("else"); + while (beg < tok && ISSPACE(*beg)) beg++; + if (beg == tok) { + p->token_info->nonspc = 0; + } + } } ; k_unless : keyword_unless { - token_info_push("unless"); + token_info_push(p, "unless", &@$); } ; k_while : keyword_while { - token_info_push("while"); + token_info_push(p, "while", &@$); } ; k_until : keyword_until { - token_info_push("until"); + token_info_push(p, "until", &@$); } ; k_case : keyword_case { - token_info_push("case"); + token_info_push(p, "case", &@$); } ; k_for : keyword_for { - token_info_push("for"); + token_info_push(p, "for", &@$); } ; k_class : keyword_class { - token_info_push("class"); + token_info_push(p, "class", &@$); + $<ctxt>$ = p->ctxt; } ; k_module : keyword_module { - token_info_push("module"); + token_info_push(p, "module", &@$); + $<ctxt>$ = p->ctxt; } ; k_def : keyword_def { - token_info_push("def"); - /*%%%*/ - $<num>$ = ruby_sourceline; - /*% - %*/ + token_info_push(p, "def", &@$); + p->ctxt.in_argdef = 1; + } + ; + +k_do : keyword_do + { + token_info_push(p, "do", &@$); + } + ; + +k_do_block : keyword_do_block + { + token_info_push(p, "do", &@$); + } + ; + +k_rescue : keyword_rescue + { + token_info_warn(p, "rescue", p->token_info, 1, &@$); + } + ; + +k_ensure : keyword_ensure + { + token_info_warn(p, "ensure", p->token_info, 1, &@$); + } + ; + +k_when : keyword_when + { + token_info_warn(p, "when", p->token_info, 0, &@$); + } + ; + +k_else : keyword_else + { + token_info *ptinfo_beg = p->token_info; + int same = ptinfo_beg && strcmp(ptinfo_beg->token, "case") != 0; + token_info_warn(p, "else", p->token_info, same, &@$); + if (same) { + token_info e; + e.next = ptinfo_beg->next; + e.token = "else"; + token_info_setup(&e, p->lex.pbeg, &@$); + if (!e.nonspc) *ptinfo_beg = e; + } + } + ; + +k_elsif : keyword_elsif + { + WARN_EOL("elsif"); + token_info_warn(p, "elsif", p->token_info, 1, &@$); } ; k_end : keyword_end { - token_info_pop("end"); + token_info_pop(p, "end", &@$); + } + ; + +k_return : keyword_return + { + if (p->ctxt.in_class && !p->ctxt.in_def && !dyna_in_block(p)) + yyerror1(&@1, "Invalid return in class/module body"); } ; then : term - /*%c%*/ - /*%c - { $$ = Qnil; } - %*/ | keyword_then | term keyword_then - /*%c%*/ - /*%c - { $$ = $2; } - %*/ ; do : term - /*%c%*/ - /*%c - { $$ = Qnil; } - %*/ | keyword_do_cond ; if_tail : opt_else - | keyword_elsif expr_value then + | k_elsif expr_value then compstmt if_tail { /*%%%*/ - $$ = NEW_IF(cond($2), $4, $5); + $$ = new_if(p, $2, $4, $5, &@$); fixpos($$, $2); - /*% - $$ = dispatch3(elsif, $2, $4, escape_Qundef($5)); - %*/ + /*% %*/ + /*% ripper: elsif!($2, $4, escape_Qundef($5)) %*/ } ; opt_else : none - | keyword_else compstmt + | k_else compstmt { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(else, $2); - %*/ + /*% %*/ + /*% ripper: else!($2) %*/ } ; @@ -3150,137 +3517,112 @@ for_var : lhs f_marg : f_norm_arg { - $$ = assignable($1, 0); /*%%%*/ - /*% - $$ = dispatch1(mlhs_paren, $$); - %*/ + $$ = assignable(p, $1, 0, &@$); + mark_lvar_used(p, $$); + /*% %*/ + /*% ripper: assignable(p, $1) %*/ } | tLPAREN f_margs rparen { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(mlhs_paren, $2); - %*/ + /*% %*/ + /*% ripper: mlhs_paren!($2) %*/ } ; f_marg_list : f_marg { /*%%%*/ - $$ = NEW_LIST($1); - /*% - $$ = mlhs_add(mlhs_new(), $1); - %*/ + $$ = NEW_LIST($1, &@$); + /*% %*/ + /*% ripper: mlhs_add!(mlhs_new!, $1) %*/ } | f_marg_list ',' f_marg { /*%%%*/ - $$ = list_append($1, $3); - /*% - $$ = mlhs_add($1, $3); - %*/ + $$ = list_append(p, $1, $3); + /*% %*/ + /*% ripper: mlhs_add!($1, $3) %*/ } ; f_margs : f_marg_list { /*%%%*/ - $$ = NEW_MASGN($1, 0); - /*% - $$ = $1; - %*/ - } - | f_marg_list ',' tSTAR f_norm_arg - { - $$ = assignable($4, 0); - /*%%%*/ - $$ = NEW_MASGN($1, $$); - /*% - $$ = mlhs_add_star($1, $$); - %*/ + $$ = NEW_MASGN($1, 0, &@$); + /*% %*/ + /*% ripper: $1 %*/ } - | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list + | f_marg_list ',' f_rest_marg { - $$ = assignable($4, 0); /*%%%*/ - $$ = NEW_MASGN($1, NEW_POSTARG($$, $6)); - /*% - $$ = mlhs_add_star($1, $$); - %*/ + $$ = NEW_MASGN($1, $3, &@$); + /*% %*/ + /*% ripper: mlhs_add_star!($1, $3) %*/ } - | f_marg_list ',' tSTAR + | f_marg_list ',' f_rest_marg ',' f_marg_list { /*%%%*/ - $$ = NEW_MASGN($1, -1); - /*% - $$ = mlhs_add_star($1, Qnil); - %*/ + $$ = NEW_MASGN($1, NEW_POSTARG($3, $5, &@$), &@$); + /*% %*/ + /*% ripper: mlhs_add_post!(mlhs_add_star!($1, $3), $5) %*/ } - | f_marg_list ',' tSTAR ',' f_marg_list + | f_rest_marg { /*%%%*/ - $$ = NEW_MASGN($1, NEW_POSTARG(-1, $5)); - /*% - $$ = mlhs_add_star($1, $5); - %*/ + $$ = NEW_MASGN(0, $1, &@$); + /*% %*/ + /*% ripper: mlhs_add_star!(mlhs_new!, $1) %*/ } - | tSTAR f_norm_arg + | f_rest_marg ',' f_marg_list { - $$ = assignable($2, 0); /*%%%*/ - $$ = NEW_MASGN(0, $$); - /*% - $$ = mlhs_add_star(mlhs_new(), $$); - %*/ + $$ = NEW_MASGN(0, NEW_POSTARG($1, $3, &@$), &@$); + /*% %*/ + /*% ripper: mlhs_add_post!(mlhs_add_star!(mlhs_new!, $1), $3) %*/ } - | tSTAR f_norm_arg ',' f_marg_list + ; + +f_rest_marg : tSTAR f_norm_arg { - $$ = assignable($2, 0); /*%%%*/ - $$ = NEW_MASGN(0, NEW_POSTARG($$, $4)); - /*% - #if 0 - TODO: Check me - #endif - $$ = mlhs_add_star($$, $4); - %*/ + $$ = assignable(p, $2, 0, &@$); + mark_lvar_used(p, $$); + /*% %*/ + /*% ripper: assignable(p, $2) %*/ } | tSTAR { /*%%%*/ - $$ = NEW_MASGN(0, -1); - /*% - $$ = mlhs_add_star(mlhs_new(), Qnil); - %*/ - } - | tSTAR ',' f_marg_list - { - /*%%%*/ - $$ = NEW_MASGN(0, NEW_POSTARG(-1, $3)); - /*% - $$ = mlhs_add_star(mlhs_new(), Qnil); - %*/ + $$ = NODE_SPECIAL_NO_NAME_REST; + /*% %*/ + /*% ripper: Qnil %*/ } ; +f_any_kwrest : f_kwrest + | f_no_kwarg {$$ = ID2VAL(idNil);} + ; + +f_eq : {p->ctxt.in_argdef = 0;} '='; block_args_tail : f_block_kwarg ',' f_kwrest opt_f_block_arg { - $$ = new_args_tail($1, $3, $4); + $$ = new_args_tail(p, $1, $3, $4, &@3); } | f_block_kwarg opt_f_block_arg { - $$ = new_args_tail($1, Qnone, $2); + $$ = new_args_tail(p, $1, Qnone, $2, &@1); } - | f_kwrest opt_f_block_arg + | f_any_kwrest opt_f_block_arg { - $$ = new_args_tail(Qnone, $1, $2); + $$ = new_args_tail(p, Qnone, $1, $2, &@1); } | f_block_arg { - $$ = new_args_tail(Qnone, Qnone, $1); + $$ = new_args_tail(p, Qnone, Qnone, $1, &@1); } ; @@ -3290,149 +3632,136 @@ opt_block_args_tail : ',' block_args_tail } | /* none */ { - $$ = new_args_tail(Qnone, Qnone, Qnone); + $$ = new_args_tail(p, Qnone, Qnone, Qnone, &@0); + } + ; + +excessed_comma : ',' + { + /* magic number for rest_id in iseq_set_arguments() */ + /*%%%*/ + $$ = NODE_SPECIAL_EXCESSIVE_COMMA; + /*% %*/ + /*% ripper: excessed_comma! %*/ } ; block_param : f_arg ',' f_block_optarg ',' f_rest_arg opt_block_args_tail { - $$ = new_args($1, $3, $5, Qnone, $6); + $$ = new_args(p, $1, $3, $5, Qnone, $6, &@$); } | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail { - $$ = new_args($1, $3, $5, $7, $8); + $$ = new_args(p, $1, $3, $5, $7, $8, &@$); } | f_arg ',' f_block_optarg opt_block_args_tail { - $$ = new_args($1, $3, Qnone, Qnone, $4); + $$ = new_args(p, $1, $3, Qnone, Qnone, $4, &@$); } | f_arg ',' f_block_optarg ',' f_arg opt_block_args_tail { - $$ = new_args($1, $3, Qnone, $5, $6); + $$ = new_args(p, $1, $3, Qnone, $5, $6, &@$); } | f_arg ',' f_rest_arg opt_block_args_tail { - $$ = new_args($1, Qnone, $3, Qnone, $4); + $$ = new_args(p, $1, Qnone, $3, Qnone, $4, &@$); } - | f_arg ',' + | f_arg excessed_comma { - $$ = new_args($1, Qnone, 1, Qnone, new_args_tail(Qnone, Qnone, Qnone)); - /*%%%*/ - /*% - dispatch1(excessed_comma, $$); - %*/ + $$ = new_args_tail(p, Qnone, Qnone, Qnone, &@2); + $$ = new_args(p, $1, Qnone, $2, Qnone, $$, &@$); } | f_arg ',' f_rest_arg ',' f_arg opt_block_args_tail { - $$ = new_args($1, Qnone, $3, $5, $6); + $$ = new_args(p, $1, Qnone, $3, $5, $6, &@$); } | f_arg opt_block_args_tail { - $$ = new_args($1, Qnone, Qnone, Qnone, $2); + $$ = new_args(p, $1, Qnone, Qnone, Qnone, $2, &@$); } | f_block_optarg ',' f_rest_arg opt_block_args_tail { - $$ = new_args(Qnone, $1, $3, Qnone, $4); + $$ = new_args(p, Qnone, $1, $3, Qnone, $4, &@$); } | f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail { - $$ = new_args(Qnone, $1, $3, $5, $6); + $$ = new_args(p, Qnone, $1, $3, $5, $6, &@$); } | f_block_optarg opt_block_args_tail { - $$ = new_args(Qnone, $1, Qnone, Qnone, $2); + $$ = new_args(p, Qnone, $1, Qnone, Qnone, $2, &@$); } | f_block_optarg ',' f_arg opt_block_args_tail { - $$ = new_args(Qnone, $1, Qnone, $3, $4); + $$ = new_args(p, Qnone, $1, Qnone, $3, $4, &@$); } | f_rest_arg opt_block_args_tail { - $$ = new_args(Qnone, Qnone, $1, Qnone, $2); + $$ = new_args(p, Qnone, Qnone, $1, Qnone, $2, &@$); } | f_rest_arg ',' f_arg opt_block_args_tail { - $$ = new_args(Qnone, Qnone, $1, $3, $4); + $$ = new_args(p, Qnone, Qnone, $1, $3, $4, &@$); } | block_args_tail { - $$ = new_args(Qnone, Qnone, Qnone, Qnone, $1); + $$ = new_args(p, Qnone, Qnone, Qnone, Qnone, $1, &@$); } ; opt_block_param : none | block_param_def { - command_start = TRUE; + p->command_start = TRUE; } ; block_param_def : '|' opt_bv_decl '|' { + p->cur_arg = 0; + p->max_numparam = ORDINAL_PARAM; + p->ctxt.in_argdef = 0; /*%%%*/ $$ = 0; - /*% - $$ = blockvar_new(params_new(Qnil,Qnil,Qnil,Qnil,Qnil,Qnil,Qnil), - escape_Qundef($2)); - %*/ - } - | tOROP - { - /*%%%*/ - $$ = 0; - /*% - $$ = blockvar_new(params_new(Qnil,Qnil,Qnil,Qnil,Qnil,Qnil,Qnil), - Qnil); - %*/ + /*% %*/ + /*% ripper: block_var!(params!(Qnil,Qnil,Qnil,Qnil,Qnil,Qnil,Qnil), escape_Qundef($2)) %*/ } | '|' block_param opt_bv_decl '|' { + p->cur_arg = 0; + p->max_numparam = ORDINAL_PARAM; + p->ctxt.in_argdef = 0; /*%%%*/ $$ = $2; - /*% - $$ = blockvar_new(escape_Qundef($2), escape_Qundef($3)); - %*/ + /*% %*/ + /*% ripper: block_var!(escape_Qundef($2), escape_Qundef($3)) %*/ } ; opt_bv_decl : opt_nl { - $$ = 0; + $$ = 0; } | opt_nl ';' bv_decls opt_nl { /*%%%*/ $$ = 0; - /*% - $$ = $3; - %*/ + /*% %*/ + /*% ripper: $3 %*/ } ; bv_decls : bvar - /*%c%*/ - /*%c - { - $$ = rb_ary_new3(1, $1); - } - %*/ + /*% ripper[brace]: rb_ary_new3(1, get_value($1)) %*/ | bv_decls ',' bvar - /*%c%*/ - /*%c - { - rb_ary_push($1, $3); - } - %*/ + /*% ripper[brace]: rb_ary_push($1, get_value($3)) %*/ ; bvar : tIDENTIFIER { - new_bv(get_id($1)); - /*%%%*/ - /*% - $$ = get_value($1); - %*/ + new_bv(p, get_id($1)); + /*% ripper: get_value($1) %*/ } | f_bad_arg { @@ -3440,319 +3769,865 @@ bvar : tIDENTIFIER } ; -lambda : { - $<vars>$ = dyna_push(); +lambda : tLAMBDA + { + token_info_push(p, "->", &@1); + $<vars>1 = dyna_push(p); + $<num>$ = p->lex.lpar_beg; + p->lex.lpar_beg = p->lex.paren_nest; } { - $<num>$ = lpar_beg; - lpar_beg = ++paren_nest; + $<num>$ = p->max_numparam; + p->max_numparam = 0; + } + { + $<node>$ = numparam_push(p); } f_larglist + { + CMDARG_PUSH(0); + } lambda_body { - lpar_beg = $<num>2; - /*%%%*/ - $$ = NEW_LAMBDA($3, $4); - /*% - $$ = dispatch2(lambda, $3, $4); - %*/ - dyna_pop($<vars>1); + int max_numparam = p->max_numparam; + p->lex.lpar_beg = $<num>2; + p->max_numparam = $<num>3; + CMDARG_POP(); + $5 = args_with_numbered(p, $5, max_numparam); + /*%%%*/ + { + YYLTYPE loc = code_loc_gen(&@5, &@7); + $$ = NEW_LAMBDA($5, $7, &loc); + nd_set_line($$->nd_body, @7.end_pos.lineno); + nd_set_line($$, @5.end_pos.lineno); + nd_set_first_loc($$, @1.beg_pos); + } + /*% %*/ + /*% ripper: lambda!($5, $7) %*/ + numparam_pop(p, $<node>4); + dyna_pop(p, $<vars>1); } ; f_larglist : '(' f_args opt_bv_decl ')' { + p->ctxt.in_argdef = 0; /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(paren, $2); - %*/ + p->max_numparam = ORDINAL_PARAM; + /*% %*/ + /*% ripper: paren!($2) %*/ } | f_args { + p->ctxt.in_argdef = 0; /*%%%*/ + if (!args_info_empty_p($1->nd_ainfo)) + p->max_numparam = ORDINAL_PARAM; + /*% %*/ $$ = $1; - /*% - $$ = $1; - %*/ } ; lambda_body : tLAMBEG compstmt '}' { + token_info_pop(p, "}", &@3); $$ = $2; } - | keyword_do_LAMBDA compstmt keyword_end + | keyword_do_LAMBDA bodystmt k_end { $$ = $2; } ; -do_block : keyword_do_block +do_block : k_do_block do_body k_end { - $<vars>1 = dyna_push(); + $$ = $2; /*%%%*/ - $<num>$ = ruby_sourceline; + $$->nd_body->nd_loc = code_loc_gen(&@1, &@3); + nd_set_line($$, @1.end_pos.lineno); /*% %*/ } - opt_block_param - compstmt - keyword_end - { - /*%%%*/ - $$ = NEW_ITER($3,$4); - nd_set_line($$, $<num>2); - /*% - $$ = dispatch2(do_block, escape_Qundef($3), $4); - %*/ - dyna_pop($<vars>1); - } ; block_call : command do_block { /*%%%*/ - if (nd_type($1) == NODE_YIELD) { - compile_error(PARSER_ARG "block given to yield"); + if (nd_type_p($1, NODE_YIELD)) { + compile_error(p, "block given to yield"); } else { - block_dup_check($1->nd_args, $2); + block_dup_check(p, $1->nd_args, $2); } - $2->nd_iter = $1; - $$ = $2; + $$ = method_add_block(p, $1, $2, &@$); fixpos($$, $1); - /*% - $$ = method_add_block($1, $2); - %*/ + /*% %*/ + /*% ripper: method_add_block!($1, $2) %*/ } - | block_call dot_or_colon operation2 opt_paren_args + | block_call call_op2 operation2 opt_paren_args { /*%%%*/ - $$ = NEW_CALL($1, $3, $4); - /*% - $$ = dispatch3(call, $1, $2, $3); - $$ = method_optarg($$, $4); - %*/ + $$ = new_qcall(p, $2, $1, $3, $4, &@3, &@$); + /*% %*/ + /*% ripper: opt_event(:method_add_arg!, call!($1, $2, $3), $4) %*/ } - | block_call dot_or_colon operation2 opt_paren_args brace_block + | block_call call_op2 operation2 opt_paren_args brace_block { /*%%%*/ - block_dup_check($4, $5); - $5->nd_iter = NEW_CALL($1, $3, $4); - $$ = $5; - fixpos($$, $1); - /*% - $$ = dispatch4(command_call, $1, $2, $3, $4); - $$ = method_add_block($$, $5); - %*/ + $$ = new_command_qcall(p, $2, $1, $3, $4, $5, &@3, &@$); + /*% %*/ + /*% ripper: opt_event(:method_add_block!, command_call!($1, $2, $3, $4), $5) %*/ } - | block_call dot_or_colon operation2 command_args do_block + | block_call call_op2 operation2 command_args do_block { /*%%%*/ - block_dup_check($4, $5); - $5->nd_iter = NEW_CALL($1, $3, $4); - $$ = $5; + $$ = new_command_qcall(p, $2, $1, $3, $4, $5, &@3, &@$); + /*% %*/ + /*% ripper: method_add_block!(command_call!($1, $2, $3, $4), $5) %*/ + } + ; + +method_call : fcall paren_args + { + /*%%%*/ + $$ = $1; + $$->nd_args = $2; + nd_set_last_loc($1, @2.end_pos); + /*% %*/ + /*% ripper: method_add_arg!(fcall!($1), $2) %*/ + } + | primary_value call_op operation2 opt_paren_args + { + /*%%%*/ + $$ = new_qcall(p, $2, $1, $3, $4, &@3, &@$); + nd_set_line($$, @3.end_pos.lineno); + /*% %*/ + /*% ripper: opt_event(:method_add_arg!, call!($1, $2, $3), $4) %*/ + } + | primary_value tCOLON2 operation2 paren_args + { + /*%%%*/ + $$ = new_qcall(p, ID2VAL(idCOLON2), $1, $3, $4, &@3, &@$); + nd_set_line($$, @3.end_pos.lineno); + /*% %*/ + /*% ripper: method_add_arg!(call!($1, $2, $3), $4) %*/ + } + | primary_value tCOLON2 operation3 + { + /*%%%*/ + $$ = new_qcall(p, ID2VAL(idCOLON2), $1, $3, Qnull, &@3, &@$); + /*% %*/ + /*% ripper: call!($1, $2, $3) %*/ + } + | primary_value call_op paren_args + { + /*%%%*/ + $$ = new_qcall(p, $2, $1, ID2VAL(idCall), $3, &@2, &@$); + nd_set_line($$, @2.end_pos.lineno); + /*% %*/ + /*% ripper: method_add_arg!(call!($1, $2, ID2VAL(idCall)), $3) %*/ + } + | primary_value tCOLON2 paren_args + { + /*%%%*/ + $$ = new_qcall(p, ID2VAL(idCOLON2), $1, ID2VAL(idCall), $3, &@2, &@$); + nd_set_line($$, @2.end_pos.lineno); + /*% %*/ + /*% ripper: method_add_arg!(call!($1, $2, ID2VAL(idCall)), $3) %*/ + } + | keyword_super paren_args + { + /*%%%*/ + $$ = NEW_SUPER($2, &@$); + /*% %*/ + /*% ripper: super!($2) %*/ + } + | keyword_super + { + /*%%%*/ + $$ = NEW_ZSUPER(&@$); + /*% %*/ + /*% ripper: zsuper! %*/ + } + | primary_value '[' opt_call_args rbracket + { + /*%%%*/ + if ($1 && nd_type_p($1, NODE_SELF)) + $$ = NEW_FCALL(tAREF, $3, &@$); + else + $$ = NEW_CALL($1, tAREF, $3, &@$); fixpos($$, $1); - /*% - $$ = dispatch4(command_call, $1, $2, $3, $4); - $$ = method_add_block($$, $5); - %*/ + /*% %*/ + /*% ripper: aref!($1, escape_Qundef($3)) %*/ } ; -method_call : operation +brace_block : '{' brace_body '}' { + $$ = $2; /*%%%*/ - $<num>$ = ruby_sourceline; + $$->nd_body->nd_loc = code_loc_gen(&@1, &@3); + nd_set_line($$, @1.end_pos.lineno); /*% %*/ } - paren_args + | k_do do_body k_end { + $$ = $2; /*%%%*/ - $$ = NEW_FCALL($1, $3); - nd_set_line($$, $<num>2); - /*% - $$ = method_arg(dispatch1(fcall, $1), $3); - %*/ + $$->nd_body->nd_loc = code_loc_gen(&@1, &@3); + nd_set_line($$, @1.end_pos.lineno); + /*% %*/ } - | primary_value '.' operation2 + ; + +brace_body : {$<vars>$ = dyna_push(p);} { + $<num>$ = p->max_numparam; + p->max_numparam = 0; + } + { + $<node>$ = numparam_push(p); + } + opt_block_param compstmt + { + int max_numparam = p->max_numparam; + p->max_numparam = $<num>2; + $4 = args_with_numbered(p, $4, max_numparam); /*%%%*/ - $<num>$ = ruby_sourceline; + $$ = NEW_ITER($4, $5, &@$); /*% %*/ + /*% ripper: brace_block!(escape_Qundef($4), $5) %*/ + numparam_pop(p, $<node>3); + dyna_pop(p, $<vars>1); + } + ; + +do_body : {$<vars>$ = dyna_push(p);} + { + $<num>$ = p->max_numparam; + p->max_numparam = 0; + } + { + $<node>$ = numparam_push(p); + CMDARG_PUSH(0); } - opt_paren_args + opt_block_param bodystmt { + int max_numparam = p->max_numparam; + p->max_numparam = $<num>2; + $4 = args_with_numbered(p, $4, max_numparam); /*%%%*/ - $$ = NEW_CALL($1, $3, $5); - nd_set_line($$, $<num>4); - /*% - $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); - $$ = method_optarg($$, $5); - %*/ + $$ = NEW_ITER($4, $5, &@$); + /*% %*/ + /*% ripper: do_block!(escape_Qundef($4), $5) %*/ + CMDARG_POP(); + numparam_pop(p, $<node>3); + dyna_pop(p, $<vars>1); + } + ; + +case_args : arg_value + { + /*%%%*/ + check_literal_when(p, $1, &@1); + $$ = NEW_LIST($1, &@$); + /*% %*/ + /*% ripper: args_add!(args_new!, $1) %*/ } - | primary_value tCOLON2 operation2 + | tSTAR arg_value { /*%%%*/ - $<num>$ = ruby_sourceline; + $$ = NEW_SPLAT($2, &@$); /*% %*/ + /*% ripper: args_add_star!(args_new!, $2) %*/ } - paren_args + | case_args ',' arg_value { /*%%%*/ - $$ = NEW_CALL($1, $3, $5); - nd_set_line($$, $<num>4); - /*% - $$ = dispatch3(call, $1, ripper_id2sym('.'), $3); - $$ = method_optarg($$, $5); - %*/ + check_literal_when(p, $3, &@3); + $$ = last_arg_append(p, $1, $3, &@$); + /*% %*/ + /*% ripper: args_add!($1, $3) %*/ } - | primary_value tCOLON2 operation3 + | case_args ',' tSTAR arg_value { /*%%%*/ - $$ = NEW_CALL($1, $3, 0); - /*% - $$ = dispatch3(call, $1, ripper_intern("::"), $3); - %*/ + $$ = rest_arg_append(p, $1, $4, &@$); + /*% %*/ + /*% ripper: args_add_star!($1, $4) %*/ + } + ; + +case_body : k_when case_args then + compstmt + cases + { + /*%%%*/ + $$ = NEW_WHEN($2, $4, $5, &@$); + fixpos($$, $2); + /*% %*/ + /*% ripper: when!($2, $4, escape_Qundef($5)) %*/ + } + ; + +cases : opt_else + | case_body + ; + +p_case_body : keyword_in + { + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); + p->command_start = FALSE; + $<ctxt>1 = p->ctxt; + p->ctxt.in_kwarg = 1; + $<tbl>$ = push_pvtbl(p); + } + { + $<tbl>$ = push_pktbl(p); + } + p_top_expr then + { + pop_pktbl(p, $<tbl>3); + pop_pvtbl(p, $<tbl>2); + p->ctxt.in_kwarg = $<ctxt>1.in_kwarg; + } + compstmt + p_cases + { + /*%%%*/ + $$ = NEW_IN($4, $7, $8, &@$); + /*% %*/ + /*% ripper: in!($4, $7, escape_Qundef($8)) %*/ + } + ; + +p_cases : opt_else + | p_case_body + ; + +p_top_expr : p_top_expr_body + | p_top_expr_body modifier_if expr_value + { + /*%%%*/ + $$ = new_if(p, $3, $1, 0, &@$); + fixpos($$, $3); + /*% %*/ + /*% ripper: if_mod!($3, $1) %*/ } - | primary_value '.' + | p_top_expr_body modifier_unless expr_value { /*%%%*/ - $<num>$ = ruby_sourceline; + $$ = new_unless(p, $3, $1, 0, &@$); + fixpos($$, $3); /*% %*/ + /*% ripper: unless_mod!($3, $1) %*/ } - paren_args + ; + +p_top_expr_body : p_expr + | p_expr ',' { + $$ = new_array_pattern_tail(p, Qnone, 1, 0, Qnone, &@$); + $$ = new_array_pattern(p, Qnone, get_value($1), $$, &@$); + } + | p_expr ',' p_args + { + $$ = new_array_pattern(p, Qnone, get_value($1), $3, &@$); /*%%%*/ - $$ = NEW_CALL($1, rb_intern("call"), $4); - nd_set_line($$, $<num>3); + nd_set_first_loc($$, @1.beg_pos); /*% - $$ = dispatch3(call, $1, ripper_id2sym('.'), - ripper_intern("call")); - $$ = method_optarg($$, $4); %*/ } - | primary_value tCOLON2 + | p_find + { + $$ = new_find_pattern(p, Qnone, $1, &@$); + } + | p_args_tail + { + $$ = new_array_pattern(p, Qnone, Qnone, $1, &@$); + } + | p_kwargs + { + $$ = new_hash_pattern(p, Qnone, $1, &@$); + } + ; + +p_expr : p_as + ; + +p_as : p_expr tASSOC p_variable + { + /*%%%*/ + NODE *n = NEW_LIST($1, &@$); + n = list_append(p, n, $3); + $$ = new_hash(p, n, &@$); + /*% %*/ + /*% ripper: binary!($1, STATIC_ID2SYM((id_assoc)), $3) %*/ + } + | p_alt + ; + +p_alt : p_alt '|' p_expr_basic { /*%%%*/ - $<num>$ = ruby_sourceline; + $$ = NEW_NODE(NODE_OR, $1, $3, 0, &@$); /*% %*/ + /*% ripper: binary!($1, STATIC_ID2SYM(idOr), $3) %*/ } - paren_args + | p_expr_basic + ; + +p_lparen : '(' {$<tbl>$ = push_pktbl(p);}; +p_lbracket : '[' {$<tbl>$ = push_pktbl(p);}; + +p_expr_basic : p_value + | p_variable + | p_const p_lparen p_args rparen { + pop_pktbl(p, $<tbl>2); + $$ = new_array_pattern(p, $1, Qnone, $3, &@$); /*%%%*/ - $$ = NEW_CALL($1, rb_intern("call"), $4); - nd_set_line($$, $<num>3); + nd_set_first_loc($$, @1.beg_pos); /*% - $$ = dispatch3(call, $1, ripper_intern("::"), - ripper_intern("call")); - $$ = method_optarg($$, $4); %*/ } - | keyword_super paren_args + | p_const p_lparen p_find rparen { + pop_pktbl(p, $<tbl>2); + $$ = new_find_pattern(p, $1, $3, &@$); /*%%%*/ - $$ = NEW_SUPER($2); + nd_set_first_loc($$, @1.beg_pos); /*% - $$ = dispatch1(super, $2); %*/ } - | keyword_super + | p_const p_lparen p_kwargs rparen { + pop_pktbl(p, $<tbl>2); + $$ = new_hash_pattern(p, $1, $3, &@$); /*%%%*/ - $$ = NEW_ZSUPER(); + nd_set_first_loc($$, @1.beg_pos); /*% - $$ = dispatch0(zsuper); %*/ } - | primary_value '[' opt_call_args rbracket + | p_const '(' rparen + { + $$ = new_array_pattern_tail(p, Qnone, 0, 0, Qnone, &@$); + $$ = new_array_pattern(p, $1, Qnone, $$, &@$); + } + | p_const p_lbracket p_args rbracket { + pop_pktbl(p, $<tbl>2); + $$ = new_array_pattern(p, $1, Qnone, $3, &@$); /*%%%*/ - if ($1 && nd_type($1) == NODE_SELF) - $$ = NEW_FCALL(tAREF, $3); - else - $$ = NEW_CALL($1, tAREF, $3); - fixpos($$, $1); + nd_set_first_loc($$, @1.beg_pos); /*% - $$ = dispatch2(aref, $1, escape_Qundef($3)); %*/ } - ; - -brace_block : '{' + | p_const p_lbracket p_find rbracket { - $<vars>1 = dyna_push(); + pop_pktbl(p, $<tbl>2); + $$ = new_find_pattern(p, $1, $3, &@$); /*%%%*/ - $<num>$ = ruby_sourceline; + nd_set_first_loc($$, @1.beg_pos); /*% - %*/ + %*/ } - opt_block_param - compstmt '}' + | p_const p_lbracket p_kwargs rbracket { + pop_pktbl(p, $<tbl>2); + $$ = new_hash_pattern(p, $1, $3, &@$); /*%%%*/ - $$ = NEW_ITER($3,$4); - nd_set_line($$, $<num>2); + nd_set_first_loc($$, @1.beg_pos); /*% - $$ = dispatch2(brace_block, escape_Qundef($3), $4); %*/ - dyna_pop($<vars>1); } - | keyword_do + | p_const '[' rbracket + { + $$ = new_array_pattern_tail(p, Qnone, 0, 0, Qnone, &@$); + $$ = new_array_pattern(p, $1, Qnone, $$, &@$); + } + | tLBRACK p_args rbracket + { + $$ = new_array_pattern(p, Qnone, Qnone, $2, &@$); + } + | tLBRACK p_find rbracket + { + $$ = new_find_pattern(p, Qnone, $2, &@$); + } + | tLBRACK rbracket + { + $$ = new_array_pattern_tail(p, Qnone, 0, 0, Qnone, &@$); + $$ = new_array_pattern(p, Qnone, Qnone, $$, &@$); + } + | tLBRACE + { + $<tbl>$ = push_pktbl(p); + $<ctxt>1 = p->ctxt; + p->ctxt.in_kwarg = 0; + } + p_kwargs rbrace + { + pop_pktbl(p, $<tbl>2); + p->ctxt.in_kwarg = $<ctxt>1.in_kwarg; + $$ = new_hash_pattern(p, Qnone, $3, &@$); + } + | tLBRACE rbrace + { + $$ = new_hash_pattern_tail(p, Qnone, 0, &@$); + $$ = new_hash_pattern(p, Qnone, $$, &@$); + } + | tLPAREN {$<tbl>$ = push_pktbl(p);} p_expr rparen + { + pop_pktbl(p, $<tbl>2); + $$ = $3; + } + ; + +p_args : p_expr { - $<vars>1 = dyna_push(); /*%%%*/ - $<num>$ = ruby_sourceline; + NODE *pre_args = NEW_LIST($1, &@$); + $$ = new_array_pattern_tail(p, pre_args, 0, 0, Qnone, &@$); /*% - %*/ + $$ = new_array_pattern_tail(p, rb_ary_new_from_args(1, get_value($1)), 0, 0, Qnone, &@$); + %*/ + } + | p_args_head + { + $$ = new_array_pattern_tail(p, $1, 1, 0, Qnone, &@$); } - opt_block_param - compstmt keyword_end + | p_args_head p_arg { /*%%%*/ - $$ = NEW_ITER($3,$4); - nd_set_line($$, $<num>2); + $$ = new_array_pattern_tail(p, list_concat($1, $2), 0, 0, Qnone, &@$); /*% - $$ = dispatch2(do_block, escape_Qundef($3), $4); + VALUE pre_args = rb_ary_concat($1, get_value($2)); + $$ = new_array_pattern_tail(p, pre_args, 0, 0, Qnone, &@$); %*/ - dyna_pop($<vars>1); } + | p_args_head tSTAR tIDENTIFIER + { + $$ = new_array_pattern_tail(p, $1, 1, $3, Qnone, &@$); + } + | p_args_head tSTAR tIDENTIFIER ',' p_args_post + { + $$ = new_array_pattern_tail(p, $1, 1, $3, $5, &@$); + } + | p_args_head tSTAR + { + $$ = new_array_pattern_tail(p, $1, 1, 0, Qnone, &@$); + } + | p_args_head tSTAR ',' p_args_post + { + $$ = new_array_pattern_tail(p, $1, 1, 0, $4, &@$); + } + | p_args_tail ; -case_body : keyword_when args then - compstmt - cases +p_args_head : p_arg ',' + { + $$ = $1; + } + | p_args_head p_arg ',' + { + /*%%%*/ + $$ = list_concat($1, $2); + /*% %*/ + /*% ripper: rb_ary_concat($1, get_value($2)) %*/ + } + ; + +p_args_tail : p_rest + { + $$ = new_array_pattern_tail(p, Qnone, 1, $1, Qnone, &@$); + } + | p_rest ',' p_args_post + { + $$ = new_array_pattern_tail(p, Qnone, 1, $1, $3, &@$); + } + ; + +p_find : p_rest ',' p_args_post ',' p_rest + { + $$ = new_find_pattern_tail(p, $1, $3, $5, &@$); + + if (rb_warning_category_enabled_p(RB_WARN_CATEGORY_EXPERIMENTAL)) + rb_warn0L_experimental(nd_line($$), "Find pattern is experimental, and the behavior may change in future versions of Ruby!"); + } + ; + + +p_rest : tSTAR tIDENTIFIER + { + $$ = $2; + } + | tSTAR + { + $$ = 0; + } + ; + +p_args_post : p_arg + | p_args_post ',' p_arg + { + /*%%%*/ + $$ = list_concat($1, $3); + /*% %*/ + /*% ripper: rb_ary_concat($1, get_value($3)) %*/ + } + ; + +p_arg : p_expr + { + /*%%%*/ + $$ = NEW_LIST($1, &@$); + /*% %*/ + /*% ripper: rb_ary_new_from_args(1, get_value($1)) %*/ + } + ; + +p_kwargs : p_kwarg ',' p_any_kwrest + { + $$ = new_hash_pattern_tail(p, new_unique_key_hash(p, $1, &@$), $3, &@$); + } + | p_kwarg + { + $$ = new_hash_pattern_tail(p, new_unique_key_hash(p, $1, &@$), 0, &@$); + } + | p_kwarg ',' + { + $$ = new_hash_pattern_tail(p, new_unique_key_hash(p, $1, &@$), 0, &@$); + } + | p_any_kwrest + { + $$ = new_hash_pattern_tail(p, new_hash(p, Qnone, &@$), $1, &@$); + } + ; + +p_kwarg : p_kw + /*% ripper[brace]: rb_ary_new_from_args(1, $1) %*/ + | p_kwarg ',' p_kw + { + /*%%%*/ + $$ = list_concat($1, $3); + /*% %*/ + /*% ripper: rb_ary_push($1, $3) %*/ + } + ; + +p_kw : p_kw_label p_expr + { + error_duplicate_pattern_key(p, get_id($1), &@1); + /*%%%*/ + $$ = list_append(p, NEW_LIST(NEW_LIT(ID2SYM($1), &@$), &@$), $2); + /*% %*/ + /*% ripper: rb_ary_new_from_args(2, get_value($1), get_value($2)) %*/ + } + | p_kw_label + { + error_duplicate_pattern_key(p, get_id($1), &@1); + if ($1 && !is_local_id(get_id($1))) { + yyerror1(&@1, "key must be valid as local variables"); + } + error_duplicate_pattern_variable(p, get_id($1), &@1); + /*%%%*/ + $$ = list_append(p, NEW_LIST(NEW_LIT(ID2SYM($1), &@$), &@$), assignable(p, $1, 0, &@$)); + /*% %*/ + /*% ripper: rb_ary_new_from_args(2, get_value($1), Qnil) %*/ + } + ; + +p_kw_label : tLABEL + | tSTRING_BEG string_contents tLABEL_END { + YYLTYPE loc = code_loc_gen(&@1, &@3); /*%%%*/ - $$ = NEW_WHEN($2, $4, $5); + if (!$2 || nd_type_p($2, NODE_STR)) { + NODE *node = dsym_node(p, $2, &loc); + $$ = SYM2ID(node->nd_lit); + } /*% - $$ = dispatch3(when, $2, $4, escape_Qundef($5)); + if (ripper_is_node_yylval($2) && RNODE($2)->nd_cval) { + VALUE label = RNODE($2)->nd_cval; + VALUE rval = RNODE($2)->nd_rval; + $$ = ripper_new_yylval(p, rb_intern_str(label), rval, label); + RNODE($$)->nd_loc = loc; + } %*/ + else { + yyerror1(&loc, "symbol literal with interpolation is not allowed"); + $$ = 0; + } } ; -cases : opt_else - | case_body +p_kwrest : kwrest_mark tIDENTIFIER + { + $$ = $2; + } + | kwrest_mark + { + $$ = 0; + } + ; + +p_kwnorest : kwrest_mark keyword_nil + { + $$ = 0; + } + ; + +p_any_kwrest : p_kwrest + | p_kwnorest {$$ = ID2VAL(idNil);} + ; + +p_value : p_primitive + | p_primitive tDOT2 p_primitive + { + /*%%%*/ + value_expr($1); + value_expr($3); + $$ = NEW_DOT2($1, $3, &@$); + /*% %*/ + /*% ripper: dot2!($1, $3) %*/ + } + | p_primitive tDOT3 p_primitive + { + /*%%%*/ + value_expr($1); + value_expr($3); + $$ = NEW_DOT3($1, $3, &@$); + /*% %*/ + /*% ripper: dot3!($1, $3) %*/ + } + | p_primitive tDOT2 + { + /*%%%*/ + value_expr($1); + $$ = NEW_DOT2($1, new_nil_at(p, &@2.end_pos), &@$); + /*% %*/ + /*% ripper: dot2!($1, Qnil) %*/ + } + | p_primitive tDOT3 + { + /*%%%*/ + value_expr($1); + $$ = NEW_DOT3($1, new_nil_at(p, &@2.end_pos), &@$); + /*% %*/ + /*% ripper: dot3!($1, Qnil) %*/ + } + | p_var_ref + | p_expr_ref + | p_const + | tBDOT2 p_primitive + { + /*%%%*/ + value_expr($2); + $$ = NEW_DOT2(new_nil_at(p, &@1.beg_pos), $2, &@$); + /*% %*/ + /*% ripper: dot2!(Qnil, $2) %*/ + } + | tBDOT3 p_primitive + { + /*%%%*/ + value_expr($2); + $$ = NEW_DOT3(new_nil_at(p, &@1.beg_pos), $2, &@$); + /*% %*/ + /*% ripper: dot3!(Qnil, $2) %*/ + } + ; + +p_primitive : literal + | strings + | xstring + | regexp + | words + | qwords + | symbols + | qsymbols + | keyword_variable + { + /*%%%*/ + if (!($$ = gettable(p, $1, &@$))) $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper: var_ref!($1) %*/ + } + | lambda + ; + +p_variable : tIDENTIFIER + { + /*%%%*/ + error_duplicate_pattern_variable(p, $1, &@1); + $$ = assignable(p, $1, 0, &@$); + /*% %*/ + /*% ripper: assignable(p, var_field(p, $1)) %*/ + } + ; + +p_var_ref : '^' tIDENTIFIER + { + /*%%%*/ + NODE *n = gettable(p, $2, &@$); + if (!(nd_type_p(n, NODE_LVAR) || nd_type_p(n, NODE_DVAR))) { + compile_error(p, "%"PRIsVALUE": no such local variable", rb_id2str($2)); + } + $$ = n; + /*% %*/ + /*% ripper: var_ref!($2) %*/ + } + | '^' nonlocal_var + { + /*%%%*/ + if (!($$ = gettable(p, $2, &@$))) $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper: var_ref!($2) %*/ + } + ; + +p_expr_ref : '^' tLPAREN expr_value ')' + { + /*%%%*/ + $$ = NEW_BEGIN($3, &@$); + /*% %*/ + /*% ripper: begin!($3) %*/ + } ; -opt_rescue : keyword_rescue exc_list exc_var then +p_const : tCOLON3 cname + { + /*%%%*/ + $$ = NEW_COLON3($2, &@$); + /*% %*/ + /*% ripper: top_const_ref!($2) %*/ + } + | p_const tCOLON2 cname + { + /*%%%*/ + $$ = NEW_COLON2($1, $3, &@$); + /*% %*/ + /*% ripper: const_path_ref!($1, $3) %*/ + } + | tCONSTANT + { + /*%%%*/ + $$ = gettable(p, $1, &@$); + /*% %*/ + /*% ripper: var_ref!($1) %*/ + } + ; + +opt_rescue : k_rescue exc_list exc_var then compstmt opt_rescue { /*%%%*/ - if ($3) { - $3 = node_assign($3, NEW_ERRINFO()); - $5 = block_append($3, $5); - } - $$ = NEW_RESBODY($2, $5, $6); + $$ = NEW_RESBODY($2, + $3 ? block_append(p, node_assign(p, $3, NEW_ERRINFO(&@3), NO_LEX_CTXT, &@3), $5) : $5, + $6, &@$); fixpos($$, $2?$2:$5); - /*% - $$ = dispatch4(rescue, - escape_Qundef($2), - escape_Qundef($3), - escape_Qundef($5), - escape_Qundef($6)); - %*/ + /*% %*/ + /*% ripper: rescue!(escape_Qundef($2), escape_Qundef($3), escape_Qundef($5), escape_Qundef($6)) %*/ } | none ; @@ -3760,18 +4635,16 @@ opt_rescue : keyword_rescue exc_list exc_var then exc_list : arg_value { /*%%%*/ - $$ = NEW_LIST($1); - /*% - $$ = rb_ary_new3(1, $1); - %*/ + $$ = NEW_LIST($1, &@$); + /*% %*/ + /*% ripper: rb_ary_new3(1, get_value($1)) %*/ } | mrhs { /*%%%*/ if (!($$ = splat_array($1))) $$ = $1; - /*% - $$ = $1; - %*/ + /*% %*/ + /*% ripper: $1 %*/ } | none ; @@ -3783,27 +4656,18 @@ exc_var : tASSOC lhs | none ; -opt_ensure : keyword_ensure compstmt +opt_ensure : k_ensure compstmt { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(ensure, $2); - %*/ + /*% %*/ + /*% ripper: ensure!($2) %*/ } | none ; literal : numeric | symbol - { - /*%%%*/ - $$ = NEW_LIT(ID2SYM($1)); - /*% - $$ = dispatch1(symbol_literal, $1); - %*/ - } - | dsym ; strings : string @@ -3811,15 +4675,15 @@ strings : string /*%%%*/ NODE *node = $1; if (!node) { - node = NEW_STR(STR_NEW0()); + node = NEW_STR(STR_NEW0(), &@$); + RB_OBJ_WRITTEN(p->ast, Qnil, node->nd_lit); } else { - node = evstr2dstr(node); + node = evstr2dstr(p, node); } $$ = node; - /*% - $$ = $1; - %*/ + /*% %*/ + /*% ripper: $1 %*/ } ; @@ -3828,131 +4692,43 @@ string : tCHAR | string string1 { /*%%%*/ - $$ = literal_concat($1, $2); - /*% - $$ = dispatch2(string_concat, $1, $2); - %*/ + $$ = literal_concat(p, $1, $2, &@$); + /*% %*/ + /*% ripper: string_concat!($1, $2) %*/ } ; string1 : tSTRING_BEG string_contents tSTRING_END { /*%%%*/ - $$ = $2; - /*% - $$ = dispatch1(string_literal, $2); - %*/ + $$ = heredoc_dedent(p, $2); + if ($$) nd_set_loc($$, &@$); + /*% %*/ + /*% ripper: string_literal!(heredoc_dedent(p, $2)) %*/ } ; xstring : tXSTRING_BEG xstring_contents tSTRING_END { /*%%%*/ - NODE *node = $2; - if (!node) { - node = NEW_XSTR(STR_NEW0()); - } - else { - switch (nd_type(node)) { - case NODE_STR: - nd_set_type(node, NODE_XSTR); - break; - case NODE_DSTR: - nd_set_type(node, NODE_DXSTR); - break; - default: - node = NEW_NODE(NODE_DXSTR, Qnil, 1, NEW_LIST(node)); - break; - } - } - $$ = node; - /*% - $$ = dispatch1(xstring_literal, $2); - %*/ + $$ = new_xstring(p, heredoc_dedent(p, $2), &@$); + /*% %*/ + /*% ripper: xstring_literal!(heredoc_dedent(p, $2)) %*/ } ; regexp : tREGEXP_BEG regexp_contents tREGEXP_END { - /*%%%*/ - int options = $3; - NODE *node = $2; - NODE *list, *prev; - if (!node) { - node = NEW_LIT(reg_compile(STR_NEW0(), options)); - } - else switch (nd_type(node)) { - case NODE_STR: - { - VALUE src = node->nd_lit; - nd_set_type(node, NODE_LIT); - node->nd_lit = reg_compile(src, options); - } - break; - default: - node = NEW_NODE(NODE_DSTR, STR_NEW0(), 1, NEW_LIST(node)); - case NODE_DSTR: - if (options & RE_OPTION_ONCE) { - nd_set_type(node, NODE_DREGX_ONCE); - } - else { - nd_set_type(node, NODE_DREGX); - } - node->nd_cflag = options & RE_OPTION_MASK; - if (!NIL_P(node->nd_lit)) reg_fragment_check(node->nd_lit, options); - for (list = (prev = node)->nd_next; list; list = list->nd_next) { - if (nd_type(list->nd_head) == NODE_STR) { - VALUE tail = list->nd_head->nd_lit; - if (reg_fragment_check(tail, options) && prev && !NIL_P(prev->nd_lit)) { - VALUE lit = prev == node ? prev->nd_lit : prev->nd_head->nd_lit; - if (!literal_concat0(parser, lit, tail)) { - node = 0; - break; - } - rb_str_resize(tail, 0); - prev->nd_next = list->nd_next; - rb_gc_force_recycle((VALUE)list->nd_head); - rb_gc_force_recycle((VALUE)list); - list = prev; - } - else { - prev = list; - } - } - else { - prev = 0; - } - } - if (!node->nd_next) { - VALUE src = node->nd_lit; - nd_set_type(node, NODE_LIT); - node->nd_lit = reg_compile(src, options); - } - break; - } - $$ = node; - /*% - $$ = dispatch2(regexp_literal, $2, $3); - %*/ + $$ = new_regexp(p, $2, $3, &@$); } ; -words : tWORDS_BEG ' ' tSTRING_END - { - /*%%%*/ - $$ = NEW_ZARRAY(); - /*% - $$ = dispatch0(words_new); - $$ = dispatch1(array, $$); - %*/ - } - | tWORDS_BEG word_list tSTRING_END +words : tWORDS_BEG ' ' word_list tSTRING_END { /*%%%*/ - $$ = $2; - /*% - $$ = dispatch1(array, $2); - %*/ + $$ = make_list($3, &@$); + /*% %*/ + /*% ripper: array!($3) %*/ } ; @@ -3960,54 +4736,35 @@ word_list : /* none */ { /*%%%*/ $$ = 0; - /*% - $$ = dispatch0(words_new); - %*/ + /*% %*/ + /*% ripper: words_new! %*/ } | word_list word ' ' { /*%%%*/ - $$ = list_append($1, evstr2dstr($2)); - /*% - $$ = dispatch2(words_add, $1, $2); - %*/ + $$ = list_append(p, $1, evstr2dstr(p, $2)); + /*% %*/ + /*% ripper: words_add!($1, $2) %*/ } ; word : string_content - /*%c%*/ - /*%c - { - $$ = dispatch0(word_new); - $$ = dispatch2(word_add, $$, $1); - } - %*/ + /*% ripper[brace]: word_add!(word_new!, $1) %*/ | word string_content { /*%%%*/ - $$ = literal_concat($1, $2); - /*% - $$ = dispatch2(word_add, $1, $2); - %*/ + $$ = literal_concat(p, $1, $2, &@$); + /*% %*/ + /*% ripper: word_add!($1, $2) %*/ } ; -symbols : tSYMBOLS_BEG ' ' tSTRING_END +symbols : tSYMBOLS_BEG ' ' symbol_list tSTRING_END { /*%%%*/ - $$ = NEW_ZARRAY(); - /*% - $$ = dispatch0(symbols_new); - $$ = dispatch1(array, $$); - %*/ - } - | tSYMBOLS_BEG symbol_list tSTRING_END - { - /*%%%*/ - $$ = $2; - /*% - $$ = dispatch1(array, $2); - %*/ + $$ = make_list($3, &@$); + /*% %*/ + /*% ripper: array!($3) %*/ } ; @@ -4015,57 +4772,33 @@ symbol_list : /* none */ { /*%%%*/ $$ = 0; - /*% - $$ = dispatch0(symbols_new); - %*/ + /*% %*/ + /*% ripper: symbols_new! %*/ } | symbol_list word ' ' { /*%%%*/ - $2 = evstr2dstr($2); - nd_set_type($2, NODE_DSYM); - $$ = list_append($1, $2); - /*% - $$ = dispatch2(symbols_add, $1, $2); - %*/ + $$ = symbol_append(p, $1, evstr2dstr(p, $2)); + /*% %*/ + /*% ripper: symbols_add!($1, $2) %*/ } ; -qwords : tQWORDS_BEG ' ' tSTRING_END - { - /*%%%*/ - $$ = NEW_ZARRAY(); - /*% - $$ = dispatch0(qwords_new); - $$ = dispatch1(array, $$); - %*/ - } - | tQWORDS_BEG qword_list tSTRING_END +qwords : tQWORDS_BEG ' ' qword_list tSTRING_END { /*%%%*/ - $$ = $2; - /*% - $$ = dispatch1(array, $2); - %*/ + $$ = make_list($3, &@$); + /*% %*/ + /*% ripper: array!($3) %*/ } ; -qsymbols : tQSYMBOLS_BEG ' ' tSTRING_END - { - /*%%%*/ - $$ = NEW_ZARRAY(); - /*% - $$ = dispatch0(qsymbols_new); - $$ = dispatch1(array, $$); - %*/ - } - | tQSYMBOLS_BEG qsym_list tSTRING_END +qsymbols : tQSYMBOLS_BEG ' ' qsym_list tSTRING_END { /*%%%*/ - $$ = $2; - /*% - $$ = dispatch1(array, $2); - %*/ + $$ = make_list($3, &@$); + /*% %*/ + /*% ripper: array!($3) %*/ } ; @@ -4073,17 +4806,15 @@ qword_list : /* none */ { /*%%%*/ $$ = 0; - /*% - $$ = dispatch0(qwords_new); - %*/ + /*% %*/ + /*% ripper: qwords_new! %*/ } | qword_list tSTRING_CONTENT ' ' { /*%%%*/ - $$ = list_append($1, $2); - /*% - $$ = dispatch2(qwords_add, $1, $2); - %*/ + $$ = list_append(p, $1, $2); + /*% %*/ + /*% ripper: qwords_add!($1, $2) %*/ } ; @@ -4091,21 +4822,15 @@ qsym_list : /* none */ { /*%%%*/ $$ = 0; - /*% - $$ = dispatch0(qsymbols_new); - %*/ + /*% %*/ + /*% ripper: qsymbols_new! %*/ } | qsym_list tSTRING_CONTENT ' ' { /*%%%*/ - VALUE lit; - lit = $2->nd_lit; - $2->nd_lit = ID2SYM(rb_intern_str(lit)); - nd_set_type($2, NODE_LIT); - $$ = list_append($1, $2); - /*% - $$ = dispatch2(qsymbols_add, $1, $2); - %*/ + $$ = symbol_append(p, $1, $2); + /*% %*/ + /*% ripper: qsymbols_add!($1, $2) %*/ } ; @@ -4113,16 +4838,27 @@ string_contents : /* none */ { /*%%%*/ $$ = 0; + /*% %*/ + /*% ripper: string_content! %*/ + /*%%%*/ /*% - $$ = dispatch0(string_content); + $$ = ripper_new_yylval(p, 0, $$, 0); %*/ } | string_contents string_content { /*%%%*/ - $$ = literal_concat($1, $2); + $$ = literal_concat(p, $1, $2, &@$); + /*% %*/ + /*% ripper: string_add!($1, $2) %*/ + /*%%%*/ /*% - $$ = dispatch2(string_add, $1, $2); + if (ripper_is_node_yylval($1) && ripper_is_node_yylval($2) && + !RNODE($1)->nd_cval) { + RNODE($1)->nd_cval = RNODE($2)->nd_cval; + RNODE($1)->nd_rval = add_mark_object(p, $$); + $$ = $1; + } %*/ } ; @@ -4131,17 +4867,15 @@ xstring_contents: /* none */ { /*%%%*/ $$ = 0; - /*% - $$ = dispatch0(xstring_new); - %*/ + /*% %*/ + /*% ripper: xstring_new! %*/ } | xstring_contents string_content { /*%%%*/ - $$ = literal_concat($1, $2); - /*% - $$ = dispatch2(xstring_add, $1, $2); - %*/ + $$ = literal_concat(p, $1, $2, &@$); + /*% %*/ + /*% ripper: xstring_add!($1, $2) %*/ } ; @@ -4149,8 +4883,11 @@ regexp_contents: /* none */ { /*%%%*/ $$ = 0; + /*% %*/ + /*% ripper: regexp_new! %*/ + /*%%%*/ /*% - $$ = dispatch0(regexp_new); + $$ = ripper_new_yylval(p, 0, $$, 0); %*/ } | regexp_contents string_content @@ -4171,100 +4908,121 @@ regexp_contents: /* none */ case NODE_DSTR: break; default: - head = list_append(NEW_DSTR(Qnil), head); + head = list_append(p, NEW_DSTR(Qnil, &@$), head); break; } - $$ = list_append(head, tail); + $$ = list_append(p, head, tail); } /*% - $$ = dispatch2(regexp_add, $1, $2); + VALUE s1 = 1, s2 = 0, n1 = $1, n2 = $2; + if (ripper_is_node_yylval(n1)) { + s1 = RNODE(n1)->nd_cval; + n1 = RNODE(n1)->nd_rval; + } + if (ripper_is_node_yylval(n2)) { + s2 = RNODE(n2)->nd_cval; + n2 = RNODE(n2)->nd_rval; + } + $$ = dispatch2(regexp_add, n1, n2); + if (!s1 && s2) { + $$ = ripper_new_yylval(p, 0, $$, s2); + } %*/ } ; string_content : tSTRING_CONTENT + /*% ripper[brace]: ripper_new_yylval(p, 0, get_value($1), $1) %*/ | tSTRING_DVAR { - $<node>$ = lex_strterm; - lex_strterm = 0; - lex_state = EXPR_BEG; + /* need to backup p->lex.strterm so that a string literal `%&foo,#$&,bar&` can be parsed */ + $<strterm>$ = p->lex.strterm; + p->lex.strterm = 0; + SET_LEX_STATE(EXPR_BEG); } string_dvar { + p->lex.strterm = $<strterm>2; /*%%%*/ - lex_strterm = $<node>2; - $$ = NEW_EVSTR($3); - /*% - lex_strterm = $<node>2; - $$ = dispatch1(string_dvar, $3); - %*/ + $$ = NEW_EVSTR($3, &@$); + nd_set_line($$, @3.end_pos.lineno); + /*% %*/ + /*% ripper: string_dvar!($3) %*/ } | tSTRING_DBEG { - $<val>1 = cond_stack; - $<val>$ = cmdarg_stack; - cond_stack = 0; - cmdarg_stack = 0; + CMDARG_PUSH(0); + COND_PUSH(0); + } + { + /* need to backup p->lex.strterm so that a string literal `%!foo,#{ !0 },bar!` can be parsed */ + $<strterm>$ = p->lex.strterm; + p->lex.strterm = 0; + } + { + $<num>$ = p->lex.state; + SET_LEX_STATE(EXPR_BEG); } { - $<node>$ = lex_strterm; - lex_strterm = 0; - lex_state = EXPR_BEG; + $<num>$ = p->lex.brace_nest; + p->lex.brace_nest = 0; } { - $<num>$ = brace_nest; - brace_nest = 0; + $<num>$ = p->heredoc_indent; + p->heredoc_indent = 0; } compstmt tSTRING_DEND { - cond_stack = $<val>1; - cmdarg_stack = $<val>2; - lex_strterm = $<node>3; - brace_nest = $<num>4; + COND_POP(); + CMDARG_POP(); + p->lex.strterm = $<strterm>3; + SET_LEX_STATE($<num>4); + p->lex.brace_nest = $<num>5; + p->heredoc_indent = $<num>6; + p->heredoc_line_indent = -1; /*%%%*/ - if ($5) $5->flags &= ~NODE_FL_NEWLINE; - $$ = new_evstr($5); - /*% - $$ = dispatch1(string_embexpr, $5); - %*/ + if ($7) $7->flags &= ~NODE_FL_NEWLINE; + $$ = new_evstr(p, $7, &@$); + /*% %*/ + /*% ripper: string_embexpr!($7) %*/ } ; string_dvar : tGVAR { /*%%%*/ - $$ = NEW_GVAR($1); - /*% - $$ = dispatch1(var_ref, $1); - %*/ + $$ = NEW_GVAR($1, &@$); + /*% %*/ + /*% ripper: var_ref!($1) %*/ } | tIVAR { /*%%%*/ - $$ = NEW_IVAR($1); - /*% - $$ = dispatch1(var_ref, $1); - %*/ + $$ = NEW_IVAR($1, &@$); + /*% %*/ + /*% ripper: var_ref!($1) %*/ } | tCVAR { /*%%%*/ - $$ = NEW_CVAR($1); - /*% - $$ = dispatch1(var_ref, $1); - %*/ + $$ = NEW_CVAR($1, &@$); + /*% %*/ + /*% ripper: var_ref!($1) %*/ } | backref ; -symbol : tSYMBEG sym +symbol : ssym + | dsym + ; + +ssym : tSYMBEG sym { - lex_state = EXPR_END; + SET_LEX_STATE(EXPR_END); /*%%%*/ - $$ = $2; - /*% - $$ = dispatch1(symbol, $2); - %*/ + $$ = NEW_LIT(ID2SYM($2), &@$); + /*% %*/ + /*% ripper: symbol_literal!(symbol!($2)) %*/ } ; @@ -4274,37 +5032,38 @@ sym : fname | tCVAR ; -dsym : tSYMBEG xstring_contents tSTRING_END +dsym : tSYMBEG string_contents tSTRING_END { - lex_state = EXPR_END; + SET_LEX_STATE(EXPR_END); /*%%%*/ - $$ = dsym_node($2); - /*% - $$ = dispatch1(dyna_symbol, $2); - %*/ + $$ = dsym_node(p, $2, &@$); + /*% %*/ + /*% ripper: dyna_symbol!($2) %*/ } ; -numeric : tINTEGER - | tFLOAT - | tUMINUS_NUM tINTEGER %prec tLOWEST +numeric : simple_numeric + | tUMINUS_NUM simple_numeric %prec tLOWEST { /*%%%*/ - $$ = negate_lit($2); - /*% - $$ = dispatch2(unary, ripper_intern("-@"), $2); - %*/ - } - | tUMINUS_NUM tFLOAT %prec tLOWEST - { - /*%%%*/ - $$ = negate_lit($2); - /*% - $$ = dispatch2(unary, ripper_intern("-@"), $2); - %*/ + $$ = $2; + RB_OBJ_WRITE(p->ast, &$$->nd_lit, negate_lit(p, $$->nd_lit)); + /*% %*/ + /*% ripper: unary!(ID2VAL(idUMinus), $2) %*/ } ; +simple_numeric : tINTEGER + | tFLOAT + | tRATIONAL + | tIMAGINARY + ; + +nonlocal_var : tIVAR + | tGVAR + | tCVAR + ; + user_variable : tIDENTIFIER | tIVAR | tGVAR @@ -4312,21 +5071,21 @@ user_variable : tIDENTIFIER | tCVAR ; -keyword_variable: keyword_nil {ifndef_ripper($$ = keyword_nil);} - | keyword_self {ifndef_ripper($$ = keyword_self);} - | keyword_true {ifndef_ripper($$ = keyword_true);} - | keyword_false {ifndef_ripper($$ = keyword_false);} - | keyword__FILE__ {ifndef_ripper($$ = keyword__FILE__);} - | keyword__LINE__ {ifndef_ripper($$ = keyword__LINE__);} - | keyword__ENCODING__ {ifndef_ripper($$ = keyword__ENCODING__);} +keyword_variable: keyword_nil {$$ = KWD2EID(nil, $1);} + | keyword_self {$$ = KWD2EID(self, $1);} + | keyword_true {$$ = KWD2EID(true, $1);} + | keyword_false {$$ = KWD2EID(false, $1);} + | keyword__FILE__ {$$ = KWD2EID(_FILE__, $1);} + | keyword__LINE__ {$$ = KWD2EID(_LINE__, $1);} + | keyword__ENCODING__ {$$ = KWD2EID(_ENCODING__, $1);} ; var_ref : user_variable { /*%%%*/ - if (!($$ = gettable($1))) $$ = NEW_BEGIN(0); + if (!($$ = gettable(p, $1, &@$))) $$ = NEW_BEGIN(0, &@$); /*% - if (id_is_var(get_id($1))) { + if (id_is_var(p, get_id($1))) { $$ = dispatch1(var_ref, $1); } else { @@ -4337,28 +5096,25 @@ var_ref : user_variable | keyword_variable { /*%%%*/ - if (!($$ = gettable($1))) $$ = NEW_BEGIN(0); - /*% - $$ = dispatch1(var_ref, $1); - %*/ + if (!($$ = gettable(p, $1, &@$))) $$ = NEW_BEGIN(0, &@$); + /*% %*/ + /*% ripper: var_ref!($1) %*/ } ; var_lhs : user_variable { - $$ = assignable($1, 0); /*%%%*/ - /*% - $$ = dispatch1(var_field, $$); - %*/ + $$ = assignable(p, $1, 0, &@$); + /*% %*/ + /*% ripper: assignable(p, var_field(p, $1)) %*/ } | keyword_variable { - $$ = assignable($1, 0); /*%%%*/ - /*% - $$ = dispatch1(var_field, $$); - %*/ + $$ = assignable(p, $1, 0, &@$); + /*% %*/ + /*% ripper: assignable(p, var_field(p, $1)) %*/ } ; @@ -4366,68 +5122,82 @@ backref : tNTH_REF | tBACK_REF ; -superclass : term - { - /*%%%*/ - $$ = 0; - /*% - $$ = Qnil; - %*/ - } - | '<' +superclass : '<' { - lex_state = EXPR_BEG; - command_start = TRUE; + SET_LEX_STATE(EXPR_BEG); + p->command_start = TRUE; } expr_value term { $$ = $3; } - | error term + | /* none */ { /*%%%*/ - yyerrok; $$ = 0; - /*% - yyerrok; - $$ = Qnil; - %*/ + /*% %*/ + /*% ripper: Qnil %*/ } ; -f_arglist : '(' f_args rparen +f_opt_paren_args: f_paren_args + | none + { + p->ctxt.in_argdef = 0; + $$ = new_args_tail(p, Qnone, Qnone, Qnone, &@0); + $$ = new_args(p, Qnone, Qnone, Qnone, Qnone, $$, &@0); + } + ; + +f_paren_args : '(' f_args rparen { /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(paren, $2); - %*/ - lex_state = EXPR_BEG; - command_start = TRUE; + /*% %*/ + /*% ripper: paren!($2) %*/ + SET_LEX_STATE(EXPR_BEG); + p->command_start = TRUE; + p->ctxt.in_argdef = 0; + } + ; + +f_arglist : f_paren_args + | { + $<ctxt>$ = p->ctxt; + p->ctxt.in_kwarg = 1; + p->ctxt.in_argdef = 1; + SET_LEX_STATE(p->lex.state|EXPR_LABEL); /* force for args */ } - | f_args term + f_args term { - $$ = $1; - lex_state = EXPR_BEG; - command_start = TRUE; + p->ctxt.in_kwarg = $<ctxt>1.in_kwarg; + p->ctxt.in_argdef = 0; + $$ = $2; + SET_LEX_STATE(EXPR_BEG); + p->command_start = TRUE; } ; args_tail : f_kwarg ',' f_kwrest opt_f_block_arg { - $$ = new_args_tail($1, $3, $4); + $$ = new_args_tail(p, $1, $3, $4, &@3); } | f_kwarg opt_f_block_arg { - $$ = new_args_tail($1, Qnone, $2); + $$ = new_args_tail(p, $1, Qnone, $2, &@1); } - | f_kwrest opt_f_block_arg + | f_any_kwrest opt_f_block_arg { - $$ = new_args_tail(Qnone, $1, $2); + $$ = new_args_tail(p, Qnone, $1, $2, &@1); } | f_block_arg { - $$ = new_args_tail(Qnone, Qnone, $1); + $$ = new_args_tail(p, Qnone, Qnone, $1, &@1); + } + | args_forward + { + add_forwarding_args(p); + $$ = new_args_tail(p, Qnone, $1, ID2VAL(idFWD_BLOCK), &@1); } ; @@ -4437,188 +5207,227 @@ opt_args_tail : ',' args_tail } | /* none */ { - $$ = new_args_tail(Qnone, Qnone, Qnone); + $$ = new_args_tail(p, Qnone, Qnone, Qnone, &@0); } ; f_args : f_arg ',' f_optarg ',' f_rest_arg opt_args_tail { - $$ = new_args($1, $3, $5, Qnone, $6); + $$ = new_args(p, $1, $3, $5, Qnone, $6, &@$); } | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_args_tail { - $$ = new_args($1, $3, $5, $7, $8); + $$ = new_args(p, $1, $3, $5, $7, $8, &@$); } | f_arg ',' f_optarg opt_args_tail { - $$ = new_args($1, $3, Qnone, Qnone, $4); + $$ = new_args(p, $1, $3, Qnone, Qnone, $4, &@$); } | f_arg ',' f_optarg ',' f_arg opt_args_tail { - $$ = new_args($1, $3, Qnone, $5, $6); + $$ = new_args(p, $1, $3, Qnone, $5, $6, &@$); } | f_arg ',' f_rest_arg opt_args_tail { - $$ = new_args($1, Qnone, $3, Qnone, $4); + $$ = new_args(p, $1, Qnone, $3, Qnone, $4, &@$); } | f_arg ',' f_rest_arg ',' f_arg opt_args_tail { - $$ = new_args($1, Qnone, $3, $5, $6); + $$ = new_args(p, $1, Qnone, $3, $5, $6, &@$); } | f_arg opt_args_tail { - $$ = new_args($1, Qnone, Qnone, Qnone, $2); + $$ = new_args(p, $1, Qnone, Qnone, Qnone, $2, &@$); } | f_optarg ',' f_rest_arg opt_args_tail { - $$ = new_args(Qnone, $1, $3, Qnone, $4); + $$ = new_args(p, Qnone, $1, $3, Qnone, $4, &@$); } | f_optarg ',' f_rest_arg ',' f_arg opt_args_tail { - $$ = new_args(Qnone, $1, $3, $5, $6); + $$ = new_args(p, Qnone, $1, $3, $5, $6, &@$); } | f_optarg opt_args_tail { - $$ = new_args(Qnone, $1, Qnone, Qnone, $2); + $$ = new_args(p, Qnone, $1, Qnone, Qnone, $2, &@$); } | f_optarg ',' f_arg opt_args_tail { - $$ = new_args(Qnone, $1, Qnone, $3, $4); + $$ = new_args(p, Qnone, $1, Qnone, $3, $4, &@$); } | f_rest_arg opt_args_tail { - $$ = new_args(Qnone, Qnone, $1, Qnone, $2); + $$ = new_args(p, Qnone, Qnone, $1, Qnone, $2, &@$); } | f_rest_arg ',' f_arg opt_args_tail { - $$ = new_args(Qnone, Qnone, $1, $3, $4); + $$ = new_args(p, Qnone, Qnone, $1, $3, $4, &@$); } | args_tail { - $$ = new_args(Qnone, Qnone, Qnone, Qnone, $1); + $$ = new_args(p, Qnone, Qnone, Qnone, Qnone, $1, &@$); } | /* none */ { - $$ = new_args_tail(Qnone, Qnone, Qnone); - $$ = new_args(Qnone, Qnone, Qnone, Qnone, $$); + $$ = new_args_tail(p, Qnone, Qnone, Qnone, &@0); + $$ = new_args(p, Qnone, Qnone, Qnone, Qnone, $$, &@0); + } + ; + +args_forward : tBDOT3 + { + /*%%%*/ + $$ = idFWD_KWREST; + /*% %*/ + /*% ripper: args_forward! %*/ } ; f_bad_arg : tCONSTANT { + static const char mesg[] = "formal argument cannot be a constant"; /*%%%*/ - yyerror("formal argument cannot be a constant"); + yyerror1(&@1, mesg); $$ = 0; - /*% - $$ = dispatch1(param_error, $1); - %*/ + /*% %*/ + /*% ripper[error]: param_error!(ERR_MESG(), $1) %*/ } | tIVAR { + static const char mesg[] = "formal argument cannot be an instance variable"; /*%%%*/ - yyerror("formal argument cannot be an instance variable"); + yyerror1(&@1, mesg); $$ = 0; - /*% - $$ = dispatch1(param_error, $1); - %*/ + /*% %*/ + /*% ripper[error]: param_error!(ERR_MESG(), $1) %*/ } | tGVAR { + static const char mesg[] = "formal argument cannot be a global variable"; /*%%%*/ - yyerror("formal argument cannot be a global variable"); + yyerror1(&@1, mesg); $$ = 0; - /*% - $$ = dispatch1(param_error, $1); - %*/ + /*% %*/ + /*% ripper[error]: param_error!(ERR_MESG(), $1) %*/ } | tCVAR { + static const char mesg[] = "formal argument cannot be a class variable"; /*%%%*/ - yyerror("formal argument cannot be a class variable"); + yyerror1(&@1, mesg); $$ = 0; - /*% - $$ = dispatch1(param_error, $1); - %*/ + /*% %*/ + /*% ripper[error]: param_error!(ERR_MESG(), $1) %*/ } ; f_norm_arg : f_bad_arg | tIDENTIFIER { - formal_argument(get_id($1)); + formal_argument(p, $1); + p->max_numparam = ORDINAL_PARAM; + $$ = $1; + } + ; + +f_arg_asgn : f_norm_arg + { + ID id = get_id($1); + arg_var(p, id); + p->cur_arg = id; $$ = $1; } ; -f_arg_item : f_norm_arg +f_arg_item : f_arg_asgn { - arg_var(get_id($1)); + p->cur_arg = 0; /*%%%*/ - $$ = NEW_ARGS_AUX($1, 1); - /*% - $$ = get_value($1); - %*/ + $$ = NEW_ARGS_AUX($1, 1, &NULL_LOC); + /*% %*/ + /*% ripper: get_value($1) %*/ } | tLPAREN f_margs rparen { - ID tid = internal_id(); - arg_var(tid); /*%%%*/ - if (dyna_in_block()) { - $2->nd_value = NEW_DVAR(tid); + ID tid = internal_id(p); + YYLTYPE loc; + loc.beg_pos = @2.beg_pos; + loc.end_pos = @2.beg_pos; + arg_var(p, tid); + if (dyna_in_block(p)) { + $2->nd_value = NEW_DVAR(tid, &loc); } else { - $2->nd_value = NEW_LVAR(tid); + $2->nd_value = NEW_LVAR(tid, &loc); } - $$ = NEW_ARGS_AUX(tid, 1); + $$ = NEW_ARGS_AUX(tid, 1, &NULL_LOC); $$->nd_next = $2; - /*% - $$ = dispatch1(mlhs_paren, $2); - %*/ + /*% %*/ + /*% ripper: mlhs_paren!($2) %*/ } ; f_arg : f_arg_item - /*%c%*/ - /*%c - { - $$ = rb_ary_new3(1, $1); - } - c%*/ + /*% ripper[brace]: rb_ary_new3(1, get_value($1)) %*/ | f_arg ',' f_arg_item { /*%%%*/ $$ = $1; $$->nd_plen++; - $$->nd_next = block_append($$->nd_next, $3->nd_next); - rb_gc_force_recycle((VALUE)$3); - /*% - $$ = rb_ary_push($1, $3); - %*/ + $$->nd_next = block_append(p, $$->nd_next, $3->nd_next); + rb_discard_node(p, $3); + /*% %*/ + /*% ripper: rb_ary_push($1, get_value($3)) %*/ + } + ; + + +f_label : tLABEL + { + arg_var(p, formal_argument(p, $1)); + p->cur_arg = get_id($1); + p->max_numparam = ORDINAL_PARAM; + p->ctxt.in_argdef = 0; + $$ = $1; } ; -f_kw : tLABEL arg_value +f_kw : f_label arg_value { - arg_var(formal_argument(get_id($1))); - $$ = assignable($1, $2); + p->cur_arg = 0; + p->ctxt.in_argdef = 1; /*%%%*/ - $$ = NEW_KW_ARG(0, $$); - /*% - $$ = rb_assoc_new($$, $2); - %*/ + $$ = new_kw_arg(p, assignable(p, $1, $2, &@$), &@$); + /*% %*/ + /*% ripper: rb_assoc_new(get_value(assignable(p, $1)), get_value($2)) %*/ + } + | f_label + { + p->cur_arg = 0; + p->ctxt.in_argdef = 1; + /*%%%*/ + $$ = new_kw_arg(p, assignable(p, $1, NODE_SPECIAL_REQUIRED_KEYWORD, &@$), &@$); + /*% %*/ + /*% ripper: rb_assoc_new(get_value(assignable(p, $1)), 0) %*/ } ; -f_block_kw : tLABEL primary_value +f_block_kw : f_label primary_value { - arg_var(formal_argument(get_id($1))); - $$ = assignable($1, $2); + p->ctxt.in_argdef = 1; /*%%%*/ - $$ = NEW_KW_ARG(0, $$); - /*% - $$ = rb_assoc_new($$, $2); - %*/ + $$ = new_kw_arg(p, assignable(p, $1, $2, &@$), &@$); + /*% %*/ + /*% ripper: rb_assoc_new(get_value(assignable(p, $1)), get_value($2)) %*/ + } + | f_label + { + p->ctxt.in_argdef = 1; + /*%%%*/ + $$ = new_kw_arg(p, assignable(p, $1, NODE_SPECIAL_REQUIRED_KEYWORD, &@$), &@$); + /*% %*/ + /*% ripper: rb_assoc_new(get_value(assignable(p, $1)), 0) %*/ } ; @@ -4626,23 +5435,15 @@ f_block_kwarg : f_block_kw { /*%%%*/ $$ = $1; - /*% - $$ = rb_ary_new3(1, $1); - %*/ + /*% %*/ + /*% ripper: rb_ary_new3(1, get_value($1)) %*/ } | f_block_kwarg ',' f_block_kw { /*%%%*/ - NODE *kws = $1; - - while (kws->nd_next) { - kws = kws->nd_next; - } - kws->nd_next = $3; - $$ = $1; - /*% - $$ = rb_ary_push($1, $3); - %*/ + $$ = kwd_append($1, $3); + /*% %*/ + /*% ripper: rb_ary_push($1, get_value($3)) %*/ } ; @@ -4651,23 +5452,15 @@ f_kwarg : f_kw { /*%%%*/ $$ = $1; - /*% - $$ = rb_ary_new3(1, $1); - %*/ + /*% %*/ + /*% ripper: rb_ary_new3(1, get_value($1)) %*/ } | f_kwarg ',' f_kw { /*%%%*/ - NODE *kws = $1; - - while (kws->nd_next) { - kws = kws->nd_next; - } - kws->nd_next = $3; - $$ = $1; - /*% - $$ = rb_ary_push($1, $3); - %*/ + $$ = kwd_append($1, $3); + /*% %*/ + /*% ripper: rb_ary_push($1, get_value($3)) %*/ } ; @@ -4675,33 +5468,51 @@ kwrest_mark : tPOW | tDSTAR ; +f_no_kwarg : kwrest_mark keyword_nil + { + /*%%%*/ + /*% %*/ + /*% ripper: nokw_param!(Qnil) %*/ + } + ; + f_kwrest : kwrest_mark tIDENTIFIER { + arg_var(p, shadowing_lvar(p, get_id($2))); + /*%%%*/ $$ = $2; + /*% %*/ + /*% ripper: kwrest_param!($2) %*/ + } + | kwrest_mark + { + /*%%%*/ + $$ = internal_id(p); + arg_var(p, $$); + /*% %*/ + /*% ripper: kwrest_param!(Qnil) %*/ } ; -f_opt : tIDENTIFIER '=' arg_value +f_opt : f_arg_asgn f_eq arg_value { - arg_var(formal_argument(get_id($1))); - $$ = assignable($1, $3); + p->cur_arg = 0; + p->ctxt.in_argdef = 1; /*%%%*/ - $$ = NEW_OPT_ARG(0, $$); - /*% - $$ = rb_assoc_new($$, $3); - %*/ + $$ = NEW_OPT_ARG(0, assignable(p, $1, $3, &@$), &@$); + /*% %*/ + /*% ripper: rb_assoc_new(get_value(assignable(p, $1)), get_value($3)) %*/ } ; -f_block_opt : tIDENTIFIER '=' primary_value +f_block_opt : f_arg_asgn f_eq primary_value { - arg_var(formal_argument(get_id($1))); - $$ = assignable($1, $3); + p->cur_arg = 0; + p->ctxt.in_argdef = 1; /*%%%*/ - $$ = NEW_OPT_ARG(0, $$); - /*% - $$ = rb_assoc_new($$, $3); - %*/ + $$ = NEW_OPT_ARG(0, assignable(p, $1, $3, &@$), &@$); + /*% %*/ + /*% ripper: rb_assoc_new(get_value(assignable(p, $1)), get_value($3)) %*/ } ; @@ -4709,23 +5520,15 @@ f_block_optarg : f_block_opt { /*%%%*/ $$ = $1; - /*% - $$ = rb_ary_new3(1, $1); - %*/ + /*% %*/ + /*% ripper: rb_ary_new3(1, get_value($1)) %*/ } | f_block_optarg ',' f_block_opt { /*%%%*/ - NODE *opts = $1; - - while (opts->nd_next) { - opts = opts->nd_next; - } - opts->nd_next = $3; - $$ = $1; - /*% - $$ = rb_ary_push($1, $3); - %*/ + $$ = opt_arg_append($1, $3); + /*% %*/ + /*% ripper: rb_ary_push($1, get_value($3)) %*/ } ; @@ -4733,23 +5536,15 @@ f_optarg : f_opt { /*%%%*/ $$ = $1; - /*% - $$ = rb_ary_new3(1, $1); - %*/ + /*% %*/ + /*% ripper: rb_ary_new3(1, get_value($1)) %*/ } | f_optarg ',' f_opt { /*%%%*/ - NODE *opts = $1; - - while (opts->nd_next) { - opts = opts->nd_next; - } - opts->nd_next = $3; - $$ = $1; - /*% - $$ = rb_ary_push($1, $3); - %*/ + $$ = opt_arg_append($1, $3); + /*% %*/ + /*% ripper: rb_ary_push($1, get_value($3)) %*/ } ; @@ -4759,25 +5554,19 @@ restarg_mark : '*' f_rest_arg : restarg_mark tIDENTIFIER { - /*%%%*/ - if (!is_local_id($2)) - yyerror("rest argument must be local variable"); - /*% %*/ - arg_var(shadowing_lvar(get_id($2))); + arg_var(p, shadowing_lvar(p, get_id($2))); /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(rest_param, $2); - %*/ + /*% %*/ + /*% ripper: rest_param!($2) %*/ } | restarg_mark { /*%%%*/ - $$ = internal_id(); - arg_var($$); - /*% - $$ = dispatch1(rest_param, Qnil); - %*/ + $$ = internal_id(p); + arg_var(p, $$); + /*% %*/ + /*% ripper: rest_param!(Qnil) %*/ } ; @@ -4787,19 +5576,20 @@ blkarg_mark : '&' f_block_arg : blkarg_mark tIDENTIFIER { - /*%%%*/ - if (!is_local_id($2)) - yyerror("block argument must be local variable"); - else if (!dyna_in_block() && local_id($2)) - yyerror("duplicated block argument name"); - /*% %*/ - arg_var(shadowing_lvar(get_id($2))); + arg_var(p, shadowing_lvar(p, get_id($2))); /*%%%*/ $$ = $2; - /*% - $$ = dispatch1(blockarg, $2); - %*/ + /*% %*/ + /*% ripper: blockarg!($2) %*/ } + | blkarg_mark + { + /*%%%*/ + arg_var(p, shadowing_lvar(p, get_id(ANON_BLOCK_ID))); + /*% + $$ = dispatch1(blockarg, Qnil); + %*/ + } ; opt_f_block_arg : ',' f_block_arg @@ -4808,50 +5598,36 @@ opt_f_block_arg : ',' f_block_arg } | none { - /*%%%*/ - $$ = 0; - /*% - $$ = Qundef; - %*/ + $$ = Qnull; } ; singleton : var_ref { - /*%%%*/ value_expr($1); $$ = $1; - if (!$$) $$ = NEW_NIL(); - /*% - $$ = $1; - %*/ } - | '(' {lex_state = EXPR_BEG;} expr rparen + | '(' {SET_LEX_STATE(EXPR_BEG);} expr rparen { /*%%%*/ - if ($3 == 0) { - yyerror("can't define singleton method for ()."); - } - else { - switch (nd_type($3)) { - case NODE_STR: - case NODE_DSTR: - case NODE_XSTR: - case NODE_DXSTR: - case NODE_DREGX: - case NODE_LIT: - case NODE_ARRAY: - case NODE_ZARRAY: - yyerror("can't define singleton method for literals"); - default: - value_expr($3); - break; - } + switch (nd_type($3)) { + case NODE_STR: + case NODE_DSTR: + case NODE_XSTR: + case NODE_DXSTR: + case NODE_DREGX: + case NODE_LIT: + case NODE_LIST: + case NODE_ZLIST: + yyerror1(&@3, "can't define singleton method for literals"); + break; + default: + value_expr($3); + break; } $$ = $3; - /*% - $$ = dispatch1(paren, $3); - %*/ + /*% %*/ + /*% ripper: paren!($3) %*/ } ; @@ -4860,57 +5636,90 @@ assoc_list : none { /*%%%*/ $$ = $1; - /*% - $$ = dispatch1(assoclist_from_args, $1); - %*/ + /*% %*/ + /*% ripper: assoclist_from_args!($1) %*/ } ; assocs : assoc - /*%c%*/ - /*%c - { - $$ = rb_ary_new3(1, $1); - } - %*/ + /*% ripper[brace]: rb_ary_new3(1, get_value($1)) %*/ | assocs ',' assoc { /*%%%*/ - $$ = list_concat($1, $3); - /*% - $$ = rb_ary_push($1, $3); - %*/ + NODE *assocs = $1; + NODE *tail = $3; + if (!assocs) { + assocs = tail; + } + else if (tail) { + if (assocs->nd_head && + !tail->nd_head && nd_type_p(tail->nd_next, NODE_LIST) && + nd_type_p(tail->nd_next->nd_head, NODE_HASH)) { + /* DSTAR */ + tail = tail->nd_next->nd_head->nd_head; + } + assocs = list_concat(assocs, tail); + } + $$ = assocs; + /*% %*/ + /*% ripper: rb_ary_push($1, get_value($3)) %*/ } ; assoc : arg_value tASSOC arg_value { /*%%%*/ - $$ = list_append(NEW_LIST($1), $3); - /*% - $$ = dispatch2(assoc_new, $1, $3); - %*/ + if (nd_type_p($1, NODE_STR)) { + nd_set_type($1, NODE_LIT); + RB_OBJ_WRITE(p->ast, &$1->nd_lit, rb_fstring($1->nd_lit)); + } + $$ = list_append(p, NEW_LIST($1, &@$), $3); + /*% %*/ + /*% ripper: assoc_new!($1, $3) %*/ } | tLABEL arg_value { /*%%%*/ - $$ = list_append(NEW_LIST(NEW_LIT(ID2SYM($1))), $2); - /*% - $$ = dispatch2(assoc_new, $1, $2); - %*/ + $$ = list_append(p, NEW_LIST(NEW_LIT(ID2SYM($1), &@1), &@$), $2); + /*% %*/ + /*% ripper: assoc_new!($1, $2) %*/ + } + | tLABEL + { + /*%%%*/ + NODE *val = gettable(p, $1, &@$); + if (!val) val = NEW_BEGIN(0, &@$); + $$ = list_append(p, NEW_LIST(NEW_LIT(ID2SYM($1), &@1), &@$), val); + /*% %*/ + /*% ripper: assoc_new!($1, Qnil) %*/ + } + | tSTRING_BEG string_contents tLABEL_END arg_value + { + /*%%%*/ + YYLTYPE loc = code_loc_gen(&@1, &@3); + $$ = list_append(p, NEW_LIST(dsym_node(p, $2, &loc), &loc), $4); + /*% %*/ + /*% ripper: assoc_new!(dyna_symbol!($2), $4) %*/ } | tDSTAR arg_value { /*%%%*/ - $$ = list_append(NEW_LIST(0), $2); - /*% - $$ = dispatch1(assoc_splat, $2); - %*/ + if (nd_type_p($2, NODE_HASH) && + !($2->nd_head && $2->nd_head->nd_alen)) { + static VALUE empty_hash; + if (!empty_hash) { + empty_hash = rb_obj_freeze(rb_hash_new()); + rb_gc_register_mark_object(empty_hash); + } + $$ = list_append(p, NEW_LIST(0, &@$), NEW_LIT(empty_hash, &@$)); + } + else + $$ = list_append(p, NEW_LIST(0, &@$), $2); + /*% %*/ + /*% ripper: assoc_splat!($2) %*/ } ; - ; - operation : tIDENTIFIER | tCONSTANT | tFID @@ -4928,15 +5737,15 @@ operation3 : tIDENTIFIER ; dot_or_colon : '.' - /*%c%*/ - /*%c - { $$ = $<val>1; } - %*/ | tCOLON2 - /*%c%*/ - /*%c - { $$ = $<val>1; } - %*/ + ; + +call_op : '.' + | tANDDOT + ; + +call_op2 : call_op + | tCOLON2 ; opt_terms : /* none */ @@ -4953,13 +5762,16 @@ rparen : opt_nl ')' rbracket : opt_nl ']' ; +rbrace : opt_nl '}' + ; + trailer : /* none */ | '\n' | ',' ; -term : ';' {yyerrok;} - | '\n' +term : ';' {yyerrok;token_flush(p);} + | '\n' {token_flush(p);} ; terms : term @@ -4968,290 +5780,529 @@ terms : term none : /* none */ { - /*%%%*/ - $$ = 0; - /*% - $$ = Qundef; - %*/ + $$ = Qnull; } ; %% -# undef parser +# undef p # undef yylex # undef yylval -# define yylval (*((YYSTYPE*)(parser->parser_yylval))) - -static int parser_regx_options(struct parser_params*); -static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**); -static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc); -static int parser_parse_string(struct parser_params*,NODE*); -static int parser_here_document(struct parser_params*,NODE*); - - -# define nextc() parser_nextc(parser) -# define pushback(c) parser_pushback(parser, (c)) -# define newtok() parser_newtok(parser) -# define tokspace(n) parser_tokspace(parser, (n)) -# define tokadd(c) parser_tokadd(parser, (c)) -# define tok_hex(numlen) parser_tok_hex(parser, (numlen)) -# define read_escape(flags,e) parser_read_escape(parser, (flags), (e)) -# define tokadd_escape(e) parser_tokadd_escape(parser, (e)) -# define regx_options() parser_regx_options(parser) -# define tokadd_string(f,t,p,n,e) parser_tokadd_string(parser,(f),(t),(p),(n),(e)) -# define parse_string(n) parser_parse_string(parser,(n)) -# define tokaddmbc(c, enc) parser_tokaddmbc(parser, (c), (enc)) -# define here_document(n) parser_here_document(parser,(n)) -# define heredoc_identifier() parser_heredoc_identifier(parser) -# define heredoc_restore(n) parser_heredoc_restore(parser,(n)) -# define whole_match_p(e,l,i) parser_whole_match_p(parser,(e),(l),(i)) +# define yylval (*p->lval) + +static int regx_options(struct parser_params*); +static int tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**,rb_encoding**); +static void tokaddmbc(struct parser_params *p, int c, rb_encoding *enc); +static enum yytokentype parse_string(struct parser_params*,rb_strterm_literal_t*); +static enum yytokentype here_document(struct parser_params*,rb_strterm_heredoc_t*); #ifndef RIPPER -# define set_yylval_str(x) (yylval.node = NEW_STR(x)) +# define set_yylval_node(x) { \ + YYLTYPE _cur_loc; \ + rb_parser_set_location(p, &_cur_loc); \ + yylval.node = (x); \ +} +# define set_yylval_str(x) \ +do { \ + set_yylval_node(NEW_STR(x, &_cur_loc)); \ + RB_OBJ_WRITTEN(p->ast, Qnil, x); \ +} while(0) +# define set_yylval_literal(x) \ +do { \ + set_yylval_node(NEW_LIT(x, &_cur_loc)); \ + RB_OBJ_WRITTEN(p->ast, Qnil, x); \ +} while(0) # define set_yylval_num(x) (yylval.num = (x)) # define set_yylval_id(x) (yylval.id = (x)) # define set_yylval_name(x) (yylval.id = (x)) -# define set_yylval_literal(x) (yylval.node = NEW_LIT(x)) -# define set_yylval_node(x) (yylval.node = (x)) # define yylval_id() (yylval.id) #else static inline VALUE -ripper_yylval_id(ID x) +ripper_yylval_id(struct parser_params *p, ID x) { - return (VALUE)NEW_LASGN(x, ID2SYM(x)); + return ripper_new_yylval(p, x, ID2SYM(x), 0); } -# define set_yylval_str(x) (void)(x) -# define set_yylval_num(x) (void)(x) +# define set_yylval_str(x) (yylval.val = add_mark_object(p, (x))) +# define set_yylval_num(x) (yylval.val = ripper_new_yylval(p, (x), 0, 0)) # define set_yylval_id(x) (void)(x) -# define set_yylval_name(x) (void)(yylval.val = ripper_yylval_id(x)) -# define set_yylval_literal(x) (void)(x) -# define set_yylval_node(x) (void)(x) +# define set_yylval_name(x) (void)(yylval.val = ripper_yylval_id(p, x)) +# define set_yylval_literal(x) add_mark_object(p, (x)) +# define set_yylval_node(x) (yylval.val = ripper_new_yylval(p, 0, 0, STR_NEW(p->lex.ptok, p->lex.pcur-p->lex.ptok))) # define yylval_id() yylval.id +# define _cur_loc NULL_LOC /* dummy */ #endif +#define set_yylval_noname() set_yylval_id(keyword_nil) + #ifndef RIPPER -#define ripper_flush(p) (void)(p) +#define literal_flush(p, ptr) ((p)->lex.ptok = (ptr)) +#define dispatch_scan_event(p, t) ((void)0) +#define dispatch_delayed_token(p, t) ((void)0) +#define has_delayed_token(p) (0) #else -#define ripper_flush(p) ((p)->tokp = (p)->parser_lex_p) +#define literal_flush(p, ptr) ((void)(ptr)) #define yylval_rval (*(RB_TYPE_P(yylval.val, T_NODE) ? &yylval.node->nd_rval : &yylval.val)) -static int -ripper_has_scan_event(struct parser_params *parser) +static inline VALUE +intern_sym(const char *name) { - - if (lex_p < parser->tokp) rb_raise(rb_eRuntimeError, "lex_p < tokp"); - return lex_p > parser->tokp; + ID id = rb_intern_const(name); + return ID2SYM(id); } -static VALUE -ripper_scan_event_val(struct parser_params *parser, int t) +static int +ripper_has_scan_event(struct parser_params *p) { - VALUE str = STR_NEW(parser->tokp, lex_p - parser->tokp); - VALUE rval = ripper_dispatch1(parser, ripper_token2eventid(t), str); - ripper_flush(parser); - return rval; + if (p->lex.pcur < p->lex.ptok) rb_raise(rb_eRuntimeError, "lex.pcur < lex.ptok"); + return p->lex.pcur > p->lex.ptok; } -static void -ripper_dispatch_scan_event(struct parser_params *parser, int t) +static VALUE +ripper_scan_event_val(struct parser_params *p, enum yytokentype t) { - if (!ripper_has_scan_event(parser)) return; - yylval_rval = ripper_scan_event_val(parser, t); + VALUE str = STR_NEW(p->lex.ptok, p->lex.pcur - p->lex.ptok); + VALUE rval = ripper_dispatch1(p, ripper_token2eventid(t), str); + token_flush(p); + return rval; } static void -ripper_dispatch_ignored_scan_event(struct parser_params *parser, int t) +ripper_dispatch_scan_event(struct parser_params *p, enum yytokentype t) { - if (!ripper_has_scan_event(parser)) return; - (void)ripper_scan_event_val(parser, t); + if (!ripper_has_scan_event(p)) return; + add_mark_object(p, yylval_rval = ripper_scan_event_val(p, t)); } +#define dispatch_scan_event(p, t) ripper_dispatch_scan_event(p, t) static void -ripper_dispatch_delayed_token(struct parser_params *parser, int t) +ripper_dispatch_delayed_token(struct parser_params *p, enum yytokentype t) { - int saved_line = ruby_sourceline; - const char *saved_tokp = parser->tokp; + int saved_line = p->ruby_sourceline; + const char *saved_tokp = p->lex.ptok; - ruby_sourceline = parser->delayed_line; - parser->tokp = lex_pbeg + parser->delayed_col; - yylval_rval = ripper_dispatch1(parser, ripper_token2eventid(t), parser->delayed); - parser->delayed = Qnil; - ruby_sourceline = saved_line; - parser->tokp = saved_tokp; + if (NIL_P(p->delayed.token)) return; + p->ruby_sourceline = p->delayed.line; + p->lex.ptok = p->lex.pbeg + p->delayed.col; + add_mark_object(p, yylval_rval = ripper_dispatch1(p, ripper_token2eventid(t), p->delayed.token)); + p->delayed.token = Qnil; + p->ruby_sourceline = saved_line; + p->lex.ptok = saved_tokp; } +#define dispatch_delayed_token(p, t) ripper_dispatch_delayed_token(p, t) +#define has_delayed_token(p) (!NIL_P(p->delayed.token)) #endif /* RIPPER */ -#include "ruby/regex.h" -#include "ruby/util.h" - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -# define SIGN_EXTEND_CHAR(c) ((signed char)(c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) -#endif - -#define parser_encoding_name() (parser->enc->name) -#define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc) -#define parser_precise_mbclen() rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc) -#define is_identchar(p,e,enc) (rb_enc_isalnum(*(p),(enc)) || (*(p)) == '_' || !ISASCII(*(p))) -#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc)) +static inline int +is_identchar(const char *ptr, const char *MAYBE_UNUSED(ptr_end), rb_encoding *enc) +{ + return rb_enc_isalnum((unsigned char)*ptr, enc) || *ptr == '_' || !ISASCII(*ptr); +} -#define parser_isascii() ISASCII(*(lex_p-1)) +static inline int +parser_is_identchar(struct parser_params *p) +{ + return !(p)->eofp && is_identchar(p->lex.pcur-1, p->lex.pend, p->enc); +} -#ifndef RIPPER -static int -token_info_get_column(struct parser_params *parser, const char *token) +static inline int +parser_isascii(struct parser_params *p) { - int column = 1; - const char *p, *pend = lex_p - strlen(token); - for (p = lex_pbeg; p < pend; p++) { - if (*p == '\t') { - column = (((column - 1) / 8) + 1) * 8; - } - column++; - } - return column; + return ISASCII(*(p->lex.pcur-1)); } -static int -token_info_has_nonspaces(struct parser_params *parser, const char *token) +static void +token_info_setup(token_info *ptinfo, const char *ptr, const rb_code_location_t *loc) { - const char *p, *pend = lex_p - strlen(token); - for (p = lex_pbeg; p < pend; p++) { - if (*p != ' ' && *p != '\t') { - return 1; + int column = 1, nonspc = 0, i; + for (i = 0; i < loc->beg_pos.column; i++, ptr++) { + if (*ptr == '\t') { + column = (((column - 1) / TAB_WIDTH) + 1) * TAB_WIDTH; + } + column++; + if (*ptr != ' ' && *ptr != '\t') { + nonspc = 1; } } - return 0; + + ptinfo->beg = loc->beg_pos; + ptinfo->indent = column; + ptinfo->nonspc = nonspc; } -#undef token_info_push static void -token_info_push(struct parser_params *parser, const char *token) +token_info_push(struct parser_params *p, const char *token, const rb_code_location_t *loc) { token_info *ptinfo; - if (!parser->parser_token_info_enabled) return; + if (!p->token_info_enabled) return; ptinfo = ALLOC(token_info); ptinfo->token = token; - ptinfo->linenum = ruby_sourceline; - ptinfo->column = token_info_get_column(parser, token); - ptinfo->nonspc = token_info_has_nonspaces(parser, token); - ptinfo->next = parser->parser_token_info; + ptinfo->next = p->token_info; + token_info_setup(ptinfo, p->lex.pbeg, loc); - parser->parser_token_info = ptinfo; + p->token_info = ptinfo; } -#undef token_info_pop static void -token_info_pop(struct parser_params *parser, const char *token) +token_info_pop(struct parser_params *p, const char *token, const rb_code_location_t *loc) { - int linenum; - token_info *ptinfo = parser->parser_token_info; + token_info *ptinfo_beg = p->token_info; + + if (!ptinfo_beg) return; + p->token_info = ptinfo_beg->next; + + /* indentation check of matched keywords (begin..end, if..end, etc.) */ + token_info_warn(p, token, ptinfo_beg, 1, loc); + ruby_sized_xfree(ptinfo_beg, sizeof(*ptinfo_beg)); +} + +static void +token_info_drop(struct parser_params *p, const char *token, rb_code_position_t beg_pos) +{ + token_info *ptinfo_beg = p->token_info; + + if (!ptinfo_beg) return; + p->token_info = ptinfo_beg->next; - if (!ptinfo) return; - parser->parser_token_info = ptinfo->next; - if (token_info_get_column(parser, token) == ptinfo->column) { /* OK */ - goto finish; + if (ptinfo_beg->beg.lineno != beg_pos.lineno || + ptinfo_beg->beg.column != beg_pos.column || + strcmp(ptinfo_beg->token, token)) { + compile_error(p, "token position mismatch: %d:%d:%s expected but %d:%d:%s", + beg_pos.lineno, beg_pos.column, token, + ptinfo_beg->beg.lineno, ptinfo_beg->beg.column, + ptinfo_beg->token); } - linenum = ruby_sourceline; - if (linenum == ptinfo->linenum) { /* SKIP */ - goto finish; + + ruby_sized_xfree(ptinfo_beg, sizeof(*ptinfo_beg)); +} + +static void +token_info_warn(struct parser_params *p, const char *token, token_info *ptinfo_beg, int same, const rb_code_location_t *loc) +{ + token_info ptinfo_end_body, *ptinfo_end = &ptinfo_end_body; + if (!p->token_info_enabled) return; + if (!ptinfo_beg) return; + token_info_setup(ptinfo_end, p->lex.pbeg, loc); + if (ptinfo_beg->beg.lineno == ptinfo_end->beg.lineno) return; /* ignore one-line block */ + if (ptinfo_beg->nonspc || ptinfo_end->nonspc) return; /* ignore keyword in the middle of a line */ + if (ptinfo_beg->indent == ptinfo_end->indent) return; /* the indents are matched */ + if (!same && ptinfo_beg->indent < ptinfo_end->indent) return; + rb_warn3L(ptinfo_end->beg.lineno, + "mismatched indentations at '%s' with '%s' at %d", + WARN_S(token), WARN_S(ptinfo_beg->token), WARN_I(ptinfo_beg->beg.lineno)); +} + +static int +parser_precise_mbclen(struct parser_params *p, const char *ptr) +{ + int len = rb_enc_precise_mbclen(ptr, p->lex.pend, p->enc); + if (!MBCLEN_CHARFOUND_P(len)) { + compile_error(p, "invalid multibyte char (%s)", rb_enc_name(p->enc)); + return -1; + } + return len; +} + +#ifndef RIPPER +static void ruby_show_error_line(VALUE errbuf, const YYLTYPE *yylloc, int lineno, VALUE str); + +static inline void +parser_show_error_line(struct parser_params *p, const YYLTYPE *yylloc) +{ + VALUE str; + int lineno = p->ruby_sourceline; + if (!yylloc) { + return; } - if (token_info_has_nonspaces(parser, token) || ptinfo->nonspc) { /* SKIP */ - goto finish; + else if (yylloc->beg_pos.lineno == lineno) { + str = p->lex.lastline; } - if (parser->parser_token_info_enabled) { - rb_compile_warn(ruby_sourcefile, linenum, - "mismatched indentations at '%s' with '%s' at %d", - token, ptinfo->token, ptinfo->linenum); + else { + return; } + ruby_show_error_line(p->error_buffer, yylloc, lineno, str); +} - finish: - xfree(ptinfo); +static int +parser_yyerror(struct parser_params *p, const YYLTYPE *yylloc, const char *msg) +{ +#if 0 + YYLTYPE current; + + if (!yylloc) { + yylloc = RUBY_SET_YYLLOC(current); + } + else if ((p->ruby_sourceline != yylloc->beg_pos.lineno && + p->ruby_sourceline != yylloc->end_pos.lineno)) { + yylloc = 0; + } +#endif + compile_error(p, "%s", msg); + parser_show_error_line(p, yylloc); + return 0; } -#endif /* RIPPER */ static int -parser_yyerror(struct parser_params *parser, const char *msg) +parser_yyerror0(struct parser_params *p, const char *msg) { -#ifndef RIPPER + YYLTYPE current; + return parser_yyerror(p, RUBY_SET_YYLLOC(current), msg); +} + +static void +ruby_show_error_line(VALUE errbuf, const YYLTYPE *yylloc, int lineno, VALUE str) +{ + VALUE mesg; const int max_line_margin = 30; - const char *p, *pe; + const char *ptr, *ptr_end, *pt, *pb; + const char *pre = "", *post = "", *pend; + const char *code = "", *caret = ""; + const char *lim; + const char *const pbeg = RSTRING_PTR(str); char *buf; long len; int i; - compile_error(PARSER_ARG "%s", msg); - p = lex_p; - while (lex_pbeg <= p) { - if (*p == '\n') break; - p--; + if (!yylloc) return; + pend = RSTRING_END(str); + if (pend > pbeg && pend[-1] == '\n') { + if (--pend > pbeg && pend[-1] == '\r') --pend; } - p++; - pe = lex_p; - while (pe < lex_pend) { - if (*pe == '\n') break; - pe++; + pt = pend; + if (lineno == yylloc->end_pos.lineno && + (pend - pbeg) > yylloc->end_pos.column) { + pt = pbeg + yylloc->end_pos.column; } - len = pe - p; + ptr = ptr_end = pt; + lim = ptr - pbeg > max_line_margin ? ptr - max_line_margin : pbeg; + while ((lim < ptr) && (*(ptr-1) != '\n')) ptr--; + + lim = pend - ptr_end > max_line_margin ? ptr_end + max_line_margin : pend; + while ((ptr_end < lim) && (*ptr_end != '\n') && (*ptr_end != '\r')) ptr_end++; + + len = ptr_end - ptr; if (len > 4) { + if (ptr > pbeg) { + ptr = rb_enc_prev_char(pbeg, ptr, pt, rb_enc_get(str)); + if (ptr > pbeg) pre = "..."; + } + if (ptr_end < pend) { + ptr_end = rb_enc_prev_char(pt, ptr_end, pend, rb_enc_get(str)); + if (ptr_end < pend) post = "..."; + } + } + pb = pbeg; + if (lineno == yylloc->beg_pos.lineno) { + pb += yylloc->beg_pos.column; + if (pb > pt) pb = pt; + } + if (pb < ptr) pb = ptr; + if (len <= 4 && yylloc->beg_pos.lineno == yylloc->end_pos.lineno) { + return; + } + if (RTEST(errbuf)) { + mesg = rb_attr_get(errbuf, idMesg); + if (RSTRING_LEN(mesg) > 0 && *(RSTRING_END(mesg)-1) != '\n') + rb_str_cat_cstr(mesg, "\n"); + } + else { + mesg = rb_enc_str_new(0, 0, rb_enc_get(str)); + } + if (!errbuf && rb_stderr_tty_p()) { +#define CSI_BEGIN "\033[" +#define CSI_SGR "m" + rb_str_catf(mesg, + CSI_BEGIN""CSI_SGR"%s" /* pre */ + CSI_BEGIN"1"CSI_SGR"%.*s" + CSI_BEGIN"1;4"CSI_SGR"%.*s" + CSI_BEGIN";1"CSI_SGR"%.*s" + CSI_BEGIN""CSI_SGR"%s" /* post */ + "\n", + pre, + (int)(pb - ptr), ptr, + (int)(pt - pb), pb, + (int)(ptr_end - pt), pt, + post); + } + else { char *p2; - const char *pre = "", *post = ""; - if (len > max_line_margin * 2 + 10) { - if (lex_p - p > max_line_margin) { - p = rb_enc_prev_char(p, lex_p - max_line_margin, pe, rb_enc_get(lex_lastline)); - pre = "..."; - } - if (pe - lex_p > max_line_margin) { - pe = rb_enc_prev_char(lex_p, lex_p + max_line_margin, pe, rb_enc_get(lex_lastline)); - post = "..."; + len = ptr_end - ptr; + lim = pt < pend ? pt : pend; + i = (int)(lim - ptr); + buf = ALLOCA_N(char, i+2); + code = ptr; + caret = p2 = buf; + if (ptr <= pb) { + while (ptr < pb) { + *p2++ = *ptr++ == '\t' ? '\t' : ' '; } - len = pe - p; + *p2++ = '^'; + ptr++; } - buf = ALLOCA_N(char, len+2); - MEMCPY(buf, p, char, len); - buf[len] = '\0'; - rb_compile_error_append("%s%s%s", pre, buf, post); - - i = (int)(lex_p - p); - p2 = buf; pe = buf + len; - - while (p2 < pe) { - if (*p2 != '\t') *p2 = ' '; - p2++; + if (lim > ptr) { + memset(p2, '~', (lim - ptr)); + p2 += (lim - ptr); } - buf[i] = '^'; - buf[i+1] = '\0'; - rb_compile_error_append("%s%s", pre, buf); + *p2 = '\0'; + rb_str_catf(mesg, "%s%.*s%s\n""%s%s\n", + pre, (int)len, code, post, + pre, caret); } + if (!errbuf) rb_write_error_str(mesg); +} #else +static int +parser_yyerror(struct parser_params *p, const YYLTYPE *yylloc, const char *msg) +{ + const char *pcur = 0, *ptok = 0; + if (p->ruby_sourceline == yylloc->beg_pos.lineno && + p->ruby_sourceline == yylloc->end_pos.lineno) { + pcur = p->lex.pcur; + ptok = p->lex.ptok; + p->lex.ptok = p->lex.pbeg + yylloc->beg_pos.column; + p->lex.pcur = p->lex.pbeg + yylloc->end_pos.column; + } + parser_yyerror0(p, msg); + if (pcur) { + p->lex.ptok = ptok; + p->lex.pcur = pcur; + } + return 0; +} + +static int +parser_yyerror0(struct parser_params *p, const char *msg) +{ dispatch1(parse_error, STR_NEW2(msg)); + ripper_error(p); + return 0; +} + +static inline void +parser_show_error_line(struct parser_params *p, const YYLTYPE *yylloc) +{ +} #endif /* !RIPPER */ + +#ifndef RIPPER +static int +vtable_size(const struct vtable *tbl) +{ + if (!DVARS_TERMINAL_P(tbl)) { + return tbl->pos; + } + else { + return 0; + } +} +#endif + +static struct vtable * +vtable_alloc_gen(struct parser_params *p, int line, struct vtable *prev) +{ + struct vtable *tbl = ALLOC(struct vtable); + tbl->pos = 0; + tbl->capa = 8; + tbl->tbl = ALLOC_N(ID, tbl->capa); + tbl->prev = prev; +#ifndef RIPPER + if (p->debug) { + rb_parser_printf(p, "vtable_alloc:%d: %p\n", line, (void *)tbl); + } +#endif + return tbl; +} +#define vtable_alloc(prev) vtable_alloc_gen(p, __LINE__, prev) + +static void +vtable_free_gen(struct parser_params *p, int line, const char *name, + struct vtable *tbl) +{ +#ifndef RIPPER + if (p->debug) { + rb_parser_printf(p, "vtable_free:%d: %s(%p)\n", line, name, (void *)tbl); + } +#endif + if (!DVARS_TERMINAL_P(tbl)) { + if (tbl->tbl) { + ruby_sized_xfree(tbl->tbl, tbl->capa * sizeof(ID)); + } + ruby_sized_xfree(tbl, sizeof(*tbl)); + } +} +#define vtable_free(tbl) vtable_free_gen(p, __LINE__, #tbl, tbl) + +static void +vtable_add_gen(struct parser_params *p, int line, const char *name, + struct vtable *tbl, ID id) +{ +#ifndef RIPPER + if (p->debug) { + rb_parser_printf(p, "vtable_add:%d: %s(%p), %s\n", + line, name, (void *)tbl, rb_id2name(id)); + } +#endif + if (DVARS_TERMINAL_P(tbl)) { + rb_parser_fatal(p, "vtable_add: vtable is not allocated (%p)", (void *)tbl); + return; + } + if (tbl->pos == tbl->capa) { + tbl->capa = tbl->capa * 2; + SIZED_REALLOC_N(tbl->tbl, ID, tbl->capa, tbl->pos); + } + tbl->tbl[tbl->pos++] = id; +} +#define vtable_add(tbl, id) vtable_add_gen(p, __LINE__, #tbl, tbl, id) + +#ifndef RIPPER +static void +vtable_pop_gen(struct parser_params *p, int line, const char *name, + struct vtable *tbl, int n) +{ + if (p->debug) { + rb_parser_printf(p, "vtable_pop:%d: %s(%p), %d\n", + line, name, (void *)tbl, n); + } + if (tbl->pos < n) { + rb_parser_fatal(p, "vtable_pop: unreachable (%d < %d)", tbl->pos, n); + return; + } + tbl->pos -= n; +} +#define vtable_pop(tbl, n) vtable_pop_gen(p, __LINE__, #tbl, tbl, n) +#endif + +static int +vtable_included(const struct vtable * tbl, ID id) +{ + int i; + + if (!DVARS_TERMINAL_P(tbl)) { + for (i = 0; i < tbl->pos; i++) { + if (tbl->tbl[i] == id) { + return i+1; + } + } + } return 0; } -static void parser_prepare(struct parser_params *parser); +static void parser_prepare(struct parser_params *p); #ifndef RIPPER +static NODE *parser_append_options(struct parser_params *p, NODE *node); + static VALUE -debug_lines(const char *f) +debug_lines(VALUE fname) { ID script_lines; CONST_ID(script_lines, "SCRIPT_LINES__"); if (rb_const_defined_at(rb_cObject, script_lines)) { VALUE hash = rb_const_get_at(rb_cObject, script_lines); if (RB_TYPE_P(hash, T_HASH)) { - VALUE fname = rb_external_str_new_with_enc(f, strlen(f), rb_filesystem_encoding()); VALUE lines = rb_ary_new(); rb_hash_aset(hash, fname, lines); return lines; @@ -5260,27 +6311,10 @@ debug_lines(const char *f) return 0; } -static VALUE -coverage(const char *f, int n) -{ - VALUE coverages = rb_get_coverages(); - if (RTEST(coverages) && RBASIC(coverages)->klass == 0) { - VALUE fname = rb_external_str_new_with_enc(f, strlen(f), rb_filesystem_encoding()); - VALUE lines = rb_ary_new2(n); - int i; - RBASIC(lines)->klass = 0; - for (i = 0; i < n; i++) RARRAY_PTR(lines)[i] = Qnil; - RARRAY(lines)->as.heap.len = n; - rb_hash_aset(coverages, fname, lines); - return lines; - } - return 0; -} - static int -e_option_supplied(struct parser_params *parser) +e_option_supplied(struct parser_params *p) { - return strcmp(ruby_sourcefile, "-e") == 0; + return strcmp(p->ruby_sourcefile, "-e") == 0; } static VALUE @@ -5288,55 +6322,98 @@ yycompile0(VALUE arg) { int n; NODE *tree; - struct parser_params *parser = (struct parser_params *)arg; - - if (!compile_for_eval && rb_safe_level() == 0) { - ruby_debug_lines = debug_lines(ruby_sourcefile); - if (ruby_debug_lines && ruby_sourceline > 0) { - VALUE str = STR_NEW0(); - n = ruby_sourceline; + struct parser_params *p = (struct parser_params *)arg; + VALUE cov = Qfalse; + + if (!compile_for_eval && !NIL_P(p->ruby_sourcefile_string)) { + p->debug_lines = debug_lines(p->ruby_sourcefile_string); + if (p->debug_lines && p->ruby_sourceline > 0) { + VALUE str = rb_default_rs; + n = p->ruby_sourceline; do { - rb_ary_push(ruby_debug_lines, str); + rb_ary_push(p->debug_lines, str); } while (--n); } - if (!e_option_supplied(parser)) { - ruby_coverage = coverage(ruby_sourcefile, ruby_sourceline); + if (!e_option_supplied(p)) { + cov = Qtrue; } } - parser_prepare(parser); - deferred_nodes = 0; -#ifndef RIPPER - parser->parser_token_info_enabled = !compile_for_eval && RTEST(ruby_verbose); -#endif - n = yyparse((void*)parser); - ruby_debug_lines = 0; - ruby_coverage = 0; - compile_for_eval = 0; - - lex_strterm = 0; - lex_p = lex_pbeg = lex_pend = 0; - lex_lastline = lex_nextline = 0; - if (parser->nerr) { - return 0; + if (p->keep_script_lines || ruby_vm_keep_script_lines) { + if (!p->debug_lines) { + p->debug_lines = rb_ary_new(); + } + + RB_OBJ_WRITE(p->ast, &p->ast->body.script_lines, p->debug_lines); } - tree = ruby_eval_tree; - if (!tree) { - tree = NEW_NIL(); + + parser_prepare(p); +#define RUBY_DTRACE_PARSE_HOOK(name) \ + if (RUBY_DTRACE_PARSE_##name##_ENABLED()) { \ + RUBY_DTRACE_PARSE_##name(p->ruby_sourcefile, p->ruby_sourceline); \ } - else if (ruby_eval_tree_begin) { - tree->nd_body = NEW_PRELUDE(ruby_eval_tree_begin, tree->nd_body); + RUBY_DTRACE_PARSE_HOOK(BEGIN); + n = yyparse(p); + RUBY_DTRACE_PARSE_HOOK(END); + p->debug_lines = 0; + + p->lex.strterm = 0; + p->lex.pcur = p->lex.pbeg = p->lex.pend = 0; + p->lex.prevline = p->lex.lastline = p->lex.nextline = 0; + if (n || p->error_p) { + VALUE mesg = p->error_buffer; + if (!mesg) { + mesg = rb_class_new_instance(0, 0, rb_eSyntaxError); + } + rb_set_errinfo(mesg); + return FALSE; + } + tree = p->eval_tree; + if (!tree) { + tree = NEW_NIL(&NULL_LOC); } - return (VALUE)tree; + else { + VALUE opt = p->compile_option; + NODE *prelude; + NODE *body = parser_append_options(p, tree->nd_body); + if (!opt) opt = rb_obj_hide(rb_ident_hash_new()); + rb_hash_aset(opt, rb_sym_intern_ascii_cstr("coverage_enabled"), cov); + prelude = block_append(p, p->eval_tree_begin, body); + tree->nd_body = prelude; + RB_OBJ_WRITE(p->ast, &p->ast->body.compile_option, opt); + } + p->ast->body.root = tree; + if (!p->ast->body.script_lines) p->ast->body.script_lines = INT2FIX(p->line_count); + return TRUE; } -static NODE* -yycompile(struct parser_params *parser, const char *f, int line) +static rb_ast_t * +yycompile(VALUE vparser, struct parser_params *p, VALUE fname, int line) { - ruby_sourcefile = ruby_strdup(f); - ruby_sourceline = line - 1; - return (NODE *)rb_suppress_tracing(yycompile0, (VALUE)parser); + rb_ast_t *ast; + if (NIL_P(fname)) { + p->ruby_sourcefile_string = Qnil; + p->ruby_sourcefile = "(none)"; + } + else { + p->ruby_sourcefile_string = rb_fstring(fname); + p->ruby_sourcefile = StringValueCStr(fname); + } + p->ruby_sourceline = line - 1; + + p->lvtbl = NULL; + + p->ast = ast = rb_ast_new(); + rb_suppress_tracing(yycompile0, (VALUE)p); + p->ast = 0; + RB_GC_GUARD(vparser); /* prohibit tail call optimization */ + + while (p->lvtbl) { + local_pop(p); + } + + return ast; } #endif /* !RIPPER */ @@ -5351,125 +6428,108 @@ must_be_ascii_compatible(VALUE s) } static VALUE -lex_get_str(struct parser_params *parser, VALUE s) +lex_get_str(struct parser_params *p, VALUE s) { - char *beg, *end, *pend; - rb_encoding *enc = must_be_ascii_compatible(s); + char *beg, *end, *start; + long len; beg = RSTRING_PTR(s); - if (lex_gets_ptr) { - if (RSTRING_LEN(s) == lex_gets_ptr) return Qnil; - beg += lex_gets_ptr; - } - pend = RSTRING_PTR(s) + RSTRING_LEN(s); - end = beg; - while (end < pend) { - if (*end++ == '\n') break; + len = RSTRING_LEN(s); + start = beg; + if (p->lex.gets_.ptr) { + if (len == p->lex.gets_.ptr) return Qnil; + beg += p->lex.gets_.ptr; + len -= p->lex.gets_.ptr; } - lex_gets_ptr = end - RSTRING_PTR(s); - return rb_enc_str_new(beg, end - beg, enc); + end = memchr(beg, '\n', len); + if (end) len = ++end - beg; + p->lex.gets_.ptr += len; + return rb_str_subseq(s, beg - start, len); } static VALUE -lex_getline(struct parser_params *parser) +lex_getline(struct parser_params *p) { - VALUE line = (*parser->parser_lex_gets)(parser, parser->parser_lex_input); + VALUE line = (*p->lex.gets)(p, p->lex.input); if (NIL_P(line)) return line; must_be_ascii_compatible(line); -#ifndef RIPPER - if (ruby_debug_lines) { - rb_enc_associate(line, parser->enc); - rb_ary_push(ruby_debug_lines, line); - } - if (ruby_coverage) { - rb_ary_push(ruby_coverage, Qnil); - } -#endif + if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen + p->line_count++; return line; } -#ifdef RIPPER -static rb_data_type_t parser_data_type; -#else static const rb_data_type_t parser_data_type; -static NODE* -parser_compile_string(volatile VALUE vparser, const char *f, VALUE s, int line) +#ifndef RIPPER +static rb_ast_t* +parser_compile_string(VALUE vparser, VALUE fname, VALUE s, int line) { - struct parser_params *parser; - NODE *node; + struct parser_params *p; - TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); - lex_gets = lex_get_str; - lex_gets_ptr = 0; - lex_input = s; - lex_pbeg = lex_p = lex_pend = 0; - compile_for_eval = rb_parse_in_eval(); + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); - node = yycompile(parser, f, line); - RB_GC_GUARD(vparser); /* prohibit tail call optimization */ + p->lex.gets = lex_get_str; + p->lex.gets_.ptr = 0; + p->lex.input = rb_str_new_frozen(s); + p->lex.pbeg = p->lex.pcur = p->lex.pend = 0; - return node; + return yycompile(vparser, p, fname, line); } -NODE* -rb_compile_string(const char *f, VALUE s, int line) +rb_ast_t* +rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line) { - must_be_ascii_compatible(s); - return parser_compile_string(rb_parser_new(), f, s, line); + return rb_parser_compile_string_path(vparser, rb_filesystem_str_new_cstr(f), s, line); } -NODE* -rb_parser_compile_string(volatile VALUE vparser, const char *f, VALUE s, int line) +rb_ast_t* +rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line) { must_be_ascii_compatible(s); return parser_compile_string(vparser, f, s, line); } -NODE* -rb_compile_cstr(const char *f, const char *s, int len, int line) -{ - VALUE str = rb_str_new(s, len); - return parser_compile_string(rb_parser_new(), f, str, line); -} +VALUE rb_io_gets_internal(VALUE io); -NODE* -rb_parser_compile_cstr(volatile VALUE vparser, const char *f, const char *s, int len, int line) +static VALUE +lex_io_gets(struct parser_params *p, VALUE io) { - VALUE str = rb_str_new(s, len); - return parser_compile_string(vparser, f, str, line); + return rb_io_gets_internal(io); } -static VALUE -lex_io_gets(struct parser_params *parser, VALUE io) +rb_ast_t* +rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start) { - return rb_io_gets(io); + struct parser_params *p; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + + p->lex.gets = lex_io_gets; + p->lex.input = file; + p->lex.pbeg = p->lex.pcur = p->lex.pend = 0; + + return yycompile(vparser, p, fname, start); } -NODE* -rb_compile_file(const char *f, VALUE file, int start) +static VALUE +lex_generic_gets(struct parser_params *p, VALUE input) { - VALUE volatile vparser = rb_parser_new(); - - return rb_parser_compile_file(vparser, f, file, start); + return (*p->lex.gets_.call)(input, p->line_count); } -NODE* -rb_parser_compile_file(volatile VALUE vparser, const char *f, VALUE file, int start) +rb_ast_t* +rb_parser_compile_generic(VALUE vparser, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int start) { - struct parser_params *parser; - NODE *node; + struct parser_params *p; - TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); - lex_gets = lex_io_gets; - lex_input = file; - lex_pbeg = lex_p = lex_pend = 0; - compile_for_eval = rb_parse_in_eval(); + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); - node = yycompile(parser, f, start); - RB_GC_GUARD(vparser); /* prohibit tail call optimization */ + p->lex.gets = lex_generic_gets; + p->lex.gets_.call = lex_gets; + p->lex.input = input; + p->lex.pbeg = p->lex.pcur = p->lex.pend = 0; - return node; + return yycompile(vparser, p, fname, start); } #endif /* !RIPPER */ @@ -5479,24 +6539,28 @@ rb_parser_compile_file(volatile VALUE vparser, const char *f, VALUE file, int st #define STR_FUNC_QWORDS 0x08 #define STR_FUNC_SYMBOL 0x10 #define STR_FUNC_INDENT 0x20 +#define STR_FUNC_LABEL 0x40 +#define STR_FUNC_LIST 0x4000 +#define STR_FUNC_TERM 0x8000 enum string_type { + str_label = STR_FUNC_LABEL, str_squote = (0), str_dquote = (STR_FUNC_EXPAND), str_xquote = (STR_FUNC_EXPAND), str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), - str_sword = (STR_FUNC_QWORDS), - str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND), + str_sword = (STR_FUNC_QWORDS|STR_FUNC_LIST), + str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND|STR_FUNC_LIST), str_ssym = (STR_FUNC_SYMBOL), str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND) }; static VALUE -parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0) +parser_str_new(const char *ptr, long len, rb_encoding *enc, int func, rb_encoding *enc0) { VALUE str; - str = rb_enc_str_new(p, n, enc); + str = rb_enc_str_new(ptr, len, enc); if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) { if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) { } @@ -5508,220 +6572,334 @@ parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *e return str; } -#define lex_goto_eol(parser) ((parser)->parser_lex_p = (parser)->parser_lex_pend) -#define lex_eol_p() (lex_p >= lex_pend) -#define peek(c) peek_n((c), 0) -#define peek_n(c,n) (lex_p+(n) < lex_pend && (c) == (unsigned char)lex_p[n]) +#define lex_goto_eol(p) ((p)->lex.pcur = (p)->lex.pend) +#define lex_eol_p(p) ((p)->lex.pcur >= (p)->lex.pend) +#define lex_eol_n_p(p,n) ((p)->lex.pcur+(n) >= (p)->lex.pend) +#define peek(p,c) peek_n(p, (c), 0) +#define peek_n(p,c,n) (!lex_eol_n_p(p, n) && (c) == (unsigned char)(p)->lex.pcur[n]) +#define peekc(p) peekc_n(p, 0) +#define peekc_n(p,n) (lex_eol_n_p(p, n) ? -1 : (unsigned char)(p)->lex.pcur[n]) -static inline int -parser_nextc(struct parser_params *parser) +#ifdef RIPPER +static void +add_delayed_token(struct parser_params *p, const char *tok, const char *end) { - int c; + if (tok < end) { + if (!has_delayed_token(p)) { + p->delayed.token = rb_str_buf_new(end - tok); + rb_enc_associate(p->delayed.token, p->enc); + p->delayed.line = p->ruby_sourceline; + p->delayed.col = rb_long2int(tok - p->lex.pbeg); + } + rb_str_buf_cat(p->delayed.token, tok, end - tok); + p->lex.ptok = end; + } +} +#else +#define add_delayed_token(p, tok, end) ((void)(tok), (void)(end)) +#endif - if (lex_p == lex_pend) { - VALUE v = lex_nextline; - lex_nextline = 0; - if (!v) { - if (parser->eofp) - return -1; +static int +nextline(struct parser_params *p, int set_encoding) +{ + VALUE v = p->lex.nextline; + p->lex.nextline = 0; + if (!v) { + if (p->eofp) + return -1; - if (!lex_input || NIL_P(v = lex_getline(parser))) { - parser->eofp = Qtrue; - lex_goto_eol(parser); - return -1; - } + if (p->lex.pend > p->lex.pbeg && *(p->lex.pend-1) != '\n') { + goto end_of_input; } - { -#ifdef RIPPER - if (parser->tokp < lex_pend) { - if (NIL_P(parser->delayed)) { - parser->delayed = rb_str_buf_new(1024); - rb_enc_associate(parser->delayed, parser->enc); - rb_str_buf_cat(parser->delayed, - parser->tokp, lex_pend - parser->tokp); - parser->delayed_line = ruby_sourceline; - parser->delayed_col = (int)(parser->tokp - lex_pbeg); - } - else { - rb_str_buf_cat(parser->delayed, - parser->tokp, lex_pend - parser->tokp); - } - } -#endif - if (heredoc_end > 0) { - ruby_sourceline = heredoc_end; - heredoc_end = 0; - } - ruby_sourceline++; - parser->line_count++; - lex_pbeg = lex_p = RSTRING_PTR(v); - lex_pend = lex_p + RSTRING_LEN(v); - ripper_flush(parser); - lex_lastline = v; + + if (!p->lex.input || NIL_P(v = lex_getline(p))) { + end_of_input: + p->eofp = 1; + lex_goto_eol(p); + return -1; } - } - c = (unsigned char)*lex_p++; - if (c == '\r' && peek('\n')) { - lex_p++; +#ifndef RIPPER + if (p->debug_lines) { + if (set_encoding) rb_enc_associate(v, p->enc); + rb_ary_push(p->debug_lines, v); + } +#endif + p->cr_seen = FALSE; + } + else if (NIL_P(v)) { + /* after here-document without terminator */ + goto end_of_input; + } + add_delayed_token(p, p->lex.ptok, p->lex.pend); + if (p->heredoc_end > 0) { + p->ruby_sourceline = p->heredoc_end; + p->heredoc_end = 0; + } + p->ruby_sourceline++; + p->lex.pbeg = p->lex.pcur = RSTRING_PTR(v); + p->lex.pend = p->lex.pcur + RSTRING_LEN(v); + token_flush(p); + p->lex.prevline = p->lex.lastline; + p->lex.lastline = v; + return 0; +} + +static int +parser_cr(struct parser_params *p, int c) +{ + if (peek(p, '\n')) { + p->lex.pcur++; c = '\n'; } + return c; +} + +static inline int +nextc0(struct parser_params *p, int set_encoding) +{ + int c; + + if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) { + if (nextline(p, set_encoding)) return -1; + } + c = (unsigned char)*p->lex.pcur++; + if (UNLIKELY(c == '\r')) { + c = parser_cr(p, c); + } return c; } +#define nextc(p) nextc0(p, TRUE) static void -parser_pushback(struct parser_params *parser, int c) +pushback(struct parser_params *p, int c) { if (c == -1) return; - lex_p--; - if (lex_p > lex_pbeg && lex_p[0] == '\n' && lex_p[-1] == '\r') { - lex_p--; + p->lex.pcur--; + if (p->lex.pcur > p->lex.pbeg && p->lex.pcur[0] == '\n' && p->lex.pcur[-1] == '\r') { + p->lex.pcur--; } } -#define was_bol() (lex_p == lex_pbeg + 1) +#define was_bol(p) ((p)->lex.pcur == (p)->lex.pbeg + 1) -#define tokfix() (tokenbuf[tokidx]='\0') -#define tok() tokenbuf -#define toklen() tokidx -#define toklast() (tokidx>0?tokenbuf[tokidx-1]:0) +#define tokfix(p) ((p)->tokenbuf[(p)->tokidx]='\0') +#define tok(p) (p)->tokenbuf +#define toklen(p) (p)->tokidx + +static int +looking_at_eol_p(struct parser_params *p) +{ + const char *ptr = p->lex.pcur; + while (ptr < p->lex.pend) { + int c = (unsigned char)*ptr++; + int eol = (c == '\n' || c == '#'); + if (eol || !ISSPACE(c)) { + return eol; + } + } + return TRUE; +} static char* -parser_newtok(struct parser_params *parser) +newtok(struct parser_params *p) { - tokidx = 0; - if (!tokenbuf) { - toksiz = 60; - tokenbuf = ALLOC_N(char, 60); + p->tokidx = 0; + p->tokline = p->ruby_sourceline; + if (!p->tokenbuf) { + p->toksiz = 60; + p->tokenbuf = ALLOC_N(char, 60); } - if (toksiz > 4096) { - toksiz = 60; - REALLOC_N(tokenbuf, char, 60); + if (p->toksiz > 4096) { + p->toksiz = 60; + REALLOC_N(p->tokenbuf, char, 60); } - return tokenbuf; + return p->tokenbuf; } static char * -parser_tokspace(struct parser_params *parser, int n) +tokspace(struct parser_params *p, int n) { - tokidx += n; + p->tokidx += n; - if (tokidx >= toksiz) { - do {toksiz *= 2;} while (toksiz < tokidx); - REALLOC_N(tokenbuf, char, toksiz); + if (p->tokidx >= p->toksiz) { + do {p->toksiz *= 2;} while (p->toksiz < p->tokidx); + REALLOC_N(p->tokenbuf, char, p->toksiz); } - return &tokenbuf[tokidx-n]; + return &p->tokenbuf[p->tokidx-n]; } static void -parser_tokadd(struct parser_params *parser, int c) +tokadd(struct parser_params *p, int c) { - tokenbuf[tokidx++] = (char)c; - if (tokidx >= toksiz) { - toksiz *= 2; - REALLOC_N(tokenbuf, char, toksiz); + p->tokenbuf[p->tokidx++] = (char)c; + if (p->tokidx >= p->toksiz) { + p->toksiz *= 2; + REALLOC_N(p->tokenbuf, char, p->toksiz); } } static int -parser_tok_hex(struct parser_params *parser, size_t *numlen) +tok_hex(struct parser_params *p, size_t *numlen) { int c; - c = scan_hex(lex_p, 2, numlen); + c = scan_hex(p->lex.pcur, 2, numlen); if (!*numlen) { - yyerror("invalid hex escape"); + yyerror0("invalid hex escape"); + token_flush(p); return 0; } - lex_p += *numlen; + p->lex.pcur += *numlen; return c; } -#define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n)) +#define tokcopy(p, n) memcpy(tokspace(p, n), (p)->lex.pcur - (n), (n)) static int -parser_tokadd_utf8(struct parser_params *parser, rb_encoding **encp, - int string_literal, int symbol_literal, int regexp_literal) +escaped_control_code(int c) { - /* - * If string_literal is true, then we allow multiple codepoints - * in \u{}, and add the codepoints to the current token. - * Otherwise we're parsing a character literal and return a single - * codepoint without adding it - */ + int c2 = 0; + switch (c) { + case ' ': + c2 = 's'; + break; + case '\n': + c2 = 'n'; + break; + case '\t': + c2 = 't'; + break; + case '\v': + c2 = 'v'; + break; + case '\r': + c2 = 'r'; + break; + case '\f': + c2 = 'f'; + break; + } + return c2; +} - int codepoint; - size_t numlen; +#define WARN_SPACE_CHAR(c, prefix) \ + rb_warn1("invalid character syntax; use "prefix"\\%c", WARN_I(c2)) - if (regexp_literal) { tokadd('\\'); tokadd('u'); } +static int +tokadd_codepoint(struct parser_params *p, rb_encoding **encp, + int regexp_literal, int wide) +{ + size_t numlen; + int codepoint = scan_hex(p->lex.pcur, wide ? p->lex.pend - p->lex.pcur : 4, &numlen); + literal_flush(p, p->lex.pcur); + p->lex.pcur += numlen; + if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4)) { + yyerror0("invalid Unicode escape"); + return wide && numlen > 0; + } + if (codepoint > 0x10ffff) { + yyerror0("invalid Unicode codepoint (too large)"); + return wide; + } + if ((codepoint & 0xfffff800) == 0xd800) { + yyerror0("invalid Unicode codepoint"); + return wide; + } + if (regexp_literal) { + tokcopy(p, (int)numlen); + } + else if (codepoint >= 0x80) { + rb_encoding *utf8 = rb_utf8_encoding(); + if (*encp && utf8 != *encp) { + YYLTYPE loc = RUBY_INIT_YYLLOC(); + compile_error(p, "UTF-8 mixed within %s source", rb_enc_name(*encp)); + parser_show_error_line(p, &loc); + return wide; + } + *encp = utf8; + tokaddmbc(p, codepoint, *encp); + } + else { + tokadd(p, codepoint); + } + return TRUE; +} - if (peek('{')) { /* handle \u{...} form */ - do { - if (regexp_literal) { tokadd(*lex_p); } - nextc(); - codepoint = scan_hex(lex_p, 6, &numlen); - if (numlen == 0) { - yyerror("invalid Unicode escape"); - return 0; - } - if (codepoint > 0x10ffff) { - yyerror("invalid Unicode codepoint (too large)"); - return 0; - } - lex_p += numlen; - if (regexp_literal) { - tokcopy((int)numlen); - } - else if (codepoint >= 0x80) { - *encp = UTF8_ENC(); - if (string_literal) tokaddmbc(codepoint, *encp); +/* return value is for ?\u3042 */ +static void +tokadd_utf8(struct parser_params *p, rb_encoding **encp, + int term, int symbol_literal, int regexp_literal) +{ + /* + * If `term` is not -1, then we allow multiple codepoints in \u{} + * upto `term` byte, otherwise we're parsing a character literal. + * And then add the codepoints to the current token. + */ + static const char multiple_codepoints[] = "Multiple codepoints at single character literal"; + + const int open_brace = '{', close_brace = '}'; + + if (regexp_literal) { tokadd(p, '\\'); tokadd(p, 'u'); } + + if (peek(p, open_brace)) { /* handle \u{...} form */ + const char *second = NULL; + int c, last = nextc(p); + if (p->lex.pcur >= p->lex.pend) goto unterminated; + while (ISSPACE(c = *p->lex.pcur) && ++p->lex.pcur < p->lex.pend); + while (c != close_brace) { + if (c == term) goto unterminated; + if (second == multiple_codepoints) + second = p->lex.pcur; + if (regexp_literal) tokadd(p, last); + if (!tokadd_codepoint(p, encp, regexp_literal, TRUE)) { + break; } - else if (string_literal) { - tokadd(codepoint); + while (ISSPACE(c = *p->lex.pcur)) { + if (++p->lex.pcur >= p->lex.pend) goto unterminated; + last = c; } - } while (string_literal && (peek(' ') || peek('\t'))); + if (term == -1 && !second) + second = multiple_codepoints; + } - if (!peek('}')) { - yyerror("unterminated Unicode escape"); - return 0; + if (c != close_brace) { + unterminated: + token_flush(p); + yyerror0("unterminated Unicode escape"); + return; + } + if (second && second != multiple_codepoints) { + const char *pcur = p->lex.pcur; + p->lex.pcur = second; + dispatch_scan_event(p, tSTRING_CONTENT); + token_flush(p); + p->lex.pcur = pcur; + yyerror0(multiple_codepoints); + token_flush(p); } - if (regexp_literal) { tokadd('}'); } - nextc(); + if (regexp_literal) tokadd(p, close_brace); + nextc(p); } else { /* handle \uxxxx form */ - codepoint = scan_hex(lex_p, 4, &numlen); - if (numlen < 4) { - yyerror("invalid Unicode escape"); - return 0; - } - lex_p += 4; - if (regexp_literal) { - tokcopy(4); - } - else if (codepoint >= 0x80) { - *encp = UTF8_ENC(); - if (string_literal) tokaddmbc(codepoint, *encp); - } - else if (string_literal) { - tokadd(codepoint); + if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) { + token_flush(p); + return; } } - - return codepoint; } #define ESCAPE_CONTROL 1 #define ESCAPE_META 2 static int -parser_read_escape(struct parser_params *parser, int flags, - rb_encoding **encp) +read_escape(struct parser_params *p, int flags, rb_encoding **encp) { int c; size_t numlen; - switch (c = nextc()) { + switch (c = nextc(p)) { case '\\': /* Backslash */ return c; @@ -5748,13 +6926,13 @@ parser_read_escape(struct parser_params *parser, int flags, case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': - pushback(c); - c = scan_oct(lex_p, 3, &numlen); - lex_p += numlen; + pushback(p, c); + c = scan_oct(p->lex.pcur, 3, &numlen); + p->lex.pcur += numlen; return c; case 'x': /* hex constant */ - c = tok_hex(&numlen); + c = tok_hex(p, &numlen); if (numlen == 0) return 0; return c; @@ -5766,38 +6944,77 @@ parser_read_escape(struct parser_params *parser, int flags, case 'M': if (flags & ESCAPE_META) goto eof; - if ((c = nextc()) != '-') { - pushback(c); + if ((c = nextc(p)) != '-') { goto eof; } - if ((c = nextc()) == '\\') { - if (peek('u')) goto eof; - return read_escape(flags|ESCAPE_META, encp) | 0x80; + if ((c = nextc(p)) == '\\') { + switch (peekc(p)) { + case 'u': case 'U': + nextc(p); + goto eof; + } + return read_escape(p, flags|ESCAPE_META, encp) | 0x80; } else if (c == -1 || !ISASCII(c)) goto eof; else { + int c2 = escaped_control_code(c); + if (c2) { + if (ISCNTRL(c) || !(flags & ESCAPE_CONTROL)) { + WARN_SPACE_CHAR(c2, "\\M-"); + } + else { + WARN_SPACE_CHAR(c2, "\\C-\\M-"); + } + } + else if (ISCNTRL(c)) goto eof; return ((c & 0xff) | 0x80); } case 'C': - if ((c = nextc()) != '-') { - pushback(c); + if ((c = nextc(p)) != '-') { goto eof; } case 'c': if (flags & ESCAPE_CONTROL) goto eof; - if ((c = nextc())== '\\') { - if (peek('u')) goto eof; - c = read_escape(flags|ESCAPE_CONTROL, encp); + if ((c = nextc(p))== '\\') { + switch (peekc(p)) { + case 'u': case 'U': + nextc(p); + goto eof; + } + c = read_escape(p, flags|ESCAPE_CONTROL, encp); } else if (c == '?') return 0177; else if (c == -1 || !ISASCII(c)) goto eof; + else { + int c2 = escaped_control_code(c); + if (c2) { + if (ISCNTRL(c)) { + if (flags & ESCAPE_META) { + WARN_SPACE_CHAR(c2, "\\M-"); + } + else { + WARN_SPACE_CHAR(c2, ""); + } + } + else { + if (flags & ESCAPE_META) { + WARN_SPACE_CHAR(c2, "\\M-\\C-"); + } + else { + WARN_SPACE_CHAR(c2, "\\C-"); + } + } + } + else if (ISCNTRL(c)) goto eof; + } return c & 0x9f; eof: case -1: - yyerror("Invalid escape character syntax"); + yyerror0("Invalid escape character syntax"); + token_flush(p); return '\0'; default: @@ -5806,95 +7023,63 @@ parser_read_escape(struct parser_params *parser, int flags, } static void -parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc) +tokaddmbc(struct parser_params *p, int c, rb_encoding *enc) { int len = rb_enc_codelen(c, enc); - rb_enc_mbcput(c, tokspace(len), enc); + rb_enc_mbcput(c, tokspace(p, len), enc); } static int -parser_tokadd_escape(struct parser_params *parser, rb_encoding **encp) +tokadd_escape(struct parser_params *p, rb_encoding **encp) { int c; - int flags = 0; size_t numlen; - first: - switch (c = nextc()) { + switch (c = nextc(p)) { case '\n': return 0; /* just ignore */ case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': { - ruby_scan_oct(--lex_p, 3, &numlen); + ruby_scan_oct(--p->lex.pcur, 3, &numlen); if (numlen == 0) goto eof; - lex_p += numlen; - tokcopy((int)numlen + 1); + p->lex.pcur += numlen; + tokcopy(p, (int)numlen + 1); } return 0; case 'x': /* hex constant */ { - tok_hex(&numlen); + tok_hex(p, &numlen); if (numlen == 0) return -1; - tokcopy((int)numlen + 2); - } - return 0; - - case 'M': - if (flags & ESCAPE_META) goto eof; - if ((c = nextc()) != '-') { - pushback(c); - goto eof; + tokcopy(p, (int)numlen + 2); } - tokcopy(3); - flags |= ESCAPE_META; - goto escaped; - - case 'C': - if (flags & ESCAPE_CONTROL) goto eof; - if ((c = nextc()) != '-') { - pushback(c); - goto eof; - } - tokcopy(3); - goto escaped; - - case 'c': - if (flags & ESCAPE_CONTROL) goto eof; - tokcopy(2); - flags |= ESCAPE_CONTROL; - escaped: - if ((c = nextc()) == '\\') { - goto first; - } - else if (c == -1) goto eof; - tokadd(c); return 0; eof: case -1: - yyerror("Invalid escape character syntax"); + yyerror0("Invalid escape character syntax"); + token_flush(p); return -1; default: - tokadd('\\'); - tokadd(c); + tokadd(p, '\\'); + tokadd(p, c); } return 0; } static int -parser_regx_options(struct parser_params *parser) +regx_options(struct parser_params *p) { int kcode = 0; int kopt = 0; int options = 0; int c, opt, kc; - newtok(); - while (c = nextc(), ISALPHA(c)) { + newtok(p); + while (c = nextc(p), ISALPHA(c)) { if (c == 'o') { options |= RE_OPTION_ONCE; } @@ -5908,50 +7093,39 @@ parser_regx_options(struct parser_params *parser) } } else { - tokadd(c); + tokadd(p, c); } } options |= kopt; - pushback(c); - if (toklen()) { - tokfix(); - compile_error(PARSER_ARG "unknown regexp option%s - %s", - toklen() > 1 ? "s" : "", tok()); + pushback(p, c); + if (toklen(p)) { + YYLTYPE loc = RUBY_INIT_YYLLOC(); + tokfix(p); + compile_error(p, "unknown regexp option%s - %*s", + toklen(p) > 1 ? "s" : "", toklen(p), tok(p)); + parser_show_error_line(p, &loc); } return options | RE_OPTION_ENCODING(kcode); } -static void -dispose_string(VALUE str) -{ - /* TODO: should use another API? */ - if (RBASIC(str)->flags & RSTRING_NOEMBED) - xfree(RSTRING_PTR(str)); - rb_gc_force_recycle(str); -} - static int -parser_tokadd_mbchar(struct parser_params *parser, int c) +tokadd_mbchar(struct parser_params *p, int c) { - int len = parser_precise_mbclen(); - if (!MBCLEN_CHARFOUND_P(len)) { - compile_error(PARSER_ARG "invalid multibyte char (%s)", parser_encoding_name()); - return -1; - } - tokadd(c); - lex_p += --len; - if (len > 0) tokcopy(len); + int len = parser_precise_mbclen(p, p->lex.pcur-1); + if (len < 0) return -1; + tokadd(p, c); + p->lex.pcur += --len; + if (len > 0) tokcopy(p, len); return c; } -#define tokadd_mbchar(c) parser_tokadd_mbchar(parser, (c)) - static inline int simple_re_meta(int c) { switch (c) { case '$': case '*': case '+': case '.': case '?': case '^': case '|': + case ')': case ']': case '}': case '>': return TRUE; default: return FALSE; @@ -5959,225 +7133,422 @@ simple_re_meta(int c) } static int -parser_tokadd_string(struct parser_params *parser, - int func, int term, int paren, long *nest, - rb_encoding **encp) +parser_update_heredoc_indent(struct parser_params *p, int c) +{ + if (p->heredoc_line_indent == -1) { + if (c == '\n') p->heredoc_line_indent = 0; + } + else { + if (c == ' ') { + p->heredoc_line_indent++; + return TRUE; + } + else if (c == '\t') { + int w = (p->heredoc_line_indent / TAB_WIDTH) + 1; + p->heredoc_line_indent = w * TAB_WIDTH; + return TRUE; + } + else if (c != '\n') { + if (p->heredoc_indent > p->heredoc_line_indent) { + p->heredoc_indent = p->heredoc_line_indent; + } + p->heredoc_line_indent = -1; + } + } + return FALSE; +} + +static void +parser_mixed_error(struct parser_params *p, rb_encoding *enc1, rb_encoding *enc2) +{ + YYLTYPE loc = RUBY_INIT_YYLLOC(); + const char *n1 = rb_enc_name(enc1), *n2 = rb_enc_name(enc2); + compile_error(p, "%s mixed within %s source", n1, n2); + parser_show_error_line(p, &loc); +} + +static void +parser_mixed_escape(struct parser_params *p, const char *beg, rb_encoding *enc1, rb_encoding *enc2) +{ + const char *pos = p->lex.pcur; + p->lex.pcur = beg; + parser_mixed_error(p, enc1, enc2); + p->lex.pcur = pos; +} + +static int +tokadd_string(struct parser_params *p, + int func, int term, int paren, long *nest, + rb_encoding **encp, rb_encoding **enc) { int c; - int has_nonascii = 0; - rb_encoding *enc = *encp; - char *errbuf = 0; - static const char mixed_msg[] = "%s mixed within %s source"; - -#define mixed_error(enc1, enc2) if (!errbuf) { \ - size_t len = sizeof(mixed_msg) - 4; \ - len += strlen(rb_enc_name(enc1)); \ - len += strlen(rb_enc_name(enc2)); \ - errbuf = ALLOCA_N(char, len); \ - snprintf(errbuf, len, mixed_msg, \ - rb_enc_name(enc1), \ - rb_enc_name(enc2)); \ - yyerror(errbuf); \ - } -#define mixed_escape(beg, enc1, enc2) do { \ - const char *pos = lex_p; \ - lex_p = (beg); \ - mixed_error((enc1), (enc2)); \ - lex_p = pos; \ - } while (0) - - while ((c = nextc()) != -1) { + bool erred = false; + +#define mixed_error(enc1, enc2) \ + (void)(erred || (parser_mixed_error(p, enc1, enc2), erred = true)) +#define mixed_escape(beg, enc1, enc2) \ + (void)(erred || (parser_mixed_escape(p, beg, enc1, enc2), erred = true)) + + while ((c = nextc(p)) != -1) { + if (p->heredoc_indent > 0) { + parser_update_heredoc_indent(p, c); + } + if (paren && c == paren) { ++*nest; } else if (c == term) { if (!nest || !*nest) { - pushback(c); + pushback(p, c); break; } --*nest; } - else if ((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) { - int c2 = *lex_p; + else if ((func & STR_FUNC_EXPAND) && c == '#' && p->lex.pcur < p->lex.pend) { + int c2 = *p->lex.pcur; if (c2 == '$' || c2 == '@' || c2 == '{') { - pushback(c); + pushback(p, c); break; } } else if (c == '\\') { - const char *beg = lex_p - 1; - c = nextc(); + literal_flush(p, p->lex.pcur - 1); + c = nextc(p); switch (c) { case '\n': if (func & STR_FUNC_QWORDS) break; - if (func & STR_FUNC_EXPAND) continue; - tokadd('\\'); + if (func & STR_FUNC_EXPAND) { + if (!(func & STR_FUNC_INDENT) || (p->heredoc_indent < 0)) + continue; + if (c == term) { + c = '\\'; + goto terminate; + } + } + tokadd(p, '\\'); break; case '\\': - if (func & STR_FUNC_ESCAPE) tokadd(c); + if (func & STR_FUNC_ESCAPE) tokadd(p, c); break; case 'u': if ((func & STR_FUNC_EXPAND) == 0) { - tokadd('\\'); + tokadd(p, '\\'); break; } - parser_tokadd_utf8(parser, &enc, 1, - func & STR_FUNC_SYMBOL, - func & STR_FUNC_REGEXP); - if (has_nonascii && enc != *encp) { - mixed_escape(beg, enc, *encp); - } + tokadd_utf8(p, enc, term, + func & STR_FUNC_SYMBOL, + func & STR_FUNC_REGEXP); continue; default: if (c == -1) return -1; if (!ISASCII(c)) { - if ((func & STR_FUNC_EXPAND) == 0) tokadd('\\'); + if ((func & STR_FUNC_EXPAND) == 0) tokadd(p, '\\'); goto non_ascii; } if (func & STR_FUNC_REGEXP) { + switch (c) { + case 'c': + case 'C': + case 'M': { + pushback(p, c); + c = read_escape(p, 0, enc); + + int i; + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", c); + for (i = 0; i < 4; i++) { + tokadd(p, escbuf[i]); + } + continue; + } + } + if (c == term && !simple_re_meta(c)) { - tokadd(c); + tokadd(p, c); continue; } - pushback(c); - if ((c = tokadd_escape(&enc)) < 0) + pushback(p, c); + if ((c = tokadd_escape(p, enc)) < 0) return -1; - if (has_nonascii && enc != *encp) { - mixed_escape(beg, enc, *encp); + if (*enc && *enc != *encp) { + mixed_escape(p->lex.ptok+2, *enc, *encp); } continue; } else if (func & STR_FUNC_EXPAND) { - pushback(c); - if (func & STR_FUNC_ESCAPE) tokadd('\\'); - c = read_escape(0, &enc); + pushback(p, c); + if (func & STR_FUNC_ESCAPE) tokadd(p, '\\'); + c = read_escape(p, 0, enc); } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { /* ignore backslashed spaces in %w */ } else if (c != term && !(paren && c == paren)) { - tokadd('\\'); - pushback(c); + tokadd(p, '\\'); + pushback(p, c); continue; } } } - else if (!parser_isascii()) { + else if (!parser_isascii(p)) { non_ascii: - has_nonascii = 1; - if (enc != *encp) { - mixed_error(enc, *encp); + if (!*enc) { + *enc = *encp; + } + else if (*enc != *encp) { + mixed_error(*enc, *encp); continue; } - if (tokadd_mbchar(c) == -1) return -1; + if (tokadd_mbchar(p, c) == -1) return -1; continue; } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { - pushback(c); + pushback(p, c); break; } if (c & 0x80) { - has_nonascii = 1; - if (enc != *encp) { - mixed_error(enc, *encp); + if (!*enc) { + *enc = *encp; + } + else if (*enc != *encp) { + mixed_error(*enc, *encp); continue; } } - tokadd(c); + tokadd(p, c); } - *encp = enc; + terminate: + if (*enc) *encp = *enc; return c; } +static inline rb_strterm_t * +new_strterm(VALUE v1, VALUE v2, VALUE v3, VALUE v0) +{ + return (rb_strterm_t*)rb_imemo_new(imemo_parser_strterm, v1, v2, v3, v0); +} + +/* imemo_parser_strterm for literal */ #define NEW_STRTERM(func, term, paren) \ - rb_node_newnode(NODE_STRTERM, (func), (term) | ((paren) << (CHAR_BIT * 2)), 0) + new_strterm((VALUE)(func), (VALUE)(paren), (VALUE)(term), 0) -static int -parser_parse_string(struct parser_params *parser, NODE *quote) +#ifdef RIPPER +static void +flush_string_content(struct parser_params *p, rb_encoding *enc) +{ + VALUE content = yylval.val; + if (!ripper_is_node_yylval(content)) + content = ripper_new_yylval(p, 0, 0, content); + if (has_delayed_token(p)) { + ptrdiff_t len = p->lex.pcur - p->lex.ptok; + if (len > 0) { + rb_enc_str_buf_cat(p->delayed.token, p->lex.ptok, len, enc); + } + dispatch_delayed_token(p, tSTRING_CONTENT); + p->lex.ptok = p->lex.pcur; + RNODE(content)->nd_rval = yylval.val; + } + dispatch_scan_event(p, tSTRING_CONTENT); + if (yylval.val != content) + RNODE(content)->nd_rval = yylval.val; + yylval.val = content; +} +#else +#define flush_string_content(p, enc) ((void)(enc)) +#endif + +RUBY_FUNC_EXPORTED const unsigned int ruby_global_name_punct_bits[(0x7e - 0x20 + 31) / 32]; +/* this can be shared with ripper, since it's independent from struct + * parser_params. */ +#ifndef RIPPER +#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0) +#define SPECIAL_PUNCT(idx) ( \ + BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \ + BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \ + BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \ + BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \ + BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \ + BIT('0', idx)) +const unsigned int ruby_global_name_punct_bits[] = { + SPECIAL_PUNCT(0), + SPECIAL_PUNCT(1), + SPECIAL_PUNCT(2), +}; +#undef BIT +#undef SPECIAL_PUNCT +#endif + +static enum yytokentype +parser_peek_variable_name(struct parser_params *p) +{ + int c; + const char *ptr = p->lex.pcur; + + if (ptr + 1 >= p->lex.pend) return 0; + c = *ptr++; + switch (c) { + case '$': + if ((c = *ptr) == '-') { + if (++ptr >= p->lex.pend) return 0; + c = *ptr; + } + else if (is_global_name_punct(c) || ISDIGIT(c)) { + return tSTRING_DVAR; + } + break; + case '@': + if ((c = *ptr) == '@') { + if (++ptr >= p->lex.pend) return 0; + c = *ptr; + } + break; + case '{': + p->lex.pcur = ptr; + p->command_start = TRUE; + return tSTRING_DBEG; + default: + return 0; + } + if (!ISASCII(c) || c == '_' || ISALPHA(c)) + return tSTRING_DVAR; + return 0; +} + +#define IS_ARG() IS_lex_state(EXPR_ARG_ANY) +#define IS_END() IS_lex_state(EXPR_END_ANY) +#define IS_BEG() (IS_lex_state(EXPR_BEG_ANY) || IS_lex_state_all(EXPR_ARG|EXPR_LABELED)) +#define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c)) +#define IS_LABEL_POSSIBLE() (\ + (IS_lex_state(EXPR_LABEL|EXPR_ENDFN) && !cmd_state) || \ + IS_ARG()) +#define IS_LABEL_SUFFIX(n) (peek_n(p, ':',(n)) && !peek_n(p, ':', (n)+1)) +#define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT) + +static inline enum yytokentype +parser_string_term(struct parser_params *p, int func) +{ + p->lex.strterm = 0; + if (func & STR_FUNC_REGEXP) { + set_yylval_num(regx_options(p)); + dispatch_scan_event(p, tREGEXP_END); + SET_LEX_STATE(EXPR_END); + return tREGEXP_END; + } + if ((func & STR_FUNC_LABEL) && IS_LABEL_SUFFIX(0)) { + nextc(p); + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); + return tLABEL_END; + } + SET_LEX_STATE(EXPR_END); + return tSTRING_END; +} + +static enum yytokentype +parse_string(struct parser_params *p, rb_strterm_literal_t *quote) { - int func = (int)quote->nd_func; - int term = nd_term(quote); - int paren = nd_paren(quote); + int func = (int)quote->u1.func; + int term = (int)quote->u3.term; + int paren = (int)quote->u2.paren; int c, space = 0; - rb_encoding *enc = parser->enc; + rb_encoding *enc = p->enc; + rb_encoding *base_enc = 0; + VALUE lit; - if (func == -1) return tSTRING_END; - c = nextc(); + if (func & STR_FUNC_TERM) { + if (func & STR_FUNC_QWORDS) nextc(p); /* delayed term */ + SET_LEX_STATE(EXPR_END); + p->lex.strterm = 0; + return func & STR_FUNC_REGEXP ? tREGEXP_END : tSTRING_END; + } + c = nextc(p); if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { - do {c = nextc();} while (ISSPACE(c)); + do {c = nextc(p);} while (ISSPACE(c)); space = 1; } - if (c == term && !quote->nd_nest) { + if (func & STR_FUNC_LIST) { + quote->u1.func &= ~STR_FUNC_LIST; + space = 1; + } + if (c == term && !quote->u0.nest) { if (func & STR_FUNC_QWORDS) { - quote->nd_func = -1; + quote->u1.func |= STR_FUNC_TERM; + pushback(p, c); /* dispatch the term at tSTRING_END */ + add_delayed_token(p, p->lex.ptok, p->lex.pcur); return ' '; } - if (!(func & STR_FUNC_REGEXP)) return tSTRING_END; - set_yylval_num(regx_options()); - return tREGEXP_END; + return parser_string_term(p, func); } if (space) { - pushback(c); + pushback(p, c); + add_delayed_token(p, p->lex.ptok, p->lex.pcur); return ' '; } - newtok(); + newtok(p); if ((func & STR_FUNC_EXPAND) && c == '#') { - switch (c = nextc()) { - case '$': - case '@': - pushback(c); - return tSTRING_DVAR; - case '{': - command_start = TRUE; - return tSTRING_DBEG; - } - tokadd('#'); - } - pushback(c); - if (tokadd_string(func, term, paren, "e->nd_nest, - &enc) == -1) { - ruby_sourceline = nd_line(quote); - if (func & STR_FUNC_REGEXP) { - if (parser->eofp) - compile_error(PARSER_ARG "unterminated regexp meets end of file"); - return tREGEXP_END; - } - else { - if (parser->eofp) - compile_error(PARSER_ARG "unterminated string meets end of file"); - return tSTRING_END; + int t = parser_peek_variable_name(p); + if (t) return t; + tokadd(p, '#'); + c = nextc(p); + } + pushback(p, c); + if (tokadd_string(p, func, term, paren, "e->u0.nest, + &enc, &base_enc) == -1) { + if (p->eofp) { +#ifndef RIPPER +# define unterminated_literal(mesg) yyerror0(mesg) +#else +# define unterminated_literal(mesg) compile_error(p, mesg) +#endif + literal_flush(p, p->lex.pcur); + if (func & STR_FUNC_QWORDS) { + /* no content to add, bailing out here */ + unterminated_literal("unterminated list meets end of file"); + p->lex.strterm = 0; + return tSTRING_END; + } + if (func & STR_FUNC_REGEXP) { + unterminated_literal("unterminated regexp meets end of file"); + } + else { + unterminated_literal("unterminated string meets end of file"); + } + quote->u1.func |= STR_FUNC_TERM; } } - tokfix(); - set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); - -#ifdef RIPPER - if (!NIL_P(parser->delayed)) { - ptrdiff_t len = lex_p - parser->tokp; - if (len > 0) { - rb_enc_str_buf_cat(parser->delayed, parser->tokp, len, enc); - } - ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); - parser->tokp = lex_p; - } -#endif + tokfix(p); + lit = STR_NEW3(tok(p), toklen(p), enc, func); + set_yylval_str(lit); + flush_string_content(p, enc); return tSTRING_CONTENT; } -static int -parser_heredoc_identifier(struct parser_params *parser) +static enum yytokentype +heredoc_identifier(struct parser_params *p) { - int c = nextc(), term, func = 0; - long len; + /* + * term_len is length of `<<"END"` except `END`, + * in this case term_len is 4 (<, <, " and "). + */ + long len, offset = p->lex.pcur - p->lex.pbeg; + int c = nextc(p), term, func = 0, quote = 0; + enum yytokentype token = tSTRING_BEG; + int indent = 0; if (c == '-') { - c = nextc(); + c = nextc(p); func = STR_FUNC_INDENT; + offset++; + } + else if (c == '~') { + c = nextc(p); + func = STR_FUNC_INDENT; + offset++; + indent = INT_MAX; } switch (c) { case '\'': @@ -6185,239 +7556,532 @@ parser_heredoc_identifier(struct parser_params *parser) case '"': func |= str_dquote; goto quoted; case '`': - func |= str_xquote; + token = tXSTRING_BEG; + func |= str_xquote; goto quoted; + quoted: - newtok(); - tokadd(func); + quote++; + offset++; term = c; - while ((c = nextc()) != -1 && c != term) { - if (tokadd_mbchar(c) == -1) return 0; - } - if (c == -1) { - compile_error(PARSER_ARG "unterminated here document identifier"); - return 0; + len = 0; + while ((c = nextc(p)) != term) { + if (c == -1 || c == '\r' || c == '\n') { + yyerror0("unterminated here document identifier"); + return -1; + } } break; default: - if (!parser_is_identchar()) { - pushback(c); + if (!parser_is_identchar(p)) { + pushback(p, c); if (func & STR_FUNC_INDENT) { - pushback('-'); + pushback(p, indent > 0 ? '~' : '-'); } return 0; } - newtok(); - term = '"'; - tokadd(func |= str_dquote); + func |= str_dquote; do { - if (tokadd_mbchar(c) == -1) return 0; - } while ((c = nextc()) != -1 && parser_is_identchar()); - pushback(c); + int n = parser_precise_mbclen(p, p->lex.pcur-1); + if (n < 0) return 0; + p->lex.pcur += --n; + } while ((c = nextc(p)) != -1 && parser_is_identchar(p)); + pushback(p, c); break; } - tokfix(); -#ifdef RIPPER - ripper_dispatch_scan_event(parser, tHEREDOC_BEG); -#endif - len = lex_p - lex_pbeg; - lex_goto_eol(parser); - lex_strterm = rb_node_newnode(NODE_HEREDOC, - STR_NEW(tok(), toklen()), /* nd_lit */ - len, /* nd_nth */ - lex_lastline); /* nd_orig */ - nd_set_line(lex_strterm, ruby_sourceline); - ripper_flush(parser); - return term == '`' ? tXSTRING_BEG : tSTRING_BEG; + len = p->lex.pcur - (p->lex.pbeg + offset) - quote; + if ((unsigned long)len >= HERETERM_LENGTH_MAX) + yyerror0("too long here document identifier"); + dispatch_scan_event(p, tHEREDOC_BEG); + lex_goto_eol(p); + + p->lex.strterm = new_strterm(0, 0, 0, p->lex.lastline); + p->lex.strterm->flags |= STRTERM_HEREDOC; + rb_strterm_heredoc_t *here = &p->lex.strterm->u.heredoc; + here->offset = offset; + here->sourceline = p->ruby_sourceline; + here->length = (int)len; + here->quote = quote; + here->func = func; + + token_flush(p); + p->heredoc_indent = indent; + p->heredoc_line_indent = 0; + return token; } static void -parser_heredoc_restore(struct parser_params *parser, NODE *here) +heredoc_restore(struct parser_params *p, rb_strterm_heredoc_t *here) { VALUE line; - line = here->nd_orig; - lex_lastline = line; - lex_pbeg = RSTRING_PTR(line); - lex_pend = lex_pbeg + RSTRING_LEN(line); - lex_p = lex_pbeg + here->nd_nth; - heredoc_end = ruby_sourceline; - ruby_sourceline = nd_line(here); - dispose_string(here->nd_lit); - rb_gc_force_recycle((VALUE)here); - ripper_flush(parser); + p->lex.strterm = 0; + line = here->lastline; + p->lex.lastline = line; + p->lex.pbeg = RSTRING_PTR(line); + p->lex.pend = p->lex.pbeg + RSTRING_LEN(line); + p->lex.pcur = p->lex.pbeg + here->offset + here->length + here->quote; + p->lex.ptok = p->lex.pbeg + here->offset - here->quote; + p->heredoc_end = p->ruby_sourceline; + p->ruby_sourceline = (int)here->sourceline; + if (p->eofp) p->lex.nextline = Qnil; + p->eofp = 0; +} + +static int +dedent_string(VALUE string, int width) +{ + char *str; + long len; + int i, col = 0; + + RSTRING_GETMEM(string, str, len); + for (i = 0; i < len && col < width; i++) { + if (str[i] == ' ') { + col++; + } + else if (str[i] == '\t') { + int n = TAB_WIDTH * (col / TAB_WIDTH + 1); + if (n > width) break; + col = n; + } + else { + break; + } + } + if (!i) return 0; + rb_str_modify(string); + str = RSTRING_PTR(string); + if (RSTRING_LEN(string) != len) + rb_fatal("literal string changed: %+"PRIsVALUE, string); + MEMMOVE(str, str + i, char, len - i); + rb_str_set_len(string, len - i); + return i; +} + +#ifndef RIPPER +static NODE * +heredoc_dedent(struct parser_params *p, NODE *root) +{ + NODE *node, *str_node, *prev_node; + int indent = p->heredoc_indent; + VALUE prev_lit = 0; + + if (indent <= 0) return root; + p->heredoc_indent = 0; + if (!root) return root; + + prev_node = node = str_node = root; + if (nd_type_p(root, NODE_LIST)) str_node = root->nd_head; + + while (str_node) { + VALUE lit = str_node->nd_lit; + if (str_node->flags & NODE_FL_NEWLINE) { + dedent_string(lit, indent); + } + if (!prev_lit) { + prev_lit = lit; + } + else if (!literal_concat0(p, prev_lit, lit)) { + return 0; + } + else { + NODE *end = node->nd_end; + node = prev_node->nd_next = node->nd_next; + if (!node) { + if (nd_type_p(prev_node, NODE_DSTR)) + nd_set_type(prev_node, NODE_STR); + break; + } + node->nd_end = end; + goto next_str; + } + + str_node = 0; + while ((node = (prev_node = node)->nd_next) != 0) { + next_str: + if (!nd_type_p(node, NODE_LIST)) break; + if ((str_node = node->nd_head) != 0) { + enum node_type type = nd_type(str_node); + if (type == NODE_STR || type == NODE_DSTR) break; + prev_lit = 0; + str_node = 0; + } + } + } + return root; +} +#else /* RIPPER */ +static VALUE +heredoc_dedent(struct parser_params *p, VALUE array) +{ + int indent = p->heredoc_indent; + + if (indent <= 0) return array; + p->heredoc_indent = 0; + dispatch2(heredoc_dedent, array, INT2NUM(indent)); + return array; +} + +/* + * call-seq: + * Ripper.dedent_string(input, width) -> Integer + * + * USE OF RIPPER LIBRARY ONLY. + * + * Strips up to +width+ leading whitespaces from +input+, + * and returns the stripped column width. + */ +static VALUE +parser_dedent_string(VALUE self, VALUE input, VALUE width) +{ + int wid, col; + + StringValue(input); + wid = NUM2UINT(width); + col = dedent_string(input, wid); + return INT2NUM(col); } +#endif static int -parser_whole_match_p(struct parser_params *parser, - const char *eos, long len, int indent) +whole_match_p(struct parser_params *p, const char *eos, long len, int indent) { - const char *p = lex_pbeg; + const char *ptr = p->lex.pbeg; long n; if (indent) { - while (*p && ISSPACE(*p)) p++; + while (*ptr && ISSPACE(*ptr)) ptr++; + } + n = p->lex.pend - (ptr + len); + if (n < 0) return FALSE; + if (n > 0 && ptr[len] != '\n') { + if (ptr[len] != '\r') return FALSE; + if (n <= 1 || ptr[len+1] != '\n') return FALSE; + } + return strncmp(eos, ptr, len) == 0; +} + +static int +word_match_p(struct parser_params *p, const char *word, long len) +{ + if (strncmp(p->lex.pcur, word, len)) return 0; + if (p->lex.pcur + len == p->lex.pend) return 1; + int c = (unsigned char)p->lex.pcur[len]; + if (ISSPACE(c)) return 1; + switch (c) { + case '\0': case '\004': case '\032': return 1; + } + return 0; +} + +#define NUM_SUFFIX_R (1<<0) +#define NUM_SUFFIX_I (1<<1) +#define NUM_SUFFIX_ALL 3 + +static int +number_literal_suffix(struct parser_params *p, int mask) +{ + int c, result = 0; + const char *lastp = p->lex.pcur; + + while ((c = nextc(p)) != -1) { + if ((mask & NUM_SUFFIX_I) && c == 'i') { + result |= (mask & NUM_SUFFIX_I); + mask &= ~NUM_SUFFIX_I; + /* r after i, rational of complex is disallowed */ + mask &= ~NUM_SUFFIX_R; + continue; + } + if ((mask & NUM_SUFFIX_R) && c == 'r') { + result |= (mask & NUM_SUFFIX_R); + mask &= ~NUM_SUFFIX_R; + continue; + } + if (!ISASCII(c) || ISALPHA(c) || c == '_') { + p->lex.pcur = lastp; + literal_flush(p, p->lex.pcur); + return 0; + } + pushback(p, c); + break; + } + return result; +} + +static enum yytokentype +set_number_literal(struct parser_params *p, VALUE v, + enum yytokentype type, int suffix) +{ + if (suffix & NUM_SUFFIX_I) { + v = rb_complex_raw(INT2FIX(0), v); + type = tIMAGINARY; } - n = lex_pend - (p + len); - if (n < 0 || (n > 0 && p[len] != '\n' && p[len] != '\r')) return FALSE; - return strncmp(eos, p, len) == 0; + set_yylval_literal(v); + SET_LEX_STATE(EXPR_END); + return type; +} + +static enum yytokentype +set_integer_literal(struct parser_params *p, VALUE v, int suffix) +{ + enum yytokentype type = tINTEGER; + if (suffix & NUM_SUFFIX_R) { + v = rb_rational_raw1(v); + type = tRATIONAL; + } + return set_number_literal(p, v, type, suffix); } #ifdef RIPPER static void -ripper_dispatch_heredoc_end(struct parser_params *parser) +dispatch_heredoc_end(struct parser_params *p) { - if (!NIL_P(parser->delayed)) - ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); - lex_goto_eol(parser); - ripper_dispatch_ignored_scan_event(parser, tHEREDOC_END); + VALUE str; + if (has_delayed_token(p)) + dispatch_delayed_token(p, tSTRING_CONTENT); + str = STR_NEW(p->lex.ptok, p->lex.pend - p->lex.ptok); + ripper_dispatch1(p, ripper_token2eventid(tHEREDOC_END), str); + lex_goto_eol(p); + token_flush(p); } -#define dispatch_heredoc_end() ripper_dispatch_heredoc_end(parser) #else -#define dispatch_heredoc_end() ((void)0) +#define dispatch_heredoc_end(p) ((void)0) #endif -static int -parser_here_document(struct parser_params *parser, NODE *here) +static enum yytokentype +here_document(struct parser_params *p, rb_strterm_heredoc_t *here) { int c, func, indent = 0; - const char *eos, *p, *pend; + const char *eos, *ptr, *ptr_end; long len; VALUE str = 0; - rb_encoding *enc = parser->enc; + rb_encoding *enc = p->enc; + rb_encoding *base_enc = 0; + int bol; - eos = RSTRING_PTR(here->nd_lit); - len = RSTRING_LEN(here->nd_lit) - 1; - indent = (func = *eos++) & STR_FUNC_INDENT; + eos = RSTRING_PTR(here->lastline) + here->offset; + len = here->length; + indent = (func = here->func) & STR_FUNC_INDENT; - if ((c = nextc()) == -1) { + if ((c = nextc(p)) == -1) { error: - compile_error(PARSER_ARG "can't find string \"%s\" anywhere before EOF", eos); #ifdef RIPPER - if (NIL_P(parser->delayed)) { - ripper_dispatch_scan_event(parser, tSTRING_CONTENT); + if (!has_delayed_token(p)) { + dispatch_scan_event(p, tSTRING_CONTENT); } else { - if (str || - ((len = lex_p - parser->tokp) > 0 && - (str = STR_NEW3(parser->tokp, len, enc, func), 1))) { - rb_str_append(parser->delayed, str); + if ((len = p->lex.pcur - p->lex.ptok) > 0) { + if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) { + int cr = ENC_CODERANGE_UNKNOWN; + rb_str_coderange_scan_restartable(p->lex.ptok, p->lex.pcur, enc, &cr); + if (cr != ENC_CODERANGE_7BIT && + p->enc == rb_usascii_encoding() && + enc != rb_utf8_encoding()) { + enc = rb_ascii8bit_encoding(); + } + } + rb_enc_str_buf_cat(p->delayed.token, p->lex.ptok, len, enc); } - ripper_dispatch_delayed_token(parser, tSTRING_CONTENT); + dispatch_delayed_token(p, tSTRING_CONTENT); } - lex_goto_eol(parser); + lex_goto_eol(p); #endif - restore: - heredoc_restore(lex_strterm); - lex_strterm = 0; - return 0; + heredoc_restore(p, &p->lex.strterm->u.heredoc); + compile_error(p, "can't find string \"%.*s\" anywhere before EOF", + (int)len, eos); + token_flush(p); + p->lex.strterm = 0; + SET_LEX_STATE(EXPR_END); + return tSTRING_END; + } + bol = was_bol(p); + if (!bol) { + /* not beginning of line, cannot be the terminator */ } - if (was_bol() && whole_match_p(eos, len, indent)) { - dispatch_heredoc_end(); - heredoc_restore(lex_strterm); + else if (p->heredoc_line_indent == -1) { + /* `heredoc_line_indent == -1` means + * - "after an interpolation in the same line", or + * - "in a continuing line" + */ + p->heredoc_line_indent = 0; + } + else if (whole_match_p(p, eos, len, indent)) { + dispatch_heredoc_end(p); + restore: + heredoc_restore(p, &p->lex.strterm->u.heredoc); + token_flush(p); + p->lex.strterm = 0; + SET_LEX_STATE(EXPR_END); return tSTRING_END; } if (!(func & STR_FUNC_EXPAND)) { do { - p = RSTRING_PTR(lex_lastline); - pend = lex_pend; - if (pend > p) { - switch (pend[-1]) { + ptr = RSTRING_PTR(p->lex.lastline); + ptr_end = p->lex.pend; + if (ptr_end > ptr) { + switch (ptr_end[-1]) { case '\n': - if (--pend == p || pend[-1] != '\r') { - pend++; + if (--ptr_end == ptr || ptr_end[-1] != '\r') { + ptr_end++; break; } case '\r': - --pend; + --ptr_end; } } + + if (p->heredoc_indent > 0) { + long i = 0; + while (ptr + i < ptr_end && parser_update_heredoc_indent(p, ptr[i])) + i++; + p->heredoc_line_indent = 0; + } + if (str) - rb_str_cat(str, p, pend - p); + rb_str_cat(str, ptr, ptr_end - ptr); else - str = STR_NEW(p, pend - p); - if (pend < lex_pend) rb_str_cat(str, "\n", 1); - lex_goto_eol(parser); - if (nextc() == -1) { - if (str) dispose_string(str); + str = STR_NEW(ptr, ptr_end - ptr); + if (ptr_end < p->lex.pend) rb_str_cat(str, "\n", 1); + lex_goto_eol(p); + if (p->heredoc_indent > 0) { + goto flush_str; + } + if (nextc(p) == -1) { + if (str) { + str = 0; + } goto error; } - } while (!whole_match_p(eos, len, indent)); + } while (!whole_match_p(p, eos, len, indent)); } else { /* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/ - newtok(); + newtok(p); if (c == '#') { - switch (c = nextc()) { - case '$': - case '@': - pushback(c); - return tSTRING_DVAR; - case '{': - command_start = TRUE; - return tSTRING_DBEG; + int t = parser_peek_variable_name(p); + if (p->heredoc_line_indent != -1) { + if (p->heredoc_indent > p->heredoc_line_indent) { + p->heredoc_indent = p->heredoc_line_indent; + } + p->heredoc_line_indent = -1; } - tokadd('#'); + if (t) return t; + tokadd(p, '#'); + c = nextc(p); } do { - pushback(c); - if ((c = tokadd_string(func, '\n', 0, NULL, &enc)) == -1) { - if (parser->eofp) goto error; + pushback(p, c); + enc = p->enc; + if ((c = tokadd_string(p, func, '\n', 0, NULL, &enc, &base_enc)) == -1) { + if (p->eofp) goto error; goto restore; } if (c != '\n') { - set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); + if (c == '\\') p->heredoc_line_indent = -1; + flush: + str = STR_NEW3(tok(p), toklen(p), enc, func); + flush_str: + set_yylval_str(str); +#ifndef RIPPER + if (bol) yylval.node->flags |= NODE_FL_NEWLINE; +#endif + flush_string_content(p, enc); return tSTRING_CONTENT; } - tokadd(nextc()); + tokadd(p, nextc(p)); + if (p->heredoc_indent > 0) { + lex_goto_eol(p); + goto flush; + } /* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/ - if ((c = nextc()) == -1) goto error; - } while (!whole_match_p(eos, len, indent)); - str = STR_NEW3(tok(), toklen(), enc, func); + if ((c = nextc(p)) == -1) goto error; + } while (!whole_match_p(p, eos, len, indent)); + str = STR_NEW3(tok(p), toklen(p), enc, func); } - dispatch_heredoc_end(); - heredoc_restore(lex_strterm); - lex_strterm = NEW_STRTERM(-1, 0, 0); + dispatch_heredoc_end(p); +#ifdef RIPPER + str = ripper_new_yylval(p, ripper_token2eventid(tSTRING_CONTENT), + yylval.val, str); +#endif + heredoc_restore(p, &p->lex.strterm->u.heredoc); + token_flush(p); + p->lex.strterm = NEW_STRTERM(func | STR_FUNC_TERM, 0, 0); set_yylval_str(str); +#ifndef RIPPER + if (bol) yylval.node->flags |= NODE_FL_NEWLINE; +#endif return tSTRING_CONTENT; } #include "lex.c" -static void -arg_ambiguous_gen(struct parser_params *parser) +static int +arg_ambiguous(struct parser_params *p, char c) { #ifndef RIPPER - rb_warning0("ambiguous first argument; put parentheses or even spaces"); + if (c == '/') { + rb_warning1("ambiguity between regexp and two divisions: wrap regexp in parentheses or add a space after `%c' operator", WARN_I(c)); + } + else { + rb_warning1("ambiguous first argument; put parentheses or a space even after `%c' operator", WARN_I(c)); + } #else - dispatch0(arg_ambiguous); + dispatch1(arg_ambiguous, rb_usascii_str_new(&c, 1)); #endif + return TRUE; } -#define arg_ambiguous() (arg_ambiguous_gen(parser), 1) static ID -formal_argument_gen(struct parser_params *parser, ID lhs) +#ifndef RIPPER +formal_argument(struct parser_params *p, ID lhs) +#else +formal_argument(struct parser_params *p, VALUE lhs) +#endif { + ID id = get_id(lhs); + + switch (id_type(id)) { + case ID_LOCAL: + break; #ifndef RIPPER - if (!is_local_id(lhs)) - yyerror("formal argument must be local variable"); +# define ERR(mesg) yyerror0(mesg) +#else +# define ERR(mesg) (dispatch2(param_error, WARN_S(mesg), lhs), ripper_error(p)) #endif - shadowing_lvar(lhs); + case ID_CONST: + ERR("formal argument cannot be a constant"); + return 0; + case ID_INSTANCE: + ERR("formal argument cannot be an instance variable"); + return 0; + case ID_GLOBAL: + ERR("formal argument cannot be a global variable"); + return 0; + case ID_CLASS: + ERR("formal argument cannot be a class variable"); + return 0; + default: + ERR("formal argument must be local variable"); + return 0; +#undef ERR + } + shadowing_lvar(p, id); return lhs; } static int -lvar_defined_gen(struct parser_params *parser, ID id) +lvar_defined(struct parser_params *p, ID id) { - return (dyna_in_block() && dvar_defined_get(id)) || local_id(id); + return (dyna_in_block(p) && dvar_defined(p, id)) || local_id(p, id); } /* emacsen -*- hack */ static long -parser_encode_length(struct parser_params *parser, const char *name, long len) +parser_encode_length(struct parser_params *p, const char *name, long len) { long nlen; @@ -6437,7 +8101,7 @@ parser_encode_length(struct parser_params *parser, const char *name, long len) } static void -parser_set_encode(struct parser_params *parser, const char *name) +parser_set_encode(struct parser_params *p, const char *name) { int idx = rb_enc_find_index(name); rb_encoding *enc; @@ -6448,7 +8112,7 @@ parser_set_encode(struct parser_params *parser, const char *name) error: excargs[0] = rb_eArgError; excargs[2] = rb_make_backtrace(); - rb_ary_unshift(excargs[2], rb_sprintf("%s:%d", ruby_sourcefile, ruby_sourceline)); + rb_ary_unshift(excargs[2], rb_sprintf("%"PRIsVALUE":%d", p->ruby_sourcefile_string, p->ruby_sourceline)); rb_exc_raise(rb_make_exception(3, excargs)); } enc = rb_enc_from_index(idx); @@ -6456,65 +8120,141 @@ parser_set_encode(struct parser_params *parser, const char *name) excargs[1] = rb_sprintf("%s is not ASCII compatible", rb_enc_name(enc)); goto error; } - parser->enc = enc; + p->enc = enc; #ifndef RIPPER - if (ruby_debug_lines) { - long i, n = RARRAY_LEN(ruby_debug_lines); - const VALUE *p = RARRAY_PTR(ruby_debug_lines); + if (p->debug_lines) { + VALUE lines = p->debug_lines; + long i, n = RARRAY_LEN(lines); for (i = 0; i < n; ++i) { - rb_enc_associate_index(*p, idx); + rb_enc_associate_index(RARRAY_AREF(lines, i), idx); } } #endif } static int -comment_at_top(struct parser_params *parser) +comment_at_top(struct parser_params *p) { - const char *p = lex_pbeg, *pend = lex_p - 1; - if (parser->line_count != (parser->has_shebang ? 2 : 1)) return 0; - while (p < pend) { - if (!ISSPACE(*p)) return 0; - p++; + const char *ptr = p->lex.pbeg, *ptr_end = p->lex.pcur - 1; + if (p->line_count != (p->has_shebang ? 2 : 1)) return 0; + while (ptr < ptr_end) { + if (!ISSPACE(*ptr)) return 0; + ptr++; } return 1; } -#ifndef RIPPER -typedef long (*rb_magic_comment_length_t)(struct parser_params *parser, const char *name, long len); -typedef void (*rb_magic_comment_setter_t)(struct parser_params *parser, const char *name, const char *val); +typedef long (*rb_magic_comment_length_t)(struct parser_params *p, const char *name, long len); +typedef void (*rb_magic_comment_setter_t)(struct parser_params *p, const char *name, const char *val); + +static int parser_invalid_pragma_value(struct parser_params *p, const char *name, const char *val); + +static void +magic_comment_encoding(struct parser_params *p, const char *name, const char *val) +{ + if (!comment_at_top(p)) { + return; + } + parser_set_encode(p, val); +} + +static int +parser_get_bool(struct parser_params *p, const char *name, const char *val) +{ + switch (*val) { + case 't': case 'T': + if (STRCASECMP(val, "true") == 0) { + return TRUE; + } + break; + case 'f': case 'F': + if (STRCASECMP(val, "false") == 0) { + return FALSE; + } + break; + } + return parser_invalid_pragma_value(p, name, val); +} + +static int +parser_invalid_pragma_value(struct parser_params *p, const char *name, const char *val) +{ + rb_warning2("invalid value for %s: %s", WARN_S(name), WARN_S(val)); + return -1; +} static void -magic_comment_encoding(struct parser_params *parser, const char *name, const char *val) +parser_set_token_info(struct parser_params *p, const char *name, const char *val) { - if (!comment_at_top(parser)) { + int b = parser_get_bool(p, name, val); + if (b >= 0) p->token_info_enabled = b; +} + +static void +parser_set_compile_option_flag(struct parser_params *p, const char *name, const char *val) +{ + int b; + + if (p->token_seen) { + rb_warning1("`%s' is ignored after any tokens", WARN_S(name)); return; } - parser_set_encode(parser, val); + + b = parser_get_bool(p, name, val); + if (b < 0) return; + + if (!p->compile_option) + p->compile_option = rb_obj_hide(rb_ident_hash_new()); + rb_hash_aset(p->compile_option, ID2SYM(rb_intern(name)), + RBOOL(b)); } static void -parser_set_token_info(struct parser_params *parser, const char *name, const char *val) +parser_set_shareable_constant_value(struct parser_params *p, const char *name, const char *val) { - int *p = &parser->parser_token_info_enabled; + for (const char *s = p->lex.pbeg, *e = p->lex.pcur; s < e; ++s) { + if (*s == ' ' || *s == '\t') continue; + if (*s == '#') break; + rb_warning1("`%s' is ignored unless in comment-only line", WARN_S(name)); + return; + } switch (*val) { - case 't': case 'T': - if (strcasecmp(val, "true") == 0) { - *p = TRUE; + case 'n': case 'N': + if (STRCASECMP(val, "none") == 0) { + p->ctxt.shareable_constant_value = shareable_none; return; } break; - case 'f': case 'F': - if (strcasecmp(val, "false") == 0) { - *p = FALSE; + case 'l': case 'L': + if (STRCASECMP(val, "literal") == 0) { + p->ctxt.shareable_constant_value = shareable_literal; + return; + } + break; + case 'e': case 'E': + if (STRCASECMP(val, "experimental_copy") == 0) { + p->ctxt.shareable_constant_value = shareable_copy; + return; + } + if (STRCASECMP(val, "experimental_everything") == 0) { + p->ctxt.shareable_constant_value = shareable_everything; return; } break; } - rb_compile_warning(ruby_sourcefile, ruby_sourceline, "invalid value for %s: %s", name, val); + parser_invalid_pragma_value(p, name, val); } +# if WARN_PAST_SCOPE +static void +parser_set_past_scope(struct parser_params *p, const char *name, const char *val) +{ + int b = parser_get_bool(p, name, val); + if (b >= 0) p->past_scope_enabled = b; +} +# endif + struct magic_comment { const char *name; rb_magic_comment_setter_t func; @@ -6524,9 +8264,13 @@ struct magic_comment { static const struct magic_comment magic_comments[] = { {"coding", magic_comment_encoding, parser_encode_length}, {"encoding", magic_comment_encoding, parser_encode_length}, + {"frozen_string_literal", parser_set_compile_option_flag}, + {"shareable_constant_value", parser_set_shareable_constant_value}, {"warn_indent", parser_set_token_info}, +# if WARN_PAST_SCOPE + {"warn_past_scope", parser_set_past_scope}, +# endif }; -#endif static const char * magic_comment_marker(const char *str, long len) @@ -6562,8 +8306,9 @@ magic_comment_marker(const char *str, long len) } static int -parser_magic_comment(struct parser_params *parser, const char *str, long len) +parser_magic_comment(struct parser_params *p, const char *str, long len) { + int indicator = 0; VALUE name = 0, val = 0; const char *beg, *end, *vbeg, *vend; #define str_copy(_s, _p, _n) ((_s) \ @@ -6572,16 +8317,17 @@ parser_magic_comment(struct parser_params *parser, const char *str, long len) : (void)((_s) = STR_NEW((_p), (_n)))) if (len <= 7) return FALSE; - if (!(beg = magic_comment_marker(str, len))) return FALSE; - if (!(end = magic_comment_marker(beg, str + len - beg))) return FALSE; - str = beg; - len = end - beg - 3; + if (!!(beg = magic_comment_marker(str, len))) { + if (!(end = magic_comment_marker(beg, str + len - beg))) + return FALSE; + indicator = TRUE; + str = beg; + len = end - beg - 3; + } /* %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*" */ while (len > 0) { -#ifndef RIPPER - const struct magic_comment *p = magic_comments; -#endif + const struct magic_comment *mc = magic_comments; char *s; int i; long n = 0; @@ -6605,7 +8351,10 @@ parser_magic_comment(struct parser_params *parser, const char *str, long len) } for (end = str; len > 0 && ISSPACE(*str); str++, --len); if (!len) break; - if (*str != ':') continue; + if (*str != ':') { + if (!indicator) return FALSE; + continue; + } do str++; while (--len > 0 && ISSPACE(*str)); if (!len) break; @@ -6626,7 +8375,13 @@ parser_magic_comment(struct parser_params *parser, const char *str, long len) for (vbeg = str; len > 0 && *str != '"' && *str != ';' && !ISSPACE(*str); --len, str++); vend = str; } - while (len > 0 && (*str == ';' || ISSPACE(*str))) --len, str++; + if (indicator) { + while (len > 0 && (*str == ';' || ISSPACE(*str))) --len, str++; + } + else { + while (len > 0 && (ISSPACE(*str))) --len, str++; + if (len) return FALSE; + } n = end - beg; str_copy(name, beg, n); @@ -6634,19 +8389,18 @@ parser_magic_comment(struct parser_params *parser, const char *str, long len) for (i = 0; i < n; ++i) { if (s[i] == '-') s[i] = '_'; } -#ifndef RIPPER do { - if (STRNCASECMP(p->name, s, n) == 0) { + if (STRNCASECMP(mc->name, s, n) == 0 && !mc->name[n]) { n = vend - vbeg; - if (p->length) { - n = (*p->length)(parser, vbeg, n); + if (mc->length) { + n = (*mc->length)(p, vbeg, n); } str_copy(val, vbeg, n); - (*p->func)(parser, s, RSTRING_PTR(val)); + (*mc->func)(p, mc->name, RSTRING_PTR(val)); break; } - } while (++p < magic_comments + numberof(magic_comments)); -#else + } while (++mc < magic_comments + numberof(magic_comments)); +#ifdef RIPPER str_copy(val, vbeg, vend - vbeg); dispatch2(magic_comment, name, val); #endif @@ -6656,7 +8410,7 @@ parser_magic_comment(struct parser_params *parser, const char *str, long len) } static void -set_file_encoding(struct parser_params *parser, const char *str, const char *send) +set_file_encoding(struct parser_params *p, const char *str, const char *send) { int sep = 0; const char *beg = str; @@ -6681,6 +8435,7 @@ set_file_encoding(struct parser_params *parser, const char *str, const char *sen continue; } if (STRNCASECMP(str-6, "coding", 6) == 0) break; + sep = 0; } for (;;) { do { @@ -6693,95 +8448,835 @@ set_file_encoding(struct parser_params *parser, const char *str, const char *sen } beg = str; while ((*str == '-' || *str == '_' || ISALNUM(*str)) && ++str < send); - s = rb_str_new(beg, parser_encode_length(parser, beg, str - beg)); - parser_set_encode(parser, RSTRING_PTR(s)); + s = rb_str_new(beg, parser_encode_length(p, beg, str - beg)); + parser_set_encode(p, RSTRING_PTR(s)); rb_str_resize(s, 0); } static void -parser_prepare(struct parser_params *parser) +parser_prepare(struct parser_params *p) { - int c = nextc(); + int c = nextc0(p, FALSE); + p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose); switch (c) { case '#': - if (peek('!')) parser->has_shebang = 1; + if (peek(p, '!')) p->has_shebang = 1; break; case 0xef: /* UTF-8 BOM marker */ - if (lex_pend - lex_p >= 2 && - (unsigned char)lex_p[0] == 0xbb && - (unsigned char)lex_p[1] == 0xbf) { - parser->enc = rb_utf8_encoding(); - lex_p += 2; - lex_pbeg = lex_p; + if (p->lex.pend - p->lex.pcur >= 2 && + (unsigned char)p->lex.pcur[0] == 0xbb && + (unsigned char)p->lex.pcur[1] == 0xbf) { + p->enc = rb_utf8_encoding(); + p->lex.pcur += 2; +#ifndef RIPPER + if (p->debug_lines) { + rb_enc_associate(p->lex.lastline, p->enc); + } +#endif + p->lex.pbeg = p->lex.pcur; return; } break; case EOF: return; } - pushback(c); - parser->enc = rb_enc_get(lex_lastline); + pushback(p, c); + p->enc = rb_enc_get(p->lex.lastline); } -#define IS_ARG() IS_lex_state(EXPR_ARG_ANY) -#define IS_END() IS_lex_state(EXPR_END_ANY) -#define IS_BEG() IS_lex_state(EXPR_BEG_ANY) -#define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c)) -#define IS_LABEL_POSSIBLE() ((IS_lex_state(EXPR_BEG) && !cmd_state) || IS_ARG()) -#define IS_LABEL_SUFFIX(n) (peek_n(':',(n)) && !peek_n(':', (n)+1)) -#define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT) - #ifndef RIPPER -#define ambiguous_operator(op, syn) ( \ - rb_warning0("`"op"' after local variable is interpreted as binary operator"), \ +#define ambiguous_operator(tok, op, syn) ( \ + rb_warning0("`"op"' after local variable or literal is interpreted as binary operator"), \ rb_warning0("even though it seems like "syn"")) #else -#define ambiguous_operator(op, syn) dispatch2(operator_ambiguous, ripper_intern(op), rb_str_new_cstr(syn)) +#define ambiguous_operator(tok, op, syn) \ + dispatch2(operator_ambiguous, TOKEN2VAL(tok), rb_str_new_cstr(syn)) #endif -#define warn_balanced(op, syn) ((void) \ - (last_state != EXPR_CLASS && last_state != EXPR_DOT && \ - last_state != EXPR_FNAME && last_state != EXPR_ENDFN && \ - last_state != EXPR_ENDARG && \ +#define warn_balanced(tok, op, syn) ((void) \ + (!IS_lex_state_for(last_state, EXPR_CLASS|EXPR_DOT|EXPR_FNAME|EXPR_ENDFN) && \ space_seen && !ISSPACE(c) && \ - (ambiguous_operator(op, syn), 0))) + (ambiguous_operator(tok, op, syn), 0)), \ + (enum yytokentype)(tok)) + +static VALUE +parse_rational(struct parser_params *p, char *str, int len, int seen_point) +{ + VALUE v; + char *point = &str[seen_point]; + size_t fraclen = len-seen_point-1; + memmove(point, point+1, fraclen+1); + v = rb_cstr_to_inum(str, 10, FALSE); + return rb_rational_new(v, rb_int_positive_pow(10, fraclen)); +} + +static enum yytokentype +no_digits(struct parser_params *p) +{ + yyerror0("numeric literal without digits"); + if (peek(p, '_')) nextc(p); + /* dummy 0, for tUMINUS_NUM at numeric */ + return set_integer_literal(p, INT2FIX(0), 0); +} + +static enum yytokentype +parse_numeric(struct parser_params *p, int c) +{ + int is_float, seen_point, seen_e, nondigit; + int suffix; + + is_float = seen_point = seen_e = nondigit = 0; + SET_LEX_STATE(EXPR_END); + newtok(p); + if (c == '-' || c == '+') { + tokadd(p, c); + c = nextc(p); + } + if (c == '0') { + int start = toklen(p); + c = nextc(p); + if (c == 'x' || c == 'X') { + /* hexadecimal */ + c = nextc(p); + if (c != -1 && ISXDIGIT(c)) { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (!ISXDIGIT(c)) break; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + } + pushback(p, c); + tokfix(p); + if (toklen(p) == start) { + return no_digits(p); + } + else if (nondigit) goto trailing_uc; + suffix = number_literal_suffix(p, NUM_SUFFIX_ALL); + return set_integer_literal(p, rb_cstr_to_inum(tok(p), 16, FALSE), suffix); + } + if (c == 'b' || c == 'B') { + /* binary */ + c = nextc(p); + if (c == '0' || c == '1') { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (c != '0' && c != '1') break; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + } + pushback(p, c); + tokfix(p); + if (toklen(p) == start) { + return no_digits(p); + } + else if (nondigit) goto trailing_uc; + suffix = number_literal_suffix(p, NUM_SUFFIX_ALL); + return set_integer_literal(p, rb_cstr_to_inum(tok(p), 2, FALSE), suffix); + } + if (c == 'd' || c == 'D') { + /* decimal */ + c = nextc(p); + if (c != -1 && ISDIGIT(c)) { + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (!ISDIGIT(c)) break; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + } + pushback(p, c); + tokfix(p); + if (toklen(p) == start) { + return no_digits(p); + } + else if (nondigit) goto trailing_uc; + suffix = number_literal_suffix(p, NUM_SUFFIX_ALL); + return set_integer_literal(p, rb_cstr_to_inum(tok(p), 10, FALSE), suffix); + } + if (c == '_') { + /* 0_0 */ + goto octal_number; + } + if (c == 'o' || c == 'O') { + /* prefixed octal */ + c = nextc(p); + if (c == -1 || c == '_' || !ISDIGIT(c)) { + return no_digits(p); + } + } + if (c >= '0' && c <= '7') { + /* octal */ + octal_number: + do { + if (c == '_') { + if (nondigit) break; + nondigit = c; + continue; + } + if (c < '0' || c > '9') break; + if (c > '7') goto invalid_octal; + nondigit = 0; + tokadd(p, c); + } while ((c = nextc(p)) != -1); + if (toklen(p) > start) { + pushback(p, c); + tokfix(p); + if (nondigit) goto trailing_uc; + suffix = number_literal_suffix(p, NUM_SUFFIX_ALL); + return set_integer_literal(p, rb_cstr_to_inum(tok(p), 8, FALSE), suffix); + } + if (nondigit) { + pushback(p, c); + goto trailing_uc; + } + } + if (c > '7' && c <= '9') { + invalid_octal: + yyerror0("Invalid octal digit"); + } + else if (c == '.' || c == 'e' || c == 'E') { + tokadd(p, '0'); + } + else { + pushback(p, c); + suffix = number_literal_suffix(p, NUM_SUFFIX_ALL); + return set_integer_literal(p, INT2FIX(0), suffix); + } + } + + for (;;) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + nondigit = 0; + tokadd(p, c); + break; + + case '.': + if (nondigit) goto trailing_uc; + if (seen_point || seen_e) { + goto decode_num; + } + else { + int c0 = nextc(p); + if (c0 == -1 || !ISDIGIT(c0)) { + pushback(p, c0); + goto decode_num; + } + c = c0; + } + seen_point = toklen(p); + tokadd(p, '.'); + tokadd(p, c); + is_float++; + nondigit = 0; + break; + + case 'e': + case 'E': + if (nondigit) { + pushback(p, c); + c = nondigit; + goto decode_num; + } + if (seen_e) { + goto decode_num; + } + nondigit = c; + c = nextc(p); + if (c != '-' && c != '+' && !ISDIGIT(c)) { + pushback(p, c); + nondigit = 0; + goto decode_num; + } + tokadd(p, nondigit); + seen_e++; + is_float++; + tokadd(p, c); + nondigit = (c == '-' || c == '+') ? c : 0; + break; + + case '_': /* `_' in number just ignored */ + if (nondigit) goto decode_num; + nondigit = c; + break; + + default: + goto decode_num; + } + c = nextc(p); + } + + decode_num: + pushback(p, c); + if (nondigit) { + trailing_uc: + literal_flush(p, p->lex.pcur - 1); + YYLTYPE loc = RUBY_INIT_YYLLOC(); + compile_error(p, "trailing `%c' in number", nondigit); + parser_show_error_line(p, &loc); + } + tokfix(p); + if (is_float) { + enum yytokentype type = tFLOAT; + VALUE v; + + suffix = number_literal_suffix(p, seen_e ? NUM_SUFFIX_I : NUM_SUFFIX_ALL); + if (suffix & NUM_SUFFIX_R) { + type = tRATIONAL; + v = parse_rational(p, tok(p), toklen(p), seen_point); + } + else { + double d = strtod(tok(p), 0); + if (errno == ERANGE) { + rb_warning1("Float %s out of range", WARN_S(tok(p))); + errno = 0; + } + v = DBL2NUM(d); + } + return set_number_literal(p, v, type, suffix); + } + suffix = number_literal_suffix(p, NUM_SUFFIX_ALL); + return set_integer_literal(p, rb_cstr_to_inum(tok(p), 10, FALSE), suffix); +} + +static enum yytokentype +parse_qmark(struct parser_params *p, int space_seen) +{ + rb_encoding *enc; + register int c; + VALUE lit; + + if (IS_END()) { + SET_LEX_STATE(EXPR_VALUE); + return '?'; + } + c = nextc(p); + if (c == -1) { + compile_error(p, "incomplete character syntax"); + return 0; + } + if (rb_enc_isspace(c, p->enc)) { + if (!IS_ARG()) { + int c2 = escaped_control_code(c); + if (c2) { + WARN_SPACE_CHAR(c2, "?"); + } + } + ternary: + pushback(p, c); + SET_LEX_STATE(EXPR_VALUE); + return '?'; + } + newtok(p); + enc = p->enc; + if (!parser_isascii(p)) { + if (tokadd_mbchar(p, c) == -1) return 0; + } + else if ((rb_enc_isalnum(c, p->enc) || c == '_') && + p->lex.pcur < p->lex.pend && is_identchar(p->lex.pcur, p->lex.pend, p->enc)) { + if (space_seen) { + const char *start = p->lex.pcur - 1, *ptr = start; + do { + int n = parser_precise_mbclen(p, ptr); + if (n < 0) return -1; + ptr += n; + } while (ptr < p->lex.pend && is_identchar(ptr, p->lex.pend, p->enc)); + rb_warn2("`?' just followed by `%.*s' is interpreted as" \ + " a conditional operator, put a space after `?'", + WARN_I((int)(ptr - start)), WARN_S_L(start, (ptr - start))); + } + goto ternary; + } + else if (c == '\\') { + if (peek(p, 'u')) { + nextc(p); + enc = rb_utf8_encoding(); + tokadd_utf8(p, &enc, -1, 0, 0); + } + else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) { + nextc(p); + if (tokadd_mbchar(p, c) == -1) return 0; + } + else { + c = read_escape(p, 0, &enc); + tokadd(p, c); + } + } + else { + tokadd(p, c); + } + tokfix(p); + lit = STR_NEW3(tok(p), toklen(p), enc, 0); + set_yylval_str(lit); + SET_LEX_STATE(EXPR_END); + return tCHAR; +} + +static enum yytokentype +parse_percent(struct parser_params *p, const int space_seen, const enum lex_state_e last_state) +{ + register int c; + const char *ptok = p->lex.pcur; + + if (IS_BEG()) { + int term; + int paren; + + c = nextc(p); + quotation: + if (c == -1) goto unterminated; + if (!ISALNUM(c)) { + term = c; + if (!ISASCII(c)) goto unknown; + c = 'Q'; + } + else { + term = nextc(p); + if (rb_enc_isalnum(term, p->enc) || !parser_isascii(p)) { + unknown: + pushback(p, term); + c = parser_precise_mbclen(p, p->lex.pcur); + if (c < 0) return 0; + p->lex.pcur += c; + yyerror0("unknown type of %string"); + return 0; + } + } + if (term == -1) { + unterminated: + compile_error(p, "unterminated quoted string meets end of file"); + return 0; + } + paren = term; + if (term == '(') term = ')'; + else if (term == '[') term = ']'; + else if (term == '{') term = '}'; + else if (term == '<') term = '>'; + else paren = 0; + + p->lex.ptok = ptok-1; + switch (c) { + case 'Q': + p->lex.strterm = NEW_STRTERM(str_dquote, term, paren); + return tSTRING_BEG; + + case 'q': + p->lex.strterm = NEW_STRTERM(str_squote, term, paren); + return tSTRING_BEG; + + case 'W': + p->lex.strterm = NEW_STRTERM(str_dword, term, paren); + return tWORDS_BEG; + + case 'w': + p->lex.strterm = NEW_STRTERM(str_sword, term, paren); + return tQWORDS_BEG; + + case 'I': + p->lex.strterm = NEW_STRTERM(str_dword, term, paren); + return tSYMBOLS_BEG; + + case 'i': + p->lex.strterm = NEW_STRTERM(str_sword, term, paren); + return tQSYMBOLS_BEG; + + case 'x': + p->lex.strterm = NEW_STRTERM(str_xquote, term, paren); + return tXSTRING_BEG; + + case 'r': + p->lex.strterm = NEW_STRTERM(str_regexp, term, paren); + return tREGEXP_BEG; + + case 's': + p->lex.strterm = NEW_STRTERM(str_ssym, term, paren); + SET_LEX_STATE(EXPR_FNAME|EXPR_FITEM); + return tSYMBEG; + + default: + yyerror0("unknown type of %string"); + return 0; + } + } + if ((c = nextc(p)) == '=') { + set_yylval_id('%'); + SET_LEX_STATE(EXPR_BEG); + return tOP_ASGN; + } + if (IS_SPCARG(c) || (IS_lex_state(EXPR_FITEM) && c == 's')) { + goto quotation; + } + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG); + pushback(p, c); + return warn_balanced('%', "%%", "string literal"); +} + +static int +tokadd_ident(struct parser_params *p, int c) +{ + do { + if (tokadd_mbchar(p, c) == -1) return -1; + c = nextc(p); + } while (parser_is_identchar(p)); + pushback(p, c); + return 0; +} + +static ID +tokenize_ident(struct parser_params *p, const enum lex_state_e last_state) +{ + ID ident = TOK_INTERN(); + + set_yylval_name(ident); + + return ident; +} static int -parser_yylex(struct parser_params *parser) +parse_numvar(struct parser_params *p) +{ + size_t len; + int overflow; + unsigned long n = ruby_scan_digits(tok(p)+1, toklen(p)-1, 10, &len, &overflow); + const unsigned long nth_ref_max = + ((FIXNUM_MAX < INT_MAX) ? FIXNUM_MAX : INT_MAX) >> 1; + /* NTH_REF is left-shifted to be ORed with back-ref flag and + * turned into a Fixnum, in compile.c */ + + if (overflow || n > nth_ref_max) { + /* compile_error()? */ + rb_warn1("`%s' is too big for a number variable, always nil", WARN_S(tok(p))); + return 0; /* $0 is $PROGRAM_NAME, not NTH_REF */ + } + else { + return (int)n; + } +} + +static enum yytokentype +parse_gvar(struct parser_params *p, const enum lex_state_e last_state) +{ + const char *ptr = p->lex.pcur; + register int c; + + SET_LEX_STATE(EXPR_END); + p->lex.ptok = ptr - 1; /* from '$' */ + newtok(p); + c = nextc(p); + switch (c) { + case '_': /* $_: last read line string */ + c = nextc(p); + if (parser_is_identchar(p)) { + tokadd(p, '$'); + tokadd(p, '_'); + break; + } + pushback(p, c); + c = '_'; + /* fall through */ + case '~': /* $~: match-data */ + case '*': /* $*: argv */ + case '$': /* $$: pid */ + case '?': /* $?: last status */ + case '!': /* $!: error string */ + case '@': /* $@: error position */ + case '/': /* $/: input record separator */ + case '\\': /* $\: output record separator */ + case ';': /* $;: field separator */ + case ',': /* $,: output field separator */ + case '.': /* $.: last read line number */ + case '=': /* $=: ignorecase */ + case ':': /* $:: load path */ + case '<': /* $<: reading filename */ + case '>': /* $>: default output handle */ + case '\"': /* $": already loaded files */ + tokadd(p, '$'); + tokadd(p, c); + goto gvar; + + case '-': + tokadd(p, '$'); + tokadd(p, c); + c = nextc(p); + if (parser_is_identchar(p)) { + if (tokadd_mbchar(p, c) == -1) return 0; + } + else { + pushback(p, c); + pushback(p, '-'); + return '$'; + } + gvar: + set_yylval_name(TOK_INTERN()); + return tGVAR; + + case '&': /* $&: last match */ + case '`': /* $`: string before last match */ + case '\'': /* $': string after last match */ + case '+': /* $+: string matches last paren. */ + if (IS_lex_state_for(last_state, EXPR_FNAME)) { + tokadd(p, '$'); + tokadd(p, c); + goto gvar; + } + set_yylval_node(NEW_BACK_REF(c, &_cur_loc)); + return tBACK_REF; + + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + tokadd(p, '$'); + do { + tokadd(p, c); + c = nextc(p); + } while (c != -1 && ISDIGIT(c)); + pushback(p, c); + if (IS_lex_state_for(last_state, EXPR_FNAME)) goto gvar; + tokfix(p); + c = parse_numvar(p); + set_yylval_node(NEW_NTH_REF(c, &_cur_loc)); + return tNTH_REF; + + default: + if (!parser_is_identchar(p)) { + YYLTYPE loc = RUBY_INIT_YYLLOC(); + if (c == -1 || ISSPACE(c)) { + compile_error(p, "`$' without identifiers is not allowed as a global variable name"); + } + else { + pushback(p, c); + compile_error(p, "`$%c' is not allowed as a global variable name", c); + } + parser_show_error_line(p, &loc); + set_yylval_noname(); + return tGVAR; + } + /* fall through */ + case '0': + tokadd(p, '$'); + } + + if (tokadd_ident(p, c)) return 0; + SET_LEX_STATE(EXPR_END); + tokenize_ident(p, last_state); + return tGVAR; +} + +#ifndef RIPPER +static bool +parser_numbered_param(struct parser_params *p, int n) +{ + if (n < 0) return false; + + if (DVARS_TERMINAL_P(p->lvtbl->args) || DVARS_TERMINAL_P(p->lvtbl->args->prev)) { + return false; + } + if (p->max_numparam == ORDINAL_PARAM) { + compile_error(p, "ordinary parameter is defined"); + return false; + } + struct vtable *args = p->lvtbl->args; + if (p->max_numparam < n) { + p->max_numparam = n; + } + while (n > args->pos) { + vtable_add(args, NUMPARAM_IDX_TO_ID(args->pos+1)); + } + return true; +} +#endif + +static enum yytokentype +parse_atmark(struct parser_params *p, const enum lex_state_e last_state) +{ + const char *ptr = p->lex.pcur; + enum yytokentype result = tIVAR; + register int c = nextc(p); + YYLTYPE loc; + + p->lex.ptok = ptr - 1; /* from '@' */ + newtok(p); + tokadd(p, '@'); + if (c == '@') { + result = tCVAR; + tokadd(p, '@'); + c = nextc(p); + } + SET_LEX_STATE(IS_lex_state_for(last_state, EXPR_FNAME) ? EXPR_ENDFN : EXPR_END); + if (c == -1 || !parser_is_identchar(p)) { + pushback(p, c); + RUBY_SET_YYLLOC(loc); + if (result == tIVAR) { + compile_error(p, "`@' without identifiers is not allowed as an instance variable name"); + } + else { + compile_error(p, "`@@' without identifiers is not allowed as a class variable name"); + } + parser_show_error_line(p, &loc); + set_yylval_noname(); + SET_LEX_STATE(EXPR_END); + return result; + } + else if (ISDIGIT(c)) { + pushback(p, c); + RUBY_SET_YYLLOC(loc); + if (result == tIVAR) { + compile_error(p, "`@%c' is not allowed as an instance variable name", c); + } + else { + compile_error(p, "`@@%c' is not allowed as a class variable name", c); + } + parser_show_error_line(p, &loc); + set_yylval_noname(); + SET_LEX_STATE(EXPR_END); + return result; + } + + if (tokadd_ident(p, c)) return 0; + tokenize_ident(p, last_state); + return result; +} + +static enum yytokentype +parse_ident(struct parser_params *p, int c, int cmd_state) +{ + enum yytokentype result; + int mb = ENC_CODERANGE_7BIT; + const enum lex_state_e last_state = p->lex.state; + ID ident; + + do { + if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN; + if (tokadd_mbchar(p, c) == -1) return 0; + c = nextc(p); + } while (parser_is_identchar(p)); + if ((c == '!' || c == '?') && !peek(p, '=')) { + result = tFID; + tokadd(p, c); + } + else if (c == '=' && IS_lex_state(EXPR_FNAME) && + (!peek(p, '~') && !peek(p, '>') && (!peek(p, '=') || (peek_n(p, '>', 1))))) { + result = tIDENTIFIER; + tokadd(p, c); + } + else { + result = tCONSTANT; /* assume provisionally */ + pushback(p, c); + } + tokfix(p); + + if (IS_LABEL_POSSIBLE()) { + if (IS_LABEL_SUFFIX(0)) { + SET_LEX_STATE(EXPR_ARG|EXPR_LABELED); + nextc(p); + set_yylval_name(TOK_INTERN()); + return tLABEL; + } + } + if (mb == ENC_CODERANGE_7BIT && !IS_lex_state(EXPR_DOT)) { + const struct kwtable *kw; + + /* See if it is a reserved word. */ + kw = rb_reserved_word(tok(p), toklen(p)); + if (kw) { + enum lex_state_e state = p->lex.state; + if (IS_lex_state_for(state, EXPR_FNAME)) { + SET_LEX_STATE(EXPR_ENDFN); + set_yylval_name(rb_intern2(tok(p), toklen(p))); + return kw->id[0]; + } + SET_LEX_STATE(kw->state); + if (IS_lex_state(EXPR_BEG)) { + p->command_start = TRUE; + } + if (kw->id[0] == keyword_do) { + if (lambda_beginning_p()) { + p->lex.lpar_beg = -1; /* make lambda_beginning_p() == FALSE in the body of "-> do ... end" */ + return keyword_do_LAMBDA; + } + if (COND_P()) return keyword_do_cond; + if (CMDARG_P() && !IS_lex_state_for(state, EXPR_CMDARG)) + return keyword_do_block; + return keyword_do; + } + if (IS_lex_state_for(state, (EXPR_BEG | EXPR_LABELED))) + return kw->id[0]; + else { + if (kw->id[0] != kw->id[1]) + SET_LEX_STATE(EXPR_BEG | EXPR_LABEL); + return kw->id[1]; + } + } + } + + if (IS_lex_state(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT)) { + if (cmd_state) { + SET_LEX_STATE(EXPR_CMDARG); + } + else { + SET_LEX_STATE(EXPR_ARG); + } + } + else if (p->lex.state == EXPR_FNAME) { + SET_LEX_STATE(EXPR_ENDFN); + } + else { + SET_LEX_STATE(EXPR_END); + } + + ident = tokenize_ident(p, last_state); + if (result == tCONSTANT && is_local_id(ident)) result = tIDENTIFIER; + if (!IS_lex_state_for(last_state, EXPR_DOT|EXPR_FNAME) && + (result == tIDENTIFIER) && /* not EXPR_FNAME, not attrasgn */ + lvar_defined(p, ident)) { + SET_LEX_STATE(EXPR_END|EXPR_LABEL); + } + return result; +} + +static enum yytokentype +parser_yylex(struct parser_params *p) { register int c; int space_seen = 0; int cmd_state; + int label; enum lex_state_e last_state; - rb_encoding *enc; - int mb; -#ifdef RIPPER int fallthru = FALSE; -#endif + int token_seen = p->token_seen; - if (lex_strterm) { - int token; - if (nd_type(lex_strterm) == NODE_HEREDOC) { - token = here_document(lex_strterm); - if (token == tSTRING_END) { - lex_strterm = 0; - lex_state = EXPR_END; - } + if (p->lex.strterm) { + if (p->lex.strterm->flags & STRTERM_HEREDOC) { + return here_document(p, &p->lex.strterm->u.heredoc); } else { - token = parse_string(lex_strterm); - if (token == tSTRING_END || token == tREGEXP_END) { - rb_gc_force_recycle((VALUE)lex_strterm); - lex_strterm = 0; - lex_state = EXPR_END; - } + token_flush(p); + return parse_string(p, &p->lex.strterm->u.literal); } - return token; } - cmd_state = command_start; - command_start = FALSE; + cmd_state = p->command_start; + p->command_start = FALSE; + p->token_seen = TRUE; retry: - last_state = lex_state; - switch (c = nextc()) { + last_state = p->lex.state; +#ifndef RIPPER + token_flush(p); +#endif + switch (c = nextc(p)) { case '\0': /* NUL */ case '\004': /* ^D */ case '\032': /* ^Z */ @@ -6789,11 +9284,18 @@ parser_yylex(struct parser_params *parser) return 0; /* white spaces */ - case ' ': case '\t': case '\f': case '\r': + case '\r': + if (!p->cr_seen) { + p->cr_seen = TRUE; + /* carried over with p->lex.nextline for nextc() */ + rb_warn0("encountered \\r in middle of line, treated as a mere space"); + } + /* fall through */ + case ' ': case '\t': case '\f': case '\13': /* '\v' */ space_seen = 1; #ifdef RIPPER - while ((c = nextc())) { + while ((c = nextc(p))) { switch (c) { case ' ': case '\t': case '\f': case '\r': case '\13': /* '\v' */ @@ -6803,73 +9305,88 @@ parser_yylex(struct parser_params *parser) } } outofloop: - pushback(c); - ripper_dispatch_scan_event(parser, tSP); + pushback(p, c); + dispatch_scan_event(p, tSP); #endif goto retry; case '#': /* it's a comment */ + p->token_seen = token_seen; /* no magic_comment in shebang line */ - if (!parser_magic_comment(parser, lex_p, lex_pend - lex_p)) { - if (comment_at_top(parser)) { - set_file_encoding(parser, lex_p, lex_pend); + if (!parser_magic_comment(p, p->lex.pcur, p->lex.pend - p->lex.pcur)) { + if (comment_at_top(p)) { + set_file_encoding(p, p->lex.pcur, p->lex.pend); } } - lex_p = lex_pend; -#ifdef RIPPER - ripper_dispatch_scan_event(parser, tCOMMENT); + lex_goto_eol(p); + dispatch_scan_event(p, tCOMMENT); fallthru = TRUE; -#endif /* fall through */ case '\n': - if (IS_lex_state(EXPR_BEG | EXPR_VALUE | EXPR_CLASS | EXPR_FNAME | EXPR_DOT)) { -#ifdef RIPPER + p->token_seen = token_seen; + c = (IS_lex_state(EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT) && + !IS_lex_state(EXPR_LABELED)); + if (c || IS_lex_state_all(EXPR_ARG|EXPR_LABELED)) { if (!fallthru) { - ripper_dispatch_scan_event(parser, tIGNORED_NL); + dispatch_scan_event(p, tIGNORED_NL); } fallthru = FALSE; -#endif + if (!c && p->ctxt.in_kwarg) { + goto normal_newline; + } goto retry; } - while ((c = nextc())) { - switch (c) { + while (1) { + switch (c = nextc(p)) { case ' ': case '\t': case '\f': case '\r': case '\13': /* '\v' */ space_seen = 1; break; + case '#': + pushback(p, c); + if (space_seen) dispatch_scan_event(p, tSP); + goto retry; + case '&': case '.': { - if ((c = nextc()) != '.') { - pushback(c); - pushback('.'); - goto retry; - } + dispatch_delayed_token(p, tIGNORED_NL); + if (peek(p, '.') == (c == '&')) { + pushback(p, c); + dispatch_scan_event(p, tSP); + goto retry; + } } default: - --ruby_sourceline; - lex_nextline = lex_lastline; + p->ruby_sourceline--; + p->lex.nextline = p->lex.lastline; case -1: /* EOF no decrement*/ - lex_goto_eol(parser); -#ifdef RIPPER +#ifndef RIPPER + if (p->lex.prevline && !p->eofp) p->lex.lastline = p->lex.prevline; + p->lex.pbeg = RSTRING_PTR(p->lex.lastline); + p->lex.pend = p->lex.pcur = p->lex.pbeg + RSTRING_LEN(p->lex.lastline); + pushback(p, 1); /* always pushback */ + p->lex.ptok = p->lex.pcur; +#else + lex_goto_eol(p); if (c != -1) { - parser->tokp = lex_p; + p->lex.ptok = p->lex.pcur; } #endif goto normal_newline; } } normal_newline: - command_start = TRUE; - lex_state = EXPR_BEG; + p->command_start = TRUE; + SET_LEX_STATE(EXPR_BEG); return '\n'; case '*': - if ((c = nextc()) == '*') { - if ((c = nextc()) == '=') { - set_yylval_id(tPOW); - lex_state = EXPR_BEG; + if ((c = nextc(p)) == '*') { + if ((c = nextc(p)) == '=') { + set_yylval_id(idPow); + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); + pushback(p, c); if (IS_SPCARG(c)) { rb_warning0("`**' interpreted as argument prefix"); c = tDSTAR; @@ -6878,17 +9395,16 @@ parser_yylex(struct parser_params *parser) c = tDSTAR; } else { - warn_balanced("**", "argument prefix"); - c = tPOW; + c = warn_balanced((enum ruby_method_ids)tPOW, "**", "argument prefix"); } } else { if (c == '=') { set_yylval_id('*'); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); + pushback(p, c); if (IS_SPCARG(c)) { rb_warning0("`*' interpreted as argument prefix"); c = tSTAR; @@ -6897,23 +9413,22 @@ parser_yylex(struct parser_params *parser) c = tSTAR; } else { - warn_balanced("*", "argument prefix"); - c = '*'; + c = warn_balanced('*', "*", "argument prefix"); } } - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG); return c; case '!': - c = nextc(); + c = nextc(p); if (IS_AFTER_OPERATOR()) { - lex_state = EXPR_ARG; + SET_LEX_STATE(EXPR_ARG); if (c == '@') { return '!'; } } else { - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); } if (c == '=') { return tNEQ; @@ -6921,52 +9436,45 @@ parser_yylex(struct parser_params *parser) if (c == '~') { return tNMATCH; } - pushback(c); + pushback(p, c); return '!'; case '=': - if (was_bol()) { + if (was_bol(p)) { /* skip embedded rd document */ - if (strncmp(lex_p, "begin", 5) == 0 && ISSPACE(lex_p[5])) { -#ifdef RIPPER - int first_p = TRUE; + if (word_match_p(p, "begin", 5)) { + int first_p = TRUE; - lex_goto_eol(parser); - ripper_dispatch_scan_event(parser, tEMBDOC_BEG); -#endif + lex_goto_eol(p); + dispatch_scan_event(p, tEMBDOC_BEG); for (;;) { - lex_goto_eol(parser); -#ifdef RIPPER - if (!first_p) { - ripper_dispatch_scan_event(parser, tEMBDOC); - } - first_p = FALSE; -#endif - c = nextc(); + lex_goto_eol(p); + if (!first_p) { + dispatch_scan_event(p, tEMBDOC); + } + first_p = FALSE; + c = nextc(p); if (c == -1) { - compile_error(PARSER_ARG "embedded document meets end of file"); + compile_error(p, "embedded document meets end of file"); return 0; } - if (c != '=') continue; - if (strncmp(lex_p, "end", 3) == 0 && - (lex_p + 3 == lex_pend || ISSPACE(lex_p[3]))) { + if (c == '=' && word_match_p(p, "end", 3)) { break; } + pushback(p, c); } - lex_goto_eol(parser); -#ifdef RIPPER - ripper_dispatch_scan_event(parser, tEMBDOC_END); -#endif + lex_goto_eol(p); + dispatch_scan_event(p, tEMBDOC_END); goto retry; } } - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; - if ((c = nextc()) == '=') { - if ((c = nextc()) == '=') { + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG); + if ((c = nextc(p)) == '=') { + if ((c = nextc(p)) == '=') { return tEQQ; } - pushback(c); + pushback(p, c); return tEQ; } if (c == '~') { @@ -6975,608 +9483,360 @@ parser_yylex(struct parser_params *parser) else if (c == '>') { return tASSOC; } - pushback(c); + pushback(p, c); return '='; case '<': - last_state = lex_state; - c = nextc(); + c = nextc(p); if (c == '<' && !IS_lex_state(EXPR_DOT | EXPR_CLASS) && !IS_END() && - (!IS_ARG() || space_seen)) { - int token = heredoc_identifier(); - if (token) return token; + (!IS_ARG() || IS_lex_state(EXPR_LABELED) || space_seen)) { + int token = heredoc_identifier(p); + if (token) return token < 0 ? 0 : token; } if (IS_AFTER_OPERATOR()) { - lex_state = EXPR_ARG; + SET_LEX_STATE(EXPR_ARG); } else { if (IS_lex_state(EXPR_CLASS)) - command_start = TRUE; - lex_state = EXPR_BEG; + p->command_start = TRUE; + SET_LEX_STATE(EXPR_BEG); } if (c == '=') { - if ((c = nextc()) == '>') { + if ((c = nextc(p)) == '>') { return tCMP; } - pushback(c); + pushback(p, c); return tLEQ; } if (c == '<') { - if ((c = nextc()) == '=') { - set_yylval_id(tLSHFT); - lex_state = EXPR_BEG; + if ((c = nextc(p)) == '=') { + set_yylval_id(idLTLT); + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); - warn_balanced("<<", "here document"); - return tLSHFT; + pushback(p, c); + return warn_balanced((enum ruby_method_ids)tLSHFT, "<<", "here document"); } - pushback(c); + pushback(p, c); return '<'; case '>': - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; - if ((c = nextc()) == '=') { + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG); + if ((c = nextc(p)) == '=') { return tGEQ; } if (c == '>') { - if ((c = nextc()) == '=') { - set_yylval_id(tRSHFT); - lex_state = EXPR_BEG; + if ((c = nextc(p)) == '=') { + set_yylval_id(idGTGT); + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); + pushback(p, c); return tRSHFT; } - pushback(c); + pushback(p, c); return '>'; case '"': - lex_strterm = NEW_STRTERM(str_dquote, '"', 0); + label = (IS_LABEL_POSSIBLE() ? str_label : 0); + p->lex.strterm = NEW_STRTERM(str_dquote | label, '"', 0); + p->lex.ptok = p->lex.pcur-1; return tSTRING_BEG; case '`': if (IS_lex_state(EXPR_FNAME)) { - lex_state = EXPR_ENDFN; + SET_LEX_STATE(EXPR_ENDFN); return c; } if (IS_lex_state(EXPR_DOT)) { if (cmd_state) - lex_state = EXPR_CMDARG; + SET_LEX_STATE(EXPR_CMDARG); else - lex_state = EXPR_ARG; + SET_LEX_STATE(EXPR_ARG); return c; } - lex_strterm = NEW_STRTERM(str_xquote, '`', 0); + p->lex.strterm = NEW_STRTERM(str_xquote, '`', 0); return tXSTRING_BEG; case '\'': - lex_strterm = NEW_STRTERM(str_squote, '\'', 0); + label = (IS_LABEL_POSSIBLE() ? str_label : 0); + p->lex.strterm = NEW_STRTERM(str_squote | label, '\'', 0); + p->lex.ptok = p->lex.pcur-1; return tSTRING_BEG; case '?': - if (IS_END()) { - lex_state = EXPR_VALUE; - return '?'; - } - c = nextc(); - if (c == -1) { - compile_error(PARSER_ARG "incomplete character syntax"); - return 0; - } - if (rb_enc_isspace(c, parser->enc)) { - if (!IS_ARG()) { - int c2 = 0; - switch (c) { - case ' ': - c2 = 's'; - break; - case '\n': - c2 = 'n'; - break; - case '\t': - c2 = 't'; - break; - case '\v': - c2 = 'v'; - break; - case '\r': - c2 = 'r'; - break; - case '\f': - c2 = 'f'; - break; - } - if (c2) { - rb_warnI("invalid character syntax; use ?\\%c", c2); - } - } - ternary: - pushback(c); - lex_state = EXPR_VALUE; - return '?'; - } - newtok(); - enc = parser->enc; - if (!parser_isascii()) { - if (tokadd_mbchar(c) == -1) return 0; - } - else if ((rb_enc_isalnum(c, parser->enc) || c == '_') && - lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser->enc)) { - goto ternary; - } - else if (c == '\\') { - if (peek('u')) { - nextc(); - c = parser_tokadd_utf8(parser, &enc, 0, 0, 0); - if (0x80 <= c) { - tokaddmbc(c, enc); - } - else { - tokadd(c); - } - } - else if (!lex_eol_p() && !(c = *lex_p, ISASCII(c))) { - nextc(); - if (tokadd_mbchar(c) == -1) return 0; - } - else { - c = read_escape(0, &enc); - tokadd(c); - } - } - else { - tokadd(c); - } - tokfix(); - set_yylval_str(STR_NEW3(tok(), toklen(), enc, 0)); - lex_state = EXPR_END; - return tCHAR; + return parse_qmark(p, space_seen); case '&': - if ((c = nextc()) == '&') { - lex_state = EXPR_BEG; - if ((c = nextc()) == '=') { - set_yylval_id(tANDOP); - lex_state = EXPR_BEG; + if ((c = nextc(p)) == '&') { + SET_LEX_STATE(EXPR_BEG); + if ((c = nextc(p)) == '=') { + set_yylval_id(idANDOP); + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); + pushback(p, c); return tANDOP; } else if (c == '=') { set_yylval_id('&'); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); + else if (c == '.') { + set_yylval_id(idANDDOT); + SET_LEX_STATE(EXPR_DOT); + return tANDDOT; + } + pushback(p, c); if (IS_SPCARG(c)) { - rb_warning0("`&' interpreted as argument prefix"); + if ((c != ':') || + (c = peekc_n(p, 1)) == -1 || + !(c == '\'' || c == '"' || + is_identchar((p->lex.pcur+1), p->lex.pend, p->enc))) { + rb_warning0("`&' interpreted as argument prefix"); + } c = tAMPER; } else if (IS_BEG()) { c = tAMPER; } else { - warn_balanced("&", "argument prefix"); - c = '&'; + c = warn_balanced('&', "&", "argument prefix"); } - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG); return c; case '|': - if ((c = nextc()) == '|') { - lex_state = EXPR_BEG; - if ((c = nextc()) == '=') { - set_yylval_id(tOROP); - lex_state = EXPR_BEG; + if ((c = nextc(p)) == '|') { + SET_LEX_STATE(EXPR_BEG); + if ((c = nextc(p)) == '=') { + set_yylval_id(idOROP); + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); + pushback(p, c); + if (IS_lex_state_for(last_state, EXPR_BEG)) { + c = '|'; + pushback(p, '|'); + return c; + } return tOROP; } if (c == '=') { set_yylval_id('|'); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; - pushback(c); + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG|EXPR_LABEL); + pushback(p, c); return '|'; case '+': - c = nextc(); + c = nextc(p); if (IS_AFTER_OPERATOR()) { - lex_state = EXPR_ARG; + SET_LEX_STATE(EXPR_ARG); if (c == '@') { return tUPLUS; } - pushback(c); + pushback(p, c); return '+'; } if (c == '=') { set_yylval_id('+'); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) { - lex_state = EXPR_BEG; - pushback(c); + if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous(p, '+'))) { + SET_LEX_STATE(EXPR_BEG); + pushback(p, c); if (c != -1 && ISDIGIT(c)) { - c = '+'; - goto start_num; + return parse_numeric(p, '+'); } return tUPLUS; } - lex_state = EXPR_BEG; - pushback(c); - warn_balanced("+", "unary operator"); - return '+'; + SET_LEX_STATE(EXPR_BEG); + pushback(p, c); + return warn_balanced('+', "+", "unary operator"); case '-': - c = nextc(); + c = nextc(p); if (IS_AFTER_OPERATOR()) { - lex_state = EXPR_ARG; + SET_LEX_STATE(EXPR_ARG); if (c == '@') { return tUMINUS; } - pushback(c); + pushback(p, c); return '-'; } if (c == '=') { set_yylval_id('-'); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } if (c == '>') { - lex_state = EXPR_ENDFN; + SET_LEX_STATE(EXPR_ENDFN); return tLAMBDA; } - if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) { - lex_state = EXPR_BEG; - pushback(c); + if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous(p, '-'))) { + SET_LEX_STATE(EXPR_BEG); + pushback(p, c); if (c != -1 && ISDIGIT(c)) { return tUMINUS_NUM; } return tUMINUS; } - lex_state = EXPR_BEG; - pushback(c); - warn_balanced("-", "unary operator"); - return '-'; - - case '.': - lex_state = EXPR_BEG; - if ((c = nextc()) == '.') { - if ((c = nextc()) == '.') { - return tDOT3; + SET_LEX_STATE(EXPR_BEG); + pushback(p, c); + return warn_balanced('-', "-", "unary operator"); + + case '.': { + int is_beg = IS_BEG(); + SET_LEX_STATE(EXPR_BEG); + if ((c = nextc(p)) == '.') { + if ((c = nextc(p)) == '.') { + if (p->ctxt.in_argdef) { + SET_LEX_STATE(EXPR_ENDARG); + return tBDOT3; + } + if (p->lex.paren_nest == 0 && looking_at_eol_p(p)) { + rb_warn0("... at EOL, should be parenthesized?"); + } + else if (p->lex.lpar_beg >= 0 && p->lex.lpar_beg+1 == p->lex.paren_nest) { + if (IS_lex_state_for(last_state, EXPR_LABEL)) + return tDOT3; + } + return is_beg ? tBDOT3 : tDOT3; } - pushback(c); - return tDOT2; + pushback(p, c); + return is_beg ? tBDOT2 : tDOT2; } - pushback(c); + pushback(p, c); if (c != -1 && ISDIGIT(c)) { - yyerror("no .<digit> floating literal anymore; put 0 before dot"); + char prev = p->lex.pcur-1 > p->lex.pbeg ? *(p->lex.pcur-2) : 0; + parse_numeric(p, '.'); + if (ISDIGIT(prev)) { + yyerror0("unexpected fraction part after numeric literal"); + } + else { + yyerror0("no .<digit> floating literal anymore; put 0 before dot"); + } + SET_LEX_STATE(EXPR_END); + p->lex.ptok = p->lex.pcur; + goto retry; } - lex_state = EXPR_DOT; + set_yylval_id('.'); + SET_LEX_STATE(EXPR_DOT); return '.'; + } - start_num: case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - { - int is_float, seen_point, seen_e, nondigit; - - is_float = seen_point = seen_e = nondigit = 0; - lex_state = EXPR_END; - newtok(); - if (c == '-' || c == '+') { - tokadd(c); - c = nextc(); - } - if (c == '0') { -#define no_digits() do {yyerror("numeric literal without digits"); return 0;} while (0) - int start = toklen(); - c = nextc(); - if (c == 'x' || c == 'X') { - /* hexadecimal */ - c = nextc(); - if (c != -1 && ISXDIGIT(c)) { - do { - if (c == '_') { - if (nondigit) break; - nondigit = c; - continue; - } - if (!ISXDIGIT(c)) break; - nondigit = 0; - tokadd(c); - } while ((c = nextc()) != -1); - } - pushback(c); - tokfix(); - if (toklen() == start) { - no_digits(); - } - else if (nondigit) goto trailing_uc; - set_yylval_literal(rb_cstr_to_inum(tok(), 16, FALSE)); - return tINTEGER; - } - if (c == 'b' || c == 'B') { - /* binary */ - c = nextc(); - if (c == '0' || c == '1') { - do { - if (c == '_') { - if (nondigit) break; - nondigit = c; - continue; - } - if (c != '0' && c != '1') break; - nondigit = 0; - tokadd(c); - } while ((c = nextc()) != -1); - } - pushback(c); - tokfix(); - if (toklen() == start) { - no_digits(); - } - else if (nondigit) goto trailing_uc; - set_yylval_literal(rb_cstr_to_inum(tok(), 2, FALSE)); - return tINTEGER; - } - if (c == 'd' || c == 'D') { - /* decimal */ - c = nextc(); - if (c != -1 && ISDIGIT(c)) { - do { - if (c == '_') { - if (nondigit) break; - nondigit = c; - continue; - } - if (!ISDIGIT(c)) break; - nondigit = 0; - tokadd(c); - } while ((c = nextc()) != -1); - } - pushback(c); - tokfix(); - if (toklen() == start) { - no_digits(); - } - else if (nondigit) goto trailing_uc; - set_yylval_literal(rb_cstr_to_inum(tok(), 10, FALSE)); - return tINTEGER; - } - if (c == '_') { - /* 0_0 */ - goto octal_number; - } - if (c == 'o' || c == 'O') { - /* prefixed octal */ - c = nextc(); - if (c == -1 || c == '_' || !ISDIGIT(c)) { - no_digits(); - } - } - if (c >= '0' && c <= '7') { - /* octal */ - octal_number: - do { - if (c == '_') { - if (nondigit) break; - nondigit = c; - continue; - } - if (c < '0' || c > '9') break; - if (c > '7') goto invalid_octal; - nondigit = 0; - tokadd(c); - } while ((c = nextc()) != -1); - if (toklen() > start) { - pushback(c); - tokfix(); - if (nondigit) goto trailing_uc; - set_yylval_literal(rb_cstr_to_inum(tok(), 8, FALSE)); - return tINTEGER; - } - if (nondigit) { - pushback(c); - goto trailing_uc; - } - } - if (c > '7' && c <= '9') { - invalid_octal: - yyerror("Invalid octal digit"); - } - else if (c == '.' || c == 'e' || c == 'E') { - tokadd('0'); - } - else { - pushback(c); - set_yylval_literal(INT2FIX(0)); - return tINTEGER; - } - } - - for (;;) { - switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - nondigit = 0; - tokadd(c); - break; - - case '.': - if (nondigit) goto trailing_uc; - if (seen_point || seen_e) { - goto decode_num; - } - else { - int c0 = nextc(); - if (c0 == -1 || !ISDIGIT(c0)) { - pushback(c0); - goto decode_num; - } - c = c0; - } - tokadd('.'); - tokadd(c); - is_float++; - seen_point++; - nondigit = 0; - break; - - case 'e': - case 'E': - if (nondigit) { - pushback(c); - c = nondigit; - goto decode_num; - } - if (seen_e) { - goto decode_num; - } - tokadd(c); - seen_e++; - is_float++; - nondigit = c; - c = nextc(); - if (c != '-' && c != '+') continue; - tokadd(c); - nondigit = c; - break; - - case '_': /* `_' in number just ignored */ - if (nondigit) goto decode_num; - nondigit = c; - break; - - default: - goto decode_num; - } - c = nextc(); - } - - decode_num: - pushback(c); - if (nondigit) { - char tmp[30]; - trailing_uc: - snprintf(tmp, sizeof(tmp), "trailing `%c' in number", nondigit); - yyerror(tmp); - } - tokfix(); - if (is_float) { - double d = strtod(tok(), 0); - if (errno == ERANGE) { - rb_warningS("Float %s out of range", tok()); - errno = 0; - } - set_yylval_literal(DBL2NUM(d)); - return tFLOAT; - } - set_yylval_literal(rb_cstr_to_inum(tok(), 10, FALSE)); - return tINTEGER; - } + return parse_numeric(p, c); case ')': + COND_POP(); + CMDARG_POP(); + SET_LEX_STATE(EXPR_ENDFN); + p->lex.paren_nest--; + return c; + case ']': - paren_nest--; + COND_POP(); + CMDARG_POP(); + SET_LEX_STATE(EXPR_END); + p->lex.paren_nest--; + return c; + case '}': - COND_LEXPOP(); - CMDARG_LEXPOP(); - if (c == ')') - lex_state = EXPR_ENDFN; - else - lex_state = EXPR_ENDARG; - if (c == '}') { - if (!brace_nest--) c = tSTRING_DEND; - } + /* tSTRING_DEND does COND_POP and CMDARG_POP in the yacc's rule */ + if (!p->lex.brace_nest--) return tSTRING_DEND; + COND_POP(); + CMDARG_POP(); + SET_LEX_STATE(EXPR_END); + p->lex.paren_nest--; return c; case ':': - c = nextc(); + c = nextc(p); if (c == ':') { if (IS_BEG() || IS_lex_state(EXPR_CLASS) || IS_SPCARG(-1)) { - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tCOLON3; } - lex_state = EXPR_DOT; + set_yylval_id(idCOLON2); + SET_LEX_STATE(EXPR_DOT); return tCOLON2; } - if (IS_END() || ISSPACE(c)) { - pushback(c); - warn_balanced(":", "symbol literal"); - lex_state = EXPR_BEG; - return ':'; + if (IS_END() || ISSPACE(c) || c == '#') { + pushback(p, c); + c = warn_balanced(':', ":", "symbol literal"); + SET_LEX_STATE(EXPR_BEG); + return c; } switch (c) { case '\'': - lex_strterm = NEW_STRTERM(str_ssym, c, 0); + p->lex.strterm = NEW_STRTERM(str_ssym, c, 0); break; case '"': - lex_strterm = NEW_STRTERM(str_dsym, c, 0); + p->lex.strterm = NEW_STRTERM(str_dsym, c, 0); break; default: - pushback(c); + pushback(p, c); break; } - lex_state = EXPR_FNAME; + SET_LEX_STATE(EXPR_FNAME); return tSYMBEG; case '/': - if (IS_lex_state(EXPR_BEG_ANY)) { - lex_strterm = NEW_STRTERM(str_regexp, '/', 0); + if (IS_BEG()) { + p->lex.strterm = NEW_STRTERM(str_regexp, '/', 0); return tREGEXP_BEG; } - if ((c = nextc()) == '=') { + if ((c = nextc(p)) == '=') { set_yylval_id('/'); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - pushback(c); + pushback(p, c); if (IS_SPCARG(c)) { - (void)arg_ambiguous(); - lex_strterm = NEW_STRTERM(str_regexp, '/', 0); + arg_ambiguous(p, '/'); + p->lex.strterm = NEW_STRTERM(str_regexp, '/', 0); return tREGEXP_BEG; } - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; - warn_balanced("/", "regexp literal"); - return '/'; + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG); + return warn_balanced('/', "/", "regexp literal"); case '^': - if ((c = nextc()) == '=') { + if ((c = nextc(p)) == '=') { set_yylval_id('^'); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); return tOP_ASGN; } - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; - pushback(c); + SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG); + pushback(p, c); return '^'; case ';': - lex_state = EXPR_BEG; - command_start = TRUE; + SET_LEX_STATE(EXPR_BEG); + p->command_start = TRUE; return ';'; case ',': - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); return ','; case '~': if (IS_AFTER_OPERATOR()) { - if ((c = nextc()) != '@') { - pushback(c); + if ((c = nextc(p)) != '@') { + pushback(p, c); } - lex_state = EXPR_ARG; + SET_LEX_STATE(EXPR_ARG); } else { - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG); } return '~'; @@ -7584,473 +9844,168 @@ parser_yylex(struct parser_params *parser) if (IS_BEG()) { c = tLPAREN; } - else if (IS_SPCARG(-1)) { + else if (!space_seen) { + /* foo( ... ) => method call, no ambiguity */ + } + else if (IS_ARG() || IS_lex_state_all(EXPR_END|EXPR_LABEL)) { c = tLPAREN_ARG; } - paren_nest++; + else if (IS_lex_state(EXPR_ENDFN) && !lambda_beginning_p()) { + rb_warning0("parentheses after method name is interpreted as " + "an argument list, not a decomposed argument"); + } + p->lex.paren_nest++; COND_PUSH(0); CMDARG_PUSH(0); - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); return c; case '[': - paren_nest++; + p->lex.paren_nest++; if (IS_AFTER_OPERATOR()) { - lex_state = EXPR_ARG; - if ((c = nextc()) == ']') { - if ((c = nextc()) == '=') { + if ((c = nextc(p)) == ']') { + p->lex.paren_nest--; + SET_LEX_STATE(EXPR_ARG); + if ((c = nextc(p)) == '=') { return tASET; } - pushback(c); + pushback(p, c); return tAREF; } - pushback(c); + pushback(p, c); + SET_LEX_STATE(EXPR_ARG|EXPR_LABEL); return '['; } else if (IS_BEG()) { c = tLBRACK; } - else if (IS_ARG() && space_seen) { + else if (IS_ARG() && (space_seen || IS_lex_state(EXPR_LABELED))) { c = tLBRACK; } - lex_state = EXPR_BEG; + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); COND_PUSH(0); CMDARG_PUSH(0); return c; case '{': - ++brace_nest; - if (lpar_beg && lpar_beg == paren_nest) { - lex_state = EXPR_BEG; - lpar_beg = 0; - --paren_nest; - COND_PUSH(0); - CMDARG_PUSH(0); - return tLAMBEG; - } - if (IS_ARG() || IS_lex_state(EXPR_END | EXPR_ENDFN)) + ++p->lex.brace_nest; + if (lambda_beginning_p()) + c = tLAMBEG; + else if (IS_lex_state(EXPR_LABELED)) + c = tLBRACE; /* hash */ + else if (IS_lex_state(EXPR_ARG_ANY | EXPR_END | EXPR_ENDFN)) c = '{'; /* block (primary) */ else if (IS_lex_state(EXPR_ENDARG)) c = tLBRACE_ARG; /* block (expr) */ else c = tLBRACE; /* hash */ + if (c != tLBRACE) { + p->command_start = TRUE; + SET_LEX_STATE(EXPR_BEG); + } + else { + SET_LEX_STATE(EXPR_BEG|EXPR_LABEL); + } + ++p->lex.paren_nest; /* after lambda_beginning_p() */ COND_PUSH(0); CMDARG_PUSH(0); - lex_state = EXPR_BEG; - if (c != tLBRACE) command_start = TRUE; return c; case '\\': - c = nextc(); + c = nextc(p); if (c == '\n') { space_seen = 1; -#ifdef RIPPER - ripper_dispatch_scan_event(parser, tSP); -#endif + dispatch_scan_event(p, tSP); goto retry; /* skip \\n */ } - pushback(c); + if (c == ' ') return tSP; + if (ISSPACE(c)) return c; + pushback(p, c); return '\\'; case '%': - if (IS_lex_state(EXPR_BEG_ANY)) { - int term; - int paren; - - c = nextc(); - quotation: - if (c == -1 || !ISALNUM(c)) { - term = c; - c = 'Q'; - } - else { - term = nextc(); - if (rb_enc_isalnum(term, parser->enc) || !parser_isascii()) { - yyerror("unknown type of %string"); - return 0; - } - } - if (c == -1 || term == -1) { - compile_error(PARSER_ARG "unterminated quoted string meets end of file"); - return 0; - } - paren = term; - if (term == '(') term = ')'; - else if (term == '[') term = ']'; - else if (term == '{') term = '}'; - else if (term == '<') term = '>'; - else paren = 0; - - switch (c) { - case 'Q': - lex_strterm = NEW_STRTERM(str_dquote, term, paren); - return tSTRING_BEG; - - case 'q': - lex_strterm = NEW_STRTERM(str_squote, term, paren); - return tSTRING_BEG; - - case 'W': - lex_strterm = NEW_STRTERM(str_dword, term, paren); - do {c = nextc();} while (ISSPACE(c)); - pushback(c); - return tWORDS_BEG; - - case 'w': - lex_strterm = NEW_STRTERM(str_sword, term, paren); - do {c = nextc();} while (ISSPACE(c)); - pushback(c); - return tQWORDS_BEG; - - case 'I': - lex_strterm = NEW_STRTERM(str_dword, term, paren); - do {c = nextc();} while (ISSPACE(c)); - pushback(c); - return tSYMBOLS_BEG; - - case 'i': - lex_strterm = NEW_STRTERM(str_sword, term, paren); - do {c = nextc();} while (ISSPACE(c)); - pushback(c); - return tQSYMBOLS_BEG; - - case 'x': - lex_strterm = NEW_STRTERM(str_xquote, term, paren); - return tXSTRING_BEG; - - case 'r': - lex_strterm = NEW_STRTERM(str_regexp, term, paren); - return tREGEXP_BEG; - - case 's': - lex_strterm = NEW_STRTERM(str_ssym, term, paren); - lex_state = EXPR_FNAME; - return tSYMBEG; - - default: - yyerror("unknown type of %string"); - return 0; - } - } - if ((c = nextc()) == '=') { - set_yylval_id('%'); - lex_state = EXPR_BEG; - return tOP_ASGN; - } - if (IS_SPCARG(c)) { - goto quotation; - } - lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; - pushback(c); - warn_balanced("%%", "string literal"); - return '%'; + return parse_percent(p, space_seen, last_state); case '$': - lex_state = EXPR_END; - newtok(); - c = nextc(); - switch (c) { - case '_': /* $_: last read line string */ - c = nextc(); - if (parser_is_identchar()) { - tokadd('$'); - tokadd('_'); - break; - } - pushback(c); - c = '_'; - /* fall through */ - case '~': /* $~: match-data */ - case '*': /* $*: argv */ - case '$': /* $$: pid */ - case '?': /* $?: last status */ - case '!': /* $!: error string */ - case '@': /* $@: error position */ - case '/': /* $/: input record separator */ - case '\\': /* $\: output record separator */ - case ';': /* $;: field separator */ - case ',': /* $,: output field separator */ - case '.': /* $.: last read line number */ - case '=': /* $=: ignorecase */ - case ':': /* $:: load path */ - case '<': /* $<: reading filename */ - case '>': /* $>: default output handle */ - case '\"': /* $": already loaded files */ - tokadd('$'); - tokadd(c); - tokfix(); - set_yylval_name(rb_intern(tok())); - return tGVAR; - - case '-': - tokadd('$'); - tokadd(c); - c = nextc(); - if (parser_is_identchar()) { - if (tokadd_mbchar(c) == -1) return 0; - } - else { - pushback(c); - } - gvar: - tokfix(); - set_yylval_name(rb_intern(tok())); - return tGVAR; - - case '&': /* $&: last match */ - case '`': /* $`: string before last match */ - case '\'': /* $': string after last match */ - case '+': /* $+: string matches last paren. */ - if (last_state == EXPR_FNAME) { - tokadd('$'); - tokadd(c); - goto gvar; - } - set_yylval_node(NEW_BACK_REF(c)); - return tBACK_REF; - - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - tokadd('$'); - do { - tokadd(c); - c = nextc(); - } while (c != -1 && ISDIGIT(c)); - pushback(c); - if (last_state == EXPR_FNAME) goto gvar; - tokfix(); - set_yylval_node(NEW_NTH_REF(atoi(tok()+1))); - return tNTH_REF; - - default: - if (!parser_is_identchar()) { - pushback(c); - return '$'; - } - case '0': - tokadd('$'); - } - break; + return parse_gvar(p, last_state); case '@': - c = nextc(); - newtok(); - tokadd('@'); - if (c == '@') { - tokadd('@'); - c = nextc(); - } - if (c != -1 && ISDIGIT(c)) { - if (tokidx == 1) { - compile_error(PARSER_ARG "`@%c' is not allowed as an instance variable name", c); - } - else { - compile_error(PARSER_ARG "`@@%c' is not allowed as a class variable name", c); - } - return 0; - } - if (!parser_is_identchar()) { - pushback(c); - return '@'; - } - break; + return parse_atmark(p, last_state); case '_': - if (was_bol() && whole_match_p("__END__", 7, 0)) { - ruby__end__seen = 1; - parser->eofp = Qtrue; + if (was_bol(p) && whole_match_p(p, "__END__", 7, 0)) { + p->ruby__end__seen = 1; + p->eofp = 1; #ifndef RIPPER return -1; #else - lex_goto_eol(parser); - ripper_dispatch_scan_event(parser, k__END__); + lex_goto_eol(p); + dispatch_scan_event(p, k__END__); return 0; #endif } - newtok(); + newtok(p); break; default: - if (!parser_is_identchar()) { - rb_compile_error(PARSER_ARG "Invalid char `\\x%02X' in expression", c); + if (!parser_is_identchar(p)) { + compile_error(p, "Invalid char `\\x%02X' in expression", c); + token_flush(p); goto retry; } - newtok(); + newtok(p); break; } - mb = ENC_CODERANGE_7BIT; - do { - if (!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN; - if (tokadd_mbchar(c) == -1) return 0; - c = nextc(); - } while (parser_is_identchar()); - switch (tok()[0]) { - case '@': case '$': - pushback(c); - break; - default: - if ((c == '!' || c == '?') && !peek('=')) { - tokadd(c); - } - else { - pushback(c); - } - } - tokfix(); - - { - int result = 0; + return parse_ident(p, c, cmd_state); +} - last_state = lex_state; - switch (tok()[0]) { - case '$': - lex_state = EXPR_END; - result = tGVAR; - break; - case '@': - lex_state = EXPR_END; - if (tok()[1] == '@') - result = tCVAR; - else - result = tIVAR; - break; +static enum yytokentype +yylex(YYSTYPE *lval, YYLTYPE *yylloc, struct parser_params *p) +{ + enum yytokentype t; - default: - if (toklast() == '!' || toklast() == '?') { - result = tFID; - } - else { - if (IS_lex_state(EXPR_FNAME)) { - if ((c = nextc()) == '=' && !peek('~') && !peek('>') && - (!peek('=') || (peek_n('>', 1)))) { - result = tIDENTIFIER; - tokadd(c); - tokfix(); - } - else { - pushback(c); - } - } - if (result == 0 && ISUPPER(tok()[0])) { - result = tCONSTANT; - } - else { - result = tIDENTIFIER; - } - } + p->lval = lval; + lval->val = Qundef; + t = parser_yylex(p); - if (IS_LABEL_POSSIBLE()) { - if (IS_LABEL_SUFFIX(0)) { - lex_state = EXPR_BEG; - nextc(); - set_yylval_name(TOK_INTERN(!ENC_SINGLE(mb))); - return tLABEL; - } - } - if (mb == ENC_CODERANGE_7BIT && !IS_lex_state(EXPR_DOT)) { - const struct kwtable *kw; - - /* See if it is a reserved word. */ - kw = rb_reserved_word(tok(), toklen()); - if (kw) { - enum lex_state_e state = lex_state; - lex_state = kw->state; - if (state == EXPR_FNAME) { - set_yylval_name(rb_intern(kw->name)); - return kw->id[0]; - } - if (lex_state == EXPR_BEG) { - command_start = TRUE; - } - if (kw->id[0] == keyword_do) { - if (lpar_beg && lpar_beg == paren_nest) { - lpar_beg = 0; - --paren_nest; - return keyword_do_LAMBDA; - } - if (COND_P()) return keyword_do_cond; - if (CMDARG_P() && state != EXPR_CMDARG) - return keyword_do_block; - if (state & (EXPR_BEG | EXPR_ENDARG)) - return keyword_do_block; - return keyword_do; - } - if (state & (EXPR_BEG | EXPR_VALUE)) - return kw->id[0]; - else { - if (kw->id[0] != kw->id[1]) - lex_state = EXPR_BEG; - return kw->id[1]; - } - } - } + if (p->lex.strterm && (p->lex.strterm->flags & STRTERM_HEREDOC)) + RUBY_SET_YYLLOC_FROM_STRTERM_HEREDOC(*yylloc); + else + RUBY_SET_YYLLOC(*yylloc); - if (IS_lex_state(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT)) { - if (cmd_state) { - lex_state = EXPR_CMDARG; - } - else { - lex_state = EXPR_ARG; - } - } - else if (lex_state == EXPR_FNAME) { - lex_state = EXPR_ENDFN; - } - else { - lex_state = EXPR_END; - } - } - { - ID ident = TOK_INTERN(!ENC_SINGLE(mb)); + if (has_delayed_token(p)) + dispatch_delayed_token(p, t); + else if (t != 0) + dispatch_scan_event(p, t); - set_yylval_name(ident); - if (last_state != EXPR_DOT && is_local_id(ident) && lvar_defined(ident)) { - lex_state = EXPR_END; - } - } - return result; - } + return t; } -#if YYPURE -static int -yylex(void *lval, void *p) -#else -yylex(void *p) -#endif +#define LVAR_USED ((ID)1 << (sizeof(ID) * CHAR_BIT - 1)) + +static NODE* +node_newnode(struct parser_params *p, enum node_type type, VALUE a0, VALUE a1, VALUE a2, const rb_code_location_t *loc) { - struct parser_params *parser = (struct parser_params*)p; - int t; + NODE *n = rb_ast_newnode(p->ast, type); -#if YYPURE - parser->parser_yylval = lval; - parser->parser_yylval->val = Qundef; -#endif - t = parser_yylex(parser); -#ifdef RIPPER - if (!NIL_P(parser->delayed)) { - ripper_dispatch_delayed_token(parser, t); - return t; - } - if (t != 0) - ripper_dispatch_scan_event(parser, t); -#endif + rb_node_init(n, type, a0, a1, a2); - return t; + nd_set_loc(n, loc); + nd_set_node_id(n, parser_get_node_id(p)); + return n; } -#ifndef RIPPER -static NODE* -node_newnode(struct parser_params *parser, enum node_type type, VALUE a0, VALUE a1, VALUE a2) +static NODE * +nd_set_loc(NODE *nd, const YYLTYPE *loc) { - NODE *n = (rb_node_newnode)(type, a0, a1, a2); - nd_set_line(n, ruby_sourceline); - return n; + nd->nd_loc = *loc; + nd_set_line(nd, loc->beg_pos.lineno); + return nd; } +#ifndef RIPPER static enum node_type nodetype(NODE *node) /* for debug */ { @@ -8078,26 +10033,23 @@ fixpos(NODE *node, NODE *orig) { if (!node) return; if (!orig) return; - if (orig == (NODE*)1) return; nd_set_line(node, nd_line(orig)); } static void -parser_warning(struct parser_params *parser, NODE *node, const char *mesg) +parser_warning(struct parser_params *p, NODE *node, const char *mesg) { - rb_compile_warning(ruby_sourcefile, nd_line(node), "%s", mesg); + rb_compile_warning(p->ruby_sourcefile, nd_line(node), "%s", mesg); } -#define parser_warning(node, mesg) parser_warning(parser, (node), (mesg)) static void -parser_warn(struct parser_params *parser, NODE *node, const char *mesg) +parser_warn(struct parser_params *p, NODE *node, const char *mesg) { - rb_compile_warn(ruby_sourcefile, nd_line(node), "%s", mesg); + rb_compile_warn(p->ruby_sourcefile, nd_line(node), "%s", mesg); } -#define parser_warn(node, mesg) parser_warn(parser, (node), (mesg)) static NODE* -block_append_gen(struct parser_params *parser, NODE *head, NODE *tail) +block_append(struct parser_params *p, NODE *head, NODE *tail) { NODE *end, *h = head, *nd; @@ -8111,12 +10063,11 @@ block_append_gen(struct parser_params *parser, NODE *head, NODE *tail) case NODE_TRUE: case NODE_FALSE: case NODE_NIL: - parser_warning(h, "unused literal ignored"); + parser_warning(p, h, "unused literal ignored"); return tail; default: - h = end = NEW_BLOCK(head); + h = end = NEW_BLOCK(head, &head->nd_loc); end->nd_end = end; - fixpos(end, head); head = end; break; case NODE_BLOCK: @@ -8132,7 +10083,7 @@ block_append_gen(struct parser_params *parser, NODE *head, NODE *tail) case NODE_REDO: case NODE_RETRY: if (RTEST(ruby_verbose)) { - parser_warning(tail, "statement not reached"); + parser_warning(p, tail, "statement not reached"); } break; @@ -8140,22 +10091,23 @@ block_append_gen(struct parser_params *parser, NODE *head, NODE *tail) break; } - if (nd_type(tail) != NODE_BLOCK) { - tail = NEW_BLOCK(tail); + if (!nd_type_p(tail, NODE_BLOCK)) { + tail = NEW_BLOCK(tail, &tail->nd_loc); tail->nd_end = tail; } end->nd_next = tail; h->nd_end = tail->nd_end; + nd_set_last_loc(head, nd_last_loc(tail)); return head; } /* append item to the list */ static NODE* -list_append_gen(struct parser_params *parser, NODE *list, NODE *item) +list_append(struct parser_params *p, NODE *list, NODE *item) { NODE *last; - if (list == 0) return NEW_LIST(item); + if (list == 0) return NEW_LIST(item, &item->nd_loc); if (list->nd_next) { last = list->nd_next->nd_end; } @@ -8164,14 +10116,17 @@ list_append_gen(struct parser_params *parser, NODE *list, NODE *item) } list->nd_alen += 1; - last->nd_next = NEW_LIST(item); + last->nd_next = NEW_LIST(item, &item->nd_loc); list->nd_next->nd_end = last->nd_next; + + nd_set_last_loc(list, nd_last_loc(item)); + return list; } /* concat two lists */ static NODE* -list_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) +list_concat(NODE *head, NODE *tail) { NODE *last; @@ -8191,15 +10146,17 @@ list_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) head->nd_next->nd_end = tail; } + nd_set_last_loc(head, nd_last_loc(tail)); + return head; } static int -literal_concat0(struct parser_params *parser, VALUE head, VALUE tail) +literal_concat0(struct parser_params *p, VALUE head, VALUE tail) { if (NIL_P(tail)) return 1; if (!rb_enc_compatible(head, tail)) { - compile_error(PARSER_ARG "string literal encodings differ (%s / %s)", + compile_error(p, "string literal encodings differ (%s / %s)", rb_enc_name(rb_enc_get(head)), rb_enc_name(rb_enc_get(tail))); rb_str_resize(head, 0); @@ -8210,12 +10167,24 @@ literal_concat0(struct parser_params *parser, VALUE head, VALUE tail) return 1; } +static VALUE +string_literal_head(enum node_type htype, NODE *head) +{ + if (htype != NODE_DSTR) return Qfalse; + if (head->nd_next) { + head = head->nd_next->nd_end->nd_head; + if (!head || !nd_type_p(head, NODE_STR)) return Qfalse; + } + const VALUE lit = head->nd_lit; + ASSUME(lit != Qfalse); + return lit; +} + /* concat two string literals */ static NODE * -literal_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) +literal_concat(struct parser_params *p, NODE *head, NODE *tail, const YYLTYPE *loc) { enum node_type htype; - NODE *headlast; VALUE lit; if (!head) return tail; @@ -8223,61 +10192,69 @@ literal_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) htype = nd_type(head); if (htype == NODE_EVSTR) { - NODE *node = NEW_DSTR(Qnil); - head = list_append(node, head); + head = new_dstr(p, head, loc); htype = NODE_DSTR; } + if (p->heredoc_indent > 0) { + switch (htype) { + case NODE_STR: + nd_set_type(head, NODE_DSTR); + case NODE_DSTR: + return list_append(p, head, tail); + default: + break; + } + } switch (nd_type(tail)) { case NODE_STR: - if (htype == NODE_DSTR && (headlast = head->nd_next->nd_end->nd_head) && - nd_type(headlast) == NODE_STR) { + if ((lit = string_literal_head(htype, head)) != Qfalse) { htype = NODE_STR; - lit = headlast->nd_lit; } else { lit = head->nd_lit; } if (htype == NODE_STR) { - if (!literal_concat0(parser, lit, tail->nd_lit)) { + if (!literal_concat0(p, lit, tail->nd_lit)) { error: - rb_gc_force_recycle((VALUE)head); - rb_gc_force_recycle((VALUE)tail); + rb_discard_node(p, head); + rb_discard_node(p, tail); return 0; } - rb_gc_force_recycle((VALUE)tail); + rb_discard_node(p, tail); } else { - list_append(head, tail); + list_append(p, head, tail); } break; case NODE_DSTR: if (htype == NODE_STR) { - if (!literal_concat0(parser, head->nd_lit, tail->nd_lit)) + if (!literal_concat0(p, head->nd_lit, tail->nd_lit)) goto error; tail->nd_lit = head->nd_lit; - rb_gc_force_recycle((VALUE)head); + rb_discard_node(p, head); head = tail; } else if (NIL_P(tail->nd_lit)) { append: head->nd_alen += tail->nd_alen - 1; - head->nd_next->nd_end->nd_next = tail->nd_next; - head->nd_next->nd_end = tail->nd_next->nd_end; - rb_gc_force_recycle((VALUE)tail); - } - else if (htype == NODE_DSTR && (headlast = head->nd_next->nd_end->nd_head) && - nd_type(headlast) == NODE_STR) { - lit = headlast->nd_lit; - if (!literal_concat0(parser, lit, tail->nd_lit)) + if (!head->nd_next) { + head->nd_next = tail->nd_next; + } + else if (tail->nd_next) { + head->nd_next->nd_end->nd_next = tail->nd_next; + head->nd_next->nd_end = tail->nd_next->nd_end; + } + rb_discard_node(p, tail); + } + else if ((lit = string_literal_head(htype, head)) != Qfalse) { + if (!literal_concat0(p, lit, tail->nd_lit)) goto error; tail->nd_lit = Qnil; goto append; } else { - nd_set_type(tail, NODE_ARRAY); - tail->nd_head = NEW_STR(tail->nd_lit); - list_concat(head, tail); + list_concat(head, NEW_NODE(NODE_LIST, NEW_STR(tail->nd_lit, loc), tail->nd_alen, tail->nd_next, loc)); } break; @@ -8286,240 +10263,718 @@ literal_concat_gen(struct parser_params *parser, NODE *head, NODE *tail) nd_set_type(head, NODE_DSTR); head->nd_alen = 1; } - list_append(head, tail); + list_append(p, head, tail); break; } return head; } static NODE * -evstr2dstr_gen(struct parser_params *parser, NODE *node) +evstr2dstr(struct parser_params *p, NODE *node) { - if (nd_type(node) == NODE_EVSTR) { - node = list_append(NEW_DSTR(Qnil), node); + if (nd_type_p(node, NODE_EVSTR)) { + node = new_dstr(p, node, &node->nd_loc); } return node; } static NODE * -new_evstr_gen(struct parser_params *parser, NODE *node) +new_evstr(struct parser_params *p, NODE *node, const YYLTYPE *loc) { NODE *head = node; if (node) { switch (nd_type(node)) { - case NODE_STR: case NODE_DSTR: case NODE_EVSTR: + case NODE_STR: + nd_set_type(node, NODE_DSTR); + return node; + case NODE_DSTR: + break; + case NODE_EVSTR: return node; } } - return NEW_EVSTR(head); + return NEW_EVSTR(head, loc); +} + +static NODE * +new_dstr(struct parser_params *p, NODE *node, const YYLTYPE *loc) +{ + VALUE lit = STR_NEW0(); + NODE *dstr = NEW_DSTR(lit, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, lit); + return list_append(p, dstr, node); } static NODE * -call_bin_op_gen(struct parser_params *parser, NODE *recv, ID id, NODE *arg1) +call_bin_op(struct parser_params *p, NODE *recv, ID id, NODE *arg1, + const YYLTYPE *op_loc, const YYLTYPE *loc) { + NODE *expr; value_expr(recv); value_expr(arg1); - return NEW_CALL(recv, id, NEW_LIST(arg1)); + expr = NEW_OPCALL(recv, id, NEW_LIST(arg1, &arg1->nd_loc), loc); + nd_set_line(expr, op_loc->beg_pos.lineno); + return expr; } static NODE * -call_uni_op_gen(struct parser_params *parser, NODE *recv, ID id) +call_uni_op(struct parser_params *p, NODE *recv, ID id, const YYLTYPE *op_loc, const YYLTYPE *loc) { + NODE *opcall; value_expr(recv); - return NEW_CALL(recv, id, 0); + opcall = NEW_OPCALL(recv, id, 0, loc); + nd_set_line(opcall, op_loc->beg_pos.lineno); + return opcall; +} + +static NODE * +new_qcall(struct parser_params* p, ID atype, NODE *recv, ID mid, NODE *args, const YYLTYPE *op_loc, const YYLTYPE *loc) +{ + NODE *qcall = NEW_QCALL(atype, recv, mid, args, loc); + nd_set_line(qcall, op_loc->beg_pos.lineno); + return qcall; +} + +static NODE* +new_command_qcall(struct parser_params* p, ID atype, NODE *recv, ID mid, NODE *args, NODE *block, const YYLTYPE *op_loc, const YYLTYPE *loc) +{ + NODE *ret; + if (block) block_dup_check(p, args, block); + ret = new_qcall(p, atype, recv, mid, args, op_loc, loc); + if (block) ret = method_add_block(p, ret, block, loc); + fixpos(ret, recv); + return ret; } +#define nd_once_body(node) (nd_type_p((node), NODE_ONCE) ? (node)->nd_body : node) static NODE* -match_op_gen(struct parser_params *parser, NODE *node1, NODE *node2) +match_op(struct parser_params *p, NODE *node1, NODE *node2, const YYLTYPE *op_loc, const YYLTYPE *loc) { + NODE *n; + int line = op_loc->beg_pos.lineno; + value_expr(node1); value_expr(node2); - if (node1) { - switch (nd_type(node1)) { + if (node1 && (n = nd_once_body(node1)) != 0) { + switch (nd_type(n)) { case NODE_DREGX: - case NODE_DREGX_ONCE: - return NEW_MATCH2(node1, node2); + { + NODE *match = NEW_MATCH2(node1, node2, loc); + nd_set_line(match, line); + return match; + } case NODE_LIT: - if (TYPE(node1->nd_lit) == T_REGEXP) { - return NEW_MATCH2(node1, node2); + if (RB_TYPE_P(n->nd_lit, T_REGEXP)) { + const VALUE lit = n->nd_lit; + NODE *match = NEW_MATCH2(node1, node2, loc); + match->nd_args = reg_named_capture_assign(p, lit, loc); + nd_set_line(match, line); + return match; } } } - if (node2) { - switch (nd_type(node2)) { - case NODE_DREGX: - case NODE_DREGX_ONCE: - return NEW_MATCH3(node2, node1); + if (node2 && (n = nd_once_body(node2)) != 0) { + NODE *match3; + switch (nd_type(n)) { case NODE_LIT: - if (TYPE(node2->nd_lit) == T_REGEXP) { - return NEW_MATCH3(node2, node1); - } + if (!RB_TYPE_P(n->nd_lit, T_REGEXP)) break; + /* fallthru */ + case NODE_DREGX: + match3 = NEW_MATCH3(node2, node1, loc); + return match3; } } - return NEW_CALL(node1, tMATCH, NEW_LIST(node2)); + n = NEW_CALL(node1, tMATCH, NEW_LIST(node2, &node2->nd_loc), loc); + nd_set_line(n, line); + return n; +} + +# if WARN_PAST_SCOPE +static int +past_dvar_p(struct parser_params *p, ID id) +{ + struct vtable *past = p->lvtbl->past; + while (past) { + if (vtable_included(past, id)) return 1; + past = past->prev; + } + return 0; +} +# endif + +static int +numparam_nested_p(struct parser_params *p) +{ + struct local_vars *local = p->lvtbl; + NODE *outer = local->numparam.outer; + NODE *inner = local->numparam.inner; + if (outer || inner) { + NODE *used = outer ? outer : inner; + compile_error(p, "numbered parameter is already used in\n" + "%s:%d: %s block here", + p->ruby_sourcefile, nd_line(used), + outer ? "outer" : "inner"); + parser_show_error_line(p, &used->nd_loc); + return 1; + } + return 0; } static NODE* -gettable_gen(struct parser_params *parser, ID id) +gettable(struct parser_params *p, ID id, const YYLTYPE *loc) { + ID *vidp = NULL; + NODE *node; switch (id) { case keyword_self: - return NEW_SELF(); + return NEW_SELF(loc); case keyword_nil: - return NEW_NIL(); + return NEW_NIL(loc); case keyword_true: - return NEW_TRUE(); + return NEW_TRUE(loc); case keyword_false: - return NEW_FALSE(); + return NEW_FALSE(loc); case keyword__FILE__: - return NEW_STR(rb_external_str_new_with_enc(ruby_sourcefile, strlen(ruby_sourcefile), - rb_filesystem_encoding())); + { + VALUE file = p->ruby_sourcefile_string; + if (NIL_P(file)) + file = rb_str_new(0, 0); + else + file = rb_str_dup(file); + node = NEW_STR(file, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, file); + } + return node; case keyword__LINE__: - return NEW_LIT(INT2FIX(ruby_sourceline)); + return NEW_LIT(INT2FIX(p->tokline), loc); case keyword__ENCODING__: - return NEW_LIT(rb_enc_from_encoding(parser->enc)); + node = NEW_LIT(rb_enc_from_encoding(p->enc), loc); + RB_OBJ_WRITTEN(p->ast, Qnil, node->nd_lit); + return node; + } switch (id_type(id)) { case ID_LOCAL: - if (dyna_in_block() && dvar_defined(id)) return NEW_DVAR(id); - if (local_id(id)) return NEW_LVAR(id); + if (dyna_in_block(p) && dvar_defined_ref(p, id, &vidp)) { + if (NUMPARAM_ID_P(id) && numparam_nested_p(p)) return 0; + if (id == p->cur_arg) { + compile_error(p, "circular argument reference - %"PRIsWARN, rb_id2str(id)); + return 0; + } + if (vidp) *vidp |= LVAR_USED; + node = NEW_DVAR(id, loc); + return node; + } + if (local_id_ref(p, id, &vidp)) { + if (id == p->cur_arg) { + compile_error(p, "circular argument reference - %"PRIsWARN, rb_id2str(id)); + return 0; + } + if (vidp) *vidp |= LVAR_USED; + node = NEW_LVAR(id, loc); + return node; + } + if (dyna_in_block(p) && NUMPARAM_ID_P(id) && + parser_numbered_param(p, NUMPARAM_ID_TO_IDX(id))) { + if (numparam_nested_p(p)) return 0; + node = NEW_DVAR(id, loc); + struct local_vars *local = p->lvtbl; + if (!local->numparam.current) local->numparam.current = node; + return node; + } +# if WARN_PAST_SCOPE + if (!p->ctxt.in_defined && RTEST(ruby_verbose) && past_dvar_p(p, id)) { + rb_warning1("possible reference to past scope - %"PRIsWARN, rb_id2str(id)); + } +# endif /* method call without arguments */ - return NEW_VCALL(id); + return NEW_VCALL(id, loc); case ID_GLOBAL: - return NEW_GVAR(id); + return NEW_GVAR(id, loc); case ID_INSTANCE: - return NEW_IVAR(id); + return NEW_IVAR(id, loc); case ID_CONST: - return NEW_CONST(id); + return NEW_CONST(id, loc); case ID_CLASS: - return NEW_CVAR(id); + return NEW_CVAR(id, loc); } - compile_error(PARSER_ARG "identifier %s is not valid to get", rb_id2name(id)); + compile_error(p, "identifier %"PRIsVALUE" is not valid to get", rb_id2str(id)); return 0; } + +static NODE * +opt_arg_append(NODE *opt_list, NODE *opt) +{ + NODE *opts = opt_list; + opts->nd_loc.end_pos = opt->nd_loc.end_pos; + + while (opts->nd_next) { + opts = opts->nd_next; + opts->nd_loc.end_pos = opt->nd_loc.end_pos; + } + opts->nd_next = opt; + + return opt_list; +} + +static NODE * +kwd_append(NODE *kwlist, NODE *kw) +{ + if (kwlist) { + NODE *kws = kwlist; + kws->nd_loc.end_pos = kw->nd_loc.end_pos; + while (kws->nd_next) { + kws = kws->nd_next; + kws->nd_loc.end_pos = kw->nd_loc.end_pos; + } + kws->nd_next = kw; + } + return kwlist; +} + +static NODE * +new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc) +{ + return NEW_DEFINED(remove_begin_all(expr), loc); +} + +static NODE* +symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol) +{ + enum node_type type = nd_type(symbol); + switch (type) { + case NODE_DSTR: + nd_set_type(symbol, NODE_DSYM); + break; + case NODE_STR: + nd_set_type(symbol, NODE_LIT); + RB_OBJ_WRITTEN(p->ast, Qnil, symbol->nd_lit = rb_str_intern(symbol->nd_lit)); + break; + default: + compile_error(p, "unexpected node as symbol: %s", ruby_node_name(type)); + } + return list_append(p, symbols, symbol); +} + +static NODE * +new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc) +{ + NODE *list, *prev; + VALUE lit; + + if (!node) { + node = NEW_LIT(reg_compile(p, STR_NEW0(), options), loc); + RB_OBJ_WRITTEN(p->ast, Qnil, node->nd_lit); + return node; + } + switch (nd_type(node)) { + case NODE_STR: + { + VALUE src = node->nd_lit; + nd_set_type(node, NODE_LIT); + nd_set_loc(node, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, node->nd_lit = reg_compile(p, src, options)); + } + break; + default: + lit = STR_NEW0(); + node = NEW_NODE(NODE_DSTR, lit, 1, NEW_LIST(node, loc), loc); + RB_OBJ_WRITTEN(p->ast, Qnil, lit); + /* fall through */ + case NODE_DSTR: + nd_set_type(node, NODE_DREGX); + nd_set_loc(node, loc); + node->nd_cflag = options & RE_OPTION_MASK; + if (!NIL_P(node->nd_lit)) reg_fragment_check(p, node->nd_lit, options); + for (list = (prev = node)->nd_next; list; list = list->nd_next) { + NODE *frag = list->nd_head; + enum node_type type = nd_type(frag); + if (type == NODE_STR || (type == NODE_DSTR && !frag->nd_next)) { + VALUE tail = frag->nd_lit; + if (reg_fragment_check(p, tail, options) && prev && !NIL_P(prev->nd_lit)) { + VALUE lit = prev == node ? prev->nd_lit : prev->nd_head->nd_lit; + if (!literal_concat0(p, lit, tail)) { + return NEW_NIL(loc); /* dummy node on error */ + } + rb_str_resize(tail, 0); + prev->nd_next = list->nd_next; + rb_discard_node(p, list->nd_head); + rb_discard_node(p, list); + list = prev; + } + else { + prev = list; + } + } + else { + prev = 0; + } + } + if (!node->nd_next) { + VALUE src = node->nd_lit; + nd_set_type(node, NODE_LIT); + RB_OBJ_WRITTEN(p->ast, Qnil, node->nd_lit = reg_compile(p, src, options)); + } + if (options & RE_OPTION_ONCE) { + node = NEW_NODE(NODE_ONCE, 0, node, 0, loc); + } + break; + } + return node; +} + +static NODE * +new_kw_arg(struct parser_params *p, NODE *k, const YYLTYPE *loc) +{ + if (!k) return 0; + return NEW_KW_ARG(0, (k), loc); +} + +static NODE * +new_xstring(struct parser_params *p, NODE *node, const YYLTYPE *loc) +{ + if (!node) { + VALUE lit = STR_NEW0(); + NODE *xstr = NEW_XSTR(lit, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, lit); + return xstr; + } + switch (nd_type(node)) { + case NODE_STR: + nd_set_type(node, NODE_XSTR); + nd_set_loc(node, loc); + break; + case NODE_DSTR: + nd_set_type(node, NODE_DXSTR); + nd_set_loc(node, loc); + break; + default: + node = NEW_NODE(NODE_DXSTR, Qnil, 1, NEW_LIST(node, loc), loc); + break; + } + return node; +} + +static void +check_literal_when(struct parser_params *p, NODE *arg, const YYLTYPE *loc) +{ + VALUE lit; + + if (!arg || !p->case_labels) return; + + lit = rb_node_case_when_optimizable_literal(arg); + if (lit == Qundef) return; + if (nd_type_p(arg, NODE_STR)) { + RB_OBJ_WRITTEN(p->ast, Qnil, arg->nd_lit = lit); + } + + if (NIL_P(p->case_labels)) { + p->case_labels = rb_obj_hide(rb_hash_new()); + } + else { + VALUE line = rb_hash_lookup(p->case_labels, lit); + if (!NIL_P(line)) { + rb_warning1("duplicated `when' clause with line %d is ignored", + WARN_IVAL(line)); + return; + } + } + rb_hash_aset(p->case_labels, lit, INT2NUM(p->ruby_sourceline)); +} + #else /* !RIPPER */ static int -id_is_var_gen(struct parser_params *parser, ID id) +id_is_var(struct parser_params *p, ID id) { if (is_notop_id(id)) { switch (id & ID_SCOPE_MASK) { case ID_GLOBAL: case ID_INSTANCE: case ID_CONST: case ID_CLASS: return 1; case ID_LOCAL: - if (dyna_in_block() && dvar_defined(id)) return 1; - if (local_id(id)) return 1; + if (dyna_in_block(p)) { + if (NUMPARAM_ID_P(id) || dvar_defined(p, id)) return 1; + } + if (local_id(p, id)) return 1; /* method call without arguments */ return 0; } } - compile_error(PARSER_ARG "identifier %s is not valid to get", rb_id2name(id)); + compile_error(p, "identifier %"PRIsVALUE" is not valid to get", rb_id2str(id)); return 0; } -#endif /* !RIPPER */ -#if PARSER_DEBUG -static const char * -lex_state_name(enum lex_state_e state) +static VALUE +new_regexp(struct parser_params *p, VALUE re, VALUE opt, const YYLTYPE *loc) { - static const char names[][12] = { - "EXPR_BEG", "EXPR_END", "EXPR_ENDARG", "EXPR_ENDFN", "EXPR_ARG", - "EXPR_CMDARG", "EXPR_MID", "EXPR_FNAME", "EXPR_DOT", "EXPR_CLASS", - "EXPR_VALUE", - }; + VALUE src = 0, err; + int options = 0; + if (ripper_is_node_yylval(re)) { + src = RNODE(re)->nd_cval; + re = RNODE(re)->nd_rval; + } + if (ripper_is_node_yylval(opt)) { + options = (int)RNODE(opt)->nd_tag; + opt = RNODE(opt)->nd_rval; + } + if (src && NIL_P(parser_reg_compile(p, src, options, &err))) { + compile_error(p, "%"PRIsVALUE, err); + } + return dispatch2(regexp_literal, re, opt); +} +#endif /* !RIPPER */ - if ((unsigned)state & ~(~0u << EXPR_MAX_STATE)) - return names[ffs(state)]; - return NULL; +static inline enum lex_state_e +parser_set_lex_state(struct parser_params *p, enum lex_state_e ls, int line) +{ + if (p->debug) { + ls = rb_parser_trace_lex_state(p, p->lex.state, ls, line); + } + return p->lex.state = ls; } -#endif -#ifdef RIPPER +#ifndef RIPPER +static const char rb_parser_lex_state_names[][8] = { + "BEG", "END", "ENDARG", "ENDFN", "ARG", + "CMDARG", "MID", "FNAME", "DOT", "CLASS", + "LABEL", "LABELED","FITEM", +}; + static VALUE -assignable_gen(struct parser_params *parser, VALUE lhs) -#else -static NODE* -assignable_gen(struct parser_params *parser, ID id, NODE *val) -#endif +append_lex_state_name(enum lex_state_e state, VALUE buf) { -#ifdef RIPPER - ID id = get_id(lhs); -# define assignable_result(x) get_value(lhs) -# define parser_yyerror(parser, x) dispatch1(assign_error, lhs) -#else -# define assignable_result(x) (x) -#endif - if (!id) return assignable_result(0); + int i, sep = 0; + unsigned int mask = 1; + static const char none[] = "NONE"; + + for (i = 0; i < EXPR_MAX_STATE; ++i, mask <<= 1) { + if ((unsigned)state & mask) { + if (sep) { + rb_str_cat(buf, "|", 1); + } + sep = 1; + rb_str_cat_cstr(buf, rb_parser_lex_state_names[i]); + } + } + if (!sep) { + rb_str_cat(buf, none, sizeof(none)-1); + } + return buf; +} + +static void +flush_debug_buffer(struct parser_params *p, VALUE out, VALUE str) +{ + VALUE mesg = p->debug_buffer; + + if (!NIL_P(mesg) && RSTRING_LEN(mesg)) { + p->debug_buffer = Qnil; + rb_io_puts(1, &mesg, out); + } + if (!NIL_P(str) && RSTRING_LEN(str)) { + rb_io_write(p->debug_output, str); + } +} + +enum lex_state_e +rb_parser_trace_lex_state(struct parser_params *p, enum lex_state_e from, + enum lex_state_e to, int line) +{ + VALUE mesg; + mesg = rb_str_new_cstr("lex_state: "); + append_lex_state_name(from, mesg); + rb_str_cat_cstr(mesg, " -> "); + append_lex_state_name(to, mesg); + rb_str_catf(mesg, " at line %d\n", line); + flush_debug_buffer(p, p->debug_output, mesg); + return to; +} + +VALUE +rb_parser_lex_state_name(enum lex_state_e state) +{ + return rb_fstring(append_lex_state_name(state, rb_str_new(0, 0))); +} + +static void +append_bitstack_value(stack_type stack, VALUE mesg) +{ + if (stack == 0) { + rb_str_cat_cstr(mesg, "0"); + } + else { + stack_type mask = (stack_type)1U << (CHAR_BIT * sizeof(stack_type) - 1); + for (; mask && !(stack & mask); mask >>= 1) continue; + for (; mask; mask >>= 1) rb_str_cat(mesg, stack & mask ? "1" : "0", 1); + } +} + +void +rb_parser_show_bitstack(struct parser_params *p, stack_type stack, + const char *name, int line) +{ + VALUE mesg = rb_sprintf("%s: ", name); + append_bitstack_value(stack, mesg); + rb_str_catf(mesg, " at line %d\n", line); + flush_debug_buffer(p, p->debug_output, mesg); +} + +void +rb_parser_fatal(struct parser_params *p, const char *fmt, ...) +{ + va_list ap; + VALUE mesg = rb_str_new_cstr("internal parser error: "); + + va_start(ap, fmt); + rb_str_vcatf(mesg, fmt, ap); + va_end(ap); + yyerror0(RSTRING_PTR(mesg)); + RB_GC_GUARD(mesg); + + mesg = rb_str_new(0, 0); + append_lex_state_name(p->lex.state, mesg); + compile_error(p, "lex.state: %"PRIsVALUE, mesg); + rb_str_resize(mesg, 0); + append_bitstack_value(p->cond_stack, mesg); + compile_error(p, "cond_stack: %"PRIsVALUE, mesg); + rb_str_resize(mesg, 0); + append_bitstack_value(p->cmdarg_stack, mesg); + compile_error(p, "cmdarg_stack: %"PRIsVALUE, mesg); + if (p->debug_output == rb_ractor_stdout()) + p->debug_output = rb_ractor_stderr(); + p->debug = TRUE; +} + +static YYLTYPE * +rb_parser_set_pos(YYLTYPE *yylloc, int sourceline, int beg_pos, int end_pos) +{ + yylloc->beg_pos.lineno = sourceline; + yylloc->beg_pos.column = beg_pos; + yylloc->end_pos.lineno = sourceline; + yylloc->end_pos.column = end_pos; + return yylloc; +} + +YYLTYPE * +rb_parser_set_location_from_strterm_heredoc(struct parser_params *p, rb_strterm_heredoc_t *here, YYLTYPE *yylloc) +{ + int sourceline = here->sourceline; + int beg_pos = (int)here->offset - here->quote + - (rb_strlen_lit("<<-") - !(here->func & STR_FUNC_INDENT)); + int end_pos = (int)here->offset + here->length + here->quote; + + return rb_parser_set_pos(yylloc, sourceline, beg_pos, end_pos); +} + +YYLTYPE * +rb_parser_set_location_of_none(struct parser_params *p, YYLTYPE *yylloc) +{ + int sourceline = p->ruby_sourceline; + int beg_pos = (int)(p->lex.ptok - p->lex.pbeg); + int end_pos = (int)(p->lex.ptok - p->lex.pbeg); + return rb_parser_set_pos(yylloc, sourceline, beg_pos, end_pos); +} + +YYLTYPE * +rb_parser_set_location(struct parser_params *p, YYLTYPE *yylloc) +{ + int sourceline = p->ruby_sourceline; + int beg_pos = (int)(p->lex.ptok - p->lex.pbeg); + int end_pos = (int)(p->lex.pcur - p->lex.pbeg); + return rb_parser_set_pos(yylloc, sourceline, beg_pos, end_pos); +} +#endif /* !RIPPER */ + +static int +assignable0(struct parser_params *p, ID id, const char **err) +{ + if (!id) return -1; switch (id) { case keyword_self: - yyerror("Can't change the value of self"); - goto error; + *err = "Can't change the value of self"; + return -1; case keyword_nil: - yyerror("Can't assign to nil"); - goto error; + *err = "Can't assign to nil"; + return -1; case keyword_true: - yyerror("Can't assign to true"); - goto error; + *err = "Can't assign to true"; + return -1; case keyword_false: - yyerror("Can't assign to false"); - goto error; + *err = "Can't assign to false"; + return -1; case keyword__FILE__: - yyerror("Can't assign to __FILE__"); - goto error; + *err = "Can't assign to __FILE__"; + return -1; case keyword__LINE__: - yyerror("Can't assign to __LINE__"); - goto error; + *err = "Can't assign to __LINE__"; + return -1; case keyword__ENCODING__: - yyerror("Can't assign to __ENCODING__"); - goto error; + *err = "Can't assign to __ENCODING__"; + return -1; } switch (id_type(id)) { case ID_LOCAL: - if (dyna_in_block()) { - if (dvar_curr(id)) { - return assignable_result(NEW_DASGN_CURR(id, val)); - } - else if (dvar_defined(id)) { - return assignable_result(NEW_DASGN(id, val)); - } - else if (local_id(id)) { - return assignable_result(NEW_LASGN(id, val)); - } - else { - dyna_var(id); - return assignable_result(NEW_DASGN_CURR(id, val)); + if (dyna_in_block(p)) { + if (p->max_numparam > NO_PARAM && NUMPARAM_ID_P(id)) { + compile_error(p, "Can't assign to numbered parameter _%d", + NUMPARAM_ID_TO_IDX(id)); + return -1; } + if (dvar_curr(p, id)) return NODE_DASGN; + if (dvar_defined(p, id)) return NODE_DASGN; + if (local_id(p, id)) return NODE_LASGN; + dyna_var(p, id); + return NODE_DASGN; } else { - if (!local_id(id)) { - local_var(id); - } - return assignable_result(NEW_LASGN(id, val)); + if (!local_id(p, id)) local_var(p, id); + return NODE_LASGN; } break; - case ID_GLOBAL: - return assignable_result(NEW_GASGN(id, val)); - case ID_INSTANCE: - return assignable_result(NEW_IASGN(id, val)); + case ID_GLOBAL: return NODE_GASGN; + case ID_INSTANCE: return NODE_IASGN; case ID_CONST: - if (!in_def && !in_single) - return assignable_result(NEW_CDECL(id, val, 0)); - yyerror("dynamic constant assignment"); - break; - case ID_CLASS: - return assignable_result(NEW_CVASGN(id, val)); + if (!p->ctxt.in_def) return NODE_CDECL; + *err = "dynamic constant assignment"; + return -1; + case ID_CLASS: return NODE_CVASGN; default: - compile_error(PARSER_ARG "identifier %s is not valid to set", rb_id2name(id)); + compile_error(p, "identifier %"PRIsVALUE" is not valid to set", rb_id2str(id)); } - error: - return assignable_result(0); -#undef assignable_result -#undef parser_yyerror + return -1; +} + +#ifndef RIPPER +static NODE* +assignable(struct parser_params *p, ID id, NODE *val, const YYLTYPE *loc) +{ + const char *err = 0; + int node_type = assignable0(p, id, &err); + switch (node_type) { + case NODE_DASGN: return NEW_DASGN(id, val, loc); + case NODE_LASGN: return NEW_LASGN(id, val, loc); + case NODE_GASGN: return NEW_GASGN(id, val, loc); + case NODE_IASGN: return NEW_IASGN(id, val, loc); + case NODE_CDECL: return NEW_CDECL(id, val, 0, loc); + case NODE_CVASGN: return NEW_CVASGN(id, val, loc); + } + if (err) yyerror1(loc, err); + return NEW_BEGIN(0, loc); } +#else +static VALUE +assignable(struct parser_params *p, VALUE lhs) +{ + const char *err = 0; + assignable0(p, get_id(lhs), &err); + if (err) lhs = assign_error(p, err, lhs); + return lhs; +} +#endif static int is_private_local_id(ID name) @@ -8532,163 +10987,469 @@ is_private_local_id(ID name) return RSTRING_PTR(s)[0] == '_'; } -#define LVAR_USED ((int)1 << (sizeof(int) * CHAR_BIT - 1)) - -static ID -shadowing_lvar_gen(struct parser_params *parser, ID name) +static int +shadowing_lvar_0(struct parser_params *p, ID name) { - if (is_private_local_id(name)) return name; - if (dyna_in_block()) { - if (dvar_curr(name)) { - yyerror("duplicated argument name"); + if (dyna_in_block(p)) { + if (dvar_curr(p, name)) { + if (is_private_local_id(name)) return 1; + yyerror0("duplicated argument name"); } - else if (dvar_defined_get(name) || local_id(name)) { - rb_warningS("shadowing outer local variable - %s", rb_id2name(name)); - vtable_add(lvtbl->vars, name); - if (lvtbl->used) { - vtable_add(lvtbl->used, (ID)ruby_sourceline | LVAR_USED); + else if (dvar_defined(p, name) || local_id(p, name)) { + vtable_add(p->lvtbl->vars, name); + if (p->lvtbl->used) { + vtable_add(p->lvtbl->used, (ID)p->ruby_sourceline | LVAR_USED); } + return 0; } } else { - if (local_id(name)) { - yyerror("duplicated argument name"); + if (local_id(p, name)) { + if (is_private_local_id(name)) return 1; + yyerror0("duplicated argument name"); } } + return 1; +} + +static ID +shadowing_lvar(struct parser_params *p, ID name) +{ + shadowing_lvar_0(p, name); return name; } static void -new_bv_gen(struct parser_params *parser, ID name) +new_bv(struct parser_params *p, ID name) { if (!name) return; if (!is_local_id(name)) { - compile_error(PARSER_ARG "invalid local variable - %s", - rb_id2name(name)); + compile_error(p, "invalid local variable - %"PRIsVALUE, + rb_id2str(name)); return; } - shadowing_lvar(name); - dyna_var(name); + if (!shadowing_lvar_0(p, name)) return; + dyna_var(p, name); } #ifndef RIPPER static NODE * -aryset_gen(struct parser_params *parser, NODE *recv, NODE *idx) +aryset(struct parser_params *p, NODE *recv, NODE *idx, const YYLTYPE *loc) { - if (recv && nd_type(recv) == NODE_SELF) - recv = (NODE *)1; - return NEW_ATTRASGN(recv, tASET, idx); + return NEW_ATTRASGN(recv, tASET, idx, loc); } static void -block_dup_check_gen(struct parser_params *parser, NODE *node1, NODE *node2) +block_dup_check(struct parser_params *p, NODE *node1, NODE *node2) { - if (node2 && node1 && nd_type(node1) == NODE_BLOCK_PASS) { - compile_error(PARSER_ARG "both block arg and actual block given"); + if (node2 && node1 && nd_type_p(node1, NODE_BLOCK_PASS)) { + compile_error(p, "both block arg and actual block given"); } } -ID -rb_id_attrset(ID id) -{ - id &= ~ID_SCOPE_MASK; - id |= ID_ATTRSET; - return id; -} - static NODE * -attrset_gen(struct parser_params *parser, NODE *recv, ID id) +attrset(struct parser_params *p, NODE *recv, ID atype, ID id, const YYLTYPE *loc) { - if (recv && nd_type(recv) == NODE_SELF) - recv = (NODE *)1; - return NEW_ATTRASGN(recv, rb_id_attrset(id), 0); + if (!CALL_Q_P(atype)) id = rb_id_attrset(id); + return NEW_ATTRASGN(recv, id, 0, loc); } static void -rb_backref_error_gen(struct parser_params *parser, NODE *node) +rb_backref_error(struct parser_params *p, NODE *node) { switch (nd_type(node)) { case NODE_NTH_REF: - compile_error(PARSER_ARG "Can't set variable $%ld", node->nd_nth); + compile_error(p, "Can't set variable $%ld", node->nd_nth); break; case NODE_BACK_REF: - compile_error(PARSER_ARG "Can't set variable $%c", (int)node->nd_nth); + compile_error(p, "Can't set variable $%c", (int)node->nd_nth); break; } } +#else +static VALUE +backref_error(struct parser_params *p, NODE *ref, VALUE expr) +{ + VALUE mesg = rb_str_new_cstr("Can't set variable "); + rb_str_append(mesg, ref->nd_cval); + return dispatch2(assign_error, mesg, expr); +} +#endif + +#ifndef RIPPER +static NODE * +arg_append(struct parser_params *p, NODE *node1, NODE *node2, const YYLTYPE *loc) +{ + if (!node1) return NEW_LIST(node2, &node2->nd_loc); + switch (nd_type(node1)) { + case NODE_LIST: + return list_append(p, node1, node2); + case NODE_BLOCK_PASS: + node1->nd_head = arg_append(p, node1->nd_head, node2, loc); + node1->nd_loc.end_pos = node1->nd_head->nd_loc.end_pos; + return node1; + case NODE_ARGSPUSH: + node1->nd_body = list_append(p, NEW_LIST(node1->nd_body, &node1->nd_body->nd_loc), node2); + node1->nd_loc.end_pos = node1->nd_body->nd_loc.end_pos; + nd_set_type(node1, NODE_ARGSCAT); + return node1; + case NODE_ARGSCAT: + if (!nd_type_p(node1->nd_body, NODE_LIST)) break; + node1->nd_body = list_append(p, node1->nd_body, node2); + node1->nd_loc.end_pos = node1->nd_body->nd_loc.end_pos; + return node1; + } + return NEW_ARGSPUSH(node1, node2, loc); +} static NODE * -arg_concat_gen(struct parser_params *parser, NODE *node1, NODE *node2) +arg_concat(struct parser_params *p, NODE *node1, NODE *node2, const YYLTYPE *loc) { if (!node2) return node1; switch (nd_type(node1)) { case NODE_BLOCK_PASS: if (node1->nd_head) - node1->nd_head = arg_concat(node1->nd_head, node2); + node1->nd_head = arg_concat(p, node1->nd_head, node2, loc); else - node1->nd_head = NEW_LIST(node2); + node1->nd_head = NEW_LIST(node2, loc); return node1; case NODE_ARGSPUSH: - if (nd_type(node2) != NODE_ARRAY) break; - node1->nd_body = list_concat(NEW_LIST(node1->nd_body), node2); + if (!nd_type_p(node2, NODE_LIST)) break; + node1->nd_body = list_concat(NEW_LIST(node1->nd_body, loc), node2); nd_set_type(node1, NODE_ARGSCAT); return node1; case NODE_ARGSCAT: - if (nd_type(node2) != NODE_ARRAY || - nd_type(node1->nd_body) != NODE_ARRAY) break; + if (!nd_type_p(node2, NODE_LIST) || + !nd_type_p(node1->nd_body, NODE_LIST)) break; node1->nd_body = list_concat(node1->nd_body, node2); return node1; } - return NEW_ARGSCAT(node1, node2); + return NEW_ARGSCAT(node1, node2, loc); } static NODE * -arg_append_gen(struct parser_params *parser, NODE *node1, NODE *node2) +last_arg_append(struct parser_params *p, NODE *args, NODE *last_arg, const YYLTYPE *loc) { - if (!node1) return NEW_LIST(node2); - switch (nd_type(node1)) { - case NODE_ARRAY: - return list_append(node1, node2); - case NODE_BLOCK_PASS: - node1->nd_head = arg_append(node1->nd_head, node2); - return node1; - case NODE_ARGSPUSH: - node1->nd_body = list_append(NEW_LIST(node1->nd_body), node2); - nd_set_type(node1, NODE_ARGSCAT); - return node1; + NODE *n1; + if ((n1 = splat_array(args)) != 0) { + return list_append(p, n1, last_arg); } - return NEW_ARGSPUSH(node1, node2); + return arg_append(p, args, last_arg, loc); +} + +static NODE * +rest_arg_append(struct parser_params *p, NODE *args, NODE *rest_arg, const YYLTYPE *loc) +{ + NODE *n1; + if ((nd_type_p(rest_arg, NODE_LIST)) && (n1 = splat_array(args)) != 0) { + return list_concat(n1, rest_arg); + } + return arg_concat(p, args, rest_arg, loc); } static NODE * splat_array(NODE* node) { - if (nd_type(node) == NODE_SPLAT) node = node->nd_head; - if (nd_type(node) == NODE_ARRAY) return node; + if (nd_type_p(node, NODE_SPLAT)) node = node->nd_head; + if (nd_type_p(node, NODE_LIST)) return node; return 0; } +static void +mark_lvar_used(struct parser_params *p, NODE *rhs) +{ + ID *vidp = NULL; + if (!rhs) return; + switch (nd_type(rhs)) { + case NODE_LASGN: + if (local_id_ref(p, rhs->nd_vid, &vidp)) { + if (vidp) *vidp |= LVAR_USED; + } + break; + case NODE_DASGN: + if (dvar_defined_ref(p, rhs->nd_vid, &vidp)) { + if (vidp) *vidp |= LVAR_USED; + } + break; +#if 0 + case NODE_MASGN: + for (rhs = rhs->nd_head; rhs; rhs = rhs->nd_next) { + mark_lvar_used(p, rhs->nd_head); + } + break; +#endif + } +} + +static NODE * +const_decl_path(struct parser_params *p, NODE **dest) +{ + NODE *n = *dest; + if (!nd_type_p(n, NODE_CALL)) { + const YYLTYPE *loc = &n->nd_loc; + VALUE path; + if (n->nd_vid) { + path = rb_id2str(n->nd_vid); + } + else { + n = n->nd_else; + path = rb_ary_new(); + for (; n && nd_type_p(n, NODE_COLON2); n = n->nd_head) { + rb_ary_push(path, rb_id2str(n->nd_mid)); + } + if (n && nd_type_p(n, NODE_CONST)) { + // Const::Name + rb_ary_push(path, rb_id2str(n->nd_vid)); + } + else if (n && nd_type_p(n, NODE_COLON3)) { + // ::Const::Name + rb_ary_push(path, rb_str_new(0, 0)); + } + else { + // expression::Name + rb_ary_push(path, rb_str_new_cstr("...")); + } + path = rb_ary_join(rb_ary_reverse(path), rb_str_new_cstr("::")); + path = rb_fstring(path); + } + *dest = n = NEW_LIT(path, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, n->nd_lit); + } + return n; +} + +extern VALUE rb_mRubyVMFrozenCore; + +static NODE * +make_shareable_node(struct parser_params *p, NODE *value, bool copy, const YYLTYPE *loc) +{ + NODE *fcore = NEW_LIT(rb_mRubyVMFrozenCore, loc); + + if (copy) { + return NEW_CALL(fcore, rb_intern("make_shareable_copy"), + NEW_LIST(value, loc), loc); + } + else { + return NEW_CALL(fcore, rb_intern("make_shareable"), + NEW_LIST(value, loc), loc); + } +} + +static NODE * +ensure_shareable_node(struct parser_params *p, NODE **dest, NODE *value, const YYLTYPE *loc) +{ + NODE *fcore = NEW_LIT(rb_mRubyVMFrozenCore, loc); + NODE *args = NEW_LIST(value, loc); + args = list_append(p, args, const_decl_path(p, dest)); + return NEW_CALL(fcore, rb_intern("ensure_shareable"), args, loc); +} + +static int is_static_content(NODE *node); + +static VALUE +shareable_literal_value(NODE *node) +{ + if (!node) return Qnil; + enum node_type type = nd_type(node); + switch (type) { + case NODE_TRUE: + return Qtrue; + case NODE_FALSE: + return Qfalse; + case NODE_NIL: + return Qnil; + case NODE_LIT: + return node->nd_lit; + default: + return Qundef; + } +} + +#ifndef SHAREABLE_BARE_EXPRESSION +#define SHAREABLE_BARE_EXPRESSION 1 +#endif + +static NODE * +shareable_literal_constant(struct parser_params *p, enum shareability shareable, + NODE **dest, NODE *value, const YYLTYPE *loc, size_t level) +{ +# define shareable_literal_constant_next(n) \ + shareable_literal_constant(p, shareable, dest, (n), &(n)->nd_loc, level+1) + VALUE lit = Qnil; + + if (!value) return 0; + enum node_type type = nd_type(value); + switch (type) { + case NODE_TRUE: + case NODE_FALSE: + case NODE_NIL: + case NODE_LIT: + return value; + + case NODE_DSTR: + if (shareable == shareable_literal) { + value = NEW_CALL(value, idUMinus, 0, loc); + } + return value; + + case NODE_STR: + lit = rb_fstring(value->nd_lit); + nd_set_type(value, NODE_LIT); + RB_OBJ_WRITE(p->ast, &value->nd_lit, lit); + return value; + + case NODE_ZLIST: + lit = rb_ary_new(); + OBJ_FREEZE_RAW(lit); + NODE *n = NEW_LIT(lit, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, n->nd_lit); + return n; + + case NODE_LIST: + lit = rb_ary_new(); + for (NODE *n = value; n; n = n->nd_next) { + NODE *elt = n->nd_head; + if (elt) { + elt = shareable_literal_constant_next(elt); + if (elt) { + n->nd_head = elt; + } + else if (RTEST(lit)) { + rb_ary_clear(lit); + lit = Qfalse; + } + } + if (RTEST(lit)) { + VALUE e = shareable_literal_value(elt); + if (e != Qundef) { + rb_ary_push(lit, e); + } + else { + rb_ary_clear(lit); + lit = Qnil; /* make shareable at runtime */ + } + } + } + break; + + case NODE_HASH: + if (!value->nd_brace) return 0; + lit = rb_hash_new(); + for (NODE *n = value->nd_head; n; n = n->nd_next->nd_next) { + NODE *key = n->nd_head; + NODE *val = n->nd_next->nd_head; + if (key) { + key = shareable_literal_constant_next(key); + if (key) { + n->nd_head = key; + } + else if (RTEST(lit)) { + rb_hash_clear(lit); + lit = Qfalse; + } + } + if (val) { + val = shareable_literal_constant_next(val); + if (val) { + n->nd_next->nd_head = val; + } + else if (RTEST(lit)) { + rb_hash_clear(lit); + lit = Qfalse; + } + } + if (RTEST(lit)) { + VALUE k = shareable_literal_value(key); + VALUE v = shareable_literal_value(val); + if (k != Qundef && v != Qundef) { + rb_hash_aset(lit, k, v); + } + else { + rb_hash_clear(lit); + lit = Qnil; /* make shareable at runtime */ + } + } + } + break; + + default: + if (shareable == shareable_literal && + (SHAREABLE_BARE_EXPRESSION || level > 0)) { + return ensure_shareable_node(p, dest, value, loc); + } + return 0; + } + + /* Array or Hash */ + if (!lit) return 0; + if (NIL_P(lit)) { + // if shareable_literal, all elements should have been ensured + // as shareable + value = make_shareable_node(p, value, false, loc); + } + else { + value = NEW_LIT(rb_ractor_make_shareable(lit), loc); + RB_OBJ_WRITTEN(p->ast, Qnil, value->nd_lit); + } + + return value; +# undef shareable_literal_constant_next +} + +static NODE * +shareable_constant_value(struct parser_params *p, enum shareability shareable, + NODE *lhs, NODE *value, const YYLTYPE *loc) +{ + if (!value) return 0; + switch (shareable) { + case shareable_none: + return value; + + case shareable_literal: + { + NODE *lit = shareable_literal_constant(p, shareable, &lhs, value, loc, 0); + if (lit) return lit; + return value; + } + break; + + case shareable_copy: + case shareable_everything: + { + NODE *lit = shareable_literal_constant(p, shareable, &lhs, value, loc, 0); + if (lit) return lit; + return make_shareable_node(p, value, shareable == shareable_copy, loc); + } + break; + + default: + UNREACHABLE_RETURN(0); + } +} + static NODE * -node_assign_gen(struct parser_params *parser, NODE *lhs, NODE *rhs) +node_assign(struct parser_params *p, NODE *lhs, NODE *rhs, struct lex_context ctxt, const YYLTYPE *loc) { if (!lhs) return 0; switch (nd_type(lhs)) { + case NODE_CDECL: + rhs = shareable_constant_value(p, ctxt.shareable_constant_value, lhs, rhs, loc); + /* fallthru */ + case NODE_GASGN: case NODE_IASGN: - case NODE_IASGN2: case NODE_LASGN: case NODE_DASGN: - case NODE_DASGN_CURR: case NODE_MASGN: - case NODE_CDECL: case NODE_CVASGN: lhs->nd_value = rhs; + nd_set_loc(lhs, loc); break; case NODE_ATTRASGN: - case NODE_CALL: - lhs->nd_args = arg_append(lhs->nd_args, rhs); + lhs->nd_args = arg_append(p, lhs->nd_args, rhs, loc); + nd_set_loc(lhs, loc); break; default: @@ -8699,29 +11460,33 @@ node_assign_gen(struct parser_params *parser, NODE *lhs, NODE *rhs) return lhs; } -static int -value_expr_gen(struct parser_params *parser, NODE *node) +static NODE * +value_expr_check(struct parser_params *p, NODE *node) { - int cond = 0; + NODE *void_node = 0, *vn; if (!node) { rb_warning0("empty expression"); } while (node) { switch (nd_type(node)) { - case NODE_DEFN: - case NODE_DEFS: - parser_warning(node, "void value expression"); - return FALSE; - case NODE_RETURN: case NODE_BREAK: case NODE_NEXT: case NODE_REDO: case NODE_RETRY: - if (!cond) yyerror("void value expression"); - /* or "control never reach"? */ - return FALSE; + return void_node ? void_node : node; + + case NODE_CASE3: + if (!node->nd_body || !nd_type_p(node->nd_body, NODE_IN)) { + compile_error(p, "unexpected node"); + return NULL; + } + if (node->nd_body->nd_body) { + return NULL; + } + /* single line pattern matching */ + return void_node ? void_node : node; case NODE_BLOCK: while (node->nd_next) { @@ -8735,42 +11500,59 @@ value_expr_gen(struct parser_params *parser, NODE *node) break; case NODE_IF: + case NODE_UNLESS: if (!node->nd_body) { - node = node->nd_else; - break; + return NULL; } else if (!node->nd_else) { - node = node->nd_body; - break; + return NULL; } - if (!value_expr(node->nd_body)) return FALSE; + vn = value_expr_check(p, node->nd_body); + if (!vn) return NULL; + if (!void_node) void_node = vn; node = node->nd_else; break; case NODE_AND: case NODE_OR: - cond = 1; - node = node->nd_2nd; + node = node->nd_1st; break; + case NODE_LASGN: + case NODE_DASGN: + case NODE_MASGN: + mark_lvar_used(p, node); + return NULL; + default: - return TRUE; + return NULL; } } - return TRUE; + return NULL; } +static int +value_expr_gen(struct parser_params *p, NODE *node) +{ + NODE *void_node = value_expr_check(p, node); + if (void_node) { + yyerror1(&void_node->nd_loc, "void value expression"); + /* or "control never reach"? */ + return FALSE; + } + return TRUE; +} static void -void_expr_gen(struct parser_params *parser, NODE *node) +void_expr(struct parser_params *p, NODE *node) { const char *useless = 0; if (!RTEST(ruby_verbose)) return; - if (!node) return; + if (!node || !(node = nd_once_body(node))) return; switch (nd_type(node)) { - case NODE_CALL: + case NODE_OPCALL: switch (node->nd_mid) { case '+': case '-': @@ -8811,7 +11593,6 @@ void_expr_gen(struct parser_params *parser, NODE *node) case NODE_STR: case NODE_DSTR: case NODE_DREGX: - case NODE_DREGX_ONCE: useless = "a literal"; break; case NODE_COLON2: @@ -8842,51 +11623,58 @@ void_expr_gen(struct parser_params *parser, NODE *node) } if (useless) { - int line = ruby_sourceline; - - ruby_sourceline = nd_line(node); - rb_warnS("possibly useless use of %s in void context", useless); - ruby_sourceline = line; + rb_warn1L(nd_line(node), "possibly useless use of %s in void context", WARN_S(useless)); } } -static void -void_stmts_gen(struct parser_params *parser, NODE *node) +static NODE * +void_stmts(struct parser_params *p, NODE *node) { - if (!RTEST(ruby_verbose)) return; - if (!node) return; - if (nd_type(node) != NODE_BLOCK) return; + NODE *const n = node; + if (!RTEST(ruby_verbose)) return n; + if (!node) return n; + if (!nd_type_p(node, NODE_BLOCK)) return n; - for (;;) { - if (!node->nd_next) return; - void_expr0(node->nd_head); + while (node->nd_next) { + void_expr(p, node->nd_head); node = node->nd_next; } + return n; } static NODE * remove_begin(NODE *node) { NODE **n = &node, *n1 = node; - while (n1 && nd_type(n1) == NODE_BEGIN && n1->nd_body) { + while (n1 && nd_type_p(n1, NODE_BEGIN) && n1->nd_body) { + *n = n1 = n1->nd_body; + } + return node; +} + +static NODE * +remove_begin_all(NODE *node) +{ + NODE **n = &node, *n1 = node; + while (n1 && nd_type_p(n1, NODE_BEGIN)) { *n = n1 = n1->nd_body; } return node; } static void -reduce_nodes_gen(struct parser_params *parser, NODE **body) +reduce_nodes(struct parser_params *p, NODE **body) { NODE *node = *body; if (!node) { - *body = NEW_NIL(); + *body = NEW_NIL(&NULL_LOC); return; } #define subnodes(n1, n2) \ ((!node->n1) ? (node->n2 ? (body = &node->n2, 1) : 0) : \ (!node->n2) ? (body = &node->n1, 1) : \ - (reduce_nodes(&node->n1), body = &node->n2, 1)) + (reduce_nodes(p, &node->n1), body = &node->n2, 1)) while (node) { int newline = (int)(node->flags & NODE_FL_NEWLINE); @@ -8907,6 +11695,7 @@ reduce_nodes_gen(struct parser_params *parser, NODE **body) body = &node->nd_end->nd_head; break; case NODE_IF: + case NODE_UNLESS: if (subnodes(nd_body, nd_else)) break; return; case NODE_CASE: @@ -8942,7 +11731,7 @@ is_static_content(NODE *node) switch (nd_type(node)) { case NODE_HASH: if (!(node = node->nd_head)) break; - case NODE_ARRAY: + case NODE_LIST: do { if (!is_static_content(node->nd_head)) return 0; } while ((node = node->nd_next) != 0); @@ -8951,7 +11740,7 @@ is_static_content(NODE *node) case NODE_NIL: case NODE_TRUE: case NODE_FALSE: - case NODE_ZARRAY: + case NODE_ZLIST: break; default: return 0; @@ -8960,16 +11749,12 @@ is_static_content(NODE *node) } static int -assign_in_cond(struct parser_params *parser, NODE *node) +assign_in_cond(struct parser_params *p, NODE *node) { switch (nd_type(node)) { case NODE_MASGN: - yyerror("multiple assignment in conditional"); - return 1; - case NODE_LASGN: case NODE_DASGN: - case NODE_DASGN_CURR: case NODE_GASGN: case NODE_IASGN: break; @@ -8981,56 +11766,28 @@ assign_in_cond(struct parser_params *parser, NODE *node) if (!node->nd_value) return 1; if (is_static_content(node->nd_value)) { /* reports always */ - parser_warn(node->nd_value, "found = in conditional, should be =="); + parser_warn(p, node->nd_value, "found `= literal' in conditional, should be =="); } return 1; } -static void -warn_unless_e_option(struct parser_params *parser, NODE *node, const char *str) -{ - if (!e_option_supplied(parser)) parser_warn(node, str); -} - -static void -warning_unless_e_option(struct parser_params *parser, NODE *node, const char *str) -{ - if (!e_option_supplied(parser)) parser_warning(node, str); -} +enum cond_type { + COND_IN_OP, + COND_IN_COND, + COND_IN_FF +}; -static void -fixup_nodes(NODE **rootnode) -{ - NODE *node, *next, *head; - - for (node = *rootnode; node; node = next) { - enum node_type type; - VALUE val; - - next = node->nd_next; - head = node->nd_head; - rb_gc_force_recycle((VALUE)node); - *rootnode = next; - switch (type = nd_type(head)) { - case NODE_DOT2: - case NODE_DOT3: - val = rb_range_new(head->nd_beg->nd_lit, head->nd_end->nd_lit, - type == NODE_DOT3); - rb_gc_force_recycle((VALUE)head->nd_beg); - rb_gc_force_recycle((VALUE)head->nd_end); - nd_set_type(head, NODE_LIT); - head->nd_lit = val; - break; - default: - break; - } +#define SWITCH_BY_COND_TYPE(t, w, arg) \ + switch (t) { \ + case COND_IN_OP: break; \ + case COND_IN_COND: rb_##w##0(arg "literal in condition"); break; \ + case COND_IN_FF: rb_##w##0(arg "literal in flip-flop"); break; \ } -} -static NODE *cond0(struct parser_params*,NODE*); +static NODE *cond0(struct parser_params*,NODE*,enum cond_type,const YYLTYPE*); static NODE* -range_op(struct parser_params *parser, NODE *node) +range_op(struct parser_params *p, NODE *node, const YYLTYPE *loc) { enum node_type type; @@ -9039,83 +11796,65 @@ range_op(struct parser_params *parser, NODE *node) type = nd_type(node); value_expr(node); if (type == NODE_LIT && FIXNUM_P(node->nd_lit)) { - warn_unless_e_option(parser, node, "integer literal in conditional range"); - return NEW_CALL(node, tEQ, NEW_LIST(NEW_GVAR(rb_intern("$.")))); - } - return cond0(parser, node); -} - -static int -literal_node(NODE *node) -{ - if (!node) return 1; /* same as NODE_NIL */ - switch (nd_type(node)) { - case NODE_LIT: - case NODE_STR: - case NODE_DSTR: - case NODE_EVSTR: - case NODE_DREGX: - case NODE_DREGX_ONCE: - case NODE_DSYM: - return 2; - case NODE_TRUE: - case NODE_FALSE: - case NODE_NIL: - return 1; + if (!e_option_supplied(p)) parser_warn(p, node, "integer literal in flip-flop"); + ID lineno = rb_intern("$."); + return NEW_CALL(node, tEQ, NEW_LIST(NEW_GVAR(lineno, loc), loc), loc); } - return 0; + return cond0(p, node, COND_IN_FF, loc); } static NODE* -cond0(struct parser_params *parser, NODE *node) +cond0(struct parser_params *p, NODE *node, enum cond_type type, const YYLTYPE *loc) { if (node == 0) return 0; - assign_in_cond(parser, node); + if (!(node = nd_once_body(node))) return 0; + assign_in_cond(p, node); switch (nd_type(node)) { case NODE_DSTR: case NODE_EVSTR: case NODE_STR: - rb_warn0("string literal in condition"); + SWITCH_BY_COND_TYPE(type, warn, "string ") break; case NODE_DREGX: - case NODE_DREGX_ONCE: - warning_unless_e_option(parser, node, "regex literal in condition"); - return NEW_MATCH2(node, NEW_GVAR(rb_intern("$_"))); + if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warning, "regex ") + + return NEW_MATCH2(node, NEW_GVAR(idLASTLINE, loc), loc); case NODE_AND: case NODE_OR: - node->nd_1st = cond0(parser, node->nd_1st); - node->nd_2nd = cond0(parser, node->nd_2nd); + node->nd_1st = cond0(p, node->nd_1st, COND_IN_COND, loc); + node->nd_2nd = cond0(p, node->nd_2nd, COND_IN_COND, loc); break; case NODE_DOT2: case NODE_DOT3: - node->nd_beg = range_op(parser, node->nd_beg); - node->nd_end = range_op(parser, node->nd_end); - if (nd_type(node) == NODE_DOT2) nd_set_type(node,NODE_FLIP2); - else if (nd_type(node) == NODE_DOT3) nd_set_type(node, NODE_FLIP3); - if (!e_option_supplied(parser)) { - int b = literal_node(node->nd_beg); - int e = literal_node(node->nd_end); - if ((b == 1 && e == 1) || (b + e >= 2 && RTEST(ruby_verbose))) { - parser_warn(node, "range literal in condition"); - } - } + node->nd_beg = range_op(p, node->nd_beg, loc); + node->nd_end = range_op(p, node->nd_end, loc); + if (nd_type_p(node, NODE_DOT2)) nd_set_type(node,NODE_FLIP2); + else if (nd_type_p(node, NODE_DOT3)) nd_set_type(node, NODE_FLIP3); break; case NODE_DSYM: - parser_warning(node, "literal in condition"); + warn_symbol: + SWITCH_BY_COND_TYPE(type, warning, "symbol ") break; case NODE_LIT: - if (TYPE(node->nd_lit) == T_REGEXP) { - warn_unless_e_option(parser, node, "regex literal in condition"); + if (RB_TYPE_P(node->nd_lit, T_REGEXP)) { + if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warn, "regex ") nd_set_type(node, NODE_MATCH); } + else if (node->nd_lit == Qtrue || + node->nd_lit == Qfalse) { + /* booleans are OK, e.g., while true */ + } + else if (SYMBOL_P(node->nd_lit)) { + goto warn_symbol; + } else { - parser_warning(node, "literal in condition"); + SWITCH_BY_COND_TYPE(type, warning, "") } default: break; @@ -9124,41 +11863,78 @@ cond0(struct parser_params *parser, NODE *node) } static NODE* -cond_gen(struct parser_params *parser, NODE *node) +cond(struct parser_params *p, NODE *node, const YYLTYPE *loc) { if (node == 0) return 0; - return cond0(parser, node); + return cond0(p, node, COND_IN_COND, loc); +} + +static NODE* +method_cond(struct parser_params *p, NODE *node, const YYLTYPE *loc) +{ + if (node == 0) return 0; + return cond0(p, node, COND_IN_OP, loc); +} + +static NODE* +new_nil_at(struct parser_params *p, const rb_code_position_t *pos) +{ + YYLTYPE loc = {*pos, *pos}; + return NEW_NIL(&loc); +} + +static NODE* +new_if(struct parser_params *p, NODE *cc, NODE *left, NODE *right, const YYLTYPE *loc) +{ + if (!cc) return right; + cc = cond0(p, cc, COND_IN_COND, loc); + return newline_node(NEW_IF(cc, left, right, loc)); +} + +static NODE* +new_unless(struct parser_params *p, NODE *cc, NODE *left, NODE *right, const YYLTYPE *loc) +{ + if (!cc) return right; + cc = cond0(p, cc, COND_IN_COND, loc); + return newline_node(NEW_UNLESS(cc, left, right, loc)); } static NODE* -logop_gen(struct parser_params *parser, enum node_type type, NODE *left, NODE *right) +logop(struct parser_params *p, ID id, NODE *left, NODE *right, + const YYLTYPE *op_loc, const YYLTYPE *loc) { + enum node_type type = id == idAND || id == idANDOP ? NODE_AND : NODE_OR; + NODE *op; value_expr(left); - if (left && (enum node_type)nd_type(left) == type) { + if (left && nd_type_p(left, type)) { NODE *node = left, *second; - while ((second = node->nd_2nd) != 0 && (enum node_type)nd_type(second) == type) { + while ((second = node->nd_2nd) != 0 && nd_type_p(second, type)) { node = second; } - node->nd_2nd = NEW_NODE(type, second, right, 0); + node->nd_2nd = NEW_NODE(type, second, right, 0, loc); + nd_set_line(node->nd_2nd, op_loc->beg_pos.lineno); + left->nd_loc.end_pos = loc->end_pos; return left; } - return NEW_NODE(type, left, right, 0); + op = NEW_NODE(type, left, right, 0, loc); + nd_set_line(op, op_loc->beg_pos.lineno); + return op; } static void -no_blockarg(struct parser_params *parser, NODE *node) +no_blockarg(struct parser_params *p, NODE *node) { - if (node && nd_type(node) == NODE_BLOCK_PASS) { - compile_error(PARSER_ARG "block argument should not be given"); + if (node && nd_type_p(node, NODE_BLOCK_PASS)) { + compile_error(p, "block argument should not be given"); } } static NODE * -ret_args_gen(struct parser_params *parser, NODE *node) +ret_args(struct parser_params *p, NODE *node) { if (node) { - no_blockarg(parser, node); - if (nd_type(node) == NODE_ARRAY) { + no_blockarg(p, node); + if (nd_type_p(node, NODE_LIST)) { if (node->nd_next == 0) { node = node->nd_head; } @@ -9171,398 +11947,979 @@ ret_args_gen(struct parser_params *parser, NODE *node) } static NODE * -new_yield_gen(struct parser_params *parser, NODE *node) +new_yield(struct parser_params *p, NODE *node, const YYLTYPE *loc) { - if (node) no_blockarg(parser, node); + if (node) no_blockarg(p, node); - return NEW_YIELD(node); + return NEW_YIELD(node, loc); } -static NODE* -negate_lit(NODE *node) +static VALUE +negate_lit(struct parser_params *p, VALUE lit) { - switch (TYPE(node->nd_lit)) { - case T_FIXNUM: - node->nd_lit = LONG2FIX(-FIX2LONG(node->nd_lit)); - break; - case T_BIGNUM: - node->nd_lit = rb_funcall(node->nd_lit,tUMINUS,0,0); - break; - case T_FLOAT: + if (FIXNUM_P(lit)) { + return LONG2FIX(-FIX2LONG(lit)); + } + if (SPECIAL_CONST_P(lit)) { #if USE_FLONUM - if (FLONUM_P(node->nd_lit)) { - node->nd_lit = DBL2NUM(-RFLOAT_VALUE(node->nd_lit)); - } - else { - RFLOAT(node->nd_lit)->float_value = -RFLOAT_VALUE(node->nd_lit); + if (FLONUM_P(lit)) { + return DBL2NUM(-RFLOAT_VALUE(lit)); } -#else - RFLOAT(node->nd_lit)->float_value = -RFLOAT_VALUE(node->nd_lit); #endif + goto unknown; + } + switch (BUILTIN_TYPE(lit)) { + case T_BIGNUM: + BIGNUM_NEGATE(lit); + lit = rb_big_norm(lit); + break; + case T_RATIONAL: + RATIONAL_SET_NUM(lit, negate_lit(p, RRATIONAL(lit)->num)); break; + case T_COMPLEX: + RCOMPLEX_SET_REAL(lit, negate_lit(p, RCOMPLEX(lit)->real)); + RCOMPLEX_SET_IMAG(lit, negate_lit(p, RCOMPLEX(lit)->imag)); + break; + case T_FLOAT: + lit = DBL2NUM(-RFLOAT_VALUE(lit)); + break; + unknown: default: + rb_parser_fatal(p, "unknown literal type (%s) passed to negate_lit", + rb_builtin_class_name(lit)); break; } - return node; + return lit; } static NODE * arg_blk_pass(NODE *node1, NODE *node2) { if (node2) { + if (!node1) return node2; node2->nd_head = node1; + nd_set_first_lineno(node2, nd_first_lineno(node1)); + nd_set_first_column(node2, nd_first_column(node1)); return node2; } return node1; } +static bool +args_info_empty_p(struct rb_args_info *args) +{ + if (args->pre_args_num) return false; + if (args->post_args_num) return false; + if (args->rest_arg) return false; + if (args->opt_args) return false; + if (args->block_arg) return false; + if (args->kw_args) return false; + if (args->kw_rest_arg) return false; + return true; +} static NODE* -new_args_gen(struct parser_params *parser, NODE *m, NODE *o, ID r, NODE *p, NODE *tail) +new_args(struct parser_params *p, NODE *pre_args, NODE *opt_args, ID rest_arg, NODE *post_args, NODE *tail, const YYLTYPE *loc) { - int saved_line = ruby_sourceline; + int saved_line = p->ruby_sourceline; struct rb_args_info *args = tail->nd_ainfo; - args->pre_args_num = m ? rb_long2int(m->nd_plen) : 0; - args->pre_init = m ? m->nd_next : 0; + if (args->block_arg == idFWD_BLOCK) { + if (rest_arg) { + yyerror1(&tail->nd_loc, "... after rest argument"); + return tail; + } + rest_arg = idFWD_REST; + } + + args->pre_args_num = pre_args ? rb_long2int(pre_args->nd_plen) : 0; + args->pre_init = pre_args ? pre_args->nd_next : 0; - args->post_args_num = p ? rb_long2int(p->nd_plen) : 0; - args->post_init = p ? p->nd_next : 0; - args->first_post_arg = p ? p->nd_pid : 0; + args->post_args_num = post_args ? rb_long2int(post_args->nd_plen) : 0; + args->post_init = post_args ? post_args->nd_next : 0; + args->first_post_arg = post_args ? post_args->nd_pid : 0; - args->rest_arg = r; + args->rest_arg = rest_arg; - args->opt_args = o; + args->opt_args = opt_args; - ruby_sourceline = saved_line; + args->ruby2_keywords = rest_arg == idFWD_REST; + + p->ruby_sourceline = saved_line; + nd_set_loc(tail, loc); return tail; } static NODE* -new_args_tail_gen(struct parser_params *parser, NODE *k, ID kr, ID b) +new_args_tail(struct parser_params *p, NODE *kw_args, ID kw_rest_arg, ID block, const YYLTYPE *kw_rest_loc) +{ + int saved_line = p->ruby_sourceline; + NODE *node; + VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer(); + struct rb_args_info *args = ZALLOC(struct rb_args_info); + rb_imemo_tmpbuf_set_ptr(tmpbuf, args); + args->imemo = tmpbuf; + node = NEW_NODE(NODE_ARGS, 0, 0, args, &NULL_LOC); + RB_OBJ_WRITTEN(p->ast, Qnil, tmpbuf); + if (p->error_p) return node; + + args->block_arg = block; + args->kw_args = kw_args; + + if (kw_args) { + /* + * def foo(k1: 1, kr1:, k2: 2, **krest, &b) + * variable order: k1, kr1, k2, &b, internal_id, krest + * #=> <reorder> + * variable order: kr1, k1, k2, internal_id, krest, &b + */ + ID kw_bits = internal_id(p), *required_kw_vars, *kw_vars; + struct vtable *vtargs = p->lvtbl->args; + NODE *kwn = kw_args; + + if (block) block = vtargs->tbl[vtargs->pos-1]; + vtable_pop(vtargs, !!block + !!kw_rest_arg); + required_kw_vars = kw_vars = &vtargs->tbl[vtargs->pos]; + while (kwn) { + if (!NODE_REQUIRED_KEYWORD_P(kwn->nd_body)) + --kw_vars; + --required_kw_vars; + kwn = kwn->nd_next; + } + + for (kwn = kw_args; kwn; kwn = kwn->nd_next) { + ID vid = kwn->nd_body->nd_vid; + if (NODE_REQUIRED_KEYWORD_P(kwn->nd_body)) { + *required_kw_vars++ = vid; + } + else { + *kw_vars++ = vid; + } + } + + arg_var(p, kw_bits); + if (kw_rest_arg) arg_var(p, kw_rest_arg); + if (block) arg_var(p, block); + + args->kw_rest_arg = NEW_DVAR(kw_rest_arg, kw_rest_loc); + args->kw_rest_arg->nd_cflag = kw_bits; + } + else if (kw_rest_arg == idNil) { + args->no_kwarg = 1; + } + else if (kw_rest_arg) { + args->kw_rest_arg = NEW_DVAR(kw_rest_arg, kw_rest_loc); + } + + p->ruby_sourceline = saved_line; + return node; +} + +static NODE * +args_with_numbered(struct parser_params *p, NODE *args, int max_numparam) +{ + if (max_numparam > NO_PARAM) { + if (!args) { + YYLTYPE loc = RUBY_INIT_YYLLOC(); + args = new_args_tail(p, 0, 0, 0, 0); + nd_set_loc(args, &loc); + } + args->nd_ainfo->pre_args_num = max_numparam; + } + return args; +} + +static NODE* +new_array_pattern(struct parser_params *p, NODE *constant, NODE *pre_arg, NODE *aryptn, const YYLTYPE *loc) +{ + struct rb_ary_pattern_info *apinfo = aryptn->nd_apinfo; + + aryptn->nd_pconst = constant; + + if (pre_arg) { + NODE *pre_args = NEW_LIST(pre_arg, loc); + if (apinfo->pre_args) { + apinfo->pre_args = list_concat(pre_args, apinfo->pre_args); + } + else { + apinfo->pre_args = pre_args; + } + } + return aryptn; +} + +static NODE* +new_array_pattern_tail(struct parser_params *p, NODE *pre_args, int has_rest, ID rest_arg, NODE *post_args, const YYLTYPE *loc) +{ + int saved_line = p->ruby_sourceline; + NODE *node; + VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer(); + struct rb_ary_pattern_info *apinfo = ZALLOC(struct rb_ary_pattern_info); + rb_imemo_tmpbuf_set_ptr(tmpbuf, apinfo); + node = NEW_NODE(NODE_ARYPTN, 0, tmpbuf, apinfo, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, tmpbuf); + + apinfo->pre_args = pre_args; + + if (has_rest) { + if (rest_arg) { + apinfo->rest_arg = assignable(p, rest_arg, 0, loc); + } + else { + apinfo->rest_arg = NODE_SPECIAL_NO_NAME_REST; + } + } + else { + apinfo->rest_arg = NULL; + } + + apinfo->post_args = post_args; + + p->ruby_sourceline = saved_line; + return node; +} + +static NODE* +new_find_pattern(struct parser_params *p, NODE *constant, NODE *fndptn, const YYLTYPE *loc) { - int saved_line = ruby_sourceline; - struct rb_args_info *args; - NODE *kw_rest_arg = 0; + fndptn->nd_pconst = constant; + + return fndptn; +} + +static NODE* +new_find_pattern_tail(struct parser_params *p, ID pre_rest_arg, NODE *args, ID post_rest_arg, const YYLTYPE *loc) +{ + int saved_line = p->ruby_sourceline; NODE *node; + VALUE tmpbuf = rb_imemo_tmpbuf_auto_free_pointer(); + struct rb_fnd_pattern_info *fpinfo = ZALLOC(struct rb_fnd_pattern_info); + rb_imemo_tmpbuf_set_ptr(tmpbuf, fpinfo); + node = NEW_NODE(NODE_FNDPTN, 0, tmpbuf, fpinfo, loc); + RB_OBJ_WRITTEN(p->ast, Qnil, tmpbuf); + + fpinfo->pre_rest_arg = pre_rest_arg ? assignable(p, pre_rest_arg, 0, loc) : NODE_SPECIAL_NO_NAME_REST; + fpinfo->args = args; + fpinfo->post_rest_arg = post_rest_arg ? assignable(p, post_rest_arg, 0, loc) : NODE_SPECIAL_NO_NAME_REST; + + p->ruby_sourceline = saved_line; + return node; +} - args = ALLOC(struct rb_args_info); - MEMZERO(args, struct rb_args_info, 1); - node = NEW_NODE(NODE_ARGS, 0, 0, args); +static NODE* +new_hash_pattern(struct parser_params *p, NODE *constant, NODE *hshptn, const YYLTYPE *loc) +{ + hshptn->nd_pconst = constant; + return hshptn; +} - args->block_arg = b; - args->kw_args = k; - if (k && !kr) kr = internal_id(); - if (kr) { - arg_var(kr); - kw_rest_arg = NEW_DVAR(kr); +static NODE* +new_hash_pattern_tail(struct parser_params *p, NODE *kw_args, ID kw_rest_arg, const YYLTYPE *loc) +{ + int saved_line = p->ruby_sourceline; + NODE *node, *kw_rest_arg_node; + + if (kw_rest_arg == idNil) { + kw_rest_arg_node = NODE_SPECIAL_NO_REST_KEYWORD; + } + else if (kw_rest_arg) { + kw_rest_arg_node = assignable(p, kw_rest_arg, 0, loc); + } + else { + kw_rest_arg_node = NULL; } - args->kw_rest_arg = kw_rest_arg; - ruby_sourceline = saved_line; + node = NEW_NODE(NODE_HSHPTN, 0, kw_args, kw_rest_arg_node, loc); + + p->ruby_sourceline = saved_line; return node; } static NODE* -dsym_node_gen(struct parser_params *parser, NODE *node) +dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc) { VALUE lit; if (!node) { - return NEW_LIT(ID2SYM(idNULL)); + return NEW_LIT(ID2SYM(idNULL), loc); } switch (nd_type(node)) { case NODE_DSTR: nd_set_type(node, NODE_DSYM); + nd_set_loc(node, loc); break; case NODE_STR: lit = node->nd_lit; - node->nd_lit = ID2SYM(rb_intern_str(lit)); + RB_OBJ_WRITTEN(p->ast, Qnil, node->nd_lit = ID2SYM(rb_intern_str(lit))); nd_set_type(node, NODE_LIT); + nd_set_loc(node, loc); break; default: - node = NEW_NODE(NODE_DSYM, Qnil, 1, NEW_LIST(node)); + node = NEW_NODE(NODE_DSYM, Qnil, 1, NEW_LIST(node, loc), loc); break; } return node; } + +static int +append_literal_keys(st_data_t k, st_data_t v, st_data_t h) +{ + NODE *node = (NODE *)v; + NODE **result = (NODE **)h; + node->nd_alen = 2; + node->nd_next->nd_end = node->nd_next; + node->nd_next->nd_next = 0; + if (*result) + list_concat(*result, node); + else + *result = node; + return ST_CONTINUE; +} + +static bool +hash_literal_key_p(VALUE k) +{ + switch (OBJ_BUILTIN_TYPE(k)) { + case T_NODE: + return false; + default: + return true; + } +} + +static int +literal_cmp(VALUE val, VALUE lit) +{ + if (val == lit) return 0; + if (!hash_literal_key_p(val) || !hash_literal_key_p(lit)) return -1; + return rb_iseq_cdhash_cmp(val, lit); +} + +static st_index_t +literal_hash(VALUE a) +{ + if (!hash_literal_key_p(a)) return (st_index_t)a; + return rb_iseq_cdhash_hash(a); +} + +static const struct st_hash_type literal_type = { + literal_cmp, + literal_hash, +}; + +static NODE * +remove_duplicate_keys(struct parser_params *p, NODE *hash) +{ + st_table *literal_keys = st_init_table_with_size(&literal_type, hash->nd_alen / 2); + NODE *result = 0; + NODE *last_expr = 0; + rb_code_location_t loc = hash->nd_loc; + while (hash && hash->nd_head && hash->nd_next) { + NODE *head = hash->nd_head; + NODE *value = hash->nd_next; + NODE *next = value->nd_next; + st_data_t key = (st_data_t)head; + st_data_t data; + value->nd_next = 0; + if (nd_type_p(head, NODE_LIT) && + st_delete(literal_keys, (key = (st_data_t)head->nd_lit, &key), &data)) { + NODE *dup_value = ((NODE *)data)->nd_next; + rb_compile_warn(p->ruby_sourcefile, nd_line((NODE *)data), + "key %+"PRIsVALUE" is duplicated and overwritten on line %d", + head->nd_lit, nd_line(head)); + if (dup_value == last_expr) { + value->nd_head = block_append(p, dup_value->nd_head, value->nd_head); + } + else { + last_expr->nd_head = block_append(p, dup_value->nd_head, last_expr->nd_head); + } + } + st_insert(literal_keys, (st_data_t)key, (st_data_t)hash); + last_expr = nd_type_p(head, NODE_LIT) ? value : head; + hash = next; + } + st_foreach(literal_keys, append_literal_keys, (st_data_t)&result); + st_free_table(literal_keys); + if (hash) { + if (!result) result = hash; + else list_concat(result, hash); + } + result->nd_loc = loc; + return result; +} + +static NODE * +new_hash(struct parser_params *p, NODE *hash, const YYLTYPE *loc) +{ + if (hash) hash = remove_duplicate_keys(p, hash); + return NEW_HASH(hash, loc); +} +#endif + +static void +error_duplicate_pattern_variable(struct parser_params *p, ID id, const YYLTYPE *loc) +{ + if (is_private_local_id(id)) { + return; + } + if (st_is_member(p->pvtbl, id)) { + yyerror1(loc, "duplicated variable name"); + } + else { + st_insert(p->pvtbl, (st_data_t)id, 0); + } +} + +static void +error_duplicate_pattern_key(struct parser_params *p, VALUE key, const YYLTYPE *loc) +{ + if (!p->pktbl) { + p->pktbl = st_init_numtable(); + } + else if (st_is_member(p->pktbl, key)) { + yyerror1(loc, "duplicated key name"); + return; + } + st_insert(p->pktbl, (st_data_t)key, 0); +} + +#ifndef RIPPER +static NODE * +new_unique_key_hash(struct parser_params *p, NODE *hash, const YYLTYPE *loc) +{ + return NEW_HASH(hash, loc); +} #endif /* !RIPPER */ #ifndef RIPPER static NODE * -new_op_assign_gen(struct parser_params *parser, NODE *lhs, ID op, NODE *rhs) +new_op_assign(struct parser_params *p, NODE *lhs, ID op, NODE *rhs, struct lex_context ctxt, const YYLTYPE *loc) { NODE *asgn; if (lhs) { ID vid = lhs->nd_vid; + YYLTYPE lhs_loc = lhs->nd_loc; + int shareable = ctxt.shareable_constant_value; + if (shareable) { + switch (nd_type(lhs)) { + case NODE_CDECL: + case NODE_COLON2: + case NODE_COLON3: + break; + default: + shareable = 0; + break; + } + } if (op == tOROP) { + rhs = shareable_constant_value(p, shareable, lhs, rhs, &rhs->nd_loc); lhs->nd_value = rhs; - asgn = NEW_OP_ASGN_OR(gettable(vid), lhs); - if (is_asgn_or_id(vid)) { - asgn->nd_aid = vid; + nd_set_loc(lhs, loc); + asgn = NEW_OP_ASGN_OR(gettable(p, vid, &lhs_loc), lhs, loc); + if (is_notop_id(vid)) { + switch (id_type(vid)) { + case ID_GLOBAL: + case ID_INSTANCE: + case ID_CLASS: + asgn->nd_aid = vid; + } } } else if (op == tANDOP) { + if (shareable) { + rhs = shareable_constant_value(p, shareable, lhs, rhs, &rhs->nd_loc); + } lhs->nd_value = rhs; - asgn = NEW_OP_ASGN_AND(gettable(vid), lhs); + nd_set_loc(lhs, loc); + asgn = NEW_OP_ASGN_AND(gettable(p, vid, &lhs_loc), lhs, loc); } else { asgn = lhs; - asgn->nd_value = NEW_CALL(gettable(vid), op, NEW_LIST(rhs)); + rhs = NEW_CALL(gettable(p, vid, &lhs_loc), op, NEW_LIST(rhs, &rhs->nd_loc), loc); + if (shareable) { + rhs = shareable_constant_value(p, shareable, lhs, rhs, &rhs->nd_loc); + } + asgn->nd_value = rhs; + nd_set_loc(asgn, loc); } } else { - asgn = NEW_BEGIN(0); + asgn = NEW_BEGIN(0, loc); } return asgn; } static NODE * -new_attr_op_assign_gen(struct parser_params *parser, NODE *lhs, ID attr, ID op, NODE *rhs) +new_ary_op_assign(struct parser_params *p, NODE *ary, + NODE *args, ID op, NODE *rhs, const YYLTYPE *args_loc, const YYLTYPE *loc) +{ + NODE *asgn; + + args = make_list(args, args_loc); + if (nd_type_p(args, NODE_BLOCK_PASS)) { + args = NEW_ARGSCAT(args, rhs, loc); + } + else { + args = arg_concat(p, args, rhs, loc); + } + asgn = NEW_OP_ASGN1(ary, op, args, loc); + fixpos(asgn, ary); + return asgn; +} + +static NODE * +new_attr_op_assign(struct parser_params *p, NODE *lhs, + ID atype, ID attr, ID op, NODE *rhs, const YYLTYPE *loc) +{ + NODE *asgn; + + asgn = NEW_OP_ASGN2(lhs, CALL_Q_P(atype), attr, op, rhs, loc); + fixpos(asgn, lhs); + return asgn; +} + +static NODE * +new_const_op_assign(struct parser_params *p, NODE *lhs, ID op, NODE *rhs, struct lex_context ctxt, const YYLTYPE *loc) { NODE *asgn; - if (op == tOROP) { - op = 0; + if (lhs) { + rhs = shareable_constant_value(p, ctxt.shareable_constant_value, lhs, rhs, loc); + asgn = NEW_OP_CDECL(lhs, op, rhs, loc); } - else if (op == tANDOP) { - op = 1; + else { + asgn = NEW_BEGIN(0, loc); } - asgn = NEW_OP_ASGN2(lhs, attr, op, rhs); fixpos(asgn, lhs); return asgn; } + +static NODE * +const_decl(struct parser_params *p, NODE *path, const YYLTYPE *loc) +{ + if (p->ctxt.in_def) { + yyerror1(loc, "dynamic constant assignment"); + } + return NEW_CDECL(0, 0, (path), loc); +} #else static VALUE -new_op_assign_gen(struct parser_params *parser, VALUE lhs, VALUE op, VALUE rhs) +const_decl(struct parser_params *p, VALUE path) +{ + if (p->ctxt.in_def) { + path = assign_error(p, "dynamic constant assignment", path); + } + return path; +} + +static VALUE +assign_error(struct parser_params *p, const char *mesg, VALUE a) { - return dispatch3(opassign, lhs, op, rhs); + a = dispatch2(assign_error, ERR_MESG(), a); + ripper_error(p); + return a; } static VALUE -new_attr_op_assign_gen(struct parser_params *parser, VALUE lhs, VALUE type, VALUE attr, VALUE op, VALUE rhs) +var_field(struct parser_params *p, VALUE a) +{ + return ripper_new_yylval(p, get_id(a), dispatch1(var_field, a), 0); +} +#endif + +#ifndef RIPPER +static NODE * +new_bodystmt(struct parser_params *p, NODE *head, NODE *rescue, NODE *rescue_else, NODE *ensure, const YYLTYPE *loc) { - VALUE recv = dispatch3(field, lhs, type, attr); - return dispatch3(opassign, recv, op, rhs); + NODE *result = head; + if (rescue) { + NODE *tmp = rescue_else ? rescue_else : rescue; + YYLTYPE rescue_loc = code_loc_gen(&head->nd_loc, &tmp->nd_loc); + + result = NEW_RESCUE(head, rescue, rescue_else, &rescue_loc); + nd_set_line(result, rescue->nd_loc.beg_pos.lineno); + } + else if (rescue_else) { + result = block_append(p, result, rescue_else); + } + if (ensure) { + result = NEW_ENSURE(result, ensure, loc); + } + fixpos(result, head); + return result; } #endif static void -warn_unused_var(struct parser_params *parser, struct local_vars *local) +warn_unused_var(struct parser_params *p, struct local_vars *local) { - int i, cnt; - ID *v, *u; + int cnt; if (!local->used) return; - v = local->vars->tbl; - u = local->used->tbl; cnt = local->used->pos; if (cnt != local->vars->pos) { - rb_bug("local->used->pos != local->vars->pos"); + rb_parser_fatal(p, "local->used->pos != local->vars->pos"); } - for (i = 0; i < cnt; ++i) { +#ifndef RIPPER + ID *v = local->vars->tbl; + ID *u = local->used->tbl; + for (int i = 0; i < cnt; ++i) { if (!v[i] || (u[i] & LVAR_USED)) continue; if (is_private_local_id(v[i])) continue; - rb_warn4S(ruby_sourcefile, (int)u[i], "assigned but unused variable - %s", rb_id2name(v[i])); + rb_warn1L((int)u[i], "assigned but unused variable - %"PRIsWARN, rb_id2str(v[i])); } +#endif } static void -local_push_gen(struct parser_params *parser, int inherit_dvars) +local_push(struct parser_params *p, int toplevel_scope) { struct local_vars *local; + int inherits_dvars = toplevel_scope && compile_for_eval; + int warn_unused_vars = RTEST(ruby_verbose); local = ALLOC(struct local_vars); - local->prev = lvtbl; + local->prev = p->lvtbl; local->args = vtable_alloc(0); - local->vars = vtable_alloc(inherit_dvars ? DVARS_INHERIT : DVARS_TOPSCOPE); - local->used = !inherit_dvars && RTEST(ruby_verbose) ? vtable_alloc(0) : 0; - lvtbl = local; + local->vars = vtable_alloc(inherits_dvars ? DVARS_INHERIT : DVARS_TOPSCOPE); +#ifndef RIPPER + if (toplevel_scope && compile_for_eval) warn_unused_vars = 0; + if (toplevel_scope && e_option_supplied(p)) warn_unused_vars = 0; + local->numparam.outer = 0; + local->numparam.inner = 0; + local->numparam.current = 0; +#endif + local->used = warn_unused_vars ? vtable_alloc(0) : 0; + +# if WARN_PAST_SCOPE + local->past = 0; +# endif + CMDARG_PUSH(0); + COND_PUSH(0); + p->lvtbl = local; } static void -local_pop_gen(struct parser_params *parser) +local_pop(struct parser_params *p) { - struct local_vars *local = lvtbl->prev; - if (lvtbl->used) { - warn_unused_var(parser, lvtbl); - vtable_free(lvtbl->used); + struct local_vars *local = p->lvtbl->prev; + if (p->lvtbl->used) { + warn_unused_var(p, p->lvtbl); + vtable_free(p->lvtbl->used); } - vtable_free(lvtbl->args); - vtable_free(lvtbl->vars); - xfree(lvtbl); - lvtbl = local; +# if WARN_PAST_SCOPE + while (p->lvtbl->past) { + struct vtable *past = p->lvtbl->past; + p->lvtbl->past = past->prev; + vtable_free(past); + } +# endif + vtable_free(p->lvtbl->args); + vtable_free(p->lvtbl->vars); + CMDARG_POP(); + COND_POP(); + ruby_sized_xfree(p->lvtbl, sizeof(*p->lvtbl)); + p->lvtbl = local; } #ifndef RIPPER -static ID* -vtable_tblcpy(ID *buf, const struct vtable *src) +static rb_ast_id_table_t * +local_tbl(struct parser_params *p) { - int i, cnt = vtable_size(src); + int cnt_args = vtable_size(p->lvtbl->args); + int cnt_vars = vtable_size(p->lvtbl->vars); + int cnt = cnt_args + cnt_vars; + int i, j; + rb_ast_id_table_t *tbl; - if (cnt > 0) { - buf[0] = cnt; - for (i = 0; i < cnt; i++) { - buf[i] = src->tbl[i]; - } - return buf; + if (cnt <= 0) return 0; + tbl = rb_ast_new_local_table(p->ast, cnt); + MEMCPY(tbl->ids, p->lvtbl->args->tbl, ID, cnt_args); + /* remove IDs duplicated to warn shadowing */ + for (i = 0, j = cnt_args; i < cnt_vars; ++i) { + ID id = p->lvtbl->vars->tbl[i]; + if (!vtable_included(p->lvtbl->args, id)) { + tbl->ids[j++] = id; + } } - return 0; + if (j < cnt) { + tbl = rb_ast_resize_latest_local_table(p->ast, j); + } + + return tbl; } -static ID* -local_tbl_gen(struct parser_params *parser) +static NODE* +node_newnode_with_locals(struct parser_params *p, enum node_type type, VALUE a1, VALUE a2, const rb_code_location_t *loc) { - int cnt = vtable_size(lvtbl->args) + vtable_size(lvtbl->vars); - ID *buf; + rb_ast_id_table_t *a0; + NODE *n; - if (cnt <= 0) return 0; - buf = ALLOC_N(ID, cnt + 1); - vtable_tblcpy(buf+1, lvtbl->args); - vtable_tblcpy(buf+vtable_size(lvtbl->args)+1, lvtbl->vars); - buf[0] = cnt; - return buf; + a0 = local_tbl(p); + n = NEW_NODE(type, a0, a1, a2, loc); + return n; } + #endif -static int -arg_var_gen(struct parser_params *parser, ID id) +static void +numparam_name(struct parser_params *p, ID id) { - vtable_add(lvtbl->args, id); - return vtable_size(lvtbl->args) - 1; + if (!NUMPARAM_ID_P(id)) return; + compile_error(p, "_%d is reserved for numbered parameter", + NUMPARAM_ID_TO_IDX(id)); } -static int -local_var_gen(struct parser_params *parser, ID id) +static void +arg_var(struct parser_params *p, ID id) { - vtable_add(lvtbl->vars, id); - if (lvtbl->used) { - vtable_add(lvtbl->used, (ID)ruby_sourceline); + numparam_name(p, id); + vtable_add(p->lvtbl->args, id); +} + +static void +local_var(struct parser_params *p, ID id) +{ + numparam_name(p, id); + vtable_add(p->lvtbl->vars, id); + if (p->lvtbl->used) { + vtable_add(p->lvtbl->used, (ID)p->ruby_sourceline); } - return vtable_size(lvtbl->vars) - 1; } static int -local_id_gen(struct parser_params *parser, ID id) +local_id_ref(struct parser_params *p, ID id, ID **vidrefp) { struct vtable *vars, *args, *used; - vars = lvtbl->vars; - args = lvtbl->args; - used = lvtbl->used; + vars = p->lvtbl->vars; + args = p->lvtbl->args; + used = p->lvtbl->used; - while (vars && POINTER_P(vars->prev)) { + while (vars && !DVARS_TERMINAL_P(vars->prev)) { vars = vars->prev; args = args->prev; if (used) used = used->prev; } if (vars && vars->prev == DVARS_INHERIT) { - return rb_local_defined(id); + return rb_local_defined(id, p->parent_iseq); } else if (vtable_included(args, id)) { return 1; } else { int i = vtable_included(vars, id); - if (i && used) used->tbl[i-1] |= LVAR_USED; + if (i && used && vidrefp) *vidrefp = &used->tbl[i-1]; return i != 0; } } +static int +local_id(struct parser_params *p, ID id) +{ + return local_id_ref(p, id, NULL); +} + +static int +check_forwarding_args(struct parser_params *p) +{ + if (local_id(p, idFWD_REST) && +#if idFWD_KWREST + local_id(p, idFWD_KWREST) && +#endif + local_id(p, idFWD_BLOCK)) return TRUE; + compile_error(p, "unexpected ..."); + return FALSE; +} + +static void +add_forwarding_args(struct parser_params *p) +{ + arg_var(p, idFWD_REST); +#if idFWD_KWREST + arg_var(p, idFWD_KWREST); +#endif + arg_var(p, idFWD_BLOCK); +} + +#ifndef RIPPER +static NODE * +new_args_forward_call(struct parser_params *p, NODE *leading, const YYLTYPE *loc, const YYLTYPE *argsloc) +{ + NODE *splat = NEW_SPLAT(NEW_LVAR(idFWD_REST, loc), loc); +#if idFWD_KWREST + NODE *kwrest = list_append(p, NEW_LIST(0, loc), NEW_LVAR(idFWD_KWREST, loc)); +#endif + NODE *block = NEW_BLOCK_PASS(NEW_LVAR(idFWD_BLOCK, loc), loc); + NODE *args = leading ? rest_arg_append(p, leading, splat, argsloc) : splat; +#if idFWD_KWREST + args = arg_append(p, splat, new_hash(p, kwrest, loc), loc); +#endif + return arg_blk_pass(args, block); +} +#endif + +static NODE * +numparam_push(struct parser_params *p) +{ +#ifndef RIPPER + struct local_vars *local = p->lvtbl; + NODE *inner = local->numparam.inner; + if (!local->numparam.outer) { + local->numparam.outer = local->numparam.current; + } + local->numparam.inner = 0; + local->numparam.current = 0; + return inner; +#else + return 0; +#endif +} + +static void +numparam_pop(struct parser_params *p, NODE *prev_inner) +{ +#ifndef RIPPER + struct local_vars *local = p->lvtbl; + if (prev_inner) { + /* prefer first one */ + local->numparam.inner = prev_inner; + } + else if (local->numparam.current) { + /* current and inner are exclusive */ + local->numparam.inner = local->numparam.current; + } + if (p->max_numparam > NO_PARAM) { + /* current and outer are exclusive */ + local->numparam.current = local->numparam.outer; + local->numparam.outer = 0; + } + else { + /* no numbered parameter */ + local->numparam.current = 0; + } +#endif +} + static const struct vtable * -dyna_push_gen(struct parser_params *parser) +dyna_push(struct parser_params *p) { - lvtbl->args = vtable_alloc(lvtbl->args); - lvtbl->vars = vtable_alloc(lvtbl->vars); - if (lvtbl->used) { - lvtbl->used = vtable_alloc(lvtbl->used); + p->lvtbl->args = vtable_alloc(p->lvtbl->args); + p->lvtbl->vars = vtable_alloc(p->lvtbl->vars); + if (p->lvtbl->used) { + p->lvtbl->used = vtable_alloc(p->lvtbl->used); } - return lvtbl->args; + return p->lvtbl->args; } static void -dyna_pop_1(struct parser_params *parser) +dyna_pop_vtable(struct parser_params *p, struct vtable **vtblp) +{ + struct vtable *tmp = *vtblp; + *vtblp = tmp->prev; +# if WARN_PAST_SCOPE + if (p->past_scope_enabled) { + tmp->prev = p->lvtbl->past; + p->lvtbl->past = tmp; + return; + } +# endif + vtable_free(tmp); +} + +static void +dyna_pop_1(struct parser_params *p) { struct vtable *tmp; - if ((tmp = lvtbl->used) != 0) { - warn_unused_var(parser, lvtbl); - lvtbl->used = lvtbl->used->prev; + if ((tmp = p->lvtbl->used) != 0) { + warn_unused_var(p, p->lvtbl); + p->lvtbl->used = p->lvtbl->used->prev; vtable_free(tmp); } - tmp = lvtbl->args; - lvtbl->args = lvtbl->args->prev; - vtable_free(tmp); - tmp = lvtbl->vars; - lvtbl->vars = lvtbl->vars->prev; - vtable_free(tmp); + dyna_pop_vtable(p, &p->lvtbl->args); + dyna_pop_vtable(p, &p->lvtbl->vars); } static void -dyna_pop_gen(struct parser_params *parser, const struct vtable *lvargs) +dyna_pop(struct parser_params *p, const struct vtable *lvargs) { - while (lvtbl->args != lvargs) { - dyna_pop_1(parser); - if (!lvtbl->args) { - struct local_vars *local = lvtbl->prev; - xfree(lvtbl); - lvtbl = local; + while (p->lvtbl->args != lvargs) { + dyna_pop_1(p); + if (!p->lvtbl->args) { + struct local_vars *local = p->lvtbl->prev; + ruby_sized_xfree(p->lvtbl, sizeof(*p->lvtbl)); + p->lvtbl = local; } } - dyna_pop_1(parser); + dyna_pop_1(p); } static int -dyna_in_block_gen(struct parser_params *parser) +dyna_in_block(struct parser_params *p) { - return POINTER_P(lvtbl->vars) && lvtbl->vars->prev != DVARS_TOPSCOPE; + return !DVARS_TERMINAL_P(p->lvtbl->vars) && p->lvtbl->vars->prev != DVARS_TOPSCOPE; } static int -dvar_defined_gen(struct parser_params *parser, ID id, int get) +dvar_defined_ref(struct parser_params *p, ID id, ID **vidrefp) { struct vtable *vars, *args, *used; int i; - args = lvtbl->args; - vars = lvtbl->vars; - used = lvtbl->used; + args = p->lvtbl->args; + vars = p->lvtbl->vars; + used = p->lvtbl->used; - while (POINTER_P(vars)) { + while (!DVARS_TERMINAL_P(vars)) { if (vtable_included(args, id)) { return 1; } if ((i = vtable_included(vars, id)) != 0) { - if (used) used->tbl[i-1] |= LVAR_USED; + if (used && vidrefp) *vidrefp = &used->tbl[i-1]; return 1; } args = args->prev; vars = vars->prev; - if (get) used = 0; + if (!vidrefp) used = 0; if (used) used = used->prev; } - if (vars == DVARS_INHERIT) { - return rb_dvar_defined(id); + if (vars == DVARS_INHERIT && !NUMPARAM_ID_P(id)) { + return rb_dvar_defined(id, p->parent_iseq); } return 0; } static int -dvar_curr_gen(struct parser_params *parser, ID id) +dvar_defined(struct parser_params *p, ID id) { - return (vtable_included(lvtbl->args, id) || - vtable_included(lvtbl->vars, id)); + return dvar_defined_ref(p, id, NULL); +} + +static int +dvar_curr(struct parser_params *p, ID id) +{ + return (vtable_included(p->lvtbl->args, id) || + vtable_included(p->lvtbl->vars, id)); } -#ifndef RIPPER static void -reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) +reg_fragment_enc_error(struct parser_params* p, VALUE str, int c) +{ + compile_error(p, + "regexp encoding option '%c' differs from source encoding '%s'", + c, rb_enc_name(rb_enc_get(str))); +} + +#ifndef RIPPER +int +rb_reg_fragment_setenc(struct parser_params* p, VALUE str, int options) { int c = RE_OPTION_ENCODING_IDX(options); @@ -9583,7 +12940,7 @@ reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) } rb_enc_associate(str, rb_ascii8bit_encoding()); } - else if (parser->enc == rb_usascii_encoding()) { + else if (p->enc == rb_usascii_encoding()) { if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { /* raise in re.c */ rb_enc_associate(str, rb_usascii_encoding()); @@ -9592,24 +12949,28 @@ reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options) rb_enc_associate(str, rb_ascii8bit_encoding()); } } - return; + return 0; error: - compile_error(PARSER_ARG - "regexp encoding option '%c' differs from source encoding '%s'", - c, rb_enc_name(rb_enc_get(str))); + return c; +} + +static void +reg_fragment_setenc(struct parser_params* p, VALUE str, int options) +{ + int c = rb_reg_fragment_setenc(p, str, options); + if (c) reg_fragment_enc_error(p, str, c); } static int -reg_fragment_check_gen(struct parser_params* parser, VALUE str, int options) +reg_fragment_check(struct parser_params* p, VALUE str, int options) { VALUE err; - reg_fragment_setenc(str, options); + reg_fragment_setenc(p, str, options); err = rb_reg_check_preprocess(str); if (err != Qnil) { err = rb_obj_as_string(err); - compile_error(PARSER_ARG "%s", RSTRING_PTR(err)); - RB_GC_GUARD(err); + compile_error(p, "%"PRIsVALUE, err); return 0; } return 1; @@ -9619,8 +12980,7 @@ typedef struct { struct parser_params* parser; rb_encoding *enc; NODE *succ_block; - NODE *fail_block; - int num; + const YYLTYPE *loc; } reg_named_capture_assign_t; static int @@ -9628,959 +12988,172 @@ reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end, int back_num, int *back_refs, OnigRegex regex, void *arg0) { reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0; - struct parser_params* parser = arg->parser; + struct parser_params* p = arg->parser; rb_encoding *enc = arg->enc; long len = name_end - name; const char *s = (const char *)name; ID var; + NODE *node, *succ; - arg->num++; - - if (arg->succ_block == 0) { - arg->succ_block = NEW_BEGIN(0); - arg->fail_block = NEW_BEGIN(0); - } - - if (!len || (*name != '_' && ISASCII(*name) && !rb_enc_islower(*name, enc)) || - (len < MAX_WORD_LENGTH && rb_reserved_word(s, (int)len)) || - !rb_enc_symname2_p(s, len, enc)) { + if (!len) return ST_CONTINUE; + if (rb_enc_symname_type(s, len, enc, (1U<<ID_LOCAL)) != ID_LOCAL) return ST_CONTINUE; + + var = intern_cstr(s, len, enc); + if (len < MAX_WORD_LENGTH && rb_reserved_word(s, (int)len)) { + if (!lvar_defined(p, var)) return ST_CONTINUE; } - var = rb_intern3(s, len, enc); - if (dvar_defined(var) || local_id(var)) { - rb_warningS("named capture conflicts a local variable - %s", - rb_id2name(var)); - } - arg->succ_block = block_append(arg->succ_block, - newline_node(node_assign(assignable(var,0), - NEW_CALL( - gettable(rb_intern("$~")), - idAREF, - NEW_LIST(NEW_LIT(ID2SYM(var)))) - ))); - arg->fail_block = block_append(arg->fail_block, - newline_node(node_assign(assignable(var,0), NEW_LIT(Qnil)))); + node = node_assign(p, assignable(p, var, 0, arg->loc), NEW_LIT(ID2SYM(var), arg->loc), NO_LEX_CTXT, arg->loc); + succ = arg->succ_block; + if (!succ) succ = NEW_BEGIN(0, arg->loc); + succ = block_append(p, succ, node); + arg->succ_block = succ; return ST_CONTINUE; } static NODE * -reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match) +reg_named_capture_assign(struct parser_params* p, VALUE regexp, const YYLTYPE *loc) { reg_named_capture_assign_t arg; - arg.parser = parser; + arg.parser = p; arg.enc = rb_enc_get(regexp); arg.succ_block = 0; - arg.fail_block = 0; - arg.num = 0; - onig_foreach_name(RREGEXP(regexp)->ptr, reg_named_capture_assign_iter, (void*)&arg); - - if (arg.num == 0) - return match; - - return - block_append( - newline_node(match), - NEW_IF(gettable(rb_intern("$~")), - block_append( - newline_node(arg.succ_block), - newline_node( - NEW_CALL( - gettable(rb_intern("$~")), - rb_intern("begin"), - NEW_LIST(NEW_LIT(INT2FIX(0)))))), - block_append( - newline_node(arg.fail_block), - newline_node( - NEW_LIT(Qnil))))); + arg.loc = loc; + onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg); + + if (!arg.succ_block) return 0; + return arg.succ_block->nd_next; +} + +static VALUE +parser_reg_compile(struct parser_params* p, VALUE str, int options) +{ + reg_fragment_setenc(p, str, options); + return rb_parser_reg_compile(p, str, options); +} + +VALUE +rb_parser_reg_compile(struct parser_params* p, VALUE str, int options) +{ + return rb_reg_compile(str, options & RE_OPTION_MASK, p->ruby_sourcefile, p->ruby_sourceline); } static VALUE -reg_compile_gen(struct parser_params* parser, VALUE str, int options) +reg_compile(struct parser_params* p, VALUE str, int options) { VALUE re; VALUE err; - reg_fragment_setenc(str, options); err = rb_errinfo(); - re = rb_reg_compile(str, options & RE_OPTION_MASK, ruby_sourcefile, ruby_sourceline); + re = parser_reg_compile(p, str, options); if (NIL_P(re)) { - ID mesg = rb_intern("mesg"); - VALUE m = rb_attr_get(rb_errinfo(), mesg); + VALUE m = rb_attr_get(rb_errinfo(), idMesg); rb_set_errinfo(err); - if (!NIL_P(err)) { - rb_str_append(rb_str_cat(rb_attr_get(err, mesg), "\n", 1), m); - } - else { - compile_error(PARSER_ARG "%s", RSTRING_PTR(m)); - } + compile_error(p, "%"PRIsVALUE, m); return Qnil; } return re; } - -void -rb_gc_mark_parser(void) -{ -} - -NODE* -rb_parser_append_print(VALUE vparser, NODE *node) -{ - NODE *prelude = 0; - NODE *scope = node; - struct parser_params *parser; - - if (!node) return node; - - TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); - - node = node->nd_body; - - if (nd_type(node) == NODE_PRELUDE) { - prelude = node; - node = node->nd_body; - } - - node = block_append(node, - NEW_FCALL(rb_intern("print"), - NEW_ARRAY(NEW_GVAR(rb_intern("$_"))))); - if (prelude) { - prelude->nd_body = node; - scope->nd_body = prelude; - } - else { - scope->nd_body = node; - } - - return scope; -} - -NODE * -rb_parser_while_loop(VALUE vparser, NODE *node, int chop, int split) -{ - NODE *prelude = 0; - NODE *scope = node; - struct parser_params *parser; - - if (!node) return node; - - TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); - - node = node->nd_body; - - if (nd_type(node) == NODE_PRELUDE) { - prelude = node; - node = node->nd_body; - } - if (split) { - node = block_append(NEW_GASGN(rb_intern("$F"), - NEW_CALL(NEW_GVAR(rb_intern("$_")), - rb_intern("split"), 0)), - node); - } - if (chop) { - node = block_append(NEW_CALL(NEW_GVAR(rb_intern("$_")), - rb_intern("chop!"), 0), node); - } - - node = NEW_OPT_N(node); - - if (prelude) { - prelude->nd_body = node; - scope->nd_body = prelude; - } - else { - scope->nd_body = node; - } - - return scope; -} - -static const struct { - ID token; - const char *name; -} op_tbl[] = { - {tDOT2, ".."}, - {tDOT3, "..."}, - {'+', "+(binary)"}, - {'-', "-(binary)"}, - {tPOW, "**"}, - {tDSTAR, "**"}, - {tUPLUS, "+@"}, - {tUMINUS, "-@"}, - {tCMP, "<=>"}, - {tGEQ, ">="}, - {tLEQ, "<="}, - {tEQ, "=="}, - {tEQQ, "==="}, - {tNEQ, "!="}, - {tMATCH, "=~"}, - {tNMATCH, "!~"}, - {tAREF, "[]"}, - {tASET, "[]="}, - {tLSHFT, "<<"}, - {tRSHFT, ">>"}, - {tCOLON2, "::"}, -}; - -#define op_tbl_count numberof(op_tbl) - -#ifndef ENABLE_SELECTOR_NAMESPACE -#define ENABLE_SELECTOR_NAMESPACE 0 -#endif - -static struct symbols { - ID last_id; - st_table *sym_id; - st_table *id_str; -#if ENABLE_SELECTOR_NAMESPACE - st_table *ivar2_id; - st_table *id_ivar2; -#endif - VALUE op_sym[tLAST_OP_ID]; -} global_symbols = {tLAST_TOKEN}; - -static const struct st_hash_type symhash = { - rb_str_hash_cmp, - rb_str_hash, -}; - -#if ENABLE_SELECTOR_NAMESPACE -struct ivar2_key { - ID id; - VALUE klass; -}; - -static int -ivar2_cmp(struct ivar2_key *key1, struct ivar2_key *key2) +#else +static VALUE +parser_reg_compile(struct parser_params* p, VALUE str, int options, VALUE *errmsg) { - if (key1->id == key2->id && key1->klass == key2->klass) { - return 0; + VALUE err = rb_errinfo(); + VALUE re; + str = ripper_is_node_yylval(str) ? RNODE(str)->nd_cval : str; + int c = rb_reg_fragment_setenc(p, str, options); + if (c) reg_fragment_enc_error(p, str, c); + re = rb_parser_reg_compile(p, str, options); + if (NIL_P(re)) { + *errmsg = rb_attr_get(rb_errinfo(), idMesg); + rb_set_errinfo(err); } - return 1; -} - -static int -ivar2_hash(struct ivar2_key *key) -{ - return (key->id << 8) ^ (key->klass >> 2); + return re; } - -static const struct st_hash_type ivar2_hash_type = { - ivar2_cmp, - ivar2_hash, -}; #endif +#ifndef RIPPER void -Init_sym(void) -{ - global_symbols.sym_id = st_init_table_with_size(&symhash, 1000); - global_symbols.id_str = st_init_numtable_with_size(1000); -#if ENABLE_SELECTOR_NAMESPACE - global_symbols.ivar2_id = st_init_table_with_size(&ivar2_hash_type, 1000); - global_symbols.id_ivar2 = st_init_numtable_with_size(1000); -#endif - - (void)nodetype; - (void)nodeline; -#if PARSER_DEBUG - (void)lex_state_name(-1); -#endif - - Init_id(); -} - -void -rb_gc_mark_symbols(void) +rb_parser_set_options(VALUE vparser, int print, int loop, int chomp, int split) { - rb_mark_tbl(global_symbols.id_str); - rb_gc_mark_locations(global_symbols.op_sym, - global_symbols.op_sym + numberof(global_symbols.op_sym)); -} -#endif /* !RIPPER */ - -static ID -internal_id_gen(struct parser_params *parser) -{ - ID id = (ID)vtable_size(lvtbl->args) + (ID)vtable_size(lvtbl->vars); - id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1; - return ID_INTERNAL | (id << ID_SCOPE_SHIFT); + struct parser_params *p; + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + p->do_print = print; + p->do_loop = loop; + p->do_chomp = chomp; + p->do_split = split; } -#ifndef RIPPER -static int -is_special_global_name(const char *m, const char *e, rb_encoding *enc) +static NODE * +parser_append_options(struct parser_params *p, NODE *node) { - int mb = 0; + static const YYLTYPE default_location = {{1, 0}, {1, 0}}; + const YYLTYPE *const LOC = &default_location; - if (m >= e) return 0; - switch (*m) { - case '~': case '*': case '$': case '?': case '!': case '@': - case '/': case '\\': case ';': case ',': case '.': case '=': - case ':': case '<': case '>': case '\"': - case '&': case '`': case '\'': case '+': - case '0': - ++m; - break; - case '-': - ++m; - if (m < e && is_identchar(m, e, enc)) { - if (!ISASCII(*m)) mb = 1; - m += rb_enc_mbclen(m, e, enc); - } - break; - default: - if (!rb_enc_isdigit(*m, enc)) return 0; - do { - if (!ISASCII(*m)) mb = 1; - ++m; - } while (m < e && rb_enc_isdigit(*m, enc)); + if (p->do_print) { + NODE *print = NEW_FCALL(rb_intern("print"), + NEW_LIST(NEW_GVAR(idLASTLINE, LOC), LOC), + LOC); + node = block_append(p, node, print); } - return m == e ? mb + 1 : 0; -} - -int -rb_symname_p(const char *name) -{ - return rb_enc_symname_p(name, rb_ascii8bit_encoding()); -} - -int -rb_enc_symname_p(const char *name, rb_encoding *enc) -{ - return rb_enc_symname2_p(name, strlen(name), enc); -} - -static int -rb_enc_symname_type(const char *name, long len, rb_encoding *enc) -{ - const char *m = name; - const char *e = m + len; - int type = ID_JUNK; - - if (!m || len <= 0) return -1; - switch (*m) { - case '\0': - return -1; - - case '$': - type = ID_GLOBAL; - if (is_special_global_name(++m, e, enc)) return type; - goto id; - - case '@': - type = ID_INSTANCE; - if (*++m == '@') { - ++m; - type = ID_CLASS; - } - goto id; - - case '<': - switch (*++m) { - case '<': ++m; break; - case '=': if (*++m == '>') ++m; break; - default: break; - } - break; - - case '>': - switch (*++m) { - case '>': case '=': ++m; break; - } - break; - case '=': - switch (*++m) { - case '~': ++m; break; - case '=': if (*++m == '=') ++m; break; - default: return -1; + if (p->do_loop) { + if (p->do_split) { + ID ifs = rb_intern("$;"); + ID fields = rb_intern("$F"); + NODE *args = NEW_LIST(NEW_GVAR(ifs, LOC), LOC); + NODE *split = NEW_GASGN(fields, + NEW_CALL(NEW_GVAR(idLASTLINE, LOC), + rb_intern("split"), args, LOC), + LOC); + node = block_append(p, split, node); } - break; - - case '*': - if (*++m == '*') ++m; - break; - - case '+': case '-': - if (*++m == '@') ++m; - break; - - case '|': case '^': case '&': case '/': case '%': case '~': case '`': - ++m; - break; - - case '[': - if (*++m != ']') return -1; - if (*++m == '=') ++m; - break; - - case '!': - if (len == 1) return ID_JUNK; - switch (*++m) { - case '=': case '~': ++m; break; - default: return -1; + if (p->do_chomp) { + NODE *chomp = NEW_CALL(NEW_GVAR(idLASTLINE, LOC), + rb_intern("chomp!"), 0, LOC); + node = block_append(p, chomp, node); } - break; - default: - type = rb_enc_isupper(*m, enc) ? ID_CONST : ID_LOCAL; - id: - if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m))) - return -1; - while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); - switch (*m) { - case '!': case '?': - if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1; - type = ID_JUNK; - ++m; - break; - case '=': - if (type != ID_CONST && type != ID_LOCAL) return -1; - type = ID_ATTRSET; - ++m; - break; - } - break; + node = NEW_WHILE(NEW_VCALL(idGets, LOC), node, 1, LOC); } - return m == e ? type : -1; -} - -int -rb_enc_symname2_p(const char *name, long len, rb_encoding *enc) -{ - return rb_enc_symname_type(name, len, enc) != -1; -} - -static int -rb_str_symname_type(VALUE name) -{ - const char *ptr = StringValuePtr(name); - long len = RSTRING_LEN(name); - int type = rb_enc_symname_type(ptr, len, rb_enc_get(name)); - RB_GC_GUARD(name); - return type; -} - -static ID -register_symid(ID id, const char *name, long len, rb_encoding *enc) -{ - VALUE str = rb_enc_str_new(name, len, enc); - return register_symid_str(id, str); -} - -static ID -register_symid_str(ID id, VALUE str) -{ - OBJ_FREEZE(str); - st_add_direct(global_symbols.sym_id, (st_data_t)str, id); - st_add_direct(global_symbols.id_str, id, (st_data_t)str); - return id; -} -static int -sym_check_asciionly(VALUE str) -{ - if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE; - switch (rb_enc_str_coderange(str)) { - case ENC_CODERANGE_BROKEN: - rb_raise(rb_eEncodingError, "invalid encoding symbol"); - case ENC_CODERANGE_7BIT: - return TRUE; - } - return FALSE; + return node; } -/* - * _str_ itself will be registered at the global symbol table. _str_ - * can be modified before the registration, since the encoding will be - * set to ASCII-8BIT if it is a special global name. - */ -static ID intern_str(VALUE str); - -ID -rb_intern3(const char *name, long len, rb_encoding *enc) +void +rb_init_parse(void) { - VALUE str; - st_data_t data; - struct RString fake_str; - fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; - fake_str.basic.klass = rb_cString; - fake_str.as.heap.len = len; - fake_str.as.heap.ptr = (char *)name; - fake_str.as.heap.aux.capa = len; - str = (VALUE)&fake_str; - rb_enc_associate(str, enc); - OBJ_FREEZE(str); - - if (st_lookup(global_symbols.sym_id, str, &data)) - return (ID)data; - - str = rb_enc_str_new(name, len, enc); /* make true string */ - return intern_str(str); + /* just to suppress unused-function warnings */ + (void)nodetype; + (void)nodeline; } static ID -intern_str(VALUE str) -{ - const char *name, *m, *e; - long len, last; - rb_encoding *enc, *symenc; - unsigned char c; - ID id; - int mb; - - RSTRING_GETMEM(str, name, len); - m = name; - e = m + len; - enc = rb_enc_get(str); - symenc = enc; - - if (rb_cString && !rb_enc_asciicompat(enc)) { - id = ID_JUNK; - goto new_id; - } - last = len-1; - id = 0; - switch (*m) { - case '$': - id |= ID_GLOBAL; - if ((mb = is_special_global_name(++m, e, enc)) != 0) { - if (!--mb) symenc = rb_usascii_encoding(); - goto new_id; - } - break; - case '@': - if (m[1] == '@') { - m++; - id |= ID_CLASS; - } - else { - id |= ID_INSTANCE; - } - m++; - break; - default: - c = m[0]; - if (c != '_' && rb_enc_isascii(c, enc) && rb_enc_ispunct(c, enc)) { - /* operators */ - int i; - - if (len == 1) { - id = c; - goto id_register; - } - for (i = 0; i < op_tbl_count; i++) { - if (*op_tbl[i].name == *m && - strcmp(op_tbl[i].name, m) == 0) { - id = op_tbl[i].token; - goto id_register; - } - } - } - - if (m[last] == '=') { - /* attribute assignment */ - id = rb_intern3(name, last, enc); - if (id > tLAST_OP_ID && !is_attrset_id(id)) { - enc = rb_enc_get(rb_id2str(id)); - id = rb_id_attrset(id); - goto id_register; - } - id = ID_ATTRSET; - } - else if (rb_enc_isupper(m[0], enc)) { - id = ID_CONST; - } - else { - id = ID_LOCAL; - } - break; - } - if (!rb_enc_isdigit(*m, enc)) { - while (m <= name + last && is_identchar(m, e, enc)) { - if (ISASCII(*m)) { - m++; - } - else { - m += rb_enc_mbclen(m, e, enc); - } - } - } - if (m - name < len) id = ID_JUNK; - if (sym_check_asciionly(str)) symenc = rb_usascii_encoding(); - new_id: - if (symenc != enc) rb_enc_associate(str, symenc); - if (global_symbols.last_id >= ~(ID)0 >> (ID_SCOPE_SHIFT+RUBY_SPECIAL_SHIFT)) { - if (len > 20) { - rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.20s...)", - name); - } - else { - rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %.*s)", - (int)len, name); - } - } - id |= ++global_symbols.last_id << ID_SCOPE_SHIFT; - id_register: - return register_symid_str(id, str); -} - -ID -rb_intern2(const char *name, long len) -{ - return rb_intern3(name, len, rb_usascii_encoding()); -} - -#undef rb_intern -ID -rb_intern(const char *name) -{ - return rb_intern2(name, strlen(name)); -} - -ID -rb_intern_str(VALUE str) -{ - st_data_t id; - - if (st_lookup(global_symbols.sym_id, str, &id)) - return (ID)id; - return intern_str(rb_str_dup(str)); -} - -VALUE -rb_id2str(ID id) -{ - st_data_t data; - - if (id < tLAST_TOKEN) { - int i = 0; - - if (id < INT_MAX && rb_ispunct((int)id)) { - VALUE str = global_symbols.op_sym[i = (int)id]; - if (!str) { - char name[2]; - name[0] = (char)id; - name[1] = 0; - str = rb_usascii_str_new(name, 1); - OBJ_FREEZE(str); - global_symbols.op_sym[i] = str; - } - return str; - } - for (i = 0; i < op_tbl_count; i++) { - if (op_tbl[i].token == id) { - VALUE str = global_symbols.op_sym[i]; - if (!str) { - str = rb_usascii_str_new2(op_tbl[i].name); - OBJ_FREEZE(str); - global_symbols.op_sym[i] = str; - } - return str; - } - } - } - - if (st_lookup(global_symbols.id_str, id, &data)) { - VALUE str = (VALUE)data; - if (RBASIC(str)->klass == 0) - RBASIC(str)->klass = rb_cString; - return str; - } - - if (is_attrset_id(id)) { - ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL; - VALUE str; - - while (!(str = rb_id2str(id2))) { - if (!is_local_id(id2)) return 0; - id2 = (id & ~ID_SCOPE_MASK) | ID_CONST; - } - str = rb_str_dup(str); - rb_str_cat(str, "=", 1); - rb_intern_str(str); - if (st_lookup(global_symbols.id_str, id, &data)) { - VALUE str = (VALUE)data; - if (RBASIC(str)->klass == 0) - RBASIC(str)->klass = rb_cString; - return str; - } - } - return 0; -} - -const char * -rb_id2name(ID id) -{ - VALUE str = rb_id2str(id); - - if (!str) return 0; - return RSTRING_PTR(str); -} - -static int -symbols_i(VALUE sym, ID value, VALUE ary) -{ - rb_ary_push(ary, ID2SYM(value)); - return ST_CONTINUE; -} - -/* - * call-seq: - * Symbol.all_symbols => array - * - * Returns an array of all the symbols currently in Ruby's symbol - * table. - * - * Symbol.all_symbols.size #=> 903 - * Symbol.all_symbols[1,20] #=> [:floor, :ARGV, :Binding, :symlink, - * :chown, :EOFError, :$;, :String, - * :LOCK_SH, :"setuid?", :$<, - * :default_proc, :compact, :extend, - * :Tms, :getwd, :$=, :ThreadGroup, - * :wait2, :$>] - */ - -VALUE -rb_sym_all_symbols(void) -{ - VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries); - - st_foreach(global_symbols.sym_id, symbols_i, ary); - return ary; -} - -int -rb_is_const_id(ID id) +internal_id(struct parser_params *p) { - return is_const_id(id); + return rb_make_temporary_id(vtable_size(p->lvtbl->args) + vtable_size(p->lvtbl->vars)); } - -int -rb_is_class_id(ID id) -{ - return is_class_id(id); -} - -int -rb_is_global_id(ID id) -{ - return is_global_id(id); -} - -int -rb_is_instance_id(ID id) -{ - return is_instance_id(id); -} - -int -rb_is_attrset_id(ID id) -{ - return is_attrset_id(id); -} - -int -rb_is_local_id(ID id) -{ - return is_local_id(id); -} - -int -rb_is_junk_id(ID id) -{ - return is_junk_id(id); -} - -/** - * Returns ID for the given name if it is interned already, or 0. - * - * \param namep the pointer to the name object - * \return the ID for *namep - * \pre the object referred by \p namep must be a Symbol or - * a String, or possible to convert with to_str method. - * \post the object referred by \p namep is a Symbol or a - * String if non-zero value is returned, or is a String - * if 0 is returned. - */ -ID -rb_check_id(volatile VALUE *namep) -{ - st_data_t id; - VALUE tmp; - VALUE name = *namep; - - if (SYMBOL_P(name)) { - return SYM2ID(name); - } - else if (!RB_TYPE_P(name, T_STRING)) { - tmp = rb_check_string_type(name); - if (NIL_P(tmp)) { - tmp = rb_inspect(name); - rb_raise(rb_eTypeError, "%s is not a symbol", - RSTRING_PTR(tmp)); - } - name = tmp; - *namep = name; - } - - sym_check_asciionly(name); - - if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) - return (ID)id; - - if (rb_is_attrset_name(name)) { - struct RString fake_str; - const VALUE localname = (VALUE)&fake_str; - /* make local name by chopping '=' */ - fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; - fake_str.basic.klass = rb_cString; - fake_str.as.heap.len = RSTRING_LEN(name) - 1; - fake_str.as.heap.ptr = RSTRING_PTR(name); - fake_str.as.heap.aux.capa = fake_str.as.heap.len; - rb_enc_copy(localname, name); - OBJ_FREEZE(localname); - - if (st_lookup(global_symbols.sym_id, (st_data_t)localname, &id)) { - return rb_id_attrset((ID)id); - } - RB_GC_GUARD(name); - } - - return (ID)0; -} - -ID -rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc) -{ - st_data_t id; - struct RString fake_str; - const VALUE name = (VALUE)&fake_str; - fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; - fake_str.basic.klass = rb_cString; - fake_str.as.heap.len = len; - fake_str.as.heap.ptr = (char *)ptr; - fake_str.as.heap.aux.capa = len; - rb_enc_associate(name, enc); - - sym_check_asciionly(name); - - if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) - return (ID)id; - - if (rb_is_attrset_name(name)) { - fake_str.as.heap.len = len - 1; - if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) { - return rb_id_attrset((ID)id); - } - } - - return (ID)0; -} - -int -rb_is_const_name(VALUE name) -{ - return rb_str_symname_type(name) == ID_CONST; -} - -int -rb_is_class_name(VALUE name) -{ - return rb_str_symname_type(name) == ID_CLASS; -} - -int -rb_is_global_name(VALUE name) -{ - return rb_str_symname_type(name) == ID_GLOBAL; -} - -int -rb_is_instance_name(VALUE name) -{ - return rb_str_symname_type(name) == ID_INSTANCE; -} - -int -rb_is_attrset_name(VALUE name) -{ - return rb_str_symname_type(name) == ID_ATTRSET; -} - -int -rb_is_local_name(VALUE name) -{ - return rb_str_symname_type(name) == ID_LOCAL; -} - -int -rb_is_method_name(VALUE name) -{ - switch (rb_str_symname_type(name)) { - case ID_LOCAL: case ID_ATTRSET: case ID_JUNK: - return TRUE; - } - return FALSE; -} - -int -rb_is_junk_name(VALUE name) -{ - return rb_str_symname_type(name) == -1; -} - #endif /* !RIPPER */ static void -parser_initialize(struct parser_params *parser) -{ - parser->eofp = Qfalse; - - parser->parser_lex_strterm = 0; - parser->parser_cond_stack = 0; - parser->parser_cmdarg_stack = 0; - parser->parser_class_nest = 0; - parser->parser_paren_nest = 0; - parser->parser_lpar_beg = 0; - parser->parser_brace_nest = 0; - parser->parser_in_single = 0; - parser->parser_in_def = 0; - parser->parser_in_defined = 0; - parser->parser_compile_for_eval = 0; - parser->parser_cur_mid = 0; - parser->parser_tokenbuf = NULL; - parser->parser_tokidx = 0; - parser->parser_toksiz = 0; - parser->parser_heredoc_end = 0; - parser->parser_command_start = TRUE; - parser->parser_deferred_nodes = 0; - parser->parser_lex_pbeg = 0; - parser->parser_lex_p = 0; - parser->parser_lex_pend = 0; - parser->parser_lvtbl = 0; - parser->parser_ruby__end__seen = 0; - parser->parser_ruby_sourcefile = 0; -#ifndef RIPPER - parser->is_ripper = 0; - parser->parser_eval_tree_begin = 0; - parser->parser_eval_tree = 0; +parser_initialize(struct parser_params *p) +{ + /* note: we rely on TypedData_Make_Struct to set most fields to 0 */ + p->command_start = TRUE; + p->ruby_sourcefile_string = Qnil; + p->lex.lpar_beg = -1; /* make lambda_beginning_p() == FALSE at first */ + p->node_id = 0; +#ifdef RIPPER + p->delayed.token = Qnil; + p->result = Qnil; + p->parsing_thread = Qnil; #else - parser->is_ripper = 1; - parser->parser_ruby_sourcefile_string = Qnil; - parser->delayed = Qnil; - - parser->result = Qnil; - parser->parsing_thread = Qnil; - parser->toplevel_p = TRUE; -#endif -#ifdef YYMALLOC - parser->heap = NULL; + p->error_buffer = Qfalse; #endif - parser->enc = rb_usascii_encoding(); + p->debug_buffer = Qnil; + p->debug_output = rb_ractor_stdout(); + p->enc = rb_utf8_encoding(); } #ifdef RIPPER @@ -10593,22 +13166,26 @@ parser_mark(void *ptr) { struct parser_params *p = (struct parser_params*)ptr; - rb_gc_mark((VALUE)p->parser_lex_strterm); - rb_gc_mark((VALUE)p->parser_deferred_nodes); - rb_gc_mark(p->parser_lex_input); - rb_gc_mark(p->parser_lex_lastline); - rb_gc_mark(p->parser_lex_nextline); + rb_gc_mark(p->lex.input); + rb_gc_mark(p->lex.prevline); + rb_gc_mark(p->lex.lastline); + rb_gc_mark(p->lex.nextline); + rb_gc_mark(p->ruby_sourcefile_string); + rb_gc_mark((VALUE)p->lex.strterm); + rb_gc_mark((VALUE)p->ast); + rb_gc_mark(p->case_labels); #ifndef RIPPER - rb_gc_mark((VALUE)p->parser_eval_tree_begin) ; - rb_gc_mark((VALUE)p->parser_eval_tree) ; rb_gc_mark(p->debug_lines); + rb_gc_mark(p->compile_option); + rb_gc_mark(p->error_buffer); #else - rb_gc_mark(p->parser_ruby_sourcefile_string); - rb_gc_mark(p->delayed); + rb_gc_mark(p->delayed.token); rb_gc_mark(p->value); rb_gc_mark(p->result); rb_gc_mark(p->parsing_thread); #endif + rb_gc_mark(p->debug_buffer); + rb_gc_mark(p->debug_output); #ifdef YYMALLOC rb_gc_mark((VALUE)p->heap); #endif @@ -10620,18 +13197,22 @@ parser_free(void *ptr) struct parser_params *p = (struct parser_params*)ptr; struct local_vars *local, *prev; - if (p->parser_tokenbuf) { - xfree(p->parser_tokenbuf); + if (p->tokenbuf) { + ruby_sized_xfree(p->tokenbuf, p->toksiz); } - for (local = p->parser_lvtbl; local; local = prev) { + for (local = p->lvtbl; local; local = prev) { if (local->vars) xfree(local->vars); prev = local->prev; xfree(local); } -#ifndef RIPPER - xfree(p->parser_ruby_sourcefile); -#endif - xfree(p); + { + token_info *ptinfo; + while ((ptinfo = p->token_info) != 0) { + p->token_info = ptinfo->next; + xfree(ptinfo); + } + } + xfree(ptr); } static size_t @@ -10641,31 +13222,26 @@ parser_memsize(const void *ptr) struct local_vars *local; size_t size = sizeof(*p); - if (!ptr) return 0; - size += p->parser_toksiz; - for (local = p->parser_lvtbl; local; local = local->prev) { + size += p->toksiz; + for (local = p->lvtbl; local; local = local->prev) { size += sizeof(*local); if (local->vars) size += local->vars->capa * sizeof(ID); } -#ifndef RIPPER - if (p->parser_ruby_sourcefile) { - size += strlen(p->parser_ruby_sourcefile) + 1; - } -#endif return size; } -static +static const rb_data_type_t parser_data_type = { #ifndef RIPPER -const -#endif -rb_data_type_t parser_data_type = { "parser", +#else + "ripper", +#endif { parser_mark, parser_free, parser_memsize, }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; #ifndef RIPPER @@ -10677,55 +13253,98 @@ rb_reserved_word(const char *str, unsigned int len) return reserved_word(str, len); } -static struct parser_params * -parser_new(void) +VALUE +rb_parser_new(void) { struct parser_params *p; - - p = ALLOC_N(struct parser_params, 1); - MEMZERO(p, struct parser_params, 1); + VALUE parser = TypedData_Make_Struct(0, struct parser_params, + &parser_data_type, p); parser_initialize(p); - return p; + return parser; } VALUE -rb_parser_new(void) +rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main) +{ + struct parser_params *p; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + p->error_buffer = main ? Qfalse : Qnil; + p->parent_iseq = base; + return vparser; +} + +void +rb_parser_keep_script_lines(VALUE vparser) +{ + struct parser_params *p; + + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + p->keep_script_lines = 1; +} +#endif + +#ifdef RIPPER +#define rb_parser_end_seen_p ripper_parser_end_seen_p +#define rb_parser_encoding ripper_parser_encoding +#define rb_parser_get_yydebug ripper_parser_get_yydebug +#define rb_parser_set_yydebug ripper_parser_set_yydebug +#define rb_parser_get_debug_output ripper_parser_get_debug_output +#define rb_parser_set_debug_output ripper_parser_set_debug_output +static VALUE ripper_parser_end_seen_p(VALUE vparser); +static VALUE ripper_parser_encoding(VALUE vparser); +static VALUE ripper_parser_get_yydebug(VALUE self); +static VALUE ripper_parser_set_yydebug(VALUE self, VALUE flag); +static VALUE ripper_parser_get_debug_output(VALUE self); +static VALUE ripper_parser_set_debug_output(VALUE self, VALUE output); + +/* + * call-seq: + * ripper.error? -> Boolean + * + * Return true if parsed source has errors. + */ +static VALUE +ripper_error_p(VALUE vparser) { - struct parser_params *p = parser_new(); + struct parser_params *p; - return TypedData_Wrap_Struct(0, &parser_data_type, p); + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + return RBOOL(p->error_p); } +#endif /* * call-seq: - * ripper#end_seen? -> Boolean + * ripper.end_seen? -> Boolean * * Return true if parsed source ended by +\_\_END\_\_+. */ VALUE rb_parser_end_seen_p(VALUE vparser) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); - return ruby__end__seen ? Qtrue : Qfalse; + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + return RBOOL(p->ruby__end__seen); } /* * call-seq: - * ripper#encoding -> encoding + * ripper.encoding -> encoding * * Return encoding of the source. */ VALUE rb_parser_encoding(VALUE vparser) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, parser); - return rb_enc_from_encoding(parser->enc); + TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p); + return rb_enc_from_encoding(p->enc); } +#ifdef RIPPER /* * call-seq: * ripper.yydebug -> true or false @@ -10735,11 +13354,12 @@ rb_parser_encoding(VALUE vparser) VALUE rb_parser_get_yydebug(VALUE self) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); - return yydebug ? Qtrue : Qfalse; + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + return RBOOL(p->debug); } +#endif /* * call-seq: @@ -10750,53 +13370,86 @@ rb_parser_get_yydebug(VALUE self) VALUE rb_parser_set_yydebug(VALUE self, VALUE flag) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); - yydebug = RTEST(flag); + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + p->debug = RTEST(flag); return flag; } +/* + * call-seq: + * ripper.debug_output -> obj + * + * Get debug output. + */ +VALUE +rb_parser_get_debug_output(VALUE self) +{ + struct parser_params *p; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + return p->debug_output; +} + +/* + * call-seq: + * ripper.debug_output = obj + * + * Set debug output. + */ +VALUE +rb_parser_set_debug_output(VALUE self, VALUE output) +{ + struct parser_params *p; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + return p->debug_output = output; +} + +#ifndef RIPPER #ifdef YYMALLOC #define HEAPCNT(n, size) ((n) * (size) / sizeof(YYSTYPE)) -#define NEWHEAP() rb_node_newnode(NODE_ALLOCA, 0, (VALUE)parser->heap, 0) -#define ADD2HEAP(n, c, p) ((parser->heap = (n))->u1.node = (p), \ - (n)->u3.cnt = (c), (p)) +/* Keep the order; NEWHEAP then xmalloc and ADD2HEAP to get rid of + * potential memory leak */ +#define NEWHEAP() rb_imemo_tmpbuf_parser_heap(0, p->heap, 0) +#define ADD2HEAP(new, cnt, ptr) ((p->heap = (new))->ptr = (ptr), \ + (new)->cnt = (cnt), (ptr)) void * -rb_parser_malloc(struct parser_params *parser, size_t size) +rb_parser_malloc(struct parser_params *p, size_t size) { size_t cnt = HEAPCNT(1, size); - NODE *n = NEWHEAP(); + rb_imemo_tmpbuf_t *n = NEWHEAP(); void *ptr = xmalloc(size); return ADD2HEAP(n, cnt, ptr); } void * -rb_parser_calloc(struct parser_params *parser, size_t nelem, size_t size) +rb_parser_calloc(struct parser_params *p, size_t nelem, size_t size) { size_t cnt = HEAPCNT(nelem, size); - NODE *n = NEWHEAP(); + rb_imemo_tmpbuf_t *n = NEWHEAP(); void *ptr = xcalloc(nelem, size); return ADD2HEAP(n, cnt, ptr); } void * -rb_parser_realloc(struct parser_params *parser, void *ptr, size_t size) +rb_parser_realloc(struct parser_params *p, void *ptr, size_t size) { - NODE *n; + rb_imemo_tmpbuf_t *n; size_t cnt = HEAPCNT(1, size); - if (ptr && (n = parser->heap) != NULL) { + if (ptr && (n = p->heap) != NULL) { do { - if (n->u1.node == ptr) { - n->u1.node = ptr = xrealloc(ptr, size); - if (n->u3.cnt) n->u3.cnt = cnt; + if (n->ptr == ptr) { + n->ptr = ptr = xrealloc(ptr, size); + if (n->cnt) n->cnt = cnt; return ptr; } - } while ((n = n->u2.node) != NULL); + } while ((n = n->next) != NULL); } n = NEWHEAP(); ptr = xrealloc(ptr, size); @@ -10804,27 +13457,134 @@ rb_parser_realloc(struct parser_params *parser, void *ptr, size_t size) } void -rb_parser_free(struct parser_params *parser, void *ptr) +rb_parser_free(struct parser_params *p, void *ptr) { - NODE **prev = &parser->heap, *n; + rb_imemo_tmpbuf_t **prev = &p->heap, *n; while ((n = *prev) != NULL) { - if (n->u1.node == ptr) { - *prev = n->u2.node; - rb_gc_force_recycle((VALUE)n); + if (n->ptr == ptr) { + *prev = n->next; break; } - prev = &n->u2.node; + prev = &n->next; } - xfree(ptr); } #endif + +void +rb_parser_printf(struct parser_params *p, const char *fmt, ...) +{ + va_list ap; + VALUE mesg = p->debug_buffer; + + if (NIL_P(mesg)) p->debug_buffer = mesg = rb_str_new(0, 0); + va_start(ap, fmt); + rb_str_vcatf(mesg, fmt, ap); + va_end(ap); + if (RSTRING_END(mesg)[-1] == '\n') { + rb_io_write(p->debug_output, mesg); + p->debug_buffer = Qnil; + } +} + +static void +parser_compile_error(struct parser_params *p, const char *fmt, ...) +{ + va_list ap; + + rb_io_flush(p->debug_output); + p->error_p = 1; + va_start(ap, fmt); + p->error_buffer = + rb_syntax_error_append(p->error_buffer, + p->ruby_sourcefile_string, + p->ruby_sourceline, + rb_long2int(p->lex.pcur - p->lex.pbeg), + p->enc, fmt, ap); + va_end(ap); +} + +static size_t +count_char(const char *str, int c) +{ + int n = 0; + while (str[n] == c) ++n; + return n; +} + +/* + * strip enclosing double-quotes, same as the default yytnamerr except + * for that single-quotes matching back-quotes do not stop stripping. + * + * "\"`class' keyword\"" => "`class' keyword" + */ +RUBY_FUNC_EXPORTED size_t +rb_yytnamerr(struct parser_params *p, char *yyres, const char *yystr) +{ + if (*yystr == '"') { + size_t yyn = 0, bquote = 0; + const char *yyp = yystr; + + while (*++yyp) { + switch (*yyp) { + case '`': + if (!bquote) { + bquote = count_char(yyp+1, '`') + 1; + if (yyres) memcpy(&yyres[yyn], yyp, bquote); + yyn += bquote; + yyp += bquote - 1; + break; + } + goto default_char; + + case '\'': + if (bquote && count_char(yyp+1, '\'') + 1 == bquote) { + if (yyres) memcpy(yyres + yyn, yyp, bquote); + yyn += bquote; + yyp += bquote - 1; + bquote = 0; + break; + } + if (yyp[1] && yyp[1] != '\'' && yyp[2] == '\'') { + if (yyres) memcpy(yyres + yyn, yyp, 3); + yyn += 3; + yyp += 2; + break; + } + goto do_not_strip_quotes; + + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default_char: + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + case '\0': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + } + do_not_strip_quotes: ; + } + + if (!yyres) return strlen(yystr); + + return (YYSIZE_T)(yystpcpy(yyres, yystr) - yyres); +} #endif #ifdef RIPPER #ifdef RIPPER_DEBUG -extern int rb_is_pointer_to_heap(VALUE); - /* :nodoc: */ static VALUE ripper_validate_object(VALUE self, VALUE x) @@ -10833,26 +13593,31 @@ ripper_validate_object(VALUE self, VALUE x) if (x == Qtrue) return x; if (x == Qnil) return x; if (x == Qundef) - rb_raise(rb_eArgError, "Qundef given"); + rb_raise(rb_eArgError, "Qundef given"); if (FIXNUM_P(x)) return x; if (SYMBOL_P(x)) return x; - if (!rb_is_pointer_to_heap(x)) - rb_raise(rb_eArgError, "invalid pointer: %p", x); - switch (TYPE(x)) { + switch (BUILTIN_TYPE(x)) { case T_STRING: case T_OBJECT: case T_ARRAY: case T_BIGNUM: case T_FLOAT: - return x; + case T_COMPLEX: + case T_RATIONAL: + break; case T_NODE: - if (nd_type(x) != NODE_LASGN) { - rb_raise(rb_eArgError, "NODE given: %p", x); + if (!nd_type_p((NODE *)x, NODE_RIPPER)) { + rb_raise(rb_eArgError, "NODE given: %p", (void *)x); } - return ((NODE *)x)->nd_rval; + x = ((NODE *)x)->nd_rval; + break; default: - rb_raise(rb_eArgError, "wrong type of ruby object: %p (%s)", - x, rb_obj_classname(x)); + rb_raise(rb_eArgError, "wrong type of ruby object: %p (%s)", + (void *)x, rb_obj_classname(x)); + } + if (!RBASIC_CLASS(x)) { + rb_raise(rb_eArgError, "hidden ruby object: %p (%s)", + (void *)x, rb_builtin_type_name(TYPE(x))); } return x; } @@ -10861,58 +13626,58 @@ ripper_validate_object(VALUE self, VALUE x) #define validate(x) ((x) = get_value(x)) static VALUE -ripper_dispatch0(struct parser_params *parser, ID mid) +ripper_dispatch0(struct parser_params *p, ID mid) { - return rb_funcall(parser->value, mid, 0); + return rb_funcall(p->value, mid, 0); } static VALUE -ripper_dispatch1(struct parser_params *parser, ID mid, VALUE a) +ripper_dispatch1(struct parser_params *p, ID mid, VALUE a) { validate(a); - return rb_funcall(parser->value, mid, 1, a); + return rb_funcall(p->value, mid, 1, a); } static VALUE -ripper_dispatch2(struct parser_params *parser, ID mid, VALUE a, VALUE b) +ripper_dispatch2(struct parser_params *p, ID mid, VALUE a, VALUE b) { validate(a); validate(b); - return rb_funcall(parser->value, mid, 2, a, b); + return rb_funcall(p->value, mid, 2, a, b); } static VALUE -ripper_dispatch3(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c) +ripper_dispatch3(struct parser_params *p, ID mid, VALUE a, VALUE b, VALUE c) { validate(a); validate(b); validate(c); - return rb_funcall(parser->value, mid, 3, a, b, c); + return rb_funcall(p->value, mid, 3, a, b, c); } static VALUE -ripper_dispatch4(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c, VALUE d) +ripper_dispatch4(struct parser_params *p, ID mid, VALUE a, VALUE b, VALUE c, VALUE d) { validate(a); validate(b); validate(c); validate(d); - return rb_funcall(parser->value, mid, 4, a, b, c, d); + return rb_funcall(p->value, mid, 4, a, b, c, d); } static VALUE -ripper_dispatch5(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c, VALUE d, VALUE e) +ripper_dispatch5(struct parser_params *p, ID mid, VALUE a, VALUE b, VALUE c, VALUE d, VALUE e) { validate(a); validate(b); validate(c); validate(d); validate(e); - return rb_funcall(parser->value, mid, 5, a, b, c, d, e); + return rb_funcall(p->value, mid, 5, a, b, c, d, e); } static VALUE -ripper_dispatch7(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c, VALUE d, VALUE e, VALUE f, VALUE g) +ripper_dispatch7(struct parser_params *p, ID mid, VALUE a, VALUE b, VALUE c, VALUE d, VALUE e, VALUE f, VALUE g) { validate(a); validate(b); @@ -10921,106 +13686,7 @@ ripper_dispatch7(struct parser_params *parser, ID mid, VALUE a, VALUE b, VALUE c validate(e); validate(f); validate(g); - return rb_funcall(parser->value, mid, 7, a, b, c, d, e, f, g); -} - -static const struct kw_assoc { - ID id; - const char *name; -} keyword_to_name[] = { - {keyword_class, "class"}, - {keyword_module, "module"}, - {keyword_def, "def"}, - {keyword_undef, "undef"}, - {keyword_begin, "begin"}, - {keyword_rescue, "rescue"}, - {keyword_ensure, "ensure"}, - {keyword_end, "end"}, - {keyword_if, "if"}, - {keyword_unless, "unless"}, - {keyword_then, "then"}, - {keyword_elsif, "elsif"}, - {keyword_else, "else"}, - {keyword_case, "case"}, - {keyword_when, "when"}, - {keyword_while, "while"}, - {keyword_until, "until"}, - {keyword_for, "for"}, - {keyword_break, "break"}, - {keyword_next, "next"}, - {keyword_redo, "redo"}, - {keyword_retry, "retry"}, - {keyword_in, "in"}, - {keyword_do, "do"}, - {keyword_do_cond, "do"}, - {keyword_do_block, "do"}, - {keyword_return, "return"}, - {keyword_yield, "yield"}, - {keyword_super, "super"}, - {keyword_self, "self"}, - {keyword_nil, "nil"}, - {keyword_true, "true"}, - {keyword_false, "false"}, - {keyword_and, "and"}, - {keyword_or, "or"}, - {keyword_not, "not"}, - {modifier_if, "if"}, - {modifier_unless, "unless"}, - {modifier_while, "while"}, - {modifier_until, "until"}, - {modifier_rescue, "rescue"}, - {keyword_alias, "alias"}, - {keyword_defined, "defined?"}, - {keyword_BEGIN, "BEGIN"}, - {keyword_END, "END"}, - {keyword__LINE__, "__LINE__"}, - {keyword__FILE__, "__FILE__"}, - {keyword__ENCODING__, "__ENCODING__"}, - {0, NULL} -}; - -static const char* -keyword_id_to_str(ID id) -{ - const struct kw_assoc *a; - - for (a = keyword_to_name; a->id; a++) { - if (a->id == id) - return a->name; - } - return NULL; -} - -#undef ripper_id2sym -static VALUE -ripper_id2sym(ID id) -{ - const char *name; - char buf[8]; - - if (id <= 256) { - buf[0] = (char)id; - buf[1] = '\0'; - return ID2SYM(rb_intern2(buf, 1)); - } - if ((name = keyword_id_to_str(id))) { - return ID2SYM(rb_intern(name)); - } - switch (id) { - case tOROP: - name = "||"; - break; - case tANDOP: - name = "&&"; - break; - default: - name = rb_id2name(id); - if (!name) { - rb_bug("cannot convert ID to string: %ld", (unsigned long)id); - } - return ID2SYM(id); - } - return ID2SYM(rb_intern(name)); + return rb_funcall(p->value, mid, 7, a, b, c, d, e, f, g); } static ID @@ -11029,7 +13695,7 @@ ripper_get_id(VALUE v) NODE *nd; if (!RB_TYPE_P(v, T_NODE)) return 0; nd = (NODE *)v; - if (nd_type(nd) != NODE_LASGN) return 0; + if (!nd_type_p(nd, NODE_RIPPER)) return 0; return nd->nd_vid; } @@ -11040,12 +13706,18 @@ ripper_get_value(VALUE v) if (v == Qundef) return Qnil; if (!RB_TYPE_P(v, T_NODE)) return v; nd = (NODE *)v; - if (nd_type(nd) != NODE_LASGN) return Qnil; + if (!nd_type_p(nd, NODE_RIPPER)) return Qnil; return nd->nd_rval; } static void -ripper_compile_error(struct parser_params *parser, const char *fmt, ...) +ripper_error(struct parser_params *p) +{ + p->error_p = TRUE; +} + +static void +ripper_compile_error(struct parser_params *p, const char *fmt, ...) { VALUE str; va_list args; @@ -11053,44 +13725,24 @@ ripper_compile_error(struct parser_params *parser, const char *fmt, ...) va_start(args, fmt); str = rb_vsprintf(fmt, args); va_end(args); - rb_funcall(parser->value, rb_intern("compile_error"), 1, str); -} - -static void -ripper_warn0(struct parser_params *parser, const char *fmt) -{ - rb_funcall(parser->value, rb_intern("warn"), 1, STR_NEW2(fmt)); -} - -static void -ripper_warnI(struct parser_params *parser, const char *fmt, int a) -{ - rb_funcall(parser->value, rb_intern("warn"), 2, - STR_NEW2(fmt), INT2NUM(a)); -} - -static void -ripper_warnS(struct parser_params *parser, const char *fmt, const char *str) -{ - rb_funcall(parser->value, rb_intern("warn"), 2, - STR_NEW2(fmt), STR_NEW2(str)); -} - -static void -ripper_warning0(struct parser_params *parser, const char *fmt) -{ - rb_funcall(parser->value, rb_intern("warning"), 1, STR_NEW2(fmt)); + rb_funcall(p->value, rb_intern("compile_error"), 1, str); + ripper_error(p); } -static void -ripper_warningS(struct parser_params *parser, const char *fmt, const char *str) +static VALUE +ripper_lex_get_generic(struct parser_params *p, VALUE src) { - rb_funcall(parser->value, rb_intern("warning"), 2, - STR_NEW2(fmt), STR_NEW2(str)); + VALUE line = rb_funcallv_public(src, id_gets, 0, 0); + if (!NIL_P(line) && !RB_TYPE_P(line, T_STRING)) { + rb_raise(rb_eTypeError, + "gets returned %"PRIsVALUE" (expected String or nil)", + rb_obj_class(line)); + } + return line; } static VALUE -ripper_lex_get_generic(struct parser_params *parser, VALUE src) +ripper_lex_io_get(struct parser_params *p, VALUE src) { return rb_io_gets(src); } @@ -11099,16 +13751,13 @@ static VALUE ripper_s_allocate(VALUE klass) { struct parser_params *p; - VALUE self; - - p = ALLOC_N(struct parser_params, 1); - MEMZERO(p, struct parser_params, 1); - self = TypedData_Wrap_Struct(klass, &parser_data_type, p); + VALUE self = TypedData_Make_Struct(klass, struct parser_params, + &parser_data_type, p); p->value = self; return self; } -#define ripper_initialized_p(r) ((r)->parser_lex_input != 0) +#define ripper_initialized_p(r) ((r)->lex.input != 0) /* * call-seq: @@ -11123,92 +13772,94 @@ ripper_s_allocate(VALUE klass) static VALUE ripper_initialize(int argc, VALUE *argv, VALUE self) { - struct parser_params *parser; + struct parser_params *p; VALUE src, fname, lineno; - TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); rb_scan_args(argc, argv, "12", &src, &fname, &lineno); if (RB_TYPE_P(src, T_FILE)) { - parser->parser_lex_gets = ripper_lex_get_generic; + p->lex.gets = ripper_lex_io_get; + } + else if (rb_respond_to(src, id_gets)) { + p->lex.gets = ripper_lex_get_generic; } else { StringValue(src); - parser->parser_lex_gets = lex_get_str; + p->lex.gets = lex_get_str; } - parser->parser_lex_input = src; - parser->eofp = Qfalse; + p->lex.input = src; + p->eofp = 0; if (NIL_P(fname)) { fname = STR_NEW2("(ripper)"); + OBJ_FREEZE(fname); } else { - StringValue(fname); + StringValueCStr(fname); + fname = rb_str_new_frozen(fname); } - parser_initialize(parser); + parser_initialize(p); - parser->parser_ruby_sourcefile_string = fname; - parser->parser_ruby_sourcefile = RSTRING_PTR(fname); - parser->parser_ruby_sourceline = NIL_P(lineno) ? 0 : NUM2INT(lineno) - 1; + p->ruby_sourcefile_string = fname; + p->ruby_sourcefile = RSTRING_PTR(fname); + p->ruby_sourceline = NIL_P(lineno) ? 0 : NUM2INT(lineno) - 1; return Qnil; } -struct ripper_args { - struct parser_params *parser; - int argc; - VALUE *argv; -}; - static VALUE ripper_parse0(VALUE parser_v) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(parser_v, struct parser_params, &parser_data_type, parser); - parser_prepare(parser); - ripper_yyparse((void*)parser); - return parser->result; + TypedData_Get_Struct(parser_v, struct parser_params, &parser_data_type, p); + parser_prepare(p); + p->ast = rb_ast_new(); + ripper_yyparse((void*)p); + rb_ast_dispose(p->ast); + p->ast = 0; + return p->result; } static VALUE ripper_ensure(VALUE parser_v) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(parser_v, struct parser_params, &parser_data_type, parser); - parser->parsing_thread = Qnil; + TypedData_Get_Struct(parser_v, struct parser_params, &parser_data_type, p); + p->parsing_thread = Qnil; return Qnil; } /* * call-seq: - * ripper#parse + * ripper.parse * * Start parsing and returns the value of the root action. */ static VALUE ripper_parse(VALUE self) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); - if (!ripper_initialized_p(parser)) { + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + if (!ripper_initialized_p(p)) { rb_raise(rb_eArgError, "method called for uninitialized object"); } - if (!NIL_P(parser->parsing_thread)) { - if (parser->parsing_thread == rb_thread_current()) + if (!NIL_P(p->parsing_thread)) { + if (p->parsing_thread == rb_thread_current()) rb_raise(rb_eArgError, "Ripper#parse is not reentrant"); else rb_raise(rb_eArgError, "Ripper#parse is not multithread-safe"); } - parser->parsing_thread = rb_thread_current(); + p->parsing_thread = rb_thread_current(); rb_ensure(ripper_parse0, self, ripper_ensure, self); - return parser->result; + return p->result; } /* * call-seq: - * ripper#column -> Integer + * ripper.column -> Integer * * Return column number of current parsing line. * This number starts from 0. @@ -11216,39 +13867,39 @@ ripper_parse(VALUE self) static VALUE ripper_column(VALUE self) { - struct parser_params *parser; + struct parser_params *p; long col; - TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); - if (!ripper_initialized_p(parser)) { + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + if (!ripper_initialized_p(p)) { rb_raise(rb_eArgError, "method called for uninitialized object"); } - if (NIL_P(parser->parsing_thread)) return Qnil; - col = parser->tokp - parser->parser_lex_pbeg; + if (NIL_P(p->parsing_thread)) return Qnil; + col = p->lex.ptok - p->lex.pbeg; return LONG2NUM(col); } /* * call-seq: - * ripper#filename -> String + * ripper.filename -> String * * Return current parsing filename. */ static VALUE ripper_filename(VALUE self) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); - if (!ripper_initialized_p(parser)) { + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + if (!ripper_initialized_p(p)) { rb_raise(rb_eArgError, "method called for uninitialized object"); } - return parser->parser_ruby_sourcefile_string; + return p->ruby_sourcefile_string; } /* * call-seq: - * ripper#lineno -> Integer + * ripper.lineno -> Integer * * Return line number of current parsing line. * This number starts from 1. @@ -11256,14 +13907,55 @@ ripper_filename(VALUE self) static VALUE ripper_lineno(VALUE self) { - struct parser_params *parser; + struct parser_params *p; - TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser); - if (!ripper_initialized_p(parser)) { + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + if (!ripper_initialized_p(p)) { rb_raise(rb_eArgError, "method called for uninitialized object"); } - if (NIL_P(parser->parsing_thread)) return Qnil; - return INT2NUM(parser->parser_ruby_sourceline); + if (NIL_P(p->parsing_thread)) return Qnil; + return INT2NUM(p->ruby_sourceline); +} + +/* + * call-seq: + * ripper.state -> Integer + * + * Return scanner state of current token. + */ +static VALUE +ripper_state(VALUE self) +{ + struct parser_params *p; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + if (!ripper_initialized_p(p)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (NIL_P(p->parsing_thread)) return Qnil; + return INT2NUM(p->lex.state); +} + +/* + * call-seq: + * ripper.token -> String + * + * Return the current token string. + */ +static VALUE +ripper_token(VALUE self) +{ + struct parser_params *p; + long pos, len; + + TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p); + if (!ripper_initialized_p(p)) { + rb_raise(rb_eArgError, "method called for uninitialized object"); + } + if (NIL_P(p->parsing_thread)) return Qnil; + pos = p->lex.ptok - p->lex.pbeg; + len = p->lex.pcur - p->lex.ptok; + return rb_str_subseq(p->lex.lastline, pos, len); } #ifdef RIPPER_DEBUG @@ -11273,7 +13965,7 @@ ripper_assert_Qundef(VALUE self, VALUE obj, VALUE msg) { StringValue(msg); if (obj == Qundef) { - rb_raise(rb_eArgError, "%s", RSTRING_PTR(msg)); + rb_raise(rb_eArgError, "%"PRIsVALUE, msg); } return Qnil; } @@ -11286,17 +13978,29 @@ ripper_value(VALUE self, VALUE obj) } #endif +/* + * call-seq: + * Ripper.lex_state_name(integer) -> string + * + * Returns a string representation of lex_state. + */ +static VALUE +ripper_lex_state_name(VALUE self, VALUE state) +{ + return rb_parser_lex_state_name(NUM2INT(state)); +} void Init_ripper(void) { - parser_data_type.parent = RTYPEDDATA_TYPE(rb_parser_new()); - ripper_init_eventids1(); ripper_init_eventids2(); - /* ensure existing in symbol table */ - (void)rb_intern("||"); - (void)rb_intern("&&"); + id_warn = rb_intern_const("warn"); + id_warning = rb_intern_const("warning"); + id_gets = rb_intern_const("gets"); + id_assoc = rb_intern_const("=>"); + + (void)yystpcpy; /* may not used in newer bison */ InitVM(ripper); } @@ -11307,6 +14011,7 @@ InitVM_ripper(void) VALUE Ripper; Ripper = rb_define_class("Ripper", rb_cObject); + /* version of Ripper */ rb_define_const(Ripper, "Version", rb_usascii_str_new2(RIPPER_VERSION)); rb_define_alloc_func(Ripper, ripper_s_allocate); rb_define_method(Ripper, "initialize", ripper_initialize, -1); @@ -11314,16 +14019,30 @@ InitVM_ripper(void) rb_define_method(Ripper, "column", ripper_column, 0); rb_define_method(Ripper, "filename", ripper_filename, 0); rb_define_method(Ripper, "lineno", ripper_lineno, 0); + rb_define_method(Ripper, "state", ripper_state, 0); + rb_define_method(Ripper, "token", ripper_token, 0); rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0); rb_define_method(Ripper, "encoding", rb_parser_encoding, 0); rb_define_method(Ripper, "yydebug", rb_parser_get_yydebug, 0); rb_define_method(Ripper, "yydebug=", rb_parser_set_yydebug, 1); + rb_define_method(Ripper, "debug_output", rb_parser_get_debug_output, 0); + rb_define_method(Ripper, "debug_output=", rb_parser_set_debug_output, 1); + rb_define_method(Ripper, "error?", ripper_error_p, 0); #ifdef RIPPER_DEBUG - rb_define_method(rb_mKernel, "assert_Qundef", ripper_assert_Qundef, 2); - rb_define_method(rb_mKernel, "rawVALUE", ripper_value, 1); - rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1); + rb_define_method(Ripper, "assert_Qundef", ripper_assert_Qundef, 2); + rb_define_method(Ripper, "rawVALUE", ripper_value, 1); + rb_define_method(Ripper, "validate_object", ripper_validate_object, 1); #endif + rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2); + rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2); + + rb_define_singleton_method(Ripper, "lex_state_name", ripper_lex_state_name, 1); + +<% @exprs.each do |expr, desc| -%> + /* <%=desc%> */ + rb_define_const(Ripper, "<%=expr%>", INT2NUM(<%=expr%>)); +<% end %> ripper_init_eventids1_table(Ripper); ripper_init_eventids2_table(Ripper); @@ -11340,3 +14059,10 @@ InitVM_ripper(void) } #endif /* RIPPER */ + +/* + * Local variables: + * mode: c + * c-file-style: "ruby" + * End: + */ |
