diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2002-06-26 08:01:00 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2002-06-26 08:01:00 +0000 |
commit | 0fdfc0bbef8f28882ec2f8946a76ee31497ee5c6 (patch) | |
tree | 985f79b99e1f4a854c53713a2f99d03b24f662c0 /parse.y | |
parent | 6037d1cf3d92a2f5fc76818263343b582d0228b6 (diff) |
* parse.y (words, qwords): word list literal rules.
* parse.y (parse_string): ditto.
* parse.y (yylex): %W: word list literal with interpolation. [new]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2598 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'parse.y')
-rw-r--r-- | parse.y | 251 |
1 files changed, 128 insertions, 123 deletions
@@ -161,6 +161,9 @@ static void top_local_setup(); #define NODE_STRTERM NODE_ZARRAY /* nothing to gc */ #define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */ +#define nd_func u1.id +#define nd_term u2.id +#define nd_paren u3.id %} @@ -222,11 +225,12 @@ static void top_local_setup(); %token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR %token <val> tINTEGER tFLOAT tSTRING_CONTENT -%token <node> tNTH_REF tBACK_REF tQWORDS +%token <node> tNTH_REF tBACK_REF %token <num> tREGEXP_END %type <node> singleton strings string string1 xstring regexp %type <node> string_contents xstring_contents string_content +%type <node> words qwords word_list qword_list word %type <val> literal numeric %type <node> bodystmt compstmt stmts stmt expr arg primary command command_call method_call %type <node> expr_value arg_value primary_value block_call_value @@ -267,7 +271,7 @@ static void top_local_setup(); %token tLBRACE_ARG /* { */ %token tSTAR /* * */ %token tAMPER /* & */ -%token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG +%token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG %token tSTRING_DBEG tSTRING_DVAR tSTRING_END /* @@ -1292,7 +1296,8 @@ primary : literal | strings | xstring | regexp - | tQWORDS + | words + | qwords | var_ref | backref | tFID @@ -1851,6 +1856,54 @@ regexp : tREGEXP_BEG xstring_contents tREGEXP_END } ; +words : tWORDS_BEG ' ' tSTRING_END + { + $$ = NEW_ZARRAY(); + } + | tWORDS_BEG word_list tSTRING_END + { + $$ = $2; + } + ; + +word_list : /* none */ + { + lex_strnest = 0; + $$ = 0; + } + | word_list word ' ' + { + $$ = list_append($1, $2); + } + ; + +word : string_content + | word string_content + { + $$ = literal_concat($1, $2); + } + +qwords : tQWORDS_BEG ' ' tSTRING_END + { + $$ = NEW_ZARRAY(); + } + | tQWORDS_BEG qword_list tSTRING_END + { + $$ = $2; + } + ; + +qword_list : /* none */ + { + lex_strnest = 0; + $$ = 0; + } + | qword_list tSTRING_CONTENT ' ' + { + $$ = list_append($1, NEW_STR($2)); + } + ; + string_contents : /* none */ { lex_strnest = 0; @@ -1916,8 +1969,8 @@ term_push : /* none */ { if (($$ = quoted_term) == -1 && nd_type(lex_strterm) == NODE_STRTERM && - !lex_strterm->u3.id) { - quoted_term = lex_strterm->u2.id; + !lex_strterm->nd_paren) { + quoted_term = lex_strterm->nd_term; } } ; @@ -2739,6 +2792,21 @@ regx_options() return options | kcode; } +#define STR_FUNC_ESCAPE 0x01 +#define STR_FUNC_EXPAND 0x02 +#define STR_FUNC_REGEXP 0x04 +#define STR_FUNC_QWORDS 0x08 +#define STR_FUNC_INDENT 0x20 + +enum string_type { + str_squote = (0), + str_dquote = (STR_FUNC_EXPAND), + str_xquote = (STR_FUNC_ESCAPE|STR_FUNC_EXPAND), + str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), + str_sword = (STR_FUNC_QWORDS), + str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND), +}; + static int tokadd_string(func, term, paren) int func, term, paren; @@ -2756,7 +2824,7 @@ tokadd_string(func, term, paren) } --lex_strnest; } - else if (func != '\'' && c == '#' && lex_p < lex_pend) { + else if ((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) { int c2 = *lex_p; if (c2 == '$' || c2 == '@' || c2 == '{') { pushback(c); @@ -2774,19 +2842,19 @@ tokadd_string(func, term, paren) continue; case '\\': - if (func == '/') tokadd(c); + if (func & STR_FUNC_ESCAPE) tokadd(c); break; default: - if (func == '/') { + if (func & STR_FUNC_REGEXP) { pushback(c); if (tokadd_escape(term) < 0) return -1; continue; } - else if (func != '\'') { + else if (func & STR_FUNC_EXPAND) { pushback(c); - if (func != '"') tokadd('\\'); + if (func & STR_FUNC_ESCAPE) tokadd('\\'); c = read_escape(); } else if (c != term && !(paren && c == paren)) { @@ -2802,91 +2870,13 @@ tokadd_string(func, term, paren) c = nextc(); } } - tokadd(c); - } - return c; -} - -static int -parse_quotedwords(term, paren) - int term, paren; -{ - NODE *qwords = 0; - int strstart; - int c; - int nest = 0; - - strstart = ruby_sourceline; - newtok(); - - while (c = nextc(),ISSPACE(c)) - ; /* skip preceding spaces */ - pushback(c); - while ((c = nextc()) != term || nest > 0) { - if (c == -1) { - ruby_sourceline = strstart; - rb_compile_error("unterminated string meets end of file"); - return 0; - } - if (paren) { - if (c == paren) nest++; - if (c == term && nest-- == 0) break; - } - if (ismbchar(c)) { - int i, len = mbclen(c)-1; - - for (i = 0; i < len; i++) { - tokadd(c); - c = nextc(); - } - } - else if (c == '\\') { - c = nextc(); - if (QUOTED_TERM_P(c)) break; - switch (c) { - case '\n': - continue; - case '\\': - c = '\\'; - break; - default: - if (c == term || (paren && c == paren)) { - tokadd(c); - continue; - } - if (!ISSPACE(c)) - tokadd('\\'); - break; - } - } - else if (ISSPACE(c)) { - NODE *str; - - tokfix(); - str = NEW_STR(rb_str_new(tok(), toklen())); - newtok(); - if (!qwords) qwords = NEW_LIST(str); - else list_append(qwords, str); - while (c = nextc(),ISSPACE(c)) - ; /* skip continuous spaces */ + else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { pushback(c); - continue; + break; } tokadd(c); } - - tokfix(); - if (toklen() > 0) { - NODE *str; - - str = NEW_STR(rb_str_new(tok(), toklen())); - if (!qwords) qwords = NEW_LIST(str); - else list_append(qwords, str); - } - if (!qwords) qwords = NEW_ZARRAY(); - yylval.node = qwords; - lex_state = EXPR_END; - return tQWORDS; + return c; } #define NEW_STRTERM(func, term, paren) \ @@ -2896,17 +2886,25 @@ static int parse_string(quote) NODE *quote; { - int func = quote->u1.id; - int term = quote->u2.id; - int paren = quote->u3.id; - int c; + int func = quote->nd_func; + int term = quote->nd_term; + int paren = quote->nd_paren; + int c, space = 0; if (func == -1) return tSTRING_END; c = nextc(); + if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { + do {c = nextc();} while (ISSPACE(c)); + space = 1; + } if (c == term) { if (!lex_strnest) { eos: - if (func != '/') return tSTRING_END; + if (func & STR_FUNC_QWORDS) { + quote->nd_func = -1; + return ' '; + } + if (!(func & STR_FUNC_REGEXP)) return tSTRING_END; yylval.num = regx_options(); return tREGEXP_END; } @@ -2914,8 +2912,12 @@ parse_string(quote) if (c == '\\' && WHEN_QUOTED_TERM(peek(quoted_term_char))) { if ((c = nextc()) == term) goto eos; } + if (space) { + pushback(c); + return ' '; + } newtok(); - if (func != '\'' && c == '#') { + if ((func & STR_FUNC_EXPAND) && c == '#') { switch (c = nextc()) { case '$': case '@': @@ -2938,12 +2940,10 @@ parse_string(quote) return tSTRING_CONTENT; } -#define INDENTED_HEREDOC 0x20 - static int heredoc_identifier() { - int c = nextc(), term, indent = 0, len; + int c = nextc(), term, func = 0, len; if (c == '-') { c = nextc(); @@ -2952,7 +2952,7 @@ heredoc_identifier() pushback('-'); return 0; } - indent = INDENTED_HEREDOC; + func = STR_FUNC_INDENT; } else if (ISSPACE(c)) { not_heredoc: @@ -2961,10 +2961,14 @@ heredoc_identifier() } switch (c) { case '\'': + func |= str_squote; goto qutoed; case '"': + func |= str_dquote; goto qutoed; case '`': + func |= str_xquote; + qutoed: newtok(); - tokadd(c ^ indent); + tokadd(func); term = c; while ((c = nextc()) != -1 && c != term) { len = mbclen(c); @@ -2980,7 +2984,7 @@ heredoc_identifier() if (!is_identchar(c)) goto not_heredoc; newtok(); term = '"'; - tokadd(term ^ indent); + tokadd(func |= str_dquote); do { len = mbclen(c); do {tokadd(c);} while (--len > 0 && (c = nextc()) != -1); @@ -3040,15 +3044,7 @@ here_document(here) eos = RSTRING(here->nd_lit)->ptr; len = RSTRING(here->nd_lit)->len - 1; - switch (func = *eos++) { - case '\'': case '"': case '`': - indent = 0; - break; - default: - func ^= INDENTED_HEREDOC; - indent = 1; - break; - } + indent = (func = *eos++) & STR_FUNC_INDENT; if ((c = nextc()) == -1) { error: @@ -3062,7 +3058,7 @@ here_document(here) return tSTRING_END; } - if (func == '\'') { + if (!(func & STR_FUNC_EXPAND)) { do { line = lex_lastline; if (str) @@ -3334,7 +3330,7 @@ yylex() return '>'; case '"': - lex_strterm = NEW_STRTERM('"', '"', 0); + lex_strterm = NEW_STRTERM(str_dquote, '"', 0); return tSTRING_BEG; case '`': @@ -3349,11 +3345,11 @@ yylex() lex_state = EXPR_ARG; return c; } - lex_strterm = NEW_STRTERM('`', '`', 0); + lex_strterm = NEW_STRTERM(str_xquote, '`', 0); return tXSTRING_BEG; case '\'': - lex_strterm = NEW_STRTERM('\'', '\'', 0); + lex_strterm = NEW_STRTERM(str_squote, '\'', 0); return tSTRING_BEG; case '?': @@ -3755,7 +3751,7 @@ yylex() case '/': if (lex_state == EXPR_BEG || lex_state == EXPR_MID) { - lex_strterm = NEW_STRTERM('/', '/', 0); + lex_strterm = NEW_STRTERM(str_regexp, '/', 0); return tREGEXP_BEG; } if ((c = nextc()) == '=') { @@ -3767,7 +3763,7 @@ yylex() if (IS_ARG() && space_seen) { if (!ISSPACE(c)) { arg_ambiguous(); - lex_strterm = NEW_STRTERM('/', '/', 0); + lex_strterm = NEW_STRTERM(str_regexp, '/', 0); return tREGEXP_BEG; } } @@ -3924,22 +3920,31 @@ yylex() switch (c) { case 'Q': - lex_strterm = NEW_STRTERM('"', term, paren); + lex_strterm = NEW_STRTERM(str_dquote, term, paren); return tSTRING_BEG; case 'q': - lex_strterm = NEW_STRTERM('\'', term, paren); + lex_strterm = NEW_STRTERM(str_squote, term, paren); return tSTRING_BEG; + case 'W': + lex_strterm = NEW_STRTERM(str_dquote | STR_FUNC_QWORDS, term, paren); + do {c = nextc();} while (ISSPACE(c)); + pushback(c); + return tWORDS_BEG; + case 'w': - return parse_quotedwords(term, paren); + lex_strterm = NEW_STRTERM(str_squote | STR_FUNC_QWORDS, term, paren); + do {c = nextc();} while (ISSPACE(c)); + pushback(c); + return tQWORDS_BEG; case 'x': - lex_strterm = NEW_STRTERM('`', term, paren); + lex_strterm = NEW_STRTERM(str_xquote, term, paren); return tXSTRING_BEG; case 'r': - lex_strterm = NEW_STRTERM('/', term, paren); + lex_strterm = NEW_STRTERM(str_regexp, term, paren); return tREGEXP_BEG; default: |