From a056098cb719312452eca309bec914406f9e5ca3 Mon Sep 17 00:00:00 2001 From: charliesome Date: Mon, 2 Sep 2013 07:11:41 +0000 Subject: * NEWS: Add note about frozen string literals * compile.c (case_when_optimizable_literal): optimize NODE_LIT strings in when clauses of case statements * ext/ripper/eventids2.c: add tSTRING_SUFFIX * parse.y: add 'f' suffix on string literals for frozen strings * test/ripper/test_scanner_events.rb: add scanner tests * test/ruby/test_string.rb: add frozen string tests [Feature #8579] [ruby-core:55699] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@42773 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 17 ++++++ NEWS | 2 + compile.c | 3 +- ext/ripper/eventids2.c | 3 ++ parse.y | 107 ++++++++++++++++++++++++++++++++++--- test/ripper/test_scanner_events.rb | 9 ++++ test/ruby/test_string.rb | 43 +++++++++++++++ 7 files changed, 175 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index ea884c3015..3c2d5c962f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +Mon Sep 2 16:06:00 2013 Charlie Somerville + + * NEWS: Add note about frozen string literals + + * compile.c (case_when_optimizable_literal): optimize NODE_LIT strings + in when clauses of case statements + + * ext/ripper/eventids2.c: add tSTRING_SUFFIX + + * parse.y: add 'f' suffix on string literals for frozen strings + + * test/ripper/test_scanner_events.rb: add scanner tests + + * test/ruby/test_string.rb: add frozen string tests + + [Feature #8579] [ruby-core:55699] + Mon Sep 2 14:39:29 2013 Akinori MUSHA * ruby.c (Process#setproctitle): [DOC] Fix and improve rdoc. diff --git a/NEWS b/NEWS index 3f1ba30aea..5d142f1a7f 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,8 @@ with all sufficient information, see the ChangeLog file. * def-expr now returns the symbol of its name instead of nil. +* Added 'f' suffix for string literals that returns a frozen String object. + === Core classes updates (outstanding ones only) * Bignum diff --git a/compile.c b/compile.c index bed329881e..92791f82a2 100644 --- a/compile.c +++ b/compile.c @@ -2504,7 +2504,8 @@ case_when_optimizable_literal(NODE * node) modf(RFLOAT_VALUE(v), &ival) == 0.0) { return FIXABLE(ival) ? LONG2FIX((long)ival) : rb_dbl2big(ival); } - if (SYMBOL_P(v) || rb_obj_is_kind_of(v, rb_cNumeric)) { + if (SYMBOL_P(v) || RB_TYPE_P(v, T_STRING) || + rb_obj_is_kind_of(v, rb_cNumeric)) { return v; } break; diff --git a/ext/ripper/eventids2.c b/ext/ripper/eventids2.c index 423f9d7e29..2d09573506 100644 --- a/ext/ripper/eventids2.c +++ b/ext/ripper/eventids2.c @@ -37,6 +37,7 @@ static ID ripper_id_symbeg; static ID ripper_id_tstring_beg; static ID ripper_id_tstring_content; static ID ripper_id_tstring_end; +static ID ripper_id_tstring_suffix; static ID ripper_id_words_beg; static ID ripper_id_qwords_beg; static ID ripper_id_qsymbols_beg; @@ -94,6 +95,7 @@ ripper_init_eventids2(void) ripper_id_tstring_beg = rb_intern_const("on_tstring_beg"); ripper_id_tstring_content = rb_intern_const("on_tstring_content"); ripper_id_tstring_end = rb_intern_const("on_tstring_end"); + ripper_id_tstring_suffix = rb_intern_const("on_tstring_suffix"); ripper_id_words_beg = rb_intern_const("on_words_beg"); ripper_id_qwords_beg = rb_intern_const("on_qwords_beg"); ripper_id_qsymbols_beg = rb_intern_const("on_qsymbols_beg"); @@ -252,6 +254,7 @@ static const struct token_assoc { {tSTRING_DEND, &ripper_id_embexpr_end}, {tSTRING_DVAR, &ripper_id_embvar}, {tSTRING_END, &ripper_id_tstring_end}, + {tSTRING_SUFFIX, &ripper_id_tstring_suffix}, {tSYMBEG, &ripper_id_symbeg}, {tUMINUS, &ripper_id_op}, {tUMINUS_NUM, &ripper_id_op}, diff --git a/parse.y b/parse.y index 496e37b9d4..31ed7598e6 100644 --- a/parse.y +++ b/parse.y @@ -400,6 +400,8 @@ static NODE *new_evstr_gen(struct parser_params*,NODE*); #define new_evstr(n) new_evstr_gen(parser,(n)) static NODE *evstr2dstr_gen(struct parser_params*,NODE*); #define evstr2dstr(n) evstr2dstr_gen(parser,(n)) +static NODE *str_suffix_gen(struct parser_params*, NODE*, long); +#define str_suffix(n,o) str_suffix_gen(parser,(n),(o)) static NODE *splat_array(NODE*); static NODE *call_bin_op_gen(struct parser_params*,NODE*,ID,NODE*); @@ -531,6 +533,9 @@ static int lvar_defined_gen(struct parser_params*, ID); #define RE_OPTION_MASK 0xff #define RE_OPTION_ARG_ENCODING_NONE 32 +#define STR_OPTION_FROZEN 1 +#define STR_OPTION_BINARY 0 /* disabled */ + #define NODE_STRTERM NODE_ZARRAY /* nothing to gc */ #define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */ #define SIGN_EXTEND(x,n) (((1<<(n)-1)^((x)&~(~0<<(n))))-(1<<(n)-1)) @@ -758,7 +763,7 @@ static void token_info_pop(struct parser_params*, const char *token); %token tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL %token tINTEGER tFLOAT tRATIONAL tIMAGINARY tSTRING_CONTENT tCHAR %token tNTH_REF tBACK_REF -%token tREGEXP_END +%token tREGEXP_END tSTRING_SUFFIX %type singleton strings string string1 xstring regexp %type string_contents xstring_contents regexp_contents string_content @@ -784,6 +789,7 @@ static void token_info_pop(struct parser_params*, const char *token); %type fsym keyword_variable user_variable sym symbol operation operation2 operation3 %type cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg %type f_kwrest f_label +%type opt_string_sfx /*%%%*/ /*% %type program reswords then do dot_or_colon @@ -3806,7 +3812,7 @@ literal : numeric | dsym ; -strings : string +strings : string opt_string_sfx { /*%%%*/ NODE *node = $1; @@ -3816,6 +3822,7 @@ strings : string else { node = evstr2dstr(node); } + node = str_suffix(node, $2); $$ = node; /*% $$ = $1; @@ -3845,6 +3852,10 @@ string1 : tSTRING_BEG string_contents tSTRING_END } ; +opt_string_sfx : tSTRING_SUFFIX + | /* none */ {$$ = 0;} + ; + xstring : tXSTRING_BEG xstring_contents tSTRING_END { /*%%%*/ @@ -5008,6 +5019,7 @@ none : /* none */ # define yylval (*((YYSTYPE*)(parser->parser_yylval))) static int parser_regx_options(struct parser_params*); +static int parser_str_options(struct parser_params*); static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**); static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc); static int parser_parse_string(struct parser_params*,NODE*); @@ -5023,6 +5035,7 @@ static int parser_here_document(struct parser_params*,NODE*); # define read_escape(flags,e) parser_read_escape(parser, (flags), (e)) # define tokadd_escape(e) parser_tokadd_escape(parser, (e)) # define regx_options() parser_regx_options(parser) +# define str_options() parser_str_options(parser) # define tokadd_string(f,t,p,n,e) parser_tokadd_string(parser,(f),(t),(p),(n),(e)) # define parse_string(n) parser_parse_string(parser,(n)) # define tokaddmbc(c, enc) parser_tokaddmbc(parser, (c), (enc)) @@ -5532,10 +5545,11 @@ rb_parser_compile_file_path(volatile VALUE vparser, VALUE fname, VALUE file, int #define STR_FUNC_QWORDS 0x08 #define STR_FUNC_SYMBOL 0x10 #define STR_FUNC_INDENT 0x20 +#define STR_FUNC_OPTION 0x40 enum string_type { - str_squote = (0), - str_dquote = (STR_FUNC_EXPAND), + str_squote = (STR_FUNC_OPTION), + str_dquote = (STR_FUNC_EXPAND|STR_FUNC_OPTION), str_xquote = (STR_FUNC_EXPAND), str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), str_sword = (STR_FUNC_QWORDS), @@ -5982,6 +5996,40 @@ parser_regx_options(struct parser_params *parser) return options | RE_OPTION_ENCODING(kcode); } +static int +parser_str_options(struct parser_params *parser) +{ + int c, options = 0; + + newtok(); + while (c = nextc(), ISALPHA(c)) { + switch (c) { +#if STR_OPTION_FROZEN + case 'f': + options |= STR_OPTION_FROZEN; + break; +#endif +#if STR_OPTION_BINARY + case 'b': + options |= STR_OPTION_BINARY; + break; +#endif + default: + tokadd(c); + break; + } + } + pushback(c); + + if (toklen()) { + tokfix(); + compile_error(PARSER_ARG "unknown string option%s - %s", + toklen() > 1 ? "s" : "", tok()); + } + + return options; +} + static void dispose_string(VALUE str) { @@ -6248,6 +6296,10 @@ parser_parse_string(struct parser_params *parser, NODE *quote) rb_encoding *enc = current_enc; if (func == -1) return tSTRING_END; + if (func == 0) { + set_yylval_num(term); + return tSTRING_SUFFIX; + } c = nextc(); if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { do {c = nextc();} while (ISSPACE(c)); @@ -6256,11 +6308,18 @@ parser_parse_string(struct parser_params *parser, NODE *quote) if (c == term && !quote->nd_nest) { if (func & STR_FUNC_QWORDS) { quote->nd_func = -1; + quote->u2.id = 0; return ' '; } - if (!(func & STR_FUNC_REGEXP)) return tSTRING_END; - set_yylval_num(regx_options()); - return tREGEXP_END; + if (func & STR_FUNC_REGEXP) { + set_yylval_num(regx_options()); + return tREGEXP_END; + } + if ((func & STR_FUNC_OPTION) && (func = str_options()) != 0) { + quote->nd_func = 0; + quote->u2.id = func; + } + return tSTRING_END; } if (space) { pushback(c); @@ -6948,7 +7007,8 @@ parser_yylex(struct parser_params *parser) } else { token = parse_string(lex_strterm); - if (token == tSTRING_END || token == tREGEXP_END) { + if ((token == tSTRING_END && lex_strterm->nd_func) || + token == tSTRING_SUFFIX || token == tREGEXP_END) { rb_gc_force_recycle((VALUE)lex_strterm); lex_strterm = 0; lex_state = EXPR_END; @@ -8497,6 +8557,37 @@ evstr2dstr_gen(struct parser_params *parser, NODE *node) return node; } +static NODE * +str_suffix_gen(struct parser_params *parser, NODE *node, long opt) +{ + if (nd_type(node) == NODE_STR) { +#if STR_OPTION_BINARY + if (opt & STR_OPTION_BINARY) { + rb_enc_associate_index(node->nd_lit, ENCINDEX_ASCII); + } +#endif +#if STR_OPTION_FROZEN + if (opt & STR_OPTION_FROZEN) { + OBJ_FREEZE(node->nd_lit); + nd_set_type(node, NODE_LIT); + } +#endif + } + else { +#if STR_OPTION_BINARY + if (opt & STR_OPTION_BINARY) { + node = NEW_CALL(node, rb_intern("b"), 0); + } +#endif +#if STR_OPTION_FROZEN + if (opt & STR_OPTION_FROZEN) { + node = NEW_CALL(node, rb_intern("freeze"), 0); + } +#endif + } + return node; +} + static NODE * new_evstr_gen(struct parser_params *parser, NODE *node) { diff --git a/test/ripper/test_scanner_events.rb b/test/ripper/test_scanner_events.rb index 2474588f76..3eed35718b 100644 --- a/test/ripper/test_scanner_events.rb +++ b/test/ripper/test_scanner_events.rb @@ -591,6 +591,15 @@ class TestRipper::ScannerEvents < Test::Unit::TestCase scan('tstring_end', '%Q[abcdef]') end + def test_tstring_suffix + assert_equal ['"f'], + scan('tstring_end', '"abcdef"f') + assert_equal [']f'], + scan('tstring_end', '%q[abcdef]f') + assert_equal [']f'], + scan('tstring_end', '%Q[abcdef]f') + end + def test_regexp_beg assert_equal [], scan('regexp_beg', '') diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 83bf78adc9..faf28f5c8e 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2192,6 +2192,49 @@ class TestString < Test::Unit::TestCase assert_equal(false, "\u3042".byteslice(0, 2).valid_encoding?) assert_equal(false, ("\u3042"*10).byteslice(0, 20).valid_encoding?) end + + def test_unknown_string_option + assert_raises(SyntaxError) do + eval(%{ + "hello"x + }) + end + end + + def test_frozen_string + assert_equal "hello", "hello"f + + assert_predicate "hello"f, :frozen? + + f = -> { "hello"f } + + assert_equal f.call.object_id, f.call.object_id + end + + def test_frozen_dstring + assert_equal "hello123", "hello#{123}"f + + assert_predicate "hello#{123}"f, :frozen? + + i = 0 + f = -> { "#{i += 1}"f } + assert_equal "1", f.call + assert_equal "2", f.call + end + + def test_frozen_string_cannot_be_adjacent + assert_raises(SyntaxError) do + eval(%{ + "hello"f "world" + }) + end + + assert_raises(SyntaxError) do + eval(%{ + "hello"f "world" + }) + end + end end class TestString2 < TestString -- cgit v1.2.3