From bab2e6cfdf341baa65533be655563d5cb819f6ba Mon Sep 17 00:00:00 2001 From: why Date: Tue, 25 Nov 2003 19:51:38 +0000 Subject: * ext/syck/token.c: removed YYTOKTMP references which were causing buffer overflows on large block scalars, comments, quoted scalars and plain scalars. * ext/syck/rubyext.c: dynamic changing of buffer size. * ext/syck/syck.h: default buffer size of 4k. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5032 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/syck/bytecode.c | 102 ++++++++++++++++++++++++++++++---------------------- ext/syck/rubyext.c | 53 ++++++++++++++++++++------- ext/syck/syck.c | 4 +-- ext/syck/syck.h | 4 +-- ext/syck/token.c | 62 ++++++++++++++++---------------- 5 files changed, 134 insertions(+), 91 deletions(-) (limited to 'ext/syck') diff --git a/ext/syck/bytecode.c b/ext/syck/bytecode.c index 848f69f918..df15658a73 100644 --- a/ext/syck/bytecode.c +++ b/ext/syck/bytecode.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.5 on Fri Oct 17 12:13:58 2003 */ +/* Generated by re2c 0.5 on Sun Nov 23 14:51:02 2003 */ #line 1 "bytecode.re" /* * bytecode.re @@ -38,6 +38,11 @@ char *get_inline( SyckParser *parser ); */ #define YYPOS(n) YYCURSOR = YYTOKEN + n +/* + * Track line numbers + */ +#define CHK_NL(ptr) if ( *( ptr - 1 ) == '\n' && ptr > YYLINECTPTR ) { YYLINEPTR = ptr; YYLINE++; YYLINECTPTR = YYLINEPTR; } + /* * I like seeing the level operations as macros... */ @@ -144,7 +149,7 @@ sycklex_bytecode_utf8( YYSTYPE *sycklval, SyckParser *parser ) return t; } -#line 168 +#line 173 lvl = CURRENT_LEVEL(); @@ -182,7 +187,7 @@ yy3: yyaccept = 0; default: goto yy4; } yy4: -#line 194 +#line 200 { YYPOS(0); goto Document; } @@ -190,9 +195,10 @@ yy5: yych = *++YYCURSOR; goto yy4; yy6: yych = *++YYCURSOR; yy7: -#line 182 +#line 187 { if ( lvl->status == syck_lvl_header ) { + CHK_NL(YYCURSOR); goto Directive; } else @@ -208,10 +214,8 @@ yy8: yych = *++YYCURSOR; default: goto yy2; } } -#line 198 - +#line 204 - lvl->status = syck_lvl_doc; Document: { @@ -279,21 +283,22 @@ yy16: yych = *++YYCURSOR; } yy17: yych = *++YYCURSOR; yy18: -#line 282 +#line 289 { ADD_BYTE_LEVEL(lvl, lvl->spaces + 1, syck_lvl_str); goto Scalar; } yy19: yych = *++YYCURSOR; yy20: -#line 286 +#line 293 { ADD_BYTE_LEVEL(lvl, lvl->spaces + 1, syck_lvl_open); sycklval->name = get_inline( parser ); syck_hdlr_remove_anchor( parser, sycklval->name ); + CHK_NL(YYCURSOR); return YAML_ANCHOR; } yy21: yych = *++YYCURSOR; yy22: -#line 292 +#line 300 { ADD_BYTE_LEVEL(lvl, lvl->spaces + 1, syck_lvl_str); sycklval->name = get_inline( parser ); POP_LEVEL(); @@ -302,10 +307,11 @@ yy22: } yy23: yych = *++YYCURSOR; yy24: -#line 299 +#line 307 { char *qstr; ADD_BYTE_LEVEL(lvl, lvl->spaces + 1, syck_lvl_open); qstr = get_inline( parser ); + CHK_NL(YYCURSOR); if ( qstr[0] == '!' ) { int qidx = strlen( qstr ); @@ -363,12 +369,13 @@ yy24: } yy25: yych = *++YYCURSOR; yy26: -#line 358 +#line 367 { goto Comment; } yy27: yych = *++YYCURSOR; yy28: -#line 360 - { if ( lvl->status == syck_lvl_seq ) +#line 369 + { CHK_NL(YYCURSOR); + if ( lvl->status == syck_lvl_seq ) { return YAML_INDENT; } @@ -386,14 +393,14 @@ yy29: yych = *++YYCURSOR; } yy30: yych = *++YYCURSOR; yy31: -#line 372 +#line 382 { ENSURE_YAML_IEND(lvl, -1); YYPOS(0); return 0; } yy32: yych = *++YYCURSOR; yy33: -#line 247 +#line 253 { if ( lvl->status == syck_lvl_seq && lvl->ncount == 0 ) { lvl->ncount++; @@ -426,6 +433,7 @@ yy33: FORCE_NEXT_TOKEN(YAML_INDENT); } } + CHK_NL(YYCURSOR); return YAML_IEND; } yy34: yych = *++YYCURSOR; @@ -435,13 +443,14 @@ yy34: yych = *++YYCURSOR; } yy35: yych = *++YYCURSOR; yy36: -#line 233 +#line 238 { int complex = 0; if ( lvl->ncount % 2 == 0 && ( lvl->status == syck_lvl_map || lvl->status == syck_lvl_seq ) ) { complex = 1; } ADD_BYTE_LEVEL(lvl, lvl->spaces + 1, syck_lvl_seq); + CHK_NL(YYCURSOR); if ( complex ) { FORCE_NEXT_TOKEN( YAML_IOPEN ); @@ -456,13 +465,14 @@ yy37: yych = *++YYCURSOR; } yy38: yych = *++YYCURSOR; yy39: -#line 219 +#line 223 { int complex = 0; if ( lvl->ncount % 2 == 0 && ( lvl->status == syck_lvl_map || lvl->status == syck_lvl_seq ) ) { complex = 1; } ADD_BYTE_LEVEL(lvl, lvl->spaces + 1, syck_lvl_map); + CHK_NL(YYCURSOR); if ( complex ) { FORCE_NEXT_TOKEN( YAML_IOPEN ); @@ -477,7 +487,7 @@ yy40: yych = *++YYCURSOR; } yy41: yych = *++YYCURSOR; yy42: -#line 214 +#line 218 { ENSURE_YAML_IEND(lvl, -1); YYPOS(0); return 0; @@ -493,7 +503,7 @@ yy44: yych = *++YYCURSOR; default: goto yy11; } } -#line 377 +#line 387 } @@ -600,7 +610,7 @@ yy48: yyaccept = 0; default: goto yy49; } yy49: -#line 389 +#line 400 { YYCURSOR = YYTOKTMP; return YAML_DOCSEP; } @@ -850,15 +860,16 @@ yy55: switch(yych){ } yy56: yych = *++YYCURSOR; yy57: -#line 387 - { goto Directive; } +#line 397 + { CHK_NL(YYCURSOR); + goto Directive; } yy58: yych = *++YYCURSOR; switch(yych){ case '\n': goto yy56; default: goto yy47; } } -#line 392 +#line 403 } @@ -883,22 +894,23 @@ yy59: } yy61:yy62: yych = *++YYCURSOR; yy63: -#line 402 - { goto Document; } +#line 413 + { CHK_NL(YYCURSOR); + goto Document; } yy64: yych = *++YYCURSOR; switch(yych){ case '\n': goto yy67; default: goto yy65; } yy65: -#line 404 +#line 416 { goto Comment; } yy66: yych = *++YYCURSOR; goto yy65; yy67: yych = *++YYCURSOR; goto yy63; } -#line 406 +#line 418 } @@ -937,7 +949,7 @@ yy70: yych = *++YYCURSOR; default: goto yy71; } yy71: -#line 447 +#line 462 { YYCURSOR = tok; goto ScalarEnd; } @@ -947,13 +959,13 @@ yy72: yych = *++YYCURSOR; default: goto yy73; } yy73: -#line 455 +#line 470 { CAT(str, cap, idx, tok[0]); goto Scalar2; } yy74: yych = *++YYCURSOR; yy75: -#line 451 +#line 466 { YYCURSOR = tok; goto ScalarEnd; } @@ -968,8 +980,9 @@ yy77: yych = *++YYCURSOR; } yy78: yych = *++YYCURSOR; yy79: -#line 424 - { goto Scalar2; } +#line 436 + { CHK_NL(tok+1); + goto Scalar2; } yy80: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; @@ -987,8 +1000,9 @@ yy81: switch(yych){ default: goto yy82; } yy82: -#line 426 - { if ( tok + 2 < YYCURSOR ) +#line 439 + { CHK_NL(tok+1); + if ( tok + 2 < YYCURSOR ) { char *count = tok + 2; int total = strtod( count, NULL ); @@ -1006,12 +1020,13 @@ yy82: } yy83: yych = *++YYCURSOR; yy84: -#line 443 - { CAT(str, cap, idx, '\0'); +#line 457 + { CHK_NL(tok+1); + CAT(str, cap, idx, '\0'); goto Scalar2; } } -#line 459 +#line 474 ScalarEnd: @@ -1061,21 +1076,22 @@ yy85: } yy87: yych = *++YYCURSOR; yy88: -#line 494 - { return str; } +#line 509 + { CHK_NL(YYCURSOR); + return str; } yy89: yych = *++YYCURSOR; switch(yych){ case '\n': goto yy94; default: goto yy90; } yy90: -#line 500 +#line 516 { CAT(str, cap, idx, tok[0]); goto Inline; } yy91: yych = *++YYCURSOR; yy92: -#line 496 +#line 512 { YYCURSOR = tok; return str; } @@ -1084,7 +1100,7 @@ yy93: yych = *++YYCURSOR; yy94: yych = *++YYCURSOR; goto yy88; } -#line 504 +#line 520 } diff --git a/ext/syck/rubyext.c b/ext/syck/rubyext.c index 4f5d4610e8..589033507d 100644 --- a/ext/syck/rubyext.c +++ b/ext/syck/rubyext.c @@ -45,14 +45,10 @@ typedef struct { #define RUBY_DOMAIN "ruby.yaml.org,2002" -#ifndef StringValue -#define StringValue(v) -#endif - /* * symbols and constants */ -static ID s_new, s_utc, s_at, s_to_f, s_read, s_binmode, s_call, s_transfer, s_update, s_dup, s_match, s_keys, s_to_str, s_unpack, s_tr_bang, s_anchors, s_default_set; +static ID s_new, s_utc, s_at, s_to_f, s_to_i, s_read, s_binmode, s_call, s_transfer, s_update, s_dup, s_match, s_keys, s_to_str, s_unpack, s_tr_bang, s_anchors, s_default_set; static VALUE sym_model, sym_generic, sym_input, sym_bytecode; static VALUE sym_scalar, sym_seq, sym_map; VALUE cDate, cParser, cLoader, cNode, cPrivateType, cDomainType, cBadAlias, cDefaultKey, cMergeKey, cEmitter; @@ -127,20 +123,20 @@ rb_syck_io_str_read( char *buf, SyckIoStr *str, long max_size, long skip ) ASSERT( str != NULL ); max_size -= skip; - if ( max_size < 0 ) max_size = 0; - if ( max_size > 0 ) + if ( max_size <= 0 ) max_size = 0; + else { /* * call io#read. */ VALUE src = (VALUE)str->ptr; VALUE n = LONG2NUM(max_size); - VALUE str = rb_funcall2(src, s_read, 1, &n); - if (!NIL_P(str)) + VALUE str2 = rb_funcall2(src, s_read, 1, &n); + if (!NIL_P(str2)) { - len = RSTRING(str)->len; - memcpy( buf + skip, RSTRING(str)->ptr, len ); + len = RSTRING(str2)->len; + memcpy( buf + skip, RSTRING(str2)->ptr, len ); } } len += skip; @@ -654,7 +650,8 @@ rb_syck_bad_anchor_handler(p, a) SyckParser *p; char *a; { - SyckNode *badanc = syck_new_map( rb_str_new2( "name" ), rb_str_new2( a ) ); + VALUE anchor_name = rb_str_new2( a ); + SyckNode *badanc = syck_new_map( rb_str_new2( "name" ), anchor_name ); badanc->type_id = syck_strndup( "tag:ruby.yaml.org,2002:object:YAML::Syck::BadAlias", 53 ); return badanc; } @@ -735,6 +732,35 @@ syck_parser_initialize( self, options ) return self; } +/* + * YAML::Syck::Parser.bufsize = Integer + */ +static VALUE +syck_parser_bufsize_set( self, size ) + VALUE self, size; +{ + SyckParser *parser; + + Data_Get_Struct(self, SyckParser, parser); + if ( rb_respond_to( size, s_to_i ) ) { + parser->bufsize = NUM2INT(rb_funcall(size, s_to_i, 0)); + } + return self; +} + +/* + * YAML::Syck::Parser.bufsize => Integer + */ +static VALUE +syck_parser_bufsize_get( self ) + VALUE self; +{ + SyckParser *parser; + + Data_Get_Struct(self, SyckParser, parser); + return INT2FIX( parser->bufsize ); +} + /* * YAML::Syck::Parser.load( IO or String ) */ @@ -1333,6 +1359,7 @@ Init_syck() s_utc = rb_intern("utc"); s_at = rb_intern("at"); s_to_f = rb_intern("to_f"); + s_to_i = rb_intern("to_i"); s_read = rb_intern("read"); s_anchors = rb_intern("anchors"); s_binmode = rb_intern("binmode"); @@ -1373,6 +1400,8 @@ Init_syck() rb_define_method( cLoader, "add_builtin_type", syck_loader_add_builtin_type, -1 ); rb_define_method( cLoader, "add_ruby_type", syck_loader_add_ruby_type, -1 ); rb_define_method( cLoader, "add_private_type", syck_loader_add_private_type, -1 ); + rb_define_method( cLoader, "bufsize=", syck_parser_bufsize_set, 1 ); + rb_define_method( cLoader, "bufsize", syck_parser_bufsize_get, 0 ); rb_define_method( cLoader, "detect_implicit", syck_loader_detect_implicit, 1 ); rb_define_method( cLoader, "transfer", syck_loader_transfer, 2 ); diff --git a/ext/syck/syck.c b/ext/syck/syck.c index 8d319379a3..c9aad7a8a5 100644 --- a/ext/syck/syck.c +++ b/ext/syck/syck.c @@ -71,9 +71,9 @@ syck_io_str_read( char *buf, SyckIoStr *str, long max_size, long skip ) if ( max_size >= 0 ) { max_size -= skip; - if ( max_size < 0 ) max_size = 0; + if ( max_size <= 0 ) max_size = 0; + else str->ptr += max_size - 1; - str->ptr += max_size; if ( str->ptr > str->end ) { str->ptr = str->end; diff --git a/ext/syck/syck.h b/ext/syck/syck.h index 5b42b778b0..0383c5ad47 100644 --- a/ext/syck/syck.h +++ b/ext/syck/syck.h @@ -13,7 +13,7 @@ #define SYCK_YAML_MAJOR 1 #define SYCK_YAML_MINOR 0 -#define SYCK_VERSION "0.41" +#define SYCK_VERSION "0.42" #define YAML_DOMAIN "yaml.org,2002" #include @@ -47,7 +47,7 @@ extern "C" { #endif #define ALLOC_CT 8 -#define SYCK_BUFFERSIZE 16384 +#define SYCK_BUFFERSIZE 4096 #define S_ALLOC_N(type,n) (type*)malloc(sizeof(type)*(n)) #define S_ALLOC(type) (type*)malloc(sizeof(type)) #define S_REALLOC_N(var,type,n) (var)=(type*)realloc((char*)(var),sizeof(type)*(n)) diff --git a/ext/syck/token.c b/ext/syck/token.c index aa7771de75..f9e0601e52 100644 --- a/ext/syck/token.c +++ b/ext/syck/token.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.5 on Thu Oct 16 14:12:55 2003 */ +/* Generated by re2c 0.5 on Tue Nov 25 12:10:28 2003 */ #line 1 "token.re" /* * token.re @@ -119,7 +119,7 @@ #define RETURN_IMPLICIT() \ { \ SyckNode *n = syck_alloc_str(); \ - YYCURSOR = YYTOKTMP; \ + YYCURSOR = YYTOKEN; \ n->data.str->ptr = qstr; \ n->data.str->len = qidx; \ sycklval->nodeData = n; \ @@ -1387,7 +1387,7 @@ Plain: GET_TRUE_YAML_INDENT(parentIndent); Plain2: - YYTOKTMP = YYCURSOR; + YYTOKEN = YYCURSOR; Plain3: @@ -1416,7 +1416,7 @@ yy111: #line 488 { int indt_len, nl_count = 0; SyckLevel *lvl; - char *tok = YYTOKTMP; + char *tok = YYTOKEN; GOBBLE_UP_YAML_INDENT( indt_len, tok ); lvl = CURRENT_LEVEL(); @@ -1425,9 +1425,9 @@ yy111: RETURN_IMPLICIT(); } - while ( YYTOKTMP < YYCURSOR ) + while ( YYTOKEN < YYCURSOR ) { - if ( is_newline( YYTOKTMP++ ) ) + if ( is_newline( YYTOKEN++ ) ) nl_count++; } if ( nl_count <= 1 ) @@ -1452,7 +1452,7 @@ yy112: yych = *++YYCURSOR; } yy113: #line 542 - { QUOTECATS(qstr, qcapa, qidx, YYTOKTMP, YYCURSOR - YYTOKTMP); + { QUOTECATS(qstr, qcapa, qidx, YYTOKEN, YYCURSOR - YYTOKEN); goto Plain2; } yy114: yyaccept = 1; @@ -1472,7 +1472,7 @@ yy116: { YYCURSOR--; } - QUOTECATS(qstr, qcapa, qidx, YYTOKTMP, YYCURSOR - YYTOKTMP); + QUOTECATS(qstr, qcapa, qidx, YYTOKEN, YYCURSOR - YYTOKEN); goto Plain2; } RETURN_IMPLICIT(); @@ -1568,7 +1568,7 @@ SingleQuote: char *qstr = S_ALLOC_N( char, qcapa ); SingleQuote2: - YYTOKTMP = YYCURSOR; + YYTOKEN = YYCURSOR; { YYCTYPE yych; @@ -1593,7 +1593,7 @@ yy141: { int indt_len; int nl_count = 0; SyckLevel *lvl; - GOBBLE_UP_YAML_INDENT( indt_len, YYTOKTMP ); + GOBBLE_UP_YAML_INDENT( indt_len, YYTOKEN ); lvl = CURRENT_LEVEL(); if ( lvl->status != syck_lvl_str ) @@ -1605,9 +1605,9 @@ yy141: /* Error! */ } - while ( YYTOKTMP < YYCURSOR ) + while ( YYTOKEN < YYCURSOR ) { - if ( is_newline( YYTOKTMP++ ) ) + if ( is_newline( YYTOKEN++ ) ) nl_count++; } if ( nl_count <= 1 ) @@ -1701,7 +1701,7 @@ DoubleQuote: char *qstr = S_ALLOC_N( char, qcapa ); DoubleQuote2: - YYTOKTMP = YYCURSOR; + YYTOKEN = YYCURSOR; { @@ -1728,7 +1728,7 @@ yy157: { int indt_len; int nl_count = 0; SyckLevel *lvl; - GOBBLE_UP_YAML_INDENT( indt_len, YYTOKTMP ); + GOBBLE_UP_YAML_INDENT( indt_len, YYTOKEN ); lvl = CURRENT_LEVEL(); if ( lvl->status != syck_lvl_str ) @@ -1742,9 +1742,9 @@ yy157: if ( keep_nl == 1 ) { - while ( YYTOKTMP < YYCURSOR ) + while ( YYTOKEN < YYCURSOR ) { - if ( is_newline( YYTOKTMP++ ) ) + if ( is_newline( YYTOKEN++ ) ) nl_count++; } if ( nl_count <= 1 ) @@ -1903,7 +1903,7 @@ yy175: yych = *++YYCURSOR; yy176: #line 693 { long ch; - char *chr_text = syck_strndup( YYTOKTMP, 4 ); + char *chr_text = syck_strndup( YYTOKEN, 4 ); chr_text[0] = '0'; ch = strtol( chr_text, NULL, 16 ); free( chr_text ); @@ -2145,7 +2145,7 @@ ScalarBlock: YYTOKEN = YYCURSOR; ScalarBlock2: - YYTOKTMP = YYCURSOR; + YYTOKEN = YYCURSOR; { YYCTYPE yych; @@ -2168,7 +2168,7 @@ yy199: yyaccept = 0; yy200: #line 854 { char *pacer; - char *tok = YYTOKTMP; + char *tok = YYTOKEN; int indt_len = 0, nl_count = 0, fold_nl = 0, nl_begin = 0; GOBBLE_UP_YAML_INDENT( indt_len, tok ); lvl = CURRENT_LEVEL(); @@ -2183,7 +2183,7 @@ yy200: } else if ( lvl->status != syck_lvl_block ) { - YYCURSOR = YYTOKTMP; + YYCURSOR = YYTOKEN; RETURN_YAML_BLOCK(); } @@ -2196,7 +2196,7 @@ yy200: fold_nl = 1; } - pacer = YYTOKTMP; + pacer = YYTOKEN; while ( pacer < YYCURSOR ) { if ( is_newline( pacer++ ) ) @@ -2227,7 +2227,7 @@ yy200: if ( indt_len < lvl->spaces ) { POP_LEVEL(); - YYCURSOR = YYTOKTMP; + YYCURSOR = YYTOKEN; RETURN_YAML_BLOCK(); } goto ScalarBlock2; @@ -2239,7 +2239,7 @@ yy201: yych = *++YYCURSOR; } yy202: #line 940 - { QUOTECAT(qstr, qcapa, qidx, *YYTOKTMP); + { QUOTECAT(qstr, qcapa, qidx, *YYTOKEN); goto ScalarBlock2; } yy203: yych = *++YYCURSOR; @@ -2249,11 +2249,11 @@ yy204: if ( lvl->status != syck_lvl_block ) { eat_comments( parser ); - YYTOKTMP = YYCURSOR; + YYTOKEN = YYCURSOR; } else { - QUOTECAT(qstr, qcapa, qidx, *YYTOKTMP); + QUOTECAT(qstr, qcapa, qidx, *YYTOKEN); } goto ScalarBlock2; } @@ -2298,11 +2298,9 @@ yy211: YYCURSOR = YYMARKER; void eat_comments( SyckParser *parser ) { - char *tok; - Comment: { - tok = YYCURSOR; + YYTOKEN = YYCURSOR; { YYCTYPE yych; @@ -2320,8 +2318,8 @@ yy212: } yy214: yych = *++YYCURSOR; yy215: -#line 963 - { YYCURSOR = tok; +#line 961 + { YYCURSOR = YYTOKEN; return; } yy216: yyaccept = 0; @@ -2333,7 +2331,7 @@ yy217: yych = *++YYCURSOR; default: goto yy218; } yy218: -#line 967 +#line 965 { goto Comment; } yy219: yych = *++YYCURSOR; @@ -2359,7 +2357,7 @@ yy223: YYCURSOR = YYMARKER; case 0: goto yy215; } } -#line 970 +#line 968 } -- cgit v1.2.3