diff options
author | tenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-03-29 01:25:11 +0000 |
---|---|---|
committer | tenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-03-29 01:25:11 +0000 |
commit | 7d984d76bac857a54c1775b203c944da6cb20ac4 (patch) | |
tree | facf5936314b9853ec789343165b79a432efa5e3 /ext/psych/parser.c | |
parent | c857c7379c8f08d8ca985efa40e8682ab4686984 (diff) |
merge revision(s) 32578,33401,33403,33404,33531,33655,33679,33809,33900,33965,34067,34069,34087,34328,34330,34527,34772,34783,34839,34914,34953,34954,35153: [Backport #6212]
* ext/psych/lib/psych.rb: updating version to match gem
* ext/psych/psych.gemspec: ditto
* ext/psych/lib/psych/visitors/to_ruby.rb: fixing deprecation warning
* ext/psych/lib/psych.rb: define a new BadAlias error class.
* ext/psych/lib/psych/visitors/to_ruby.rb: raise an exception when
deserializing an alias that does not exist.
* test/psych/test_merge_keys.rb: corresponding test.
* ext/psych/lib/psych.rb (load, parse): stop parsing or loading after
the first document has been parsed.
* test/psych/test_stream.rb: pertinent tests.
* ext/psych/lib/psych.rb (parse_stream, load_stream): if a block is
given, documents will be yielded to the block as they are parsed.
[ruby-core:42404] [Bug #5978]
* ext/psych/lib/psych/handlers/document_stream.rb: add a handler that
yields documents as they are parsed
* test/psych/test_stream.rb: corresponding tests.
* ext/psych/lib/psych/core_ext.rb: only extend Kernel if IRB is loaded
in order to stop method pollution.
* ext/psych/lib/psych.rb: default open YAML files with utf8 external
encoding. [ruby-core:42967]
* test/psych/test_tainted.rb: ditto
* ext/psych/parser.c: prevent a memory leak by protecting calls to
handler callbacks.
* test/psych/test_parser.rb: test to demonstrate leak.
* ext/psych/parser.c: set parser encoding based on the YAML input
rather than user configuration.
* test/psych/test_encoding.rb: corresponding tests.
* test/psych/test_parser.rb: ditto
* test/psych/test_tainted.rb: ditto
* ext/psych/parser.c: removed external encoding setter, allow parser
to be reused.
* ext/psych/lib/psych/parser.rb: added external encoding setter.
* test/psych/test_parser.rb: test parser reuse
* ext/psych/lib/psych/visitors/to_ruby.rb: Added support for loading
subclasses of String with ivars
* ext/psych/lib/psych/visitors/yaml_tree.rb: Added support for dumping
subclasses of String with ivars
* test/psych/test_string.rb: corresponding tests
* ext/psych/lib/psych/visitors/to_ruby.rb: Added ability to load array
subclasses with ivars.
* ext/psych/lib/psych/visitors/yaml_tree.rb: Added ability to dump
array subclasses with ivars.
* test/psych/test_array.rb: corresponding tests
* ext/psych/emitter.c: fixing clang warnings. Thanks Joey!
* ext/psych/lib/psych/visitors/to_ruby.rb: BigDecimals can be restored
from YAML.
* ext/psych/lib/psych/visitors/yaml_tree.rb: BigDecimals can be dumped
to YAML.
* test/psych/test_numeric.rb: tests for BigDecimal serialization
* ext/psych/lib/psych/scalar_scanner.rb: Strings that look like dates
should be treated as strings and not dates.
* test/psych/test_scalar_scanner.rb: corresponding tests.
* ext/psych/lib/psych.rb (module Psych): parse and load methods take
an optional file name that is used when raising Psych::SyntaxError
exceptions
* ext/psych/lib/psych/syntax_error.rb (module Psych): allow nil file
names and handle nil file names in the exception message
* test/psych/test_exception.rb (module Psych): Tests for changes.
* ext/psych/parser.c (parse): parse method can take an option file
name for use in exception messages.
* test/psych/test_parser.rb: corresponding tests.
* ext/psych/lib/psych.rb: remove autoload from psych
* ext/psych/lib/psych/json.rb: ditto
* ext/psych/lib/psych/tree_builder.rb: dump complex numbers,
rationals, etc with reference ids.
* ext/psych/lib/psych/visitors/yaml_tree.rb: ditto
* ext/psych/lib/psych/visitors/to_ruby.rb: loading complex numbers,
rationals, etc with reference ids.
* test/psych/test_object_references.rb: corresponding tests
* ext/psych/lib/psych/scalar_scanner.rb: make sure strings that look
like base 60 numbers are serialized as quoted strings.
* test/psych/test_string.rb: test for change.
* ext/psych/parser.c: remove unused variable.
* ext/psych/lib/psych/syntax_error.rb: Add file, line, offset, and
message attributes during parse failure.
* ext/psych/parser.c: Update parser to raise exception with correct
values.
* test/psych/test_exception.rb: corresponding tests.
* ext/psych/parser.c (parse): Use context_mark for indicating error
line and column.
* ext/psych/lib/psych/scalar_scanner.rb: use normal begin / rescue
since postfix rescue cannot receive the exception class. Thanks
nagachika!
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@35165 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/psych/parser.c')
-rw-r--r-- | ext/psych/parser.c | 303 |
1 files changed, 241 insertions, 62 deletions
diff --git a/ext/psych/parser.c b/ext/psych/parser.c index e68768f562..9808c6b60e 100644 --- a/ext/psych/parser.c +++ b/ext/psych/parser.c @@ -59,6 +59,163 @@ static VALUE allocate(VALUE klass) return Data_Wrap_Struct(klass, 0, dealloc, parser); } +static VALUE make_exception(yaml_parser_t * parser, VALUE path) +{ + size_t line, column; + + line = parser->context_mark.line + 1; + column = parser->context_mark.column + 1; + + return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, + path, + INT2NUM(line), + INT2NUM(column), + INT2NUM(parser->problem_offset), + parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil, + parser->context ? rb_usascii_str_new2(parser->context) : Qnil); +} + +#ifdef HAVE_RUBY_ENCODING_H +static VALUE transcode_string(VALUE src, int * parser_encoding) +{ + int utf8 = rb_utf8_encindex(); + int utf16le = rb_enc_find_index("UTF16_LE"); + int utf16be = rb_enc_find_index("UTF16_BE"); + int source_encoding = rb_enc_get_index(src); + + if (source_encoding == utf8) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (source_encoding == utf16le) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (source_encoding == utf16be) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + src = rb_str_export_to_enc(src, rb_utf8_encoding()); + RB_GC_GUARD(src); + + *parser_encoding = YAML_UTF8_ENCODING; + return src; +} + +static VALUE transcode_io(VALUE src, int * parser_encoding) +{ + VALUE io_external_encoding; + int io_external_enc_index; + + io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); + + /* if no encoding is returned, assume ascii8bit. */ + if (NIL_P(io_external_encoding)) { + io_external_enc_index = rb_ascii8bit_encindex(); + } else { + io_external_enc_index = rb_to_encoding_index(io_external_encoding); + } + + /* Treat US-ASCII as utf_8 */ + if (io_external_enc_index == rb_usascii_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_utf8_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + /* Just guess on ASCII-8BIT */ + if (io_external_enc_index == rb_ascii8bit_encindex()) { + *parser_encoding = YAML_ANY_ENCODING; + return src; + } + + rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s", + rb_enc_name(rb_enc_from_index(io_external_enc_index))); + + return Qnil; +} + +#endif + +static VALUE protected_start_stream(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_start_stream, 1, args[1]); +} + +static VALUE protected_start_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_document, 3, args + 1); +} + +static VALUE protected_end_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_end_document, 1, args[1]); +} + +static VALUE protected_alias(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_alias, 1, args[1]); +} + +static VALUE protected_scalar(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_scalar, 6, args + 1); +} + +static VALUE protected_start_sequence(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_sequence, 4, args + 1); +} + +static VALUE protected_end_sequence(VALUE handler) +{ + return rb_funcall(handler, id_end_sequence, 0); +} + +static VALUE protected_start_mapping(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_mapping, 4, args + 1); +} + +static VALUE protected_end_mapping(VALUE handler) +{ + return rb_funcall(handler, id_end_mapping, 0); +} + +static VALUE protected_empty(VALUE handler) +{ + return rb_funcall(handler, id_empty, 0); +} + +static VALUE protected_end_stream(VALUE handler) +{ + return rb_funcall(handler, id_end_stream, 0); +} + /* * call-seq: * parser.parse(yaml) @@ -68,27 +225,48 @@ static VALUE allocate(VALUE klass) * * See Psych::Parser and Psych::Parser#handler */ -static VALUE parse(VALUE self, VALUE yaml) +static VALUE parse(int argc, VALUE *argv, VALUE self) { + VALUE yaml, path; yaml_parser_t * parser; yaml_event_t event; int done = 0; int tainted = 0; + int state = 0; + int parser_encoding = YAML_ANY_ENCODING; #ifdef HAVE_RUBY_ENCODING_H int encoding = rb_utf8_encindex(); rb_encoding * internal_enc = rb_default_internal_encoding(); #endif VALUE handler = rb_iv_get(self, "@handler"); + if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { + if(rb_respond_to(yaml, id_path)) + path = rb_funcall(yaml, id_path, 0); + else + path = rb_str_new2("<unknown>"); + } + Data_Get_Struct(self, yaml_parser_t, parser); + yaml_parser_delete(parser); + yaml_parser_initialize(parser); + if (OBJ_TAINTED(yaml)) tainted = 1; - if(rb_respond_to(yaml, id_read)) { + if (rb_respond_to(yaml, id_read)) { +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_io(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif yaml_parser_set_input(parser, io_reader, (void *)yaml); if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1; } else { StringValue(yaml); +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_string(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif yaml_parser_set_input_string( parser, (const unsigned char *)RSTRING_PTR(yaml), @@ -98,32 +276,28 @@ static VALUE parse(VALUE self, VALUE yaml) while(!done) { if(!yaml_parser_parse(parser, &event)) { - VALUE path; - size_t line = parser->mark.line; - size_t column = parser->mark.column; - - if(rb_respond_to(yaml, id_path)) - path = rb_funcall(yaml, id_path, 0); - else - path = rb_str_new2("<unknown>"); + VALUE exception; + exception = make_exception(parser, path); yaml_parser_delete(parser); yaml_parser_initialize(parser); - rb_raise(ePsychSyntaxError, "(%s): couldn't parse YAML at line %d column %d", - StringValuePtr(path), - (int)line, (int)column); + rb_exc_raise(exception); } switch(event.type) { - case YAML_STREAM_START_EVENT: - - rb_funcall(handler, id_start_stream, 1, - INT2NUM((long)event.data.stream_start.encoding) - ); - break; + case YAML_STREAM_START_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = INT2NUM((long)event.data.stream_start.encoding); + rb_protect(protected_start_stream, (VALUE)args, &state); + } + break; case YAML_DOCUMENT_START_EVENT: { + VALUE args[4]; /* Get a list of tag directives (if any) */ VALUE tag_directives = rb_ary_new(); /* Grab the document version */ @@ -161,19 +335,25 @@ static VALUE parse(VALUE self, VALUE yaml) rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); } } - rb_funcall(handler, id_start_document, 3, - version, tag_directives, - event.data.document_start.implicit == 1 ? Qtrue : Qfalse - ); + args[0] = handler; + args[1] = version; + args[2] = tag_directives; + args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_start_document, (VALUE)args, &state); } break; case YAML_DOCUMENT_END_EVENT: - rb_funcall(handler, id_end_document, 1, - event.data.document_end.implicit == 1 ? Qtrue : Qfalse - ); + { + VALUE args[2]; + + args[0] = handler; + args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_end_document, (VALUE)args, &state); + } break; case YAML_ALIAS_EVENT: { + VALUE args[2]; VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); @@ -183,11 +363,14 @@ static VALUE parse(VALUE self, VALUE yaml) #endif } - rb_funcall(handler, id_alias, 1, alias); + args[0] = handler; + args[1] = alias; + rb_protect(protected_alias, (VALUE)args, &state); } break; case YAML_SCALAR_EVENT: { + VALUE args[7]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE plain_implicit, quoted_implicit, style; @@ -225,12 +408,19 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.scalar.style); - rb_funcall(handler, id_scalar, 6, - val, anchor, tag, plain_implicit, quoted_implicit, style); + args[0] = handler; + args[1] = val; + args[2] = anchor; + args[3] = tag; + args[4] = plain_implicit; + args[5] = quoted_implicit; + args[6] = style; + rb_protect(protected_scalar, (VALUE)args, &state); } break; case YAML_SEQUENCE_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -256,15 +446,21 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.sequence_start.style); - rb_funcall(handler, id_start_sequence, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_sequence, (VALUE)args, &state); } break; case YAML_SEQUENCE_END_EVENT: - rb_funcall(handler, id_end_sequence, 0); + rb_protect(protected_end_sequence, handler, &state); break; case YAML_MAPPING_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -289,22 +485,28 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.mapping_start.style); - rb_funcall(handler, id_start_mapping, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_mapping, (VALUE)args, &state); } break; case YAML_MAPPING_END_EVENT: - rb_funcall(handler, id_end_mapping, 0); + rb_protect(protected_end_mapping, handler, &state); break; case YAML_NO_EVENT: - rb_funcall(handler, id_empty, 0); + rb_protect(protected_empty, handler, &state); break; case YAML_STREAM_END_EVENT: - rb_funcall(handler, id_end_stream, 0); + rb_protect(protected_end_stream, handler, &state); done = 1; break; } yaml_event_delete(&event); + if (state) rb_jump_tag(state); } return self; @@ -312,29 +514,6 @@ static VALUE parse(VALUE self, VALUE yaml) /* * call-seq: - * parser.external_encoding=(encoding) - * - * Set the encoding for this parser to +encoding+ - */ -static VALUE set_external_encoding(VALUE self, VALUE encoding) -{ - yaml_parser_t * parser; - VALUE exception; - - Data_Get_Struct(self, yaml_parser_t, parser); - - if(parser->encoding) { - exception = rb_const_get_at(mPsych, rb_intern("Exception")); - rb_raise(exception, "don't set the encoding twice!"); - } - - yaml_parser_set_encoding(parser, NUM2INT(encoding)); - - return encoding; -} - -/* - * call-seq: * parser.mark # => #<Psych::Parser::Mark> * * Returns a Psych::Parser::Mark object that contains line, column, and index @@ -376,11 +555,11 @@ void Init_psych_parser() /* UTF-16-BE Encoding with BOM */ rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING)); + rb_require("psych/syntax_error"); ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError); - rb_define_method(cPsychParser, "parse", parse, 1); + rb_define_method(cPsychParser, "parse", parse, -1); rb_define_method(cPsychParser, "mark", mark, 0); - rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1); id_read = rb_intern("read"); id_path = rb_intern("path"); |