summaryrefslogtreecommitdiff
path: root/ext/psych/parser.c
diff options
context:
space:
mode:
authortenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-03-29 01:25:11 +0000
committertenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-03-29 01:25:11 +0000
commit7d984d76bac857a54c1775b203c944da6cb20ac4 (patch)
treefacf5936314b9853ec789343165b79a432efa5e3 /ext/psych/parser.c
parentc857c7379c8f08d8ca985efa40e8682ab4686984 (diff)
merge revision(s) 32578,33401,33403,33404,33531,33655,33679,33809,33900,33965,34067,34069,34087,34328,34330,34527,34772,34783,34839,34914,34953,34954,35153: [Backport #6212]
* ext/psych/lib/psych.rb: updating version to match gem * ext/psych/psych.gemspec: ditto * ext/psych/lib/psych/visitors/to_ruby.rb: fixing deprecation warning * ext/psych/lib/psych.rb: define a new BadAlias error class. * ext/psych/lib/psych/visitors/to_ruby.rb: raise an exception when deserializing an alias that does not exist. * test/psych/test_merge_keys.rb: corresponding test. * ext/psych/lib/psych.rb (load, parse): stop parsing or loading after the first document has been parsed. * test/psych/test_stream.rb: pertinent tests. * ext/psych/lib/psych.rb (parse_stream, load_stream): if a block is given, documents will be yielded to the block as they are parsed. [ruby-core:42404] [Bug #5978] * ext/psych/lib/psych/handlers/document_stream.rb: add a handler that yields documents as they are parsed * test/psych/test_stream.rb: corresponding tests. * ext/psych/lib/psych/core_ext.rb: only extend Kernel if IRB is loaded in order to stop method pollution. * ext/psych/lib/psych.rb: default open YAML files with utf8 external encoding. [ruby-core:42967] * test/psych/test_tainted.rb: ditto * ext/psych/parser.c: prevent a memory leak by protecting calls to handler callbacks. * test/psych/test_parser.rb: test to demonstrate leak. * ext/psych/parser.c: set parser encoding based on the YAML input rather than user configuration. * test/psych/test_encoding.rb: corresponding tests. * test/psych/test_parser.rb: ditto * test/psych/test_tainted.rb: ditto * ext/psych/parser.c: removed external encoding setter, allow parser to be reused. * ext/psych/lib/psych/parser.rb: added external encoding setter. * test/psych/test_parser.rb: test parser reuse * ext/psych/lib/psych/visitors/to_ruby.rb: Added support for loading subclasses of String with ivars * ext/psych/lib/psych/visitors/yaml_tree.rb: Added support for dumping subclasses of String with ivars * test/psych/test_string.rb: corresponding tests * ext/psych/lib/psych/visitors/to_ruby.rb: Added ability to load array subclasses with ivars. * ext/psych/lib/psych/visitors/yaml_tree.rb: Added ability to dump array subclasses with ivars. * test/psych/test_array.rb: corresponding tests * ext/psych/emitter.c: fixing clang warnings. Thanks Joey! * ext/psych/lib/psych/visitors/to_ruby.rb: BigDecimals can be restored from YAML. * ext/psych/lib/psych/visitors/yaml_tree.rb: BigDecimals can be dumped to YAML. * test/psych/test_numeric.rb: tests for BigDecimal serialization * ext/psych/lib/psych/scalar_scanner.rb: Strings that look like dates should be treated as strings and not dates. * test/psych/test_scalar_scanner.rb: corresponding tests. * ext/psych/lib/psych.rb (module Psych): parse and load methods take an optional file name that is used when raising Psych::SyntaxError exceptions * ext/psych/lib/psych/syntax_error.rb (module Psych): allow nil file names and handle nil file names in the exception message * test/psych/test_exception.rb (module Psych): Tests for changes. * ext/psych/parser.c (parse): parse method can take an option file name for use in exception messages. * test/psych/test_parser.rb: corresponding tests. * ext/psych/lib/psych.rb: remove autoload from psych * ext/psych/lib/psych/json.rb: ditto * ext/psych/lib/psych/tree_builder.rb: dump complex numbers, rationals, etc with reference ids. * ext/psych/lib/psych/visitors/yaml_tree.rb: ditto * ext/psych/lib/psych/visitors/to_ruby.rb: loading complex numbers, rationals, etc with reference ids. * test/psych/test_object_references.rb: corresponding tests * ext/psych/lib/psych/scalar_scanner.rb: make sure strings that look like base 60 numbers are serialized as quoted strings. * test/psych/test_string.rb: test for change. * ext/psych/parser.c: remove unused variable. * ext/psych/lib/psych/syntax_error.rb: Add file, line, offset, and message attributes during parse failure. * ext/psych/parser.c: Update parser to raise exception with correct values. * test/psych/test_exception.rb: corresponding tests. * ext/psych/parser.c (parse): Use context_mark for indicating error line and column. * ext/psych/lib/psych/scalar_scanner.rb: use normal begin / rescue since postfix rescue cannot receive the exception class. Thanks nagachika! git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@35165 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/psych/parser.c')
-rw-r--r--ext/psych/parser.c303
1 files changed, 241 insertions, 62 deletions
diff --git a/ext/psych/parser.c b/ext/psych/parser.c
index e68768f562..9808c6b60e 100644
--- a/ext/psych/parser.c
+++ b/ext/psych/parser.c
@@ -59,6 +59,163 @@ static VALUE allocate(VALUE klass)
return Data_Wrap_Struct(klass, 0, dealloc, parser);
}
+static VALUE make_exception(yaml_parser_t * parser, VALUE path)
+{
+ size_t line, column;
+
+ line = parser->context_mark.line + 1;
+ column = parser->context_mark.column + 1;
+
+ return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
+ path,
+ INT2NUM(line),
+ INT2NUM(column),
+ INT2NUM(parser->problem_offset),
+ parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil,
+ parser->context ? rb_usascii_str_new2(parser->context) : Qnil);
+}
+
+#ifdef HAVE_RUBY_ENCODING_H
+static VALUE transcode_string(VALUE src, int * parser_encoding)
+{
+ int utf8 = rb_utf8_encindex();
+ int utf16le = rb_enc_find_index("UTF16_LE");
+ int utf16be = rb_enc_find_index("UTF16_BE");
+ int source_encoding = rb_enc_get_index(src);
+
+ if (source_encoding == utf8) {
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+ }
+
+ if (source_encoding == utf16le) {
+ *parser_encoding = YAML_UTF16LE_ENCODING;
+ return src;
+ }
+
+ if (source_encoding == utf16be) {
+ *parser_encoding = YAML_UTF16BE_ENCODING;
+ return src;
+ }
+
+ src = rb_str_export_to_enc(src, rb_utf8_encoding());
+ RB_GC_GUARD(src);
+
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+}
+
+static VALUE transcode_io(VALUE src, int * parser_encoding)
+{
+ VALUE io_external_encoding;
+ int io_external_enc_index;
+
+ io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);
+
+ /* if no encoding is returned, assume ascii8bit. */
+ if (NIL_P(io_external_encoding)) {
+ io_external_enc_index = rb_ascii8bit_encindex();
+ } else {
+ io_external_enc_index = rb_to_encoding_index(io_external_encoding);
+ }
+
+ /* Treat US-ASCII as utf_8 */
+ if (io_external_enc_index == rb_usascii_encindex()) {
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+ }
+
+ if (io_external_enc_index == rb_utf8_encindex()) {
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+ }
+
+ if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
+ *parser_encoding = YAML_UTF16LE_ENCODING;
+ return src;
+ }
+
+ if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
+ *parser_encoding = YAML_UTF16BE_ENCODING;
+ return src;
+ }
+
+ /* Just guess on ASCII-8BIT */
+ if (io_external_enc_index == rb_ascii8bit_encindex()) {
+ *parser_encoding = YAML_ANY_ENCODING;
+ return src;
+ }
+
+ rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s",
+ rb_enc_name(rb_enc_from_index(io_external_enc_index)));
+
+ return Qnil;
+}
+
+#endif
+
+static VALUE protected_start_stream(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall(args[0], id_start_stream, 1, args[1]);
+}
+
+static VALUE protected_start_document(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_start_document, 3, args + 1);
+}
+
+static VALUE protected_end_document(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall(args[0], id_end_document, 1, args[1]);
+}
+
+static VALUE protected_alias(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall(args[0], id_alias, 1, args[1]);
+}
+
+static VALUE protected_scalar(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_scalar, 6, args + 1);
+}
+
+static VALUE protected_start_sequence(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_start_sequence, 4, args + 1);
+}
+
+static VALUE protected_end_sequence(VALUE handler)
+{
+ return rb_funcall(handler, id_end_sequence, 0);
+}
+
+static VALUE protected_start_mapping(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_start_mapping, 4, args + 1);
+}
+
+static VALUE protected_end_mapping(VALUE handler)
+{
+ return rb_funcall(handler, id_end_mapping, 0);
+}
+
+static VALUE protected_empty(VALUE handler)
+{
+ return rb_funcall(handler, id_empty, 0);
+}
+
+static VALUE protected_end_stream(VALUE handler)
+{
+ return rb_funcall(handler, id_end_stream, 0);
+}
+
/*
* call-seq:
* parser.parse(yaml)
@@ -68,27 +225,48 @@ static VALUE allocate(VALUE klass)
*
* See Psych::Parser and Psych::Parser#handler
*/
-static VALUE parse(VALUE self, VALUE yaml)
+static VALUE parse(int argc, VALUE *argv, VALUE self)
{
+ VALUE yaml, path;
yaml_parser_t * parser;
yaml_event_t event;
int done = 0;
int tainted = 0;
+ int state = 0;
+ int parser_encoding = YAML_ANY_ENCODING;
#ifdef HAVE_RUBY_ENCODING_H
int encoding = rb_utf8_encindex();
rb_encoding * internal_enc = rb_default_internal_encoding();
#endif
VALUE handler = rb_iv_get(self, "@handler");
+ if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
+ if(rb_respond_to(yaml, id_path))
+ path = rb_funcall(yaml, id_path, 0);
+ else
+ path = rb_str_new2("<unknown>");
+ }
+
Data_Get_Struct(self, yaml_parser_t, parser);
+ yaml_parser_delete(parser);
+ yaml_parser_initialize(parser);
+
if (OBJ_TAINTED(yaml)) tainted = 1;
- if(rb_respond_to(yaml, id_read)) {
+ if (rb_respond_to(yaml, id_read)) {
+#ifdef HAVE_RUBY_ENCODING_H
+ yaml = transcode_io(yaml, &parser_encoding);
+ yaml_parser_set_encoding(parser, parser_encoding);
+#endif
yaml_parser_set_input(parser, io_reader, (void *)yaml);
if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
} else {
StringValue(yaml);
+#ifdef HAVE_RUBY_ENCODING_H
+ yaml = transcode_string(yaml, &parser_encoding);
+ yaml_parser_set_encoding(parser, parser_encoding);
+#endif
yaml_parser_set_input_string(
parser,
(const unsigned char *)RSTRING_PTR(yaml),
@@ -98,32 +276,28 @@ static VALUE parse(VALUE self, VALUE yaml)
while(!done) {
if(!yaml_parser_parse(parser, &event)) {
- VALUE path;
- size_t line = parser->mark.line;
- size_t column = parser->mark.column;
-
- if(rb_respond_to(yaml, id_path))
- path = rb_funcall(yaml, id_path, 0);
- else
- path = rb_str_new2("<unknown>");
+ VALUE exception;
+ exception = make_exception(parser, path);
yaml_parser_delete(parser);
yaml_parser_initialize(parser);
- rb_raise(ePsychSyntaxError, "(%s): couldn't parse YAML at line %d column %d",
- StringValuePtr(path),
- (int)line, (int)column);
+ rb_exc_raise(exception);
}
switch(event.type) {
- case YAML_STREAM_START_EVENT:
-
- rb_funcall(handler, id_start_stream, 1,
- INT2NUM((long)event.data.stream_start.encoding)
- );
- break;
+ case YAML_STREAM_START_EVENT:
+ {
+ VALUE args[2];
+
+ args[0] = handler;
+ args[1] = INT2NUM((long)event.data.stream_start.encoding);
+ rb_protect(protected_start_stream, (VALUE)args, &state);
+ }
+ break;
case YAML_DOCUMENT_START_EVENT:
{
+ VALUE args[4];
/* Get a list of tag directives (if any) */
VALUE tag_directives = rb_ary_new();
/* Grab the document version */
@@ -161,19 +335,25 @@ static VALUE parse(VALUE self, VALUE yaml)
rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
}
}
- rb_funcall(handler, id_start_document, 3,
- version, tag_directives,
- event.data.document_start.implicit == 1 ? Qtrue : Qfalse
- );
+ args[0] = handler;
+ args[1] = version;
+ args[2] = tag_directives;
+ args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
+ rb_protect(protected_start_document, (VALUE)args, &state);
}
break;
case YAML_DOCUMENT_END_EVENT:
- rb_funcall(handler, id_end_document, 1,
- event.data.document_end.implicit == 1 ? Qtrue : Qfalse
- );
+ {
+ VALUE args[2];
+
+ args[0] = handler;
+ args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
+ rb_protect(protected_end_document, (VALUE)args, &state);
+ }
break;
case YAML_ALIAS_EVENT:
{
+ VALUE args[2];
VALUE alias = Qnil;
if(event.data.alias.anchor) {
alias = rb_str_new2((const char *)event.data.alias.anchor);
@@ -183,11 +363,14 @@ static VALUE parse(VALUE self, VALUE yaml)
#endif
}
- rb_funcall(handler, id_alias, 1, alias);
+ args[0] = handler;
+ args[1] = alias;
+ rb_protect(protected_alias, (VALUE)args, &state);
}
break;
case YAML_SCALAR_EVENT:
{
+ VALUE args[7];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE plain_implicit, quoted_implicit, style;
@@ -225,12 +408,19 @@ static VALUE parse(VALUE self, VALUE yaml)
style = INT2NUM((long)event.data.scalar.style);
- rb_funcall(handler, id_scalar, 6,
- val, anchor, tag, plain_implicit, quoted_implicit, style);
+ args[0] = handler;
+ args[1] = val;
+ args[2] = anchor;
+ args[3] = tag;
+ args[4] = plain_implicit;
+ args[5] = quoted_implicit;
+ args[6] = style;
+ rb_protect(protected_scalar, (VALUE)args, &state);
}
break;
case YAML_SEQUENCE_START_EVENT:
{
+ VALUE args[5];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE implicit, style;
@@ -256,15 +446,21 @@ static VALUE parse(VALUE self, VALUE yaml)
style = INT2NUM((long)event.data.sequence_start.style);
- rb_funcall(handler, id_start_sequence, 4,
- anchor, tag, implicit, style);
+ args[0] = handler;
+ args[1] = anchor;
+ args[2] = tag;
+ args[3] = implicit;
+ args[4] = style;
+
+ rb_protect(protected_start_sequence, (VALUE)args, &state);
}
break;
case YAML_SEQUENCE_END_EVENT:
- rb_funcall(handler, id_end_sequence, 0);
+ rb_protect(protected_end_sequence, handler, &state);
break;
case YAML_MAPPING_START_EVENT:
{
+ VALUE args[5];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE implicit, style;
@@ -289,22 +485,28 @@ static VALUE parse(VALUE self, VALUE yaml)
style = INT2NUM((long)event.data.mapping_start.style);
- rb_funcall(handler, id_start_mapping, 4,
- anchor, tag, implicit, style);
+ args[0] = handler;
+ args[1] = anchor;
+ args[2] = tag;
+ args[3] = implicit;
+ args[4] = style;
+
+ rb_protect(protected_start_mapping, (VALUE)args, &state);
}
break;
case YAML_MAPPING_END_EVENT:
- rb_funcall(handler, id_end_mapping, 0);
+ rb_protect(protected_end_mapping, handler, &state);
break;
case YAML_NO_EVENT:
- rb_funcall(handler, id_empty, 0);
+ rb_protect(protected_empty, handler, &state);
break;
case YAML_STREAM_END_EVENT:
- rb_funcall(handler, id_end_stream, 0);
+ rb_protect(protected_end_stream, handler, &state);
done = 1;
break;
}
yaml_event_delete(&event);
+ if (state) rb_jump_tag(state);
}
return self;
@@ -312,29 +514,6 @@ static VALUE parse(VALUE self, VALUE yaml)
/*
* call-seq:
- * parser.external_encoding=(encoding)
- *
- * Set the encoding for this parser to +encoding+
- */
-static VALUE set_external_encoding(VALUE self, VALUE encoding)
-{
- yaml_parser_t * parser;
- VALUE exception;
-
- Data_Get_Struct(self, yaml_parser_t, parser);
-
- if(parser->encoding) {
- exception = rb_const_get_at(mPsych, rb_intern("Exception"));
- rb_raise(exception, "don't set the encoding twice!");
- }
-
- yaml_parser_set_encoding(parser, NUM2INT(encoding));
-
- return encoding;
-}
-
-/*
- * call-seq:
* parser.mark # => #<Psych::Parser::Mark>
*
* Returns a Psych::Parser::Mark object that contains line, column, and index
@@ -376,11 +555,11 @@ void Init_psych_parser()
/* UTF-16-BE Encoding with BOM */
rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
+ rb_require("psych/syntax_error");
ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
- rb_define_method(cPsychParser, "parse", parse, 1);
+ rb_define_method(cPsychParser, "parse", parse, -1);
rb_define_method(cPsychParser, "mark", mark, 0);
- rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1);
id_read = rb_intern("read");
id_path = rb_intern("path");