summaryrefslogtreecommitdiff
path: root/ext/psych/psych_parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/psych/psych_parser.c')
-rw-r--r--ext/psych/psych_parser.c548
1 files changed, 267 insertions, 281 deletions
diff --git a/ext/psych/psych_parser.c b/ext/psych/psych_parser.c
index fd550b671a..26e2d41e06 100644
--- a/ext/psych/psych_parser.c
+++ b/ext/psych/psych_parser.c
@@ -32,9 +32,9 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
*read = 0;
if(! NIL_P(string)) {
- void * str = (void *)StringValuePtr(string);
- *read = (size_t)RSTRING_LEN(string);
- memcpy(buf, str, *read);
+ void * str = (void *)StringValuePtr(string);
+ *read = (size_t)RSTRING_LEN(string);
+ memcpy(buf, str, *read);
}
return 1;
@@ -79,21 +79,25 @@ static VALUE allocate(VALUE klass)
static VALUE make_exception(yaml_parser_t * parser, VALUE path)
{
- size_t line, column;
- VALUE ePsychSyntaxError;
+ if (parser->error == YAML_MEMORY_ERROR) {
+ return rb_eNoMemError;
+ } else {
+ size_t line, column;
+ VALUE ePsychSyntaxError;
- line = parser->context_mark.line + 1;
- column = parser->context_mark.column + 1;
+ line = parser->context_mark.line + 1;
+ column = parser->context_mark.column + 1;
- ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError"));
+ ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError"));
- return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
- path,
- SIZET2NUM(line),
- SIZET2NUM(column),
- SIZET2NUM(parser->problem_offset),
- parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil,
- parser->context ? rb_usascii_str_new2(parser->context) : Qnil);
+ return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
+ path,
+ SIZET2NUM(line),
+ SIZET2NUM(column),
+ SIZET2NUM(parser->problem_offset),
+ parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil,
+ parser->context ? rb_usascii_str_new2(parser->context) : Qnil);
+ }
}
static VALUE transcode_string(VALUE src, int * parser_encoding)
@@ -104,18 +108,18 @@ static VALUE transcode_string(VALUE src, int * parser_encoding)
int source_encoding = rb_enc_get_index(src);
if (source_encoding == utf8) {
- *parser_encoding = YAML_UTF8_ENCODING;
- return src;
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
}
if (source_encoding == utf16le) {
- *parser_encoding = YAML_UTF16LE_ENCODING;
- return src;
+ *parser_encoding = YAML_UTF16LE_ENCODING;
+ return src;
}
if (source_encoding == utf16be) {
- *parser_encoding = YAML_UTF16BE_ENCODING;
- return src;
+ *parser_encoding = YAML_UTF16BE_ENCODING;
+ return src;
}
src = rb_str_export_to_enc(src, rb_utf8_encoding());
@@ -134,36 +138,36 @@ static VALUE transcode_io(VALUE src, int * parser_encoding)
/* if no encoding is returned, assume ascii8bit. */
if (NIL_P(io_external_encoding)) {
- io_external_enc_index = rb_ascii8bit_encindex();
+ io_external_enc_index = rb_ascii8bit_encindex();
} else {
- io_external_enc_index = rb_to_encoding_index(io_external_encoding);
+ io_external_enc_index = rb_to_encoding_index(io_external_encoding);
}
/* Treat US-ASCII as utf_8 */
if (io_external_enc_index == rb_usascii_encindex()) {
- *parser_encoding = YAML_UTF8_ENCODING;
- return src;
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
}
if (io_external_enc_index == rb_utf8_encindex()) {
- *parser_encoding = YAML_UTF8_ENCODING;
- return src;
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
}
if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
- *parser_encoding = YAML_UTF16LE_ENCODING;
- return src;
+ *parser_encoding = YAML_UTF16LE_ENCODING;
+ return src;
}
if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
- *parser_encoding = YAML_UTF16BE_ENCODING;
- return src;
+ *parser_encoding = YAML_UTF16BE_ENCODING;
+ return src;
}
/* Just guess on ASCII-8BIT */
if (io_external_enc_index == rb_ascii8bit_encindex()) {
- *parser_encoding = YAML_ANY_ENCODING;
- return src;
+ *parser_encoding = YAML_ANY_ENCODING;
+ return src;
}
/* If the external encoding is something we don't know how to handle,
@@ -241,18 +245,8 @@ static VALUE protected_event_location(VALUE pointer)
return rb_funcall3(args[0], id_event_location, 4, args + 1);
}
-/*
- * call-seq:
- * parser.parse(yaml)
- *
- * Parse the YAML document contained in +yaml+. Events will be called on
- * the handler set on the parser instance.
- *
- * See Psych::Parser and Psych::Parser#handler
- */
-static VALUE parse(int argc, VALUE *argv, VALUE self)
+static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path)
{
- VALUE yaml, path;
yaml_parser_t * parser;
yaml_event_t event;
int done = 0;
@@ -260,14 +254,6 @@ static VALUE parse(int argc, VALUE *argv, VALUE self)
int parser_encoding = YAML_ANY_ENCODING;
int encoding = rb_utf8_encindex();
rb_encoding * internal_enc = rb_default_internal_encoding();
- VALUE handler = rb_iv_get(self, "@handler");
-
- if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
- if(rb_respond_to(yaml, id_path))
- path = rb_funcall(yaml, id_path, 0);
- else
- path = rb_str_new2("<unknown>");
- }
TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);
@@ -275,238 +261,238 @@ static VALUE parse(int argc, VALUE *argv, VALUE self)
yaml_parser_initialize(parser);
if (rb_respond_to(yaml, id_read)) {
- yaml = transcode_io(yaml, &parser_encoding);
- yaml_parser_set_encoding(parser, parser_encoding);
- yaml_parser_set_input(parser, io_reader, (void *)yaml);
+ yaml = transcode_io(yaml, &parser_encoding);
+ yaml_parser_set_encoding(parser, parser_encoding);
+ yaml_parser_set_input(parser, io_reader, (void *)yaml);
} else {
- StringValue(yaml);
- yaml = transcode_string(yaml, &parser_encoding);
- yaml_parser_set_encoding(parser, parser_encoding);
- yaml_parser_set_input_string(
- parser,
- (const unsigned char *)RSTRING_PTR(yaml),
- (size_t)RSTRING_LEN(yaml)
- );
+ StringValue(yaml);
+ yaml = transcode_string(yaml, &parser_encoding);
+ yaml_parser_set_encoding(parser, parser_encoding);
+ yaml_parser_set_input_string(
+ parser,
+ (const unsigned char *)RSTRING_PTR(yaml),
+ (size_t)RSTRING_LEN(yaml)
+ );
}
while(!done) {
- VALUE event_args[5];
- VALUE start_line, start_column, end_line, end_column;
-
- if(!yaml_parser_parse(parser, &event)) {
- VALUE exception;
-
- exception = make_exception(parser, path);
- yaml_parser_delete(parser);
- yaml_parser_initialize(parser);
-
- rb_exc_raise(exception);
- }
-
- start_line = SIZET2NUM(event.start_mark.line);
- start_column = SIZET2NUM(event.start_mark.column);
- end_line = SIZET2NUM(event.end_mark.line);
- end_column = SIZET2NUM(event.end_mark.column);
-
- event_args[0] = handler;
- event_args[1] = start_line;
- event_args[2] = start_column;
- event_args[3] = end_line;
- event_args[4] = end_column;
- rb_protect(protected_event_location, (VALUE)event_args, &state);
-
- switch(event.type) {
- case YAML_STREAM_START_EVENT:
- {
- VALUE args[2];
-
- args[0] = handler;
- args[1] = INT2NUM(event.data.stream_start.encoding);
- rb_protect(protected_start_stream, (VALUE)args, &state);
- }
- break;
- case YAML_DOCUMENT_START_EVENT:
- {
- VALUE args[4];
- /* Get a list of tag directives (if any) */
- VALUE tag_directives = rb_ary_new();
- /* Grab the document version */
- VALUE version = event.data.document_start.version_directive ?
- rb_ary_new3(
- (long)2,
- INT2NUM(event.data.document_start.version_directive->major),
- INT2NUM(event.data.document_start.version_directive->minor)
- ) : rb_ary_new();
-
- if(event.data.document_start.tag_directives.start) {
- yaml_tag_directive_t *start =
- event.data.document_start.tag_directives.start;
- yaml_tag_directive_t *end =
- event.data.document_start.tag_directives.end;
- for(; start != end; start++) {
- VALUE handle = Qnil;
- VALUE prefix = Qnil;
- if(start->handle) {
- handle = rb_str_new2((const char *)start->handle);
- PSYCH_TRANSCODE(handle, encoding, internal_enc);
- }
-
- if(start->prefix) {
- prefix = rb_str_new2((const char *)start->prefix);
- PSYCH_TRANSCODE(prefix, encoding, internal_enc);
- }
-
- rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
- }
- }
- args[0] = handler;
- args[1] = version;
- args[2] = tag_directives;
- args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
- rb_protect(protected_start_document, (VALUE)args, &state);
- }
- break;
- case YAML_DOCUMENT_END_EVENT:
- {
- VALUE args[2];
-
- args[0] = handler;
- args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
- rb_protect(protected_end_document, (VALUE)args, &state);
- }
- break;
- case YAML_ALIAS_EVENT:
- {
- VALUE args[2];
- VALUE alias = Qnil;
- if(event.data.alias.anchor) {
- alias = rb_str_new2((const char *)event.data.alias.anchor);
- PSYCH_TRANSCODE(alias, encoding, internal_enc);
- }
-
- args[0] = handler;
- args[1] = alias;
- rb_protect(protected_alias, (VALUE)args, &state);
- }
- break;
- case YAML_SCALAR_EVENT:
- {
- VALUE args[7];
- VALUE anchor = Qnil;
- VALUE tag = Qnil;
- VALUE plain_implicit, quoted_implicit, style;
- VALUE val = rb_str_new(
- (const char *)event.data.scalar.value,
- (long)event.data.scalar.length
- );
-
- PSYCH_TRANSCODE(val, encoding, internal_enc);
-
- if(event.data.scalar.anchor) {
- anchor = rb_str_new2((const char *)event.data.scalar.anchor);
- PSYCH_TRANSCODE(anchor, encoding, internal_enc);
- }
-
- if(event.data.scalar.tag) {
- tag = rb_str_new2((const char *)event.data.scalar.tag);
- PSYCH_TRANSCODE(tag, encoding, internal_enc);
- }
-
- plain_implicit =
- event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
-
- quoted_implicit =
- event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
-
- style = INT2NUM(event.data.scalar.style);
-
- args[0] = handler;
- args[1] = val;
- args[2] = anchor;
- args[3] = tag;
- args[4] = plain_implicit;
- args[5] = quoted_implicit;
- args[6] = style;
- rb_protect(protected_scalar, (VALUE)args, &state);
- }
- break;
- case YAML_SEQUENCE_START_EVENT:
- {
- VALUE args[5];
- VALUE anchor = Qnil;
- VALUE tag = Qnil;
- VALUE implicit, style;
- if(event.data.sequence_start.anchor) {
- anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
- PSYCH_TRANSCODE(anchor, encoding, internal_enc);
- }
-
- tag = Qnil;
- if(event.data.sequence_start.tag) {
- tag = rb_str_new2((const char *)event.data.sequence_start.tag);
- PSYCH_TRANSCODE(tag, encoding, internal_enc);
- }
-
- implicit =
- event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
-
- style = INT2NUM(event.data.sequence_start.style);
-
- args[0] = handler;
- args[1] = anchor;
- args[2] = tag;
- args[3] = implicit;
- args[4] = style;
-
- rb_protect(protected_start_sequence, (VALUE)args, &state);
- }
- break;
- case YAML_SEQUENCE_END_EVENT:
- rb_protect(protected_end_sequence, handler, &state);
- break;
- case YAML_MAPPING_START_EVENT:
- {
- VALUE args[5];
- VALUE anchor = Qnil;
- VALUE tag = Qnil;
- VALUE implicit, style;
- if(event.data.mapping_start.anchor) {
- anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
- PSYCH_TRANSCODE(anchor, encoding, internal_enc);
- }
-
- if(event.data.mapping_start.tag) {
- tag = rb_str_new2((const char *)event.data.mapping_start.tag);
- PSYCH_TRANSCODE(tag, encoding, internal_enc);
- }
-
- implicit =
- event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
-
- style = INT2NUM(event.data.mapping_start.style);
-
- args[0] = handler;
- args[1] = anchor;
- args[2] = tag;
- args[3] = implicit;
- args[4] = style;
-
- rb_protect(protected_start_mapping, (VALUE)args, &state);
- }
- break;
- case YAML_MAPPING_END_EVENT:
- rb_protect(protected_end_mapping, handler, &state);
- break;
- case YAML_NO_EVENT:
- rb_protect(protected_empty, handler, &state);
- break;
- case YAML_STREAM_END_EVENT:
- rb_protect(protected_end_stream, handler, &state);
- done = 1;
- break;
- }
- yaml_event_delete(&event);
- if (state) rb_jump_tag(state);
+ VALUE event_args[5];
+ VALUE start_line, start_column, end_line, end_column;
+
+ if(parser->error || !yaml_parser_parse(parser, &event)) {
+ VALUE exception;
+
+ exception = make_exception(parser, path);
+ yaml_parser_delete(parser);
+ yaml_parser_initialize(parser);
+
+ rb_exc_raise(exception);
+ }
+
+ start_line = SIZET2NUM(event.start_mark.line);
+ start_column = SIZET2NUM(event.start_mark.column);
+ end_line = SIZET2NUM(event.end_mark.line);
+ end_column = SIZET2NUM(event.end_mark.column);
+
+ event_args[0] = handler;
+ event_args[1] = start_line;
+ event_args[2] = start_column;
+ event_args[3] = end_line;
+ event_args[4] = end_column;
+ rb_protect(protected_event_location, (VALUE)event_args, &state);
+
+ switch(event.type) {
+ case YAML_STREAM_START_EVENT:
+ {
+ VALUE args[2];
+
+ args[0] = handler;
+ args[1] = INT2NUM(event.data.stream_start.encoding);
+ rb_protect(protected_start_stream, (VALUE)args, &state);
+ }
+ break;
+ case YAML_DOCUMENT_START_EVENT:
+ {
+ VALUE args[4];
+ /* Get a list of tag directives (if any) */
+ VALUE tag_directives = rb_ary_new();
+ /* Grab the document version */
+ VALUE version = event.data.document_start.version_directive ?
+ rb_ary_new3(
+ (long)2,
+ INT2NUM(event.data.document_start.version_directive->major),
+ INT2NUM(event.data.document_start.version_directive->minor)
+ ) : rb_ary_new();
+
+ if(event.data.document_start.tag_directives.start) {
+ yaml_tag_directive_t *start =
+ event.data.document_start.tag_directives.start;
+ yaml_tag_directive_t *end =
+ event.data.document_start.tag_directives.end;
+ for(; start != end; start++) {
+ VALUE handle = Qnil;
+ VALUE prefix = Qnil;
+ if(start->handle) {
+ handle = rb_str_new2((const char *)start->handle);
+ PSYCH_TRANSCODE(handle, encoding, internal_enc);
+ }
+
+ if(start->prefix) {
+ prefix = rb_str_new2((const char *)start->prefix);
+ PSYCH_TRANSCODE(prefix, encoding, internal_enc);
+ }
+
+ rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
+ }
+ }
+ args[0] = handler;
+ args[1] = version;
+ args[2] = tag_directives;
+ args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
+ rb_protect(protected_start_document, (VALUE)args, &state);
+ }
+ break;
+ case YAML_DOCUMENT_END_EVENT:
+ {
+ VALUE args[2];
+
+ args[0] = handler;
+ args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
+ rb_protect(protected_end_document, (VALUE)args, &state);
+ }
+ break;
+ case YAML_ALIAS_EVENT:
+ {
+ VALUE args[2];
+ VALUE alias = Qnil;
+ if(event.data.alias.anchor) {
+ alias = rb_str_new2((const char *)event.data.alias.anchor);
+ PSYCH_TRANSCODE(alias, encoding, internal_enc);
+ }
+
+ args[0] = handler;
+ args[1] = alias;
+ rb_protect(protected_alias, (VALUE)args, &state);
+ }
+ break;
+ case YAML_SCALAR_EVENT:
+ {
+ VALUE args[7];
+ VALUE anchor = Qnil;
+ VALUE tag = Qnil;
+ VALUE plain_implicit, quoted_implicit, style;
+ VALUE val = rb_str_new(
+ (const char *)event.data.scalar.value,
+ (long)event.data.scalar.length
+ );
+
+ PSYCH_TRANSCODE(val, encoding, internal_enc);
+
+ if(event.data.scalar.anchor) {
+ anchor = rb_str_new2((const char *)event.data.scalar.anchor);
+ PSYCH_TRANSCODE(anchor, encoding, internal_enc);
+ }
+
+ if(event.data.scalar.tag) {
+ tag = rb_str_new2((const char *)event.data.scalar.tag);
+ PSYCH_TRANSCODE(tag, encoding, internal_enc);
+ }
+
+ plain_implicit =
+ event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
+
+ quoted_implicit =
+ event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
+
+ style = INT2NUM(event.data.scalar.style);
+
+ args[0] = handler;
+ args[1] = val;
+ args[2] = anchor;
+ args[3] = tag;
+ args[4] = plain_implicit;
+ args[5] = quoted_implicit;
+ args[6] = style;
+ rb_protect(protected_scalar, (VALUE)args, &state);
+ }
+ break;
+ case YAML_SEQUENCE_START_EVENT:
+ {
+ VALUE args[5];
+ VALUE anchor = Qnil;
+ VALUE tag = Qnil;
+ VALUE implicit, style;
+ if(event.data.sequence_start.anchor) {
+ anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
+ PSYCH_TRANSCODE(anchor, encoding, internal_enc);
+ }
+
+ tag = Qnil;
+ if(event.data.sequence_start.tag) {
+ tag = rb_str_new2((const char *)event.data.sequence_start.tag);
+ PSYCH_TRANSCODE(tag, encoding, internal_enc);
+ }
+
+ implicit =
+ event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
+
+ style = INT2NUM(event.data.sequence_start.style);
+
+ args[0] = handler;
+ args[1] = anchor;
+ args[2] = tag;
+ args[3] = implicit;
+ args[4] = style;
+
+ rb_protect(protected_start_sequence, (VALUE)args, &state);
+ }
+ break;
+ case YAML_SEQUENCE_END_EVENT:
+ rb_protect(protected_end_sequence, handler, &state);
+ break;
+ case YAML_MAPPING_START_EVENT:
+ {
+ VALUE args[5];
+ VALUE anchor = Qnil;
+ VALUE tag = Qnil;
+ VALUE implicit, style;
+ if(event.data.mapping_start.anchor) {
+ anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
+ PSYCH_TRANSCODE(anchor, encoding, internal_enc);
+ }
+
+ if(event.data.mapping_start.tag) {
+ tag = rb_str_new2((const char *)event.data.mapping_start.tag);
+ PSYCH_TRANSCODE(tag, encoding, internal_enc);
+ }
+
+ implicit =
+ event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
+
+ style = INT2NUM(event.data.mapping_start.style);
+
+ args[0] = handler;
+ args[1] = anchor;
+ args[2] = tag;
+ args[3] = implicit;
+ args[4] = style;
+
+ rb_protect(protected_start_mapping, (VALUE)args, &state);
+ }
+ break;
+ case YAML_MAPPING_END_EVENT:
+ rb_protect(protected_end_mapping, handler, &state);
+ break;
+ case YAML_NO_EVENT:
+ rb_protect(protected_empty, handler, &state);
+ break;
+ case YAML_STREAM_END_EVENT:
+ rb_protect(protected_end_stream, handler, &state);
+ done = 1;
+ break;
+ }
+ yaml_event_delete(&event);
+ if (state) rb_jump_tag(state);
}
return self;
@@ -558,7 +544,7 @@ void Init_psych_parser(void)
rb_require("psych/syntax_error");
- rb_define_method(cPsychParser, "parse", parse, -1);
+ rb_define_private_method(cPsychParser, "_native_parse", parse, 3);
rb_define_method(cPsychParser, "mark", mark, 0);
id_read = rb_intern("read");