diff options
Diffstat (limited to 'ext/psych/psych_parser.c')
-rw-r--r-- | ext/psych/psych_parser.c | 548 |
1 files changed, 267 insertions, 281 deletions
diff --git a/ext/psych/psych_parser.c b/ext/psych/psych_parser.c index fd550b671a..26e2d41e06 100644 --- a/ext/psych/psych_parser.c +++ b/ext/psych/psych_parser.c @@ -32,9 +32,9 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read) *read = 0; if(! NIL_P(string)) { - void * str = (void *)StringValuePtr(string); - *read = (size_t)RSTRING_LEN(string); - memcpy(buf, str, *read); + void * str = (void *)StringValuePtr(string); + *read = (size_t)RSTRING_LEN(string); + memcpy(buf, str, *read); } return 1; @@ -79,21 +79,25 @@ static VALUE allocate(VALUE klass) static VALUE make_exception(yaml_parser_t * parser, VALUE path) { - size_t line, column; - VALUE ePsychSyntaxError; + if (parser->error == YAML_MEMORY_ERROR) { + return rb_eNoMemError; + } else { + size_t line, column; + VALUE ePsychSyntaxError; - line = parser->context_mark.line + 1; - column = parser->context_mark.column + 1; + line = parser->context_mark.line + 1; + column = parser->context_mark.column + 1; - ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError")); + ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError")); - return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, - path, - SIZET2NUM(line), - SIZET2NUM(column), - SIZET2NUM(parser->problem_offset), - parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil, - parser->context ? rb_usascii_str_new2(parser->context) : Qnil); + return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, + path, + SIZET2NUM(line), + SIZET2NUM(column), + SIZET2NUM(parser->problem_offset), + parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil, + parser->context ? rb_usascii_str_new2(parser->context) : Qnil); + } } static VALUE transcode_string(VALUE src, int * parser_encoding) @@ -104,18 +108,18 @@ static VALUE transcode_string(VALUE src, int * parser_encoding) int source_encoding = rb_enc_get_index(src); if (source_encoding == utf8) { - *parser_encoding = YAML_UTF8_ENCODING; - return src; + *parser_encoding = YAML_UTF8_ENCODING; + return src; } if (source_encoding == utf16le) { - *parser_encoding = YAML_UTF16LE_ENCODING; - return src; + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; } if (source_encoding == utf16be) { - *parser_encoding = YAML_UTF16BE_ENCODING; - return src; + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; } src = rb_str_export_to_enc(src, rb_utf8_encoding()); @@ -134,36 +138,36 @@ static VALUE transcode_io(VALUE src, int * parser_encoding) /* if no encoding is returned, assume ascii8bit. */ if (NIL_P(io_external_encoding)) { - io_external_enc_index = rb_ascii8bit_encindex(); + io_external_enc_index = rb_ascii8bit_encindex(); } else { - io_external_enc_index = rb_to_encoding_index(io_external_encoding); + io_external_enc_index = rb_to_encoding_index(io_external_encoding); } /* Treat US-ASCII as utf_8 */ if (io_external_enc_index == rb_usascii_encindex()) { - *parser_encoding = YAML_UTF8_ENCODING; - return src; + *parser_encoding = YAML_UTF8_ENCODING; + return src; } if (io_external_enc_index == rb_utf8_encindex()) { - *parser_encoding = YAML_UTF8_ENCODING; - return src; + *parser_encoding = YAML_UTF8_ENCODING; + return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { - *parser_encoding = YAML_UTF16LE_ENCODING; - return src; + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; } if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { - *parser_encoding = YAML_UTF16BE_ENCODING; - return src; + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; } /* Just guess on ASCII-8BIT */ if (io_external_enc_index == rb_ascii8bit_encindex()) { - *parser_encoding = YAML_ANY_ENCODING; - return src; + *parser_encoding = YAML_ANY_ENCODING; + return src; } /* If the external encoding is something we don't know how to handle, @@ -241,18 +245,8 @@ static VALUE protected_event_location(VALUE pointer) return rb_funcall3(args[0], id_event_location, 4, args + 1); } -/* - * call-seq: - * parser.parse(yaml) - * - * Parse the YAML document contained in +yaml+. Events will be called on - * the handler set on the parser instance. - * - * See Psych::Parser and Psych::Parser#handler - */ -static VALUE parse(int argc, VALUE *argv, VALUE self) +static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path) { - VALUE yaml, path; yaml_parser_t * parser; yaml_event_t event; int done = 0; @@ -260,14 +254,6 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) int parser_encoding = YAML_ANY_ENCODING; int encoding = rb_utf8_encindex(); rb_encoding * internal_enc = rb_default_internal_encoding(); - VALUE handler = rb_iv_get(self, "@handler"); - - if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { - if(rb_respond_to(yaml, id_path)) - path = rb_funcall(yaml, id_path, 0); - else - path = rb_str_new2("<unknown>"); - } TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser); @@ -275,238 +261,238 @@ static VALUE parse(int argc, VALUE *argv, VALUE self) yaml_parser_initialize(parser); if (rb_respond_to(yaml, id_read)) { - yaml = transcode_io(yaml, &parser_encoding); - yaml_parser_set_encoding(parser, parser_encoding); - yaml_parser_set_input(parser, io_reader, (void *)yaml); + yaml = transcode_io(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); + yaml_parser_set_input(parser, io_reader, (void *)yaml); } else { - StringValue(yaml); - yaml = transcode_string(yaml, &parser_encoding); - yaml_parser_set_encoding(parser, parser_encoding); - yaml_parser_set_input_string( - parser, - (const unsigned char *)RSTRING_PTR(yaml), - (size_t)RSTRING_LEN(yaml) - ); + StringValue(yaml); + yaml = transcode_string(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); + yaml_parser_set_input_string( + parser, + (const unsigned char *)RSTRING_PTR(yaml), + (size_t)RSTRING_LEN(yaml) + ); } while(!done) { - VALUE event_args[5]; - VALUE start_line, start_column, end_line, end_column; - - if(!yaml_parser_parse(parser, &event)) { - VALUE exception; - - exception = make_exception(parser, path); - yaml_parser_delete(parser); - yaml_parser_initialize(parser); - - rb_exc_raise(exception); - } - - start_line = SIZET2NUM(event.start_mark.line); - start_column = SIZET2NUM(event.start_mark.column); - end_line = SIZET2NUM(event.end_mark.line); - end_column = SIZET2NUM(event.end_mark.column); - - event_args[0] = handler; - event_args[1] = start_line; - event_args[2] = start_column; - event_args[3] = end_line; - event_args[4] = end_column; - rb_protect(protected_event_location, (VALUE)event_args, &state); - - switch(event.type) { - case YAML_STREAM_START_EVENT: - { - VALUE args[2]; - - args[0] = handler; - args[1] = INT2NUM(event.data.stream_start.encoding); - rb_protect(protected_start_stream, (VALUE)args, &state); - } - break; - case YAML_DOCUMENT_START_EVENT: - { - VALUE args[4]; - /* Get a list of tag directives (if any) */ - VALUE tag_directives = rb_ary_new(); - /* Grab the document version */ - VALUE version = event.data.document_start.version_directive ? - rb_ary_new3( - (long)2, - INT2NUM(event.data.document_start.version_directive->major), - INT2NUM(event.data.document_start.version_directive->minor) - ) : rb_ary_new(); - - if(event.data.document_start.tag_directives.start) { - yaml_tag_directive_t *start = - event.data.document_start.tag_directives.start; - yaml_tag_directive_t *end = - event.data.document_start.tag_directives.end; - for(; start != end; start++) { - VALUE handle = Qnil; - VALUE prefix = Qnil; - if(start->handle) { - handle = rb_str_new2((const char *)start->handle); - PSYCH_TRANSCODE(handle, encoding, internal_enc); - } - - if(start->prefix) { - prefix = rb_str_new2((const char *)start->prefix); - PSYCH_TRANSCODE(prefix, encoding, internal_enc); - } - - rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); - } - } - args[0] = handler; - args[1] = version; - args[2] = tag_directives; - args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; - rb_protect(protected_start_document, (VALUE)args, &state); - } - break; - case YAML_DOCUMENT_END_EVENT: - { - VALUE args[2]; - - args[0] = handler; - args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; - rb_protect(protected_end_document, (VALUE)args, &state); - } - break; - case YAML_ALIAS_EVENT: - { - VALUE args[2]; - VALUE alias = Qnil; - if(event.data.alias.anchor) { - alias = rb_str_new2((const char *)event.data.alias.anchor); - PSYCH_TRANSCODE(alias, encoding, internal_enc); - } - - args[0] = handler; - args[1] = alias; - rb_protect(protected_alias, (VALUE)args, &state); - } - break; - case YAML_SCALAR_EVENT: - { - VALUE args[7]; - VALUE anchor = Qnil; - VALUE tag = Qnil; - VALUE plain_implicit, quoted_implicit, style; - VALUE val = rb_str_new( - (const char *)event.data.scalar.value, - (long)event.data.scalar.length - ); - - PSYCH_TRANSCODE(val, encoding, internal_enc); - - if(event.data.scalar.anchor) { - anchor = rb_str_new2((const char *)event.data.scalar.anchor); - PSYCH_TRANSCODE(anchor, encoding, internal_enc); - } - - if(event.data.scalar.tag) { - tag = rb_str_new2((const char *)event.data.scalar.tag); - PSYCH_TRANSCODE(tag, encoding, internal_enc); - } - - plain_implicit = - event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; - - quoted_implicit = - event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; - - style = INT2NUM(event.data.scalar.style); - - args[0] = handler; - args[1] = val; - args[2] = anchor; - args[3] = tag; - args[4] = plain_implicit; - args[5] = quoted_implicit; - args[6] = style; - rb_protect(protected_scalar, (VALUE)args, &state); - } - break; - case YAML_SEQUENCE_START_EVENT: - { - VALUE args[5]; - VALUE anchor = Qnil; - VALUE tag = Qnil; - VALUE implicit, style; - if(event.data.sequence_start.anchor) { - anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); - PSYCH_TRANSCODE(anchor, encoding, internal_enc); - } - - tag = Qnil; - if(event.data.sequence_start.tag) { - tag = rb_str_new2((const char *)event.data.sequence_start.tag); - PSYCH_TRANSCODE(tag, encoding, internal_enc); - } - - implicit = - event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; - - style = INT2NUM(event.data.sequence_start.style); - - args[0] = handler; - args[1] = anchor; - args[2] = tag; - args[3] = implicit; - args[4] = style; - - rb_protect(protected_start_sequence, (VALUE)args, &state); - } - break; - case YAML_SEQUENCE_END_EVENT: - rb_protect(protected_end_sequence, handler, &state); - break; - case YAML_MAPPING_START_EVENT: - { - VALUE args[5]; - VALUE anchor = Qnil; - VALUE tag = Qnil; - VALUE implicit, style; - if(event.data.mapping_start.anchor) { - anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); - PSYCH_TRANSCODE(anchor, encoding, internal_enc); - } - - if(event.data.mapping_start.tag) { - tag = rb_str_new2((const char *)event.data.mapping_start.tag); - PSYCH_TRANSCODE(tag, encoding, internal_enc); - } - - implicit = - event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; - - style = INT2NUM(event.data.mapping_start.style); - - args[0] = handler; - args[1] = anchor; - args[2] = tag; - args[3] = implicit; - args[4] = style; - - rb_protect(protected_start_mapping, (VALUE)args, &state); - } - break; - case YAML_MAPPING_END_EVENT: - rb_protect(protected_end_mapping, handler, &state); - break; - case YAML_NO_EVENT: - rb_protect(protected_empty, handler, &state); - break; - case YAML_STREAM_END_EVENT: - rb_protect(protected_end_stream, handler, &state); - done = 1; - break; - } - yaml_event_delete(&event); - if (state) rb_jump_tag(state); + VALUE event_args[5]; + VALUE start_line, start_column, end_line, end_column; + + if(parser->error || !yaml_parser_parse(parser, &event)) { + VALUE exception; + + exception = make_exception(parser, path); + yaml_parser_delete(parser); + yaml_parser_initialize(parser); + + rb_exc_raise(exception); + } + + start_line = SIZET2NUM(event.start_mark.line); + start_column = SIZET2NUM(event.start_mark.column); + end_line = SIZET2NUM(event.end_mark.line); + end_column = SIZET2NUM(event.end_mark.column); + + event_args[0] = handler; + event_args[1] = start_line; + event_args[2] = start_column; + event_args[3] = end_line; + event_args[4] = end_column; + rb_protect(protected_event_location, (VALUE)event_args, &state); + + switch(event.type) { + case YAML_STREAM_START_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = INT2NUM(event.data.stream_start.encoding); + rb_protect(protected_start_stream, (VALUE)args, &state); + } + break; + case YAML_DOCUMENT_START_EVENT: + { + VALUE args[4]; + /* Get a list of tag directives (if any) */ + VALUE tag_directives = rb_ary_new(); + /* Grab the document version */ + VALUE version = event.data.document_start.version_directive ? + rb_ary_new3( + (long)2, + INT2NUM(event.data.document_start.version_directive->major), + INT2NUM(event.data.document_start.version_directive->minor) + ) : rb_ary_new(); + + if(event.data.document_start.tag_directives.start) { + yaml_tag_directive_t *start = + event.data.document_start.tag_directives.start; + yaml_tag_directive_t *end = + event.data.document_start.tag_directives.end; + for(; start != end; start++) { + VALUE handle = Qnil; + VALUE prefix = Qnil; + if(start->handle) { + handle = rb_str_new2((const char *)start->handle); + PSYCH_TRANSCODE(handle, encoding, internal_enc); + } + + if(start->prefix) { + prefix = rb_str_new2((const char *)start->prefix); + PSYCH_TRANSCODE(prefix, encoding, internal_enc); + } + + rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); + } + } + args[0] = handler; + args[1] = version; + args[2] = tag_directives; + args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_start_document, (VALUE)args, &state); + } + break; + case YAML_DOCUMENT_END_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_end_document, (VALUE)args, &state); + } + break; + case YAML_ALIAS_EVENT: + { + VALUE args[2]; + VALUE alias = Qnil; + if(event.data.alias.anchor) { + alias = rb_str_new2((const char *)event.data.alias.anchor); + PSYCH_TRANSCODE(alias, encoding, internal_enc); + } + + args[0] = handler; + args[1] = alias; + rb_protect(protected_alias, (VALUE)args, &state); + } + break; + case YAML_SCALAR_EVENT: + { + VALUE args[7]; + VALUE anchor = Qnil; + VALUE tag = Qnil; + VALUE plain_implicit, quoted_implicit, style; + VALUE val = rb_str_new( + (const char *)event.data.scalar.value, + (long)event.data.scalar.length + ); + + PSYCH_TRANSCODE(val, encoding, internal_enc); + + if(event.data.scalar.anchor) { + anchor = rb_str_new2((const char *)event.data.scalar.anchor); + PSYCH_TRANSCODE(anchor, encoding, internal_enc); + } + + if(event.data.scalar.tag) { + tag = rb_str_new2((const char *)event.data.scalar.tag); + PSYCH_TRANSCODE(tag, encoding, internal_enc); + } + + plain_implicit = + event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; + + quoted_implicit = + event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; + + style = INT2NUM(event.data.scalar.style); + + args[0] = handler; + args[1] = val; + args[2] = anchor; + args[3] = tag; + args[4] = plain_implicit; + args[5] = quoted_implicit; + args[6] = style; + rb_protect(protected_scalar, (VALUE)args, &state); + } + break; + case YAML_SEQUENCE_START_EVENT: + { + VALUE args[5]; + VALUE anchor = Qnil; + VALUE tag = Qnil; + VALUE implicit, style; + if(event.data.sequence_start.anchor) { + anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); + PSYCH_TRANSCODE(anchor, encoding, internal_enc); + } + + tag = Qnil; + if(event.data.sequence_start.tag) { + tag = rb_str_new2((const char *)event.data.sequence_start.tag); + PSYCH_TRANSCODE(tag, encoding, internal_enc); + } + + implicit = + event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; + + style = INT2NUM(event.data.sequence_start.style); + + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_sequence, (VALUE)args, &state); + } + break; + case YAML_SEQUENCE_END_EVENT: + rb_protect(protected_end_sequence, handler, &state); + break; + case YAML_MAPPING_START_EVENT: + { + VALUE args[5]; + VALUE anchor = Qnil; + VALUE tag = Qnil; + VALUE implicit, style; + if(event.data.mapping_start.anchor) { + anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); + PSYCH_TRANSCODE(anchor, encoding, internal_enc); + } + + if(event.data.mapping_start.tag) { + tag = rb_str_new2((const char *)event.data.mapping_start.tag); + PSYCH_TRANSCODE(tag, encoding, internal_enc); + } + + implicit = + event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; + + style = INT2NUM(event.data.mapping_start.style); + + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_mapping, (VALUE)args, &state); + } + break; + case YAML_MAPPING_END_EVENT: + rb_protect(protected_end_mapping, handler, &state); + break; + case YAML_NO_EVENT: + rb_protect(protected_empty, handler, &state); + break; + case YAML_STREAM_END_EVENT: + rb_protect(protected_end_stream, handler, &state); + done = 1; + break; + } + yaml_event_delete(&event); + if (state) rb_jump_tag(state); } return self; @@ -558,7 +544,7 @@ void Init_psych_parser(void) rb_require("psych/syntax_error"); - rb_define_method(cPsychParser, "parse", parse, -1); + rb_define_private_method(cPsychParser, "_native_parse", parse, 3); rb_define_method(cPsychParser, "mark", mark, 0); id_read = rb_intern("read"); |