From 14099149f403f4cfbaa4cd673f04bf20cc3c5ac5 Mon Sep 17 00:00:00 2001 From: tenderlove Date: Wed, 28 Nov 2012 04:34:41 +0000 Subject: * ext/psych/extconf.rb: use embedded libyaml if no system libyaml is found. [ruby-core:49463] * ext/psych/lib/psych.rb: updating to psych 2.0.0 * ext/psych/lib/psych/deprecated.rb: updated docs * ext/psych/psych.gemspec: updated to psych 2.0.0 * ext/psych/psych.h: fixing header file include for rename * ext/psych/psych_emitter.c: renamed to avoid libyaml conflict. * ext/psych/psych_emitter.h: ditto * ext/psych/psych_parser.c: ditto * ext/psych/psych_parser.h: ditto * ext/psych/psych_to_ruby.c: ditto * ext/psych/psych_to_ruby.h: ditto * ext/psych/psych_yaml_tree.c: ditto * ext/psych/psych_yaml_tree.h: ditto * ext/psych/yaml/LICENSE: embedding libyaml 0.1.4 * ext/psych/yaml/api.c: ditto * ext/psych/yaml/config.h: ditto * ext/psych/yaml/dumper.c: ditto * ext/psych/yaml/emitter.c: ditto * ext/psych/yaml/loader.c: ditto * ext/psych/yaml/parser.c: ditto * ext/psych/yaml/reader.c: ditto * ext/psych/yaml/scanner.c: ditto * ext/psych/yaml/writer.c: ditto * ext/psych/yaml/yaml.h: ditto * ext/psych/yaml/yaml_private.h: ditto git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37919 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/psych/psych_parser.c | 579 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 579 insertions(+) create mode 100644 ext/psych/psych_parser.c (limited to 'ext/psych/psych_parser.c') diff --git a/ext/psych/psych_parser.c b/ext/psych/psych_parser.c new file mode 100644 index 0000000000..8c65ce1307 --- /dev/null +++ b/ext/psych/psych_parser.c @@ -0,0 +1,579 @@ +#include + +VALUE cPsychParser; +VALUE ePsychSyntaxError; + +static ID id_read; +static ID id_path; +static ID id_empty; +static ID id_start_stream; +static ID id_end_stream; +static ID id_start_document; +static ID id_end_document; +static ID id_alias; +static ID id_scalar; +static ID id_start_sequence; +static ID id_end_sequence; +static ID id_start_mapping; +static ID id_end_mapping; + +#define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \ + do { \ + rb_enc_associate_index((_str), (_yaml_enc)); \ + if(_internal_enc) \ + (_str) = rb_str_export_to_enc((_str), (_internal_enc)); \ + } while (0) + +static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read) +{ + VALUE io = (VALUE)data; + VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size)); + + *read = 0; + + if(! NIL_P(string)) { + void * str = (void *)StringValuePtr(string); + *read = (size_t)RSTRING_LEN(string); + memcpy(buf, str, *read); + } + + return 1; +} + +static void dealloc(void * ptr) +{ + yaml_parser_t * parser; + + parser = (yaml_parser_t *)ptr; + yaml_parser_delete(parser); + xfree(parser); +} + +static VALUE allocate(VALUE klass) +{ + yaml_parser_t * parser; + + parser = xmalloc(sizeof(yaml_parser_t)); + yaml_parser_initialize(parser); + + return Data_Wrap_Struct(klass, 0, dealloc, parser); +} + +static VALUE make_exception(yaml_parser_t * parser, VALUE path) +{ + size_t line, column; + + line = parser->context_mark.line + 1; + column = parser->context_mark.column + 1; + + return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, + path, + INT2NUM(line), + INT2NUM(column), + INT2NUM(parser->problem_offset), + parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil, + parser->context ? rb_usascii_str_new2(parser->context) : Qnil); +} + +#ifdef HAVE_RUBY_ENCODING_H +static VALUE transcode_string(VALUE src, int * parser_encoding) +{ + int utf8 = rb_utf8_encindex(); + int utf16le = rb_enc_find_index("UTF-16LE"); + int utf16be = rb_enc_find_index("UTF-16BE"); + int source_encoding = rb_enc_get_index(src); + + if (source_encoding == utf8) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (source_encoding == utf16le) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (source_encoding == utf16be) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + src = rb_str_export_to_enc(src, rb_utf8_encoding()); + RB_GC_GUARD(src); + + *parser_encoding = YAML_UTF8_ENCODING; + return src; +} + +static VALUE transcode_io(VALUE src, int * parser_encoding) +{ + VALUE io_external_encoding; + int io_external_enc_index; + + io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); + + /* if no encoding is returned, assume ascii8bit. */ + if (NIL_P(io_external_encoding)) { + io_external_enc_index = rb_ascii8bit_encindex(); + } else { + io_external_enc_index = rb_to_encoding_index(io_external_encoding); + } + + /* Treat US-ASCII as utf_8 */ + if (io_external_enc_index == rb_usascii_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_utf8_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + /* Just guess on ASCII-8BIT */ + if (io_external_enc_index == rb_ascii8bit_encindex()) { + *parser_encoding = YAML_ANY_ENCODING; + return src; + } + + /* If the external encoding is something we don't know how to handle, + * fall back to YAML_ANY_ENCODING. */ + *parser_encoding = YAML_ANY_ENCODING; + + return src; +} + +#endif + +static VALUE protected_start_stream(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_start_stream, 1, args[1]); +} + +static VALUE protected_start_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_document, 3, args + 1); +} + +static VALUE protected_end_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_end_document, 1, args[1]); +} + +static VALUE protected_alias(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_alias, 1, args[1]); +} + +static VALUE protected_scalar(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_scalar, 6, args + 1); +} + +static VALUE protected_start_sequence(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_sequence, 4, args + 1); +} + +static VALUE protected_end_sequence(VALUE handler) +{ + return rb_funcall(handler, id_end_sequence, 0); +} + +static VALUE protected_start_mapping(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_mapping, 4, args + 1); +} + +static VALUE protected_end_mapping(VALUE handler) +{ + return rb_funcall(handler, id_end_mapping, 0); +} + +static VALUE protected_empty(VALUE handler) +{ + return rb_funcall(handler, id_empty, 0); +} + +static VALUE protected_end_stream(VALUE handler) +{ + return rb_funcall(handler, id_end_stream, 0); +} + +/* + * call-seq: + * parser.parse(yaml) + * + * Parse the YAML document contained in +yaml+. Events will be called on + * the handler set on the parser instance. + * + * See Psych::Parser and Psych::Parser#handler + */ +static VALUE parse(int argc, VALUE *argv, VALUE self) +{ + VALUE yaml, path; + yaml_parser_t * parser; + yaml_event_t event; + int done = 0; + int tainted = 0; + int state = 0; + int parser_encoding = YAML_ANY_ENCODING; +#ifdef HAVE_RUBY_ENCODING_H + int encoding = rb_utf8_encindex(); + rb_encoding * internal_enc = rb_default_internal_encoding(); +#endif + VALUE handler = rb_iv_get(self, "@handler"); + + if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { + if(rb_respond_to(yaml, id_path)) + path = rb_funcall(yaml, id_path, 0); + else + path = rb_str_new2(""); + } + + Data_Get_Struct(self, yaml_parser_t, parser); + + yaml_parser_delete(parser); + yaml_parser_initialize(parser); + + if (OBJ_TAINTED(yaml)) tainted = 1; + + if (rb_respond_to(yaml, id_read)) { +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_io(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif + yaml_parser_set_input(parser, io_reader, (void *)yaml); + if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1; + } else { + StringValue(yaml); +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_string(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif + yaml_parser_set_input_string( + parser, + (const unsigned char *)RSTRING_PTR(yaml), + (size_t)RSTRING_LEN(yaml) + ); + } + + while(!done) { + if(!yaml_parser_parse(parser, &event)) { + VALUE exception; + + exception = make_exception(parser, path); + yaml_parser_delete(parser); + yaml_parser_initialize(parser); + + rb_exc_raise(exception); + } + + switch(event.type) { + case YAML_STREAM_START_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = INT2NUM((long)event.data.stream_start.encoding); + rb_protect(protected_start_stream, (VALUE)args, &state); + } + break; + case YAML_DOCUMENT_START_EVENT: + { + VALUE args[4]; + /* Get a list of tag directives (if any) */ + VALUE tag_directives = rb_ary_new(); + /* Grab the document version */ + VALUE version = event.data.document_start.version_directive ? + rb_ary_new3( + (long)2, + INT2NUM((long)event.data.document_start.version_directive->major), + INT2NUM((long)event.data.document_start.version_directive->minor) + ) : rb_ary_new(); + + if(event.data.document_start.tag_directives.start) { + yaml_tag_directive_t *start = + event.data.document_start.tag_directives.start; + yaml_tag_directive_t *end = + event.data.document_start.tag_directives.end; + for(; start != end; start++) { + VALUE handle = Qnil; + VALUE prefix = Qnil; + if(start->handle) { + handle = rb_str_new2((const char *)start->handle); + if (tainted) OBJ_TAINT(handle); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(handle, encoding, internal_enc); +#endif + } + + if(start->prefix) { + prefix = rb_str_new2((const char *)start->prefix); + if (tainted) OBJ_TAINT(prefix); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(prefix, encoding, internal_enc); +#endif + } + + rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); + } + } + args[0] = handler; + args[1] = version; + args[2] = tag_directives; + args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_start_document, (VALUE)args, &state); + } + break; + case YAML_DOCUMENT_END_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_end_document, (VALUE)args, &state); + } + break; + case YAML_ALIAS_EVENT: + { + VALUE args[2]; + VALUE alias = Qnil; + if(event.data.alias.anchor) { + alias = rb_str_new2((const char *)event.data.alias.anchor); + if (tainted) OBJ_TAINT(alias); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(alias, encoding, internal_enc); +#endif + } + + args[0] = handler; + args[1] = alias; + rb_protect(protected_alias, (VALUE)args, &state); + } + break; + case YAML_SCALAR_EVENT: + { + VALUE args[7]; + VALUE anchor = Qnil; + VALUE tag = Qnil; + VALUE plain_implicit, quoted_implicit, style; + VALUE val = rb_str_new( + (const char *)event.data.scalar.value, + (long)event.data.scalar.length + ); + if (tainted) OBJ_TAINT(val); + +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(val, encoding, internal_enc); +#endif + + if(event.data.scalar.anchor) { + anchor = rb_str_new2((const char *)event.data.scalar.anchor); + if (tainted) OBJ_TAINT(anchor); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(anchor, encoding, internal_enc); +#endif + } + + if(event.data.scalar.tag) { + tag = rb_str_new2((const char *)event.data.scalar.tag); + if (tainted) OBJ_TAINT(tag); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(tag, encoding, internal_enc); +#endif + } + + plain_implicit = + event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue; + + quoted_implicit = + event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue; + + style = INT2NUM((long)event.data.scalar.style); + + args[0] = handler; + args[1] = val; + args[2] = anchor; + args[3] = tag; + args[4] = plain_implicit; + args[5] = quoted_implicit; + args[6] = style; + rb_protect(protected_scalar, (VALUE)args, &state); + } + break; + case YAML_SEQUENCE_START_EVENT: + { + VALUE args[5]; + VALUE anchor = Qnil; + VALUE tag = Qnil; + VALUE implicit, style; + if(event.data.sequence_start.anchor) { + anchor = rb_str_new2((const char *)event.data.sequence_start.anchor); + if (tainted) OBJ_TAINT(anchor); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(anchor, encoding, internal_enc); +#endif + } + + tag = Qnil; + if(event.data.sequence_start.tag) { + tag = rb_str_new2((const char *)event.data.sequence_start.tag); + if (tainted) OBJ_TAINT(tag); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(tag, encoding, internal_enc); +#endif + } + + implicit = + event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue; + + style = INT2NUM((long)event.data.sequence_start.style); + + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_sequence, (VALUE)args, &state); + } + break; + case YAML_SEQUENCE_END_EVENT: + rb_protect(protected_end_sequence, handler, &state); + break; + case YAML_MAPPING_START_EVENT: + { + VALUE args[5]; + VALUE anchor = Qnil; + VALUE tag = Qnil; + VALUE implicit, style; + if(event.data.mapping_start.anchor) { + anchor = rb_str_new2((const char *)event.data.mapping_start.anchor); + if (tainted) OBJ_TAINT(anchor); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(anchor, encoding, internal_enc); +#endif + } + + if(event.data.mapping_start.tag) { + tag = rb_str_new2((const char *)event.data.mapping_start.tag); + if (tainted) OBJ_TAINT(tag); +#ifdef HAVE_RUBY_ENCODING_H + PSYCH_TRANSCODE(tag, encoding, internal_enc); +#endif + } + + implicit = + event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue; + + style = INT2NUM((long)event.data.mapping_start.style); + + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_mapping, (VALUE)args, &state); + } + break; + case YAML_MAPPING_END_EVENT: + rb_protect(protected_end_mapping, handler, &state); + break; + case YAML_NO_EVENT: + rb_protect(protected_empty, handler, &state); + break; + case YAML_STREAM_END_EVENT: + rb_protect(protected_end_stream, handler, &state); + done = 1; + break; + } + yaml_event_delete(&event); + if (state) rb_jump_tag(state); + } + + return self; +} + +/* + * call-seq: + * parser.mark # => # + * + * Returns a Psych::Parser::Mark object that contains line, column, and index + * information. + */ +static VALUE mark(VALUE self) +{ + VALUE mark_klass; + VALUE args[3]; + yaml_parser_t * parser; + + Data_Get_Struct(self, yaml_parser_t, parser); + mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark")); + args[0] = INT2NUM(parser->mark.index); + args[1] = INT2NUM(parser->mark.line); + args[2] = INT2NUM(parser->mark.column); + + return rb_class_new_instance(3, args, mark_klass); +} + +void Init_psych_parser() +{ +#if 0 + mPsych = rb_define_module("Psych"); +#endif + + cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject); + rb_define_alloc_func(cPsychParser, allocate); + + /* Any encoding: Let the parser choose the encoding */ + rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING)); + + /* UTF-8 Encoding */ + rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING)); + + /* UTF-16-LE Encoding with BOM */ + rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING)); + + /* UTF-16-BE Encoding with BOM */ + rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING)); + + rb_require("psych/syntax_error"); + ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError")); + + rb_define_method(cPsychParser, "parse", parse, -1); + rb_define_method(cPsychParser, "mark", mark, 0); + + id_read = rb_intern("read"); + id_path = rb_intern("path"); + id_empty = rb_intern("empty"); + id_start_stream = rb_intern("start_stream"); + id_end_stream = rb_intern("end_stream"); + id_start_document = rb_intern("start_document"); + id_end_document = rb_intern("end_document"); + id_alias = rb_intern("alias"); + id_scalar = rb_intern("scalar"); + id_start_sequence = rb_intern("start_sequence"); + id_end_sequence = rb_intern("end_sequence"); + id_start_mapping = rb_intern("start_mapping"); + id_end_mapping = rb_intern("end_mapping"); +} +/* vim: set noet sws=4 sw=4: */ -- cgit v1.2.3