summaryrefslogtreecommitdiff
path: root/ext/psych/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/psych/parser.c')
-rw-r--r--ext/psych/parser.c314
1 files changed, 314 insertions, 0 deletions
diff --git a/ext/psych/parser.c b/ext/psych/parser.c
new file mode 100644
index 0000000000..2bad49fc70
--- /dev/null
+++ b/ext/psych/parser.c
@@ -0,0 +1,314 @@
+#include <psych.h>
+
+VALUE cPsychParser;
+VALUE ePsychSyntaxError;
+
+static ID id_read;
+static ID id_empty;
+static ID id_start_stream;
+static ID id_end_stream;
+static ID id_start_document;
+static ID id_end_document;
+static ID id_alias;
+static ID id_scalar;
+static ID id_start_sequence;
+static ID id_end_sequence;
+static ID id_start_mapping;
+static ID id_end_mapping;
+
+static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
+{
+ VALUE io = (VALUE)data;
+ VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size));
+
+ *read = 0;
+
+ if(! NIL_P(string)) {
+ void * str = (void *)StringValuePtr(string);
+ *read = (size_t)RSTRING_LEN(string);
+ memcpy(buf, str, *read);
+ }
+
+ return 1;
+}
+
+/*
+ * call-seq:
+ * parser.parse(yaml)
+ *
+ * Parse the YAML document contained in +yaml+. Events will be called on
+ * the handler set on the parser instance.
+ *
+ * See Psych::Parser and Psych::Parser#handler
+ */
+static VALUE parse(VALUE self, VALUE yaml)
+{
+ yaml_parser_t parser;
+ yaml_event_t event;
+
+ yaml_parser_initialize(&parser);
+
+ if(rb_respond_to(yaml, id_read)) {
+ yaml_parser_set_input(&parser, io_reader, (void *)yaml);
+ } else {
+ yaml_parser_set_input_string(
+ &parser,
+ (const unsigned char *)StringValuePtr(yaml),
+ (size_t)RSTRING_LEN(yaml)
+ );
+ }
+
+ int done = 0;
+#ifdef HAVE_RUBY_ENCODING_H
+ int encoding = rb_enc_find_index("ASCII-8BIT");
+#endif
+
+ VALUE handler = rb_iv_get(self, "@handler");
+
+ while(!done) {
+ if(!yaml_parser_parse(&parser, &event)) {
+ size_t line = parser.mark.line;
+ size_t column = parser.mark.column;
+
+ yaml_parser_delete(&parser);
+ rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d",
+ (int)line, (int)column);
+ }
+
+ switch(event.type) {
+ case YAML_STREAM_START_EVENT:
+
+#ifdef HAVE_RUBY_ENCODING_H
+ switch(event.data.stream_start.encoding) {
+ case YAML_ANY_ENCODING:
+ break;
+ case YAML_UTF8_ENCODING:
+ encoding = rb_enc_find_index("UTF-8");
+ break;
+ case YAML_UTF16LE_ENCODING:
+ encoding = rb_enc_find_index("UTF-16LE");
+ break;
+ case YAML_UTF16BE_ENCODING:
+ encoding = rb_enc_find_index("UTF-16BE");
+ break;
+ default:
+ break;
+ }
+#endif
+
+ rb_funcall(handler, id_start_stream, 1,
+ INT2NUM((long)event.data.stream_start.encoding)
+ );
+ break;
+ case YAML_DOCUMENT_START_EVENT:
+ {
+ // Grab the document version
+ VALUE version = event.data.document_start.version_directive ?
+ rb_ary_new3(
+ (long)2,
+ INT2NUM((long)event.data.document_start.version_directive->major),
+ INT2NUM((long)event.data.document_start.version_directive->minor)
+ ) : rb_ary_new();
+
+ // Get a list of tag directives (if any)
+ VALUE tag_directives = rb_ary_new();
+ if(event.data.document_start.tag_directives.start) {
+ yaml_tag_directive_t *start =
+ event.data.document_start.tag_directives.start;
+ yaml_tag_directive_t *end =
+ event.data.document_start.tag_directives.end;
+ for(; start != end; start++) {
+ VALUE handle = Qnil;
+ if(start->handle) {
+ handle = rb_str_new2((const char *)start->handle);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(handle, encoding);
+#endif
+ }
+
+ VALUE prefix = Qnil;
+ if(start->prefix) {
+ prefix = rb_str_new2((const char *)start->prefix);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(prefix, encoding);
+#endif
+ }
+
+ VALUE pair = rb_ary_new3((long)2, handle, prefix);
+ rb_ary_push(tag_directives, pair);
+ }
+ }
+ rb_funcall(handler, id_start_document, 3,
+ version, tag_directives,
+ event.data.document_start.implicit == 1 ? Qtrue : Qfalse
+ );
+ }
+ break;
+ case YAML_DOCUMENT_END_EVENT:
+ rb_funcall(handler, id_end_document, 1,
+ event.data.document_end.implicit == 1 ? Qtrue : Qfalse
+ );
+ break;
+ case YAML_ALIAS_EVENT:
+ {
+ VALUE alias = Qnil;
+ if(event.data.alias.anchor) {
+ alias = rb_str_new2((const char *)event.data.alias.anchor);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(alias, encoding);
+#endif
+ }
+
+ rb_funcall(handler, id_alias, 1, alias);
+ }
+ break;
+ case YAML_SCALAR_EVENT:
+ {
+ VALUE val = rb_str_new(
+ (const char *)event.data.scalar.value,
+ (long)event.data.scalar.length
+ );
+
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(val, encoding);
+#endif
+
+ VALUE anchor = Qnil;
+ if(event.data.scalar.anchor) {
+ anchor = rb_str_new2((const char *)event.data.scalar.anchor);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(anchor, encoding);
+#endif
+ }
+
+ VALUE tag = Qnil;
+ if(event.data.scalar.tag) {
+ tag = rb_str_new2((const char *)event.data.scalar.tag);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(tag, encoding);
+#endif
+ }
+
+ VALUE plain_implicit =
+ event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
+
+ VALUE quoted_implicit =
+ event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
+
+ VALUE style = INT2NUM((long)event.data.scalar.style);
+
+ rb_funcall(handler, id_scalar, 6,
+ val, anchor, tag, plain_implicit, quoted_implicit, style);
+ }
+ break;
+ case YAML_SEQUENCE_START_EVENT:
+ {
+ VALUE anchor = Qnil;
+ if(event.data.sequence_start.anchor) {
+ anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(anchor, encoding);
+#endif
+ }
+
+ VALUE tag = Qnil;
+ if(event.data.sequence_start.tag) {
+ tag = rb_str_new2((const char *)event.data.sequence_start.tag);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(tag, encoding);
+#endif
+ }
+
+ VALUE implicit =
+ event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
+
+ VALUE style = INT2NUM((long)event.data.sequence_start.style);
+
+ rb_funcall(handler, id_start_sequence, 4,
+ anchor, tag, implicit, style);
+ }
+ break;
+ case YAML_SEQUENCE_END_EVENT:
+ rb_funcall(handler, id_end_sequence, 0);
+ break;
+ case YAML_MAPPING_START_EVENT:
+ {
+ VALUE anchor = Qnil;
+ if(event.data.mapping_start.anchor) {
+ anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(anchor, encoding);
+#endif
+ }
+
+ VALUE tag = Qnil;
+ if(event.data.mapping_start.tag) {
+ tag = rb_str_new2((const char *)event.data.mapping_start.tag);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_enc_associate_index(tag, encoding);
+#endif
+ }
+
+ VALUE implicit =
+ event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
+
+ VALUE style = INT2NUM((long)event.data.mapping_start.style);
+
+ rb_funcall(handler, id_start_mapping, 4,
+ anchor, tag, implicit, style);
+ }
+ break;
+ case YAML_MAPPING_END_EVENT:
+ rb_funcall(handler, id_end_mapping, 0);
+ break;
+ case YAML_NO_EVENT:
+ rb_funcall(handler, id_empty, 0);
+ break;
+ case YAML_STREAM_END_EVENT:
+ rb_funcall(handler, id_end_stream, 0);
+ done = 1;
+ break;
+ }
+ }
+
+ return self;
+}
+
+void Init_psych_parser()
+{
+#if 0
+ mPsych = rb_define_module("Psych");
+#endif
+
+ cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject);
+
+ /* Any encoding: Let the parser choose the encoding */
+ rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
+
+ /* UTF-8 Encoding */
+ rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING));
+
+ /* UTF-16-LE Encoding with BOM */
+ rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING));
+
+ /* UTF-16-BE Encoding with BOM */
+ rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
+
+ ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
+
+ rb_define_method(cPsychParser, "parse", parse, 1);
+
+ id_read = rb_intern("read");
+ id_empty = rb_intern("empty");
+ id_start_stream = rb_intern("start_stream");
+ id_end_stream = rb_intern("end_stream");
+ id_start_document = rb_intern("start_document");
+ id_end_document = rb_intern("end_document");
+ id_alias = rb_intern("alias");
+ id_scalar = rb_intern("scalar");
+ id_start_sequence = rb_intern("start_sequence");
+ id_end_sequence = rb_intern("end_sequence");
+ id_start_mapping = rb_intern("start_mapping");
+ id_end_mapping = rb_intern("end_mapping");
+}
+/* vim: set noet sws=4 sw=4: */