summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/psych/emitter.c12
-rw-r--r--ext/psych/lib/psych/nodes/stream.rb2
-rw-r--r--ext/psych/parser.c76
-rw-r--r--test/psych/test_parser.rb19
4 files changed, 75 insertions, 34 deletions
diff --git a/ext/psych/emitter.c b/ext/psych/emitter.c
index befa98e821..a06304aba6 100644
--- a/ext/psych/emitter.c
+++ b/ext/psych/emitter.c
@@ -17,15 +17,21 @@ static int writer(void *ctx, unsigned char *buffer, size_t size)
return (int)NUM2INT(wrote);
}
-static void dealloc(yaml_emitter_t * emitter)
+static void dealloc(void * ptr)
{
+ yaml_emitter_t * emitter;
+
+ emitter = (yaml_emitter_t *)ptr;
yaml_emitter_delete(emitter);
- free(emitter);
+ xfree(emitter);
}
static VALUE allocate(VALUE klass)
{
- yaml_emitter_t * emitter = malloc(sizeof(yaml_emitter_t));
+ yaml_emitter_t * emitter;
+
+ emitter = xmalloc(sizeof(yaml_emitter_t));
+
yaml_emitter_initialize(emitter);
yaml_emitter_set_unicode(emitter, 1);
yaml_emitter_set_indent(emitter, 2);
diff --git a/ext/psych/lib/psych/nodes/stream.rb b/ext/psych/lib/psych/nodes/stream.rb
index f4aab5a7dc..7cf5e033ec 100644
--- a/ext/psych/lib/psych/nodes/stream.rb
+++ b/ext/psych/lib/psych/nodes/stream.rb
@@ -21,7 +21,7 @@ module Psych
UTF16BE = Psych::Parser::UTF16BE
# The encoding used for this stream
- attr_reader :encoding
+ attr_accessor :encoding
###
# Create a new Psych::Nodes::Stream node with an +encoding+ that
diff --git a/ext/psych/parser.c b/ext/psych/parser.c
index eefb8e67bc..41260ab6f5 100644
--- a/ext/psych/parser.c
+++ b/ext/psych/parser.c
@@ -39,6 +39,25 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
return 1;
}
+static void dealloc(void * ptr)
+{
+ yaml_parser_t * parser;
+
+ parser = (yaml_parser_t *)ptr;
+ yaml_parser_delete(parser);
+ xfree(parser);
+}
+
+static VALUE allocate(VALUE klass)
+{
+ yaml_parser_t * parser;
+
+ parser = xmalloc(sizeof(yaml_parser_t));
+ yaml_parser_initialize(parser);
+
+ return Data_Wrap_Struct(klass, 0, dealloc, parser);
+}
+
/*
* call-seq:
* parser.parse(yaml)
@@ -50,35 +69,33 @@ static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
*/
static VALUE parse(VALUE self, VALUE yaml)
{
- yaml_parser_t parser;
+ yaml_parser_t * parser;
yaml_event_t event;
int done = 0;
#ifdef HAVE_RUBY_ENCODING_H
- int encoding = rb_enc_find_index("ASCII-8BIT");
- rb_encoding * internal_enc = 0;
+ int encoding = rb_utf8_encindex();
+ rb_encoding * internal_enc = rb_default_internal_encoding();
#endif
VALUE handler = rb_iv_get(self, "@handler");
-
- yaml_parser_initialize(&parser);
+ Data_Get_Struct(self, yaml_parser_t, parser);
if(rb_respond_to(yaml, id_read)) {
- yaml_parser_set_input(&parser, io_reader, (void *)yaml);
+ yaml_parser_set_input(parser, io_reader, (void *)yaml);
} else {
StringValue(yaml);
yaml_parser_set_input_string(
- &parser,
+ parser,
(const unsigned char *)RSTRING_PTR(yaml),
(size_t)RSTRING_LEN(yaml)
);
}
while(!done) {
- if(!yaml_parser_parse(&parser, &event)) {
- size_t line = parser.mark.line;
- size_t column = parser.mark.column;
+ if(!yaml_parser_parse(parser, &event)) {
+ size_t line = parser->mark.line;
+ size_t column = parser->mark.column;
- yaml_parser_delete(&parser);
rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d",
(int)line, (int)column);
}
@@ -86,25 +103,6 @@ static VALUE parse(VALUE self, VALUE yaml)
switch(event.type) {
case YAML_STREAM_START_EVENT:
-#ifdef HAVE_RUBY_ENCODING_H
- switch(event.data.stream_start.encoding) {
- case YAML_ANY_ENCODING:
- break;
- case YAML_UTF8_ENCODING:
- encoding = rb_enc_find_index("UTF-8");
- break;
- case YAML_UTF16LE_ENCODING:
- encoding = rb_enc_find_index("UTF-16LE");
- break;
- case YAML_UTF16BE_ENCODING:
- encoding = rb_enc_find_index("UTF-16BE");
- break;
- default:
- break;
- }
- internal_enc = rb_default_internal_encoding();
-#endif
-
rb_funcall(handler, id_start_stream, 1,
INT2NUM((long)event.data.stream_start.encoding)
);
@@ -286,6 +284,22 @@ static VALUE parse(VALUE self, VALUE yaml)
return self;
}
+/*
+ * call-seq:
+ * parser.external_encoding=(encoding)
+ *
+ * Set the encoding for this parser to +encoding+
+ */
+static VALUE set_external_encoding(VALUE self, VALUE encoding)
+{
+ yaml_parser_t * parser;
+
+ Data_Get_Struct(self, yaml_parser_t, parser);
+ yaml_parser_set_encoding(parser, NUM2INT(encoding));
+
+ return encoding;
+}
+
void Init_psych_parser()
{
#if 0
@@ -293,6 +307,7 @@ void Init_psych_parser()
#endif
cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject);
+ rb_define_alloc_func(cPsychParser, allocate);
/* Any encoding: Let the parser choose the encoding */
rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
@@ -309,6 +324,7 @@ void Init_psych_parser()
ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
rb_define_method(cPsychParser, "parse", parse, 1);
+ rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1);
id_read = rb_intern("read");
id_empty = rb_intern("empty");
diff --git a/test/psych/test_parser.rb b/test/psych/test_parser.rb
index 9ef2a41e1d..8c4483309b 100644
--- a/test/psych/test_parser.rb
+++ b/test/psych/test_parser.rb
@@ -1,3 +1,5 @@
+# coding: utf-8
+
require_relative 'helper'
module Psych
@@ -24,6 +26,23 @@ module Psych
@parser = Psych::Parser.new EventCatcher.new
end
+ def test_bom
+ tadpole = 'おたまじゃくし'
+
+ # BOM + text
+ yml = "\uFEFF#{tadpole}".encode('UTF-16LE')
+ @parser.parse yml
+ assert_equal tadpole, @parser.handler.calls[2][1].first
+ end
+
+ def test_external_encoding
+ tadpole = 'おたまじゃくし'
+
+ @parser.external_encoding = Psych::Parser::UTF16LE
+ @parser.parse tadpole.encode 'UTF-16LE'
+ assert_equal tadpole, @parser.handler.calls[2][1].first
+ end
+
def test_bogus_io
o = Object.new
def o.read len; self end