summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-05-10 16:22:52 +0000
committertenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-05-10 16:22:52 +0000
commit0de068d03e373cbc95da824c4b5a07910ea20e3f (patch)
treee48e5829ca8b32b8dcd860cb70c87a393fbd3e2b
parent9d3b304b0bc0f2e36b61579971dafd83798ab56b (diff)
* ext/psych/parser.c (parse): Return strings encoded as
Encoding.default_internal if set. * test/psych/test_encoding.rb: Tests for encoding change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27723 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--ext/psych/parser.c30
-rw-r--r--test/psych/test_encoding.rb16
3 files changed, 41 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 6a907ce45f..540d4c90ce 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Tue May 11 01:20:43 2010 Aaron Patterson <aaron@tenderlovemaking.com>
+
+ * ext/psych/parser.c (parse): Return strings encoded as
+ Encoding.default_internal if set.
+
Mon May 10 23:50:19 2010 Tanaka Akira <akr@fsij.org>
* pack.c (pack_unpack): add a missing break.
diff --git a/ext/psych/parser.c b/ext/psych/parser.c
index f814091502..edfaf5e0d1 100644
--- a/ext/psych/parser.c
+++ b/ext/psych/parser.c
@@ -16,6 +16,14 @@ static ID id_end_sequence;
static ID id_start_mapping;
static ID id_end_mapping;
+#define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \
+ ({ \
+ rb_enc_associate_index(_str, _yaml_enc); \
+ if(_internal_enc) \
+ _str = rb_str_export_to_enc(_str, _internal_enc); \
+ _str; \
+ })
+
static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
{
VALUE io = (VALUE)data;
@@ -48,6 +56,7 @@ static VALUE parse(VALUE self, VALUE yaml)
int done = 0;
#ifdef HAVE_RUBY_ENCODING_H
int encoding = rb_enc_find_index("ASCII-8BIT");
+ rb_encoding * internal_enc;
#endif
VALUE handler = rb_iv_get(self, "@handler");
@@ -94,6 +103,7 @@ static VALUE parse(VALUE self, VALUE yaml)
default:
break;
}
+ internal_enc = rb_default_internal_encoding();
#endif
rb_funcall(handler, id_start_stream, 1,
@@ -123,14 +133,14 @@ static VALUE parse(VALUE self, VALUE yaml)
if(start->handle) {
handle = rb_str_new2((const char *)start->handle);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(handle, encoding);
+ handle = PSYCH_TRANSCODE(handle, encoding, internal_enc);
#endif
}
if(start->prefix) {
prefix = rb_str_new2((const char *)start->prefix);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(prefix, encoding);
+ prefix = PSYCH_TRANSCODE(prefix, encoding, internal_enc);
#endif
}
@@ -154,7 +164,7 @@ static VALUE parse(VALUE self, VALUE yaml)
if(event.data.alias.anchor) {
alias = rb_str_new2((const char *)event.data.alias.anchor);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(alias, encoding);
+ alias = PSYCH_TRANSCODE(alias, encoding, internal_enc);
#endif
}
@@ -172,20 +182,20 @@ static VALUE parse(VALUE self, VALUE yaml)
);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(val, encoding);
+ val = PSYCH_TRANSCODE(val, encoding, internal_enc);
#endif
if(event.data.scalar.anchor) {
anchor = rb_str_new2((const char *)event.data.scalar.anchor);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(anchor, encoding);
+ anchor = PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
}
if(event.data.scalar.tag) {
tag = rb_str_new2((const char *)event.data.scalar.tag);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(tag, encoding);
+ tag = PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
}
@@ -209,7 +219,7 @@ static VALUE parse(VALUE self, VALUE yaml)
if(event.data.sequence_start.anchor) {
anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(anchor, encoding);
+ anchor = PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
}
@@ -217,7 +227,7 @@ static VALUE parse(VALUE self, VALUE yaml)
if(event.data.sequence_start.tag) {
tag = rb_str_new2((const char *)event.data.sequence_start.tag);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(tag, encoding);
+ tag = PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
}
@@ -241,14 +251,14 @@ static VALUE parse(VALUE self, VALUE yaml)
if(event.data.mapping_start.anchor) {
anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(anchor, encoding);
+ anchor = PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
}
if(event.data.mapping_start.tag) {
tag = rb_str_new2((const char *)event.data.mapping_start.tag);
#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_associate_index(tag, encoding);
+ tag = PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
}
diff --git a/test/psych/test_encoding.rb b/test/psych/test_encoding.rb
index 9d290f1c0a..bf8318a774 100644
--- a/test/psych/test_encoding.rb
+++ b/test/psych/test_encoding.rb
@@ -29,6 +29,22 @@ module Psych
@utf8 = Encoding.find('UTF-8')
end
+ def test_default_internal
+ before = Encoding.default_internal
+
+ Encoding.default_internal = 'EUC-JP'
+
+ str = "壁に耳あり、障子に目あり"
+ yaml = "--- #{str}"
+ assert_equal @utf8, str.encoding
+
+ @parser.parse str
+ assert_encodings Encoding.find('EUC-JP'), @handler.strings
+ assert_equal str, @handler.strings.first.encode('UTF-8')
+ ensure
+ Encoding.default_internal = before
+ end
+
def test_scalar
@parser.parse("--- a")
assert_encodings @utf8, @handler.strings