summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--enc/trans/escape.trans59
-rw-r--r--include/ruby/encoding.h5
-rw-r--r--io.c62
-rw-r--r--test/ruby/test_econv.rb36
-rw-r--r--test/ruby/test_io_m17n.rb13
-rw-r--r--transcode.c28
7 files changed, 150 insertions, 70 deletions
diff --git a/ChangeLog b/ChangeLog
index 7ec025f570..24396be89f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+Sun Sep 7 12:09:29 2008 Tanaka Akira <akr@fsij.org>
+
+ * include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined.
+ (ECONV_STATEFUL_ENCODER_MASK): defined.
+ (ECONV_XML_ATTR_QUOTE_ENCODER): defined.
+ (ECONV_XML_ATTR_ENCODER): removed.
+
+ * enc/trans/escape.trans (rb_escape_xml_attr_content): defined.
+ (rb_escape_xml_attr_quote): defined.
+ (rb_escape_xml_attr): removed.
+
+ * io.c (NEED_WRITECONV): writeconv is required if supplemental
+ converter is used.
+ (make_writeconv): apply stateful encoder in writeconv.
+
+ * transcode.c: follow the constant change.
+
Sun Sep 7 07:24:09 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* misc/*.el: merged the following patches from Nathan Weizenbaum
diff --git a/enc/trans/escape.trans b/enc/trans/escape.trans
index 0641c6e251..a64114f533 100644
--- a/enc/trans/escape.trans
+++ b/enc/trans/escape.trans
@@ -52,9 +52,18 @@ fun_so_escape_xml_chref(void *statep, const unsigned char *s, size_t l, unsigned
map_xml_text["3E"] = :func_so
transcode_generate_node(ActionMap.parse(map_xml_text), "escape_xml_text")
- map_xml_attr = {}
- map_xml_attr["{00-FF}"] = :func_so
- transcode_generate_node(ActionMap.parse(map_xml_attr), "escape_xml_attr")
+ map_xml_attr_content = {}
+ map_xml_attr_content["{00-21,23-25,27-3B,3D,3F-FF}"] = :nomap
+ map_xml_attr_content["22"] = :func_so
+ map_xml_attr_content["26"] = :func_so
+ map_xml_attr_content["3C"] = :func_so
+ map_xml_attr_content["3E"] = :func_so
+ transcode_generate_node(ActionMap.parse(map_xml_attr_content), "escape_xml_attr_content")
+
+ map_xml_attr_quote = {}
+ map_xml_attr_quote["{00-FF}"] = :func_so
+ transcode_generate_node(ActionMap.parse(map_xml_attr_quote), "escape_xml_attr_quote")
+
%>
<%= transcode_generated_code %>
@@ -83,11 +92,23 @@ rb_escape_xml_text = {
NULL, NULL, NULL, &fun_so_escape_xml_chref
};
+static const rb_transcoder
+rb_escape_xml_attr_content = {
+ "", "xml-attr-content-escaped", escape_xml_attr_content,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 1, /* max_input */
+ 6, /* max_output */
+ stateless_converter, /* stateful_type */
+ 0, NULL, NULL,
+ NULL, NULL, NULL, &fun_so_escape_xml_chref
+};
+
#define END 0
#define NORMAL 1
static int
-escape_xml_attr_init(void *statep)
+escape_xml_attr_quote_init(void *statep)
{
unsigned char *sp = statep;
*sp = END;
@@ -95,7 +116,7 @@ escape_xml_attr_init(void *statep)
}
static int
-fun_so_escape_xml_attr(void *statep, const unsigned char *s, size_t l, unsigned char *o)
+fun_so_escape_xml_attr_quote(void *statep, const unsigned char *s, size_t l, unsigned char *o)
{
unsigned char *sp = statep;
int n = 0;
@@ -103,23 +124,12 @@ fun_so_escape_xml_attr(void *statep, const unsigned char *s, size_t l, unsigned
*sp = NORMAL;
o[n++] = '"';
}
- switch (s[0]) {
- case '&':
- case '<':
- case '>':
- case '"':
- n += fun_so_escape_xml_chref(statep, s, l, o+n);
- break;
-
- default:
- o[n++] = s[0];
- break;
- }
+ o[n++] = s[0];
return n;
}
static int
-escape_xml_attr_finish(void *statep, unsigned char *o)
+escape_xml_attr_quote_finish(void *statep, unsigned char *o)
{
unsigned char *sp = statep;
int n = 0;
@@ -135,16 +145,16 @@ escape_xml_attr_finish(void *statep, unsigned char *o)
}
static const rb_transcoder
-rb_escape_xml_attr = {
- "", "xml-attr-escaped", escape_xml_attr,
+rb_escape_xml_attr_quote = {
+ "", "xml-attr-quoted", escape_xml_attr_quote,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
1, /* max_input */
7, /* max_output */
stateful_encoder, /* stateful_type */
- 1, escape_xml_attr_init, escape_xml_attr_init,
- NULL, NULL, NULL, fun_so_escape_xml_attr,
- escape_xml_attr_finish
+ 1, escape_xml_attr_quote_init, escape_xml_attr_quote_init,
+ NULL, NULL, NULL, fun_so_escape_xml_attr_quote,
+ escape_xml_attr_quote_finish
};
void
@@ -152,6 +162,7 @@ Init_escape(void)
{
rb_register_transcoder(&rb_escape_amp_as_chref);
rb_register_transcoder(&rb_escape_xml_text);
- rb_register_transcoder(&rb_escape_xml_attr);
+ rb_register_transcoder(&rb_escape_xml_attr_content);
+ rb_register_transcoder(&rb_escape_xml_attr_quote);
}
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index d78ef11dc1..eac7326a6d 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -269,7 +269,10 @@ void rb_econv_binmode(rb_econv_t *ec);
#define ECONV_CRLF_NEWLINE_ENCODER 0x00001000
#define ECONV_CR_NEWLINE_ENCODER 0x00002000
#define ECONV_XML_TEXT_ENCODER 0x00004000
-#define ECONV_XML_ATTR_ENCODER 0x00008000
+#define ECONV_XML_ATTR_CONTENT_ENCODER 0x00008000
+
+#define ECONV_STATEFUL_ENCODER_MASK 0x00f00000
+#define ECONV_XML_ATTR_QUOTE_ENCODER 0x00100000
/* end of flags for rb_econv_open */
diff --git a/io.c b/io.c
index ac7c1f16b5..5c43a062de 100644
--- a/io.c
+++ b/io.c
@@ -682,7 +682,7 @@ rb_io_wait_writable(int f)
# define NEED_NEWLINE_ENCODER(fptr) 0
#endif
#define NEED_READCONV(fptr) (fptr->encs.enc2 != NULL || NEED_NEWLINE_DECODER(fptr))
-#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr))
+#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr) || (fptr->encs.ecflags & (ECONV_DECODER_MASK|ECONV_ENCODER_MASK|ECONV_STATEFUL_ENCODER_MASK)))
static void
make_writeconv(rb_io_t *fptr)
@@ -695,42 +695,50 @@ make_writeconv(rb_io_t *fptr)
fptr->writeconv_initialized = 1;
- /* ECONV_INVALID_XXX and ECONV_UNDEF_XXX should be set both.
- * But ECONV_CRLF_NEWLINE_ENCODER should be set only for the first. */
- fptr->writeconv_pre_ecflags = fptr->encs.ecflags;
- fptr->writeconv_pre_ecopts = fptr->encs.ecopts;
ecflags = fptr->encs.ecflags;
ecopts = fptr->encs.ecopts;
-
#ifdef TEXTMODE_NEWLINE_ENCODER
+ if (NEED_NEWLINE_ENCODER(fptr))
+ ecflags |= TEXTMODE_NEWLINE_ENCODER;
+#endif
+
if (!fptr->encs.enc) {
- if (NEED_NEWLINE_ENCODER(fptr))
- ecflags |= TEXTMODE_NEWLINE_ENCODER;
+ /* no encoding conversion */
+ fptr->writeconv_pre_ecflags = 0;
+ fptr->writeconv_pre_ecopts = Qnil;
fptr->writeconv = rb_econv_open_opts("", "", ecflags, ecopts);
if (!fptr->writeconv)
rb_exc_raise(rb_econv_open_exc("", "", ecflags));
fptr->writeconv_stateless = Qnil;
- return;
- }
-
- if (NEED_NEWLINE_ENCODER(fptr))
- fptr->writeconv_pre_ecflags |= TEXTMODE_NEWLINE_ENCODER;
-#endif
- ecflags &= ECONV_ERROR_HANDLER_MASK;
-
- enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
- senc = rb_econv_stateless_encoding(enc->name);
- if (senc) {
- denc = enc->name;
- fptr->writeconv_stateless = rb_str_new2(senc);
- fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts);
- if (!fptr->writeconv)
- rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags));
}
else {
- denc = NULL;
- fptr->writeconv_stateless = Qnil;
- fptr->writeconv = NULL;
+ enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
+ senc = rb_econv_stateless_encoding(enc->name);
+ if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) {
+ /* single conversion */
+ fptr->writeconv_pre_ecflags = ecflags;
+ fptr->writeconv_pre_ecopts = ecopts;
+ fptr->writeconv = NULL;
+ fptr->writeconv_stateless = Qnil;
+ }
+ else {
+ /* double conversion */
+ fptr->writeconv_pre_ecflags = ecflags & ~ECONV_STATEFUL_ENCODER_MASK;
+ fptr->writeconv_pre_ecopts = ecopts;
+ if (senc) {
+ denc = enc->name;
+ fptr->writeconv_stateless = rb_str_new2(senc);
+ }
+ else {
+ senc = denc = "";
+ fptr->writeconv_stateless = rb_str_new2(enc->name);
+ }
+ ecflags = fptr->encs.ecflags & (ECONV_ERROR_HANDLER_MASK|ECONV_STATEFUL_ENCODER_MASK);
+ ecopts = fptr->encs.ecopts;
+ fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts);
+ if (!fptr->writeconv)
+ rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags));
+ }
}
}
}
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index 5c9fc143b1..ef87fff4cc 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -738,20 +738,37 @@ class TestEncodingConverter < Test::Unit::TestCase
assert_equal('', ec.finish)
end
- def test_xml_escape_attr
- ec = Encoding::Converter.new("", "xml-attr-escaped")
+ def test_xml_escape_attr_content
+ ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+ assert_equal('', ec.finish)
+
+ ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+ assert_equal('', ec.convert(""))
+ assert_equal('', ec.finish)
+
+ ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+ assert_equal('&quot;', ec.convert('"'))
+ assert_equal('', ec.finish)
+
+ ec = Encoding::Converter.new("", "xml-attr-content-escaped")
+ assert_equal('&amp;&lt;&gt;&quot;', ec.convert("&<>\""))
+ assert_equal('', ec.finish)
+ end
+
+ def test_xml_escape_attr_quote
+ ec = Encoding::Converter.new("", "xml-attr-quoted")
assert_equal('""', ec.finish)
- ec = Encoding::Converter.new("", "xml-attr-escaped")
+ ec = Encoding::Converter.new("", "xml-attr-quoted")
assert_equal('', ec.convert(""))
assert_equal('""', ec.finish)
- ec = Encoding::Converter.new("", "xml-attr-escaped")
- assert_equal('"&quot;', ec.convert('"'))
+ ec = Encoding::Converter.new("", "xml-attr-quoted")
+ assert_equal('""', ec.convert('"'))
assert_equal('"', ec.finish)
- ec = Encoding::Converter.new("", "xml-attr-escaped")
- assert_equal('"&amp;&lt;&gt;&quot;', ec.convert("&<>\""))
+ ec = Encoding::Converter.new("", "xml-attr-quoted")
+ assert_equal('"&<>"', ec.convert("&<>\""))
assert_equal('"', ec.finish)
end
@@ -760,7 +777,10 @@ class TestEncodingConverter < Test::Unit::TestCase
assert_equal('&lt;&#x2665;&gt;&amp;"&#x2661;"', ec.convert("<\u2665>&\"\u2661\""))
assert_equal('', ec.finish)
- ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::XML_ATTR_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF)
+ ec = Encoding::Converter.new("utf-8", "euc-jp",
+ Encoding::Converter::XML_ATTR_CONTENT_ENCODER|
+ Encoding::Converter::XML_ATTR_QUOTE_ENCODER|
+ Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal('"&lt;&#x2665;&gt;&amp;&quot;&#x2661;&quot;', ec.convert("<\u2665>&\"\u2661\""))
assert_equal('"', ec.finish)
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 9d999be59e..57943df25f 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -1461,6 +1461,18 @@ EOT
def test_w_xml_attr
with_tmpdir {
+ open("raw.txt", "wb", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
+ content = File.read("raw.txt", :mode=>"rb:ascii-8bit")
+ assert_equal("\"&amp;&lt;&gt;&quot;'\u4E02\u3042\n\"".force_encoding("ascii-8bit"), content)
+
+ open("ascii.txt", "wb:us-ascii", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
+ content = File.read("ascii.txt", :mode=>"rb:ascii-8bit")
+ assert_equal("\"&amp;&lt;&gt;&quot;'&#x4E02;&#x3042;\n\"".force_encoding("ascii-8bit"), content)
+
+ open("iso-2022-jp.txt", "wb:iso-2022-jp", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
+ content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit")
+ assert_equal("\"&amp;&lt;&gt;&quot;'&#x4E02;\e$B$\"\e(B\n\"".force_encoding("ascii-8bit"), content)
+
open("eucjp.txt", "w:euc-jp:utf-8", xml: :attr) {|f|
f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
}
@@ -1480,6 +1492,5 @@ EOT
assert_equal("\"&#x4E02;\"".force_encoding("ascii-8bit"), content)
}
end
-
end
diff --git a/transcode.c b/transcode.c
index acfe688ca4..1fdd27d7de 100644
--- a/transcode.c
+++ b/transcode.c
@@ -896,7 +896,7 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
return NULL;
if ((ecflags & ECONV_XML_TEXT_ENCODER) &&
- (ecflags & ECONV_XML_ATTR_ENCODER))
+ (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER))
return NULL;
num_encoders = 0;
@@ -909,8 +909,11 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
if (ecflags & ECONV_XML_TEXT_ENCODER)
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-text-escaped")))
return NULL;
- if (ecflags & ECONV_XML_ATTR_ENCODER)
- if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-escaped")))
+ if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER)
+ if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-content-escaped")))
+ return NULL;
+ if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER)
+ if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-quoted")))
return NULL;
num_decoders = 0;
@@ -1792,7 +1795,8 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
ECONV_CRLF_NEWLINE_ENCODER|
ECONV_CR_NEWLINE_ENCODER|
ECONV_XML_TEXT_ENCODER|
- ECONV_XML_ATTR_ENCODER)) {
+ ECONV_XML_ATTR_CONTENT_ENCODER|
+ ECONV_XML_ATTR_QUOTE_ENCODER)) {
const char *pre = "";
if (has_description)
rb_str_cat2(mesg, " with ");
@@ -1812,9 +1816,13 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "XML-text");
}
- if (ecflags & ECONV_XML_ATTR_ENCODER) {
+ if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER) {
+ rb_str_cat2(mesg, pre); pre = ",";
+ rb_str_cat2(mesg, "XML-attr-content");
+ }
+ if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER) {
rb_str_cat2(mesg, pre); pre = ",";
- rb_str_cat2(mesg, "XML-attr");
+ rb_str_cat2(mesg, "XML-attr-quote");
}
has_description = 1;
}
@@ -2173,7 +2181,7 @@ econv_opts(VALUE opt)
ecflags |= ECONV_XML_TEXT_ENCODER|ECONV_UNDEF_HEX_CHARREF;
}
else if (v==sym_attr) {
- ecflags |= ECONV_XML_ATTR_ENCODER|ECONV_UNDEF_HEX_CHARREF;
+ ecflags |= ECONV_XML_ATTR_CONTENT_ENCODER|ECONV_XML_ATTR_QUOTE_ENCODER|ECONV_UNDEF_HEX_CHARREF;
}
else {
rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v)));
@@ -2329,7 +2337,8 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
ECONV_CRLF_NEWLINE_ENCODER|
ECONV_CR_NEWLINE_ENCODER|
ECONV_XML_TEXT_ENCODER|
- ECONV_XML_ATTR_ENCODER)) == 0) {
+ ECONV_XML_ATTR_CONTENT_ENCODER|
+ ECONV_XML_ATTR_QUOTE_ENCODER)) == 0) {
if (senc && senc == denc) {
return -1;
}
@@ -3573,7 +3582,8 @@ Init_transcode(void)
rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER));
rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER));
rb_define_const(rb_cEncodingConverter, "XML_TEXT_ENCODER", INT2FIX(ECONV_XML_TEXT_ENCODER));
- rb_define_const(rb_cEncodingConverter, "XML_ATTR_ENCODER", INT2FIX(ECONV_XML_ATTR_ENCODER));
+ rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_ENCODER", INT2FIX(ECONV_XML_ATTR_CONTENT_ENCODER));
+ rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_ENCODER", INT2FIX(ECONV_XML_ATTR_QUOTE_ENCODER));
rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0);
rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0);