summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-05 21:29:12 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-09-05 21:29:12 +0000
commita10a5ddaacb05368cee3beec82af47043aec6667 (patch)
tree6b90181b6b7e94ffa5b641134af629a41080d8e9
parentfb9c53dea50fbbf7d789f0cac004a00c6d125022 (diff)
* enc/trans/escape.trans: new file.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19165 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog4
-rw-r--r--enc/trans/escape.trans109
-rw-r--r--test/ruby/test_econv.rb14
3 files changed, 127 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 27c1f9878b..32ad9b45e0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Sat Sep 6 06:28:46 2008 Tanaka Akira <akr@fsij.org>
+
+ * enc/trans/escape.trans: new file.
+
Sat Sep 6 06:23:27 2008 Tanaka Akira <akr@fsij.org>
* tool/transcode-tblgen.rb (StrSet.parse): accept upper case
diff --git a/enc/trans/escape.trans b/enc/trans/escape.trans
new file mode 100644
index 0000000000..d1612187aa
--- /dev/null
+++ b/enc/trans/escape.trans
@@ -0,0 +1,109 @@
+#include "transcode_data.h"
+
+static int
+fun_so_escape_html_chref(void *statep, const unsigned char *s, size_t l, unsigned char *o)
+{
+ switch (*s) {
+ case '&':
+ o[0] = '&';
+ o[1] = 'a';
+ o[2] = 'm';
+ o[3] = 'p';
+ o[4] = ';';
+ return 5;
+
+ case '<':
+ o[0] = '&';
+ o[1] = 'l';
+ o[2] = 't';
+ o[3] = ';';
+ return 4;
+
+ case '>':
+ o[0] = '&';
+ o[1] = 'g';
+ o[2] = 't';
+ o[3] = ';';
+ return 4;
+
+ case '"':
+ o[0] = '&';
+ o[1] = 'q';
+ o[2] = 'u';
+ o[3] = 'o';
+ o[4] = 't';
+ o[5] = ';';
+ return 6;
+
+ default:
+ rb_bug("unexpected char");
+ }
+}
+<%
+ map_amp = {}
+ map_amp["{00-25,27-FF}"] = :nomap
+ map_amp["26"] = :func_so
+ transcode_generate_node(ActionMap.parse(map_amp), "escape_amp_as_chref")
+
+ map_html_text = {}
+ map_html_text["{00-25,27-3B,3D,3F-FF}"] = :nomap
+ map_html_text["26"] = :func_so
+ map_html_text["3C"] = :func_so
+ map_html_text["3E"] = :func_so
+ transcode_generate_node(ActionMap.parse(map_html_text), "escape_html_text")
+
+ map_html_attr = {}
+ map_html_attr["{00-21,23-25,27-3B,3D,3F-FF}"] = :nomap
+ map_html_attr["22"] = :func_so
+ map_html_attr["26"] = :func_so
+ map_html_attr["3C"] = :func_so
+ map_html_attr["3E"] = :func_so
+ transcode_generate_node(ActionMap.parse(map_html_attr), "escape_html_attr")
+%>
+
+<%= transcode_generated_code %>
+
+static const rb_transcoder
+rb_escape_amp_as_chref = {
+ "", "amp-escaped", escape_amp_as_chref,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 1, /* max_input */
+ 5, /* max_output */
+ stateless_converter, /* stateful_type */
+ 0, NULL, NULL,
+ NULL, NULL, NULL, &fun_so_escape_html_chref
+};
+
+static const rb_transcoder
+rb_escape_html_text = {
+ "", "html-text-escaped", escape_html_text,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 1, /* max_input */
+ 5, /* max_output */
+ stateless_converter, /* stateful_type */
+ 0, NULL, NULL,
+ NULL, NULL, NULL, &fun_so_escape_html_chref
+};
+
+static const rb_transcoder
+rb_escape_html_attr = {
+ "", "html-attr-escaped", escape_html_attr,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 1, /* max_input */
+ 6, /* max_output */
+ stateless_converter, /* stateful_type */
+ 0, NULL, NULL,
+ NULL, NULL, NULL, &fun_so_escape_html_chref
+};
+
+void
+Init_escape(void)
+{
+ rb_register_transcoder(&rb_escape_amp_as_chref);
+ rb_register_transcoder(&rb_escape_html_text);
+ rb_register_transcoder(&rb_escape_html_attr);
+}
+
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index 0a774c4a7a..be4917ddf7 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -692,5 +692,19 @@ class TestEncodingConverter < Test::Unit::TestCase
ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1", Encoding::Converter::UNDEF_HEX_CHARREF)
assert_equal("&#x4EA4;&#x63DB;&#x6CD5;&#x5247;: n\xD7m=m\xD7n".force_encoding("ISO-8859-1"),
ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn"))
+
+ ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
+ assert_equal("&", ec.convert("&"))
+ end
+
+ def test_html_escape
+ ec = Encoding::Converter.new("", "amp-escaped")
+ assert_equal('&amp;<>"', ec.convert("&<>\""))
+
+ ec = Encoding::Converter.new("", "html-text-escaped")
+ assert_equal('&amp;&lt;&gt;"', ec.convert("&<>\""))
+
+ ec = Encoding::Converter.new("", "html-attr-escaped")
+ assert_equal('&amp;&lt;&gt;&quot;', ec.convert("&<>\""))
end
end