summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-03-05 08:45:51 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-03-05 08:45:51 +0000
commit08631278ada7a6fd2bafb3ab0f0447b1f6d58790 (patch)
tree05eda73587c46a0d7cbedad281da8c6a07fa2b53 /transcode.c
parent39787ea14db33dd4265d7f6271cd2d59ccaeff37 (diff)
Web Mar 5 17:43:43 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c (transcode_loop): Adjusted detection of invalid (ill-formed) UTF-8 sequences. Fixing potential security issue, see http://www.unicode.org/versions/Unicode5.1.0/#Notable_Changes. * test/ruby/test_transcode.rb: Added two tests for above fix. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15692 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/transcode.c b/transcode.c
index ed01374f5b..a4c066a5ca 100644
--- a/transcode.c
+++ b/transcode.c
@@ -177,8 +177,10 @@ transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
if (from_utf8) {
if ((next_byte&0xC0) == 0x80)
next_byte -= 0x80;
- else
+ else {
+ in_p--; /* may need to add more code later to revert other things */
goto invalid;
+ }
}
next_table = (const BYTE_LOOKUP *)next_info;
goto follow_byte;
@@ -390,13 +392,15 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
/*
* call-seq:
- * str.encode!(encoding) => str
- * str.encode!(to_encoding, from_encoding) => str
+ * str.encode!(encoding [, options] ) => str
+ * str.encode!(to_encoding, from_encoding [, options] ) => str
*
- * With one argument, transcodes the contents of <i>str</i> from
+ * The first form transcodes the contents of <i>str</i> from
* str.encoding to +encoding+.
- * With two arguments, transcodes the contents of <i>str</i> from
+ * The second form transcodes the contents of <i>str</i> from
* from_encoding to to_encoding.
+ * The options Hash gives details for conversion. See String#encode
+ * for details.
* Returns the string even if no changes were made.
*/
@@ -414,13 +418,15 @@ rb_str_transcode_bang(int argc, VALUE *argv, VALUE str)
/*
* call-seq:
- * str.encode(encoding) => str
- * str.encode(to_encoding, from_encoding) => str
+ * str.encode(encoding [, options] ) => str
+ * str.encode(to_encoding, from_encoding [, options] ) => str
*
- * With one argument, returns a copy of <i>str</i> transcoded
+ * The first form returns a copy of <i>str</i> transcoded
* to encoding +encoding+.
- * With two arguments, returns a copy of <i>str</i> transcoded
+ * The second form returns a copy of <i>str</i> transcoded
* from from_encoding to to_encoding.
+ * The options Hash gives details for conversion. Details
+ * to be added.
*/
static VALUE