summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-21 08:42:10 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-21 08:42:10 +0000
commit6d5ef97a32f74917efaa53049815873c4ae00aa1 (patch)
tree580ec0cb8820acc1f4ac67c47b1dfc735e2ae802 /transcode.c
parentca22f3e168dc16cda7b88cb257ce4298782e46b2 (diff)
Thu Feb 21 17:15:15 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c: Added basic support for passing options to String#encode via a hash. Currently only one option, with one value, is supported: invalid: :ignore (dropping invalid byte sequences instead of producing an error). Option naming is not yet stable! * test/ruby/test_transcode.rb: Added a single test for invalid: :ignore option. Not more tests because most data does not yet distinguish between INVALID and UNKNOWN. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15565 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c33
1 files changed, 29 insertions, 4 deletions
diff --git a/transcode.c b/transcode.c
index 4173df9dc1..ed01374f5b 100644
--- a/transcode.c
+++ b/transcode.c
@@ -15,6 +15,9 @@
#include "transcode_data.h"
#include <ctype.h>
+static VALUE sym_invalid, sym_ignore;
+#define INVALID_IGNORE 0x1
+
/*
* Dispatch data and logic
*/
@@ -132,7 +135,8 @@ static void
transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
unsigned char *in_stop, unsigned char *out_stop,
const rb_transcoder *my_transcoder,
- rb_transcoding *my_transcoding)
+ rb_transcoding *my_transcoding,
+ const int opt)
{
unsigned char *in_p = *in_pos, *out_p = *out_pos;
const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
@@ -211,14 +215,17 @@ transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
case INVALID:
goto invalid;
case UNDEF:
- /* todo: add code for alternative behaviors */
+ /* todo: add code for alternate behaviors */
rb_raise(rb_eRuntimeError /*@@@change exception*/, "conversion undefined for byte sequence (maybe invalid byte sequence)");
continue;
}
continue;
invalid:
/* deal with invalid byte sequence */
- /* todo: add code for alternative behaviors */
+ /* todo: add more alternative behaviors */
+ if (opt&INVALID_IGNORE) {
+ continue;
+ }
rb_raise(rb_eRuntimeError /*change exception*/, "invalid byte sequence");
continue;
}
@@ -254,7 +261,22 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
const rb_transcoder *my_transcoder;
rb_transcoding my_transcoding;
int final_encoding = 0;
+ VALUE opt;
+ int options = 0;
+
+ opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
+ if (!NIL_P(opt)) {
+ VALUE v;
+ argc--;
+ v = rb_hash_aref(opt, sym_invalid);
+ if (NIL_P(v)) {
+ rb_raise(rb_eArgError, "unknown value for invalid: setting");
+ }
+ else if (v==sym_ignore) {
+ options |= INVALID_IGNORE;
+ }
+ }
if (argc < 1 || argc > 2) {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
}
@@ -325,7 +347,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
my_transcoding.ruby_string_dest = dest;
my_transcoding.flush_func = str_transcoding_resize;
- transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding);
+ transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), my_transcoder, &my_transcoding, options);
if (fromp != sp+slen) {
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
}
@@ -426,6 +448,9 @@ Init_transcode(void)
transcoder_lib_table = st_init_strcasetable();
init_transcoder_table();
+ sym_invalid = ID2SYM(rb_intern("invalid"));
+ sym_ignore = ID2SYM(rb_intern("ignore"));
+
rb_define_method(rb_cString, "encode", rb_str_transcode, -1);
rb_define_method(rb_cString, "encode!", rb_str_transcode_bang, -1);
}