summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-04-21 10:14:46 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-04-21 10:14:46 +0000
commitc71019f8887f3f524f5ef3c0a7491ff14bef0b16 (patch)
treed5f46f244d7b7717abede6e99034263ba2c5883f
parent8377e9e435b4f1434a154a1e4360d522311f720e (diff)
* ext/-test-/string/coderange.c: add Bug::String.new#coderange_scan
to explicitly scan coderange. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@45649 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--ext/-test-/string/coderange.c21
-rw-r--r--test/-ext-/string/test_coderange.rb59
3 files changed, 83 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index cafd1e7..82290f3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Mon Apr 21 18:55:21 2014 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * ext/-test-/string/coderange.c: add Bug::String.new#coderange_scan
+ to explicitly scan coderange.
+
Mon Apr 21 18:19:35 2014 NARUSE, Yui <naruse@ruby-lang.org>
* string.c (coderange_scan): remove unused logic.
diff --git a/ext/-test-/string/coderange.c b/ext/-test-/string/coderange.c
index 2f1e6a0..b93172d 100644
--- a/ext/-test-/string/coderange.c
+++ b/ext/-test-/string/coderange.c
@@ -2,10 +2,11 @@
#include "ruby/encoding.h"
static VALUE sym_7bit, sym_valid, sym_unknown, sym_broken;
+
static VALUE
-str_coderange(VALUE str)
+coderange_int2sym(int coderange)
{
- switch (ENC_CODERANGE(str)) {
+ switch (coderange) {
case ENC_CODERANGE_7BIT:
return sym_7bit;
case ENC_CODERANGE_VALID:
@@ -19,6 +20,21 @@ str_coderange(VALUE str)
UNREACHABLE;
}
+/* return coderange without scan */
+static VALUE
+str_coderange(VALUE str)
+{
+ return coderange_int2sym(ENC_CODERANGE(str));
+}
+
+/* scan coderange and return the result */
+static VALUE
+str_coderange_scan(VALUE str)
+{
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN);
+ return coderange_int2sym(rb_enc_str_coderange(str));
+}
+
void
Init_coderange(VALUE klass)
{
@@ -27,4 +43,5 @@ Init_coderange(VALUE klass)
sym_unknown = ID2SYM(rb_intern("unknown"));
sym_broken = ID2SYM(rb_intern("broken"));
rb_define_method(klass, "coderange", str_coderange, 0);
+ rb_define_method(klass, "coderange_scan", str_coderange_scan, 0);
}
diff --git a/test/-ext-/string/test_coderange.rb b/test/-ext-/string/test_coderange.rb
new file mode 100644
index 0000000..83cebf1
--- /dev/null
+++ b/test/-ext-/string/test_coderange.rb
@@ -0,0 +1,59 @@
+# coding: ascii-8bit
+require 'test/unit'
+require "-test-/string/string"
+require "rbconfig/sizeof"
+
+class Test_StringCoderange < Test::Unit::TestCase
+ def setup
+ @sizeof_voidp = RbConfig::SIZEOF["void*"]
+ @a8 = Encoding::ASCII_8BIT
+ @a7 = Encoding::US_ASCII
+ @u8 = Encoding::UTF_8
+ end
+
+ def test_ascii8bit
+ enc = @a8
+ str = "a"
+ str.force_encoding(enc)
+ assert_equal :"7bit", Bug::String.new(str).coderange_scan
+
+ str = "a\xBE".force_encoding(enc)
+ assert_equal :valid, Bug::String.new(str).coderange_scan
+ end
+
+ def test_usascii
+ enc = @a7
+ str = "a"
+ str.force_encoding(enc)
+ assert_equal :"7bit", Bug::String.new(str).coderange_scan
+
+ str = "a" * (@sizeof_voidp * 2)
+ str << "\xBE"
+ str.force_encoding(enc)
+ assert_equal :broken, Bug::String.new(str).coderange_scan
+ end
+
+ def test_utf8
+ enc = @u8
+ str = "a"
+ str.force_encoding(enc)
+ assert_equal :"7bit", Bug::String.new(str).coderange_scan
+
+ str = "a" * (@sizeof_voidp * 3)
+ str << "aa\xC2\x80"
+ str.force_encoding(enc)
+ assert_equal :valid, Bug::String.new(str).coderange_scan
+
+ str = "a" * (@sizeof_voidp * 2)
+ str << "\xC2\x80"
+ str << "a" * (@sizeof_voidp * 2)
+ str.force_encoding(enc)
+ assert_equal :valid, Bug::String.new(str).coderange_scan
+
+ str = "a" * (@sizeof_voidp * 2)
+ str << "\xC1\x80"
+ str << "a" * (@sizeof_voidp * 2)
+ str.force_encoding(enc)
+ assert_equal :broken, Bug::String.new(str).coderange_scan
+ end
+end