summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--NEWS4
-rw-r--r--intern.h2
-rw-r--r--pack.c7
-rw-r--r--re.c44
-rw-r--r--test/ruby/test_string.rb44
6 files changed, 104 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 72c8e51d25..f8a5cd5317 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Wed Dec 3 16:25:55 2008 Akinori MUSHA <knu@iDaemons.org>
+
+ * pack.c (rb_utf8_to_uv), intern.h: Rename utf8_to_uv to
+ rb_utf8_to_uv and export.
+
+ * re.c (str_ord): Backport String#ord from 1.9 (sort of);
+ requested by Shinichiro Hamaji in [ruby-dev:37247].
+
Wed Dec 3 16:17:36 2008 Akinori MUSHA <knu@iDaemons.org>
* string.c (rb_str_getbyte, rb_str_setbyte): Add String#getbyte
diff --git a/NEWS b/NEWS
index d503c9becb..481f0413da 100644
--- a/NEWS
+++ b/NEWS
@@ -45,9 +45,11 @@ with all sufficient information, see the ChangeLog file.
* String#getbyte
* String#setbyte
+ * String#ord
New methods for the forward compatibility with 1.9, in which the
- behavior of String#[] and String#[]= have changed.
+ behavior of String#[] and String#[]= have changed. String#ord is
+ $KCODE aware.
* dbm
diff --git a/intern.h b/intern.h
index ae676946c2..aa7ac6e4ac 100644
--- a/intern.h
+++ b/intern.h
@@ -344,6 +344,8 @@ VALUE rb_String _((VALUE));
VALUE rb_Array _((VALUE));
double rb_cstr_to_dbl _((const char*, int));
double rb_str_to_dbl _((VALUE, int));
+/* pack.c */
+unsigned long rb_utf8_to_uv _((char *, long *));
/* parse.y */
RUBY_EXTERN int ruby_sourceline;
RUBY_EXTERN char *ruby_sourcefile;
diff --git a/pack.c b/pack.c
index 610cb30a0f..bd6546cd76 100644
--- a/pack.c
+++ b/pack.c
@@ -370,7 +370,6 @@ static void encodes _((VALUE,const char*,long,int));
static void qpencode _((VALUE,VALUE,long));
static int uv_to_utf8 _((char*,unsigned long));
-static unsigned long utf8_to_uv _((char*,long*));
/*
* call-seq:
@@ -1722,7 +1721,7 @@ pack_unpack(str, fmt)
long alen = send - s;
unsigned long l;
- l = utf8_to_uv(s, &alen);
+ l = rb_utf8_to_uv(s, &alen);
s += alen; len--;
rb_ary_push(ary, ULONG2NUM(l));
}
@@ -2054,8 +2053,8 @@ static const unsigned long utf8_limits[] = {
0x80000000, /* 7 */
};
-static unsigned long
-utf8_to_uv(p, lenp)
+unsigned long
+rb_utf8_to_uv(p, lenp)
char *p;
long *lenp;
{
diff --git a/re.c b/re.c
index a0ac6a84c8..1e5a553814 100644
--- a/re.c
+++ b/re.c
@@ -2281,6 +2281,48 @@ rb_reg_s_last_match(argc, argv)
return match_getter();
}
+/*
+ * call-seq:
+ * str.ord => integer
+ *
+ * Return the <code>Integer</code> ordinal of a one-character string.
+ *
+ * "a".ord #=> 97
+ */
+
+static VALUE
+str_ord(str)
+ VALUE str;
+{
+ char *p = RSTRING(str)->ptr;
+ long len = RSTRING(str)->len;
+ unsigned long cp;
+ int charlen;
+
+ if (len <= 0)
+ rb_raise(rb_eArgError, "empty string");
+
+ switch (reg_kcode) {
+ case KCODE_NONE:
+ return INT2FIX((unsigned char)p[0]);
+ case KCODE_UTF8:
+ cp = rb_utf8_to_uv(p, &len);
+ break;
+ default:
+ charlen = mbclen(p[0]);
+ if (len < charlen)
+ rb_raise(rb_eArgError,
+ "malformed %s character (expected %d bytes, given %ld bytes)",
+ rb_get_kcode(), charlen, len);
+ for (cp = 0; charlen--; ) {
+ cp <<= 8;
+ cp |= (unsigned char)*p++;
+ }
+ break;
+ }
+
+ return ULONG2NUM(cp);
+}
/*
* Document-class: Regexp
@@ -2373,4 +2415,6 @@ Init_Regexp()
rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
rb_define_method(rb_cMatch, "inspect", match_inspect, 0);
rb_define_method(rb_cMatch, "string", match_string, 0);
+
+ rb_define_method(rb_cString, "ord", str_ord, 0);
}
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 8d37479de4..25786fe9c5 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -96,4 +96,48 @@ class TestString < Test::Unit::TestCase
s.setbyte(-4, 0x84)
assert_equal("\xE3\x81\x84\xE3\x81\x84", s)
end
+
+ def test_ord
+ original_kcode = $KCODE
+
+ assert_raise(ArgumentError) { "".ord }
+
+ str_abc = "ABC"
+ str_a_i_U = "\xE3\x81\x82\xE3\x81\x84"
+ str_a_i_E = "\xA4\xA2\xA4\xA4"
+ str_a_i_S = "\x82\xA0\x82\xA2"
+ str_ai_U = "\xEF\xBD\xB1\xEF\xBD\xB2"
+ str_ai_E = "\x8E\xB1\x8E\xB2"
+ str_ai_S = "\xB1\xB2"
+
+ $KCODE = 'n'
+ assert_equal(0x41, str_abc.ord)
+ assert_equal(0xE3, str_a_i_U.ord)
+ assert_equal(0xA4, str_a_i_E.ord)
+ assert_equal(0x82, str_a_i_S.ord)
+ assert_equal(0xEF, str_ai_U.ord)
+ assert_equal(0x8E, str_ai_E.ord)
+ assert_equal(0xB1, str_ai_S.ord)
+
+ $KCODE = 'u'
+ assert_equal(0x41, str_abc.ord)
+ assert_equal(0x3042, str_a_i_U.ord)
+ assert_raise(ArgumentError) { str_a_i_U[0..0].ord }
+ assert_raise(ArgumentError) { str_a_i_U[0..1].ord }
+ assert_equal(0xFF71, str_ai_U.ord)
+
+ $KCODE = 's'
+ assert_equal(0x41, str_abc.ord)
+ assert_equal(0x82A0, str_a_i_S.ord)
+ assert_raise(ArgumentError) { str_a_i_S[0..0].ord }
+ assert_equal(0xB1, str_ai_S.ord)
+
+ $KCODE = 'e'
+ assert_equal(0x41, str_abc.ord)
+ assert_equal(0xA4A2, str_a_i_E.ord)
+ assert_raise(ArgumentError) { str_a_i_E[0..0].ord }
+ assert_equal(0x8EB1, str_ai_E.ord)
+ ensure
+ $KCODE = original_kcode
+ end
end