diff options
| -rw-r--r-- | ChangeLog | 8 | ||||
| -rw-r--r-- | NEWS | 4 | ||||
| -rw-r--r-- | intern.h | 2 | ||||
| -rw-r--r-- | pack.c | 7 | ||||
| -rw-r--r-- | re.c | 44 | ||||
| -rw-r--r-- | test/ruby/test_string.rb | 44 |
6 files changed, 104 insertions, 5 deletions
@@ -1,3 +1,11 @@ +Wed Dec 3 16:25:55 2008 Akinori MUSHA <knu@iDaemons.org> + + * pack.c (rb_utf8_to_uv), intern.h: Rename utf8_to_uv to + rb_utf8_to_uv and export. + + * re.c (str_ord): Backport String#ord from 1.9 (sort of); + requested by Shinichiro Hamaji in [ruby-dev:37247]. + Wed Dec 3 16:17:36 2008 Akinori MUSHA <knu@iDaemons.org> * string.c (rb_str_getbyte, rb_str_setbyte): Add String#getbyte @@ -45,9 +45,11 @@ with all sufficient information, see the ChangeLog file. * String#getbyte * String#setbyte + * String#ord New methods for the forward compatibility with 1.9, in which the - behavior of String#[] and String#[]= have changed. + behavior of String#[] and String#[]= have changed. String#ord is + $KCODE aware. * dbm @@ -344,6 +344,8 @@ VALUE rb_String _((VALUE)); VALUE rb_Array _((VALUE)); double rb_cstr_to_dbl _((const char*, int)); double rb_str_to_dbl _((VALUE, int)); +/* pack.c */ +unsigned long rb_utf8_to_uv _((char *, long *)); /* parse.y */ RUBY_EXTERN int ruby_sourceline; RUBY_EXTERN char *ruby_sourcefile; @@ -370,7 +370,6 @@ static void encodes _((VALUE,const char*,long,int)); static void qpencode _((VALUE,VALUE,long)); static int uv_to_utf8 _((char*,unsigned long)); -static unsigned long utf8_to_uv _((char*,long*)); /* * call-seq: @@ -1722,7 +1721,7 @@ pack_unpack(str, fmt) long alen = send - s; unsigned long l; - l = utf8_to_uv(s, &alen); + l = rb_utf8_to_uv(s, &alen); s += alen; len--; rb_ary_push(ary, ULONG2NUM(l)); } @@ -2054,8 +2053,8 @@ static const unsigned long utf8_limits[] = { 0x80000000, /* 7 */ }; -static unsigned long -utf8_to_uv(p, lenp) +unsigned long +rb_utf8_to_uv(p, lenp) char *p; long *lenp; { @@ -2281,6 +2281,48 @@ rb_reg_s_last_match(argc, argv) return match_getter(); } +/* + * call-seq: + * str.ord => integer + * + * Return the <code>Integer</code> ordinal of a one-character string. + * + * "a".ord #=> 97 + */ + +static VALUE +str_ord(str) + VALUE str; +{ + char *p = RSTRING(str)->ptr; + long len = RSTRING(str)->len; + unsigned long cp; + int charlen; + + if (len <= 0) + rb_raise(rb_eArgError, "empty string"); + + switch (reg_kcode) { + case KCODE_NONE: + return INT2FIX((unsigned char)p[0]); + case KCODE_UTF8: + cp = rb_utf8_to_uv(p, &len); + break; + default: + charlen = mbclen(p[0]); + if (len < charlen) + rb_raise(rb_eArgError, + "malformed %s character (expected %d bytes, given %ld bytes)", + rb_get_kcode(), charlen, len); + for (cp = 0; charlen--; ) { + cp <<= 8; + cp |= (unsigned char)*p++; + } + break; + } + + return ULONG2NUM(cp); +} /* * Document-class: Regexp @@ -2373,4 +2415,6 @@ Init_Regexp() rb_define_method(rb_cMatch, "to_s", match_to_s, 0); rb_define_method(rb_cMatch, "inspect", match_inspect, 0); rb_define_method(rb_cMatch, "string", match_string, 0); + + rb_define_method(rb_cString, "ord", str_ord, 0); } diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 8d37479de4..25786fe9c5 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -96,4 +96,48 @@ class TestString < Test::Unit::TestCase s.setbyte(-4, 0x84) assert_equal("\xE3\x81\x84\xE3\x81\x84", s) end + + def test_ord + original_kcode = $KCODE + + assert_raise(ArgumentError) { "".ord } + + str_abc = "ABC" + str_a_i_U = "\xE3\x81\x82\xE3\x81\x84" + str_a_i_E = "\xA4\xA2\xA4\xA4" + str_a_i_S = "\x82\xA0\x82\xA2" + str_ai_U = "\xEF\xBD\xB1\xEF\xBD\xB2" + str_ai_E = "\x8E\xB1\x8E\xB2" + str_ai_S = "\xB1\xB2" + + $KCODE = 'n' + assert_equal(0x41, str_abc.ord) + assert_equal(0xE3, str_a_i_U.ord) + assert_equal(0xA4, str_a_i_E.ord) + assert_equal(0x82, str_a_i_S.ord) + assert_equal(0xEF, str_ai_U.ord) + assert_equal(0x8E, str_ai_E.ord) + assert_equal(0xB1, str_ai_S.ord) + + $KCODE = 'u' + assert_equal(0x41, str_abc.ord) + assert_equal(0x3042, str_a_i_U.ord) + assert_raise(ArgumentError) { str_a_i_U[0..0].ord } + assert_raise(ArgumentError) { str_a_i_U[0..1].ord } + assert_equal(0xFF71, str_ai_U.ord) + + $KCODE = 's' + assert_equal(0x41, str_abc.ord) + assert_equal(0x82A0, str_a_i_S.ord) + assert_raise(ArgumentError) { str_a_i_S[0..0].ord } + assert_equal(0xB1, str_ai_S.ord) + + $KCODE = 'e' + assert_equal(0x41, str_abc.ord) + assert_equal(0xA4A2, str_a_i_E.ord) + assert_raise(ArgumentError) { str_a_i_E[0..0].ord } + assert_equal(0x8EB1, str_ai_E.ord) + ensure + $KCODE = original_kcode + end end |
