6 files changed, 104 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 72c8e51d25..f8a5cd5317 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Wed Dec  3 16:25:55 2008  Akinori MUSHA  <knu@iDaemons.org>
+
+	* pack.c (rb_utf8_to_uv), intern.h: Rename utf8_to_uv to
+	  rb_utf8_to_uv and export.
+
+	* re.c (str_ord): Backport String#ord from 1.9 (sort of);
+	  requested by Shinichiro Hamaji in [ruby-dev:37247].
+
 Wed Dec  3 16:17:36 2008  Akinori MUSHA  <knu@iDaemons.org>
 
 	* string.c (rb_str_getbyte, rb_str_setbyte): Add String#getbyte
diff --git a/NEWS b/NEWS
index d503c9becb..481f0413da 100644
--- a/NEWS
+++ b/NEWS
@@ -45,9 +45,11 @@ with all sufficient information, see the ChangeLog file.
 
   * String#getbyte
   * String#setbyte
+  * String#ord
 
     New methods for the forward compatibility with 1.9, in which the
-    behavior of String#[] and String#[]= have changed.
+    behavior of String#[] and String#[]= have changed.  String#ord is
+    $KCODE aware.
 
 * dbm
 
diff --git a/intern.h b/intern.h
index ae676946c2..aa7ac6e4ac 100644
--- a/intern.h
+++ b/intern.h
@@ -344,6 +344,8 @@ VALUE rb_String _((VALUE));
 VALUE rb_Array _((VALUE));
 double rb_cstr_to_dbl _((const char*, int));
 double rb_str_to_dbl _((VALUE, int));
+/* pack.c */
+unsigned long rb_utf8_to_uv _((char *, long *));
 /* parse.y */
 RUBY_EXTERN int   ruby_sourceline;
 RUBY_EXTERN char *ruby_sourcefile;
diff --git a/pack.c b/pack.c
index 610cb30a0f..bd6546cd76 100644
--- a/pack.c
+++ b/pack.c
@@ -370,7 +370,6 @@ static void encodes _((VALUE,const char*,long,int));
 static void qpencode _((VALUE,VALUE,long));
 
 static int uv_to_utf8 _((char*,unsigned long));
-static unsigned long utf8_to_uv _((char*,long*));
 
 /*
  *  call-seq:
@@ -1722,7 +1721,7 @@ pack_unpack(str, fmt)
 		long alen = send - s;
 		unsigned long l;
 
-		l = utf8_to_uv(s, &alen);
+		l = rb_utf8_to_uv(s, &alen);
 		s += alen; len--;
 		rb_ary_push(ary, ULONG2NUM(l));
 	    }
@@ -2054,8 +2053,8 @@ static const unsigned long utf8_limits[] = {
     0x80000000,			/* 7 */
 };
 
-static unsigned long
-utf8_to_uv(p, lenp)
+unsigned long
+rb_utf8_to_uv(p, lenp)
     char *p;
     long *lenp;
 {
diff --git a/re.c b/re.c
index a0ac6a84c8..1e5a553814 100644
--- a/re.c
+++ b/re.c
@@ -2281,6 +2281,48 @@ rb_reg_s_last_match(argc, argv)
     return match_getter();
 }
 
+/*
+ *  call-seq:
+ *     str.ord   => integer
+ *  
+ *  Return the <code>Integer</code> ordinal of a one-character string.
+ *     
+ *     "a".ord         #=> 97
+ */
+
+static VALUE
+str_ord(str)
+    VALUE str;
+{
+    char *p = RSTRING(str)->ptr;
+    long len = RSTRING(str)->len;
+    unsigned long cp;
+    int charlen;
+
+    if (len <= 0)
+	rb_raise(rb_eArgError, "empty string");
+
+    switch (reg_kcode) {
+      case KCODE_NONE:
+	return INT2FIX((unsigned char)p[0]);
+      case KCODE_UTF8:
+	cp = rb_utf8_to_uv(p, &len);
+	break;
+      default:
+	charlen = mbclen(p[0]);
+	if (len < charlen)
+	    rb_raise(rb_eArgError,
+		     "malformed %s character (expected %d bytes, given %ld bytes)",
+		     rb_get_kcode(), charlen, len);
+	for (cp = 0; charlen--; ) {
+	    cp <<= 8;
+	    cp |= (unsigned char)*p++;
+	}
+	break;
+    }
+
+    return ULONG2NUM(cp);
+}
 
 /*
  *  Document-class: Regexp
@@ -2373,4 +2415,6 @@ Init_Regexp()
     rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
     rb_define_method(rb_cMatch, "inspect", match_inspect, 0);
     rb_define_method(rb_cMatch, "string", match_string, 0);
+
+    rb_define_method(rb_cString, "ord", str_ord, 0);
 }
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 8d37479de4..25786fe9c5 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -96,4 +96,48 @@ class TestString < Test::Unit::TestCase
     s.setbyte(-4, 0x84)
     assert_equal("\xE3\x81\x84\xE3\x81\x84", s)
   end
+
+  def test_ord
+    original_kcode = $KCODE
+
+    assert_raise(ArgumentError) { "".ord }
+
+    str_abc = "ABC"
+    str_a_i_U = "\xE3\x81\x82\xE3\x81\x84"
+    str_a_i_E = "\xA4\xA2\xA4\xA4"
+    str_a_i_S = "\x82\xA0\x82\xA2"
+    str_ai_U = "\xEF\xBD\xB1\xEF\xBD\xB2"
+    str_ai_E = "\x8E\xB1\x8E\xB2"
+    str_ai_S = "\xB1\xB2"
+
+    $KCODE = 'n'
+    assert_equal(0x41, str_abc.ord)
+    assert_equal(0xE3, str_a_i_U.ord)
+    assert_equal(0xA4, str_a_i_E.ord)
+    assert_equal(0x82, str_a_i_S.ord)
+    assert_equal(0xEF, str_ai_U.ord)
+    assert_equal(0x8E, str_ai_E.ord)
+    assert_equal(0xB1, str_ai_S.ord)
+
+    $KCODE = 'u'
+    assert_equal(0x41, str_abc.ord)
+    assert_equal(0x3042, str_a_i_U.ord)
+    assert_raise(ArgumentError) { str_a_i_U[0..0].ord }
+    assert_raise(ArgumentError) { str_a_i_U[0..1].ord }
+    assert_equal(0xFF71, str_ai_U.ord)
+
+    $KCODE = 's'
+    assert_equal(0x41, str_abc.ord)
+    assert_equal(0x82A0, str_a_i_S.ord)
+    assert_raise(ArgumentError) { str_a_i_S[0..0].ord }
+    assert_equal(0xB1, str_ai_S.ord)
+
+    $KCODE = 'e'
+    assert_equal(0x41, str_abc.ord)
+    assert_equal(0xA4A2, str_a_i_E.ord)
+    assert_raise(ArgumentError) { str_a_i_E[0..0].ord }
+    assert_equal(0x8EB1, str_ai_E.ord)
+  ensure
+    $KCODE = original_kcode
+  end
 end