summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--ext/stringio/stringio.c33
-rw-r--r--io.c102
-rw-r--r--test/stringio/test_stringio.rb5
4 files changed, 146 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 6014cbcd62..1d6962b999 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Mon Jun 22 17:15:38 2009 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * ext/stringio/stringio.c (strio_each_codepoint): new method.
+ [ruby-core:23949]
+
+ * ext/stringio/stringio.c (strio_each_codepoint): ditto.
+
Mon Jun 22 16:26:11 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
* ruby.c (ruby_init_loadpath_safe): removed "." from load_path.
diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c
index 9bb5ed8a2a..7514264f6a 100644
--- a/ext/stringio/stringio.c
+++ b/ext/stringio/stringio.c
@@ -824,6 +824,37 @@ strio_each_char(VALUE self)
return self;
}
+/*
+ * call-seq:
+ * strio.each_codepoint {|c| block } -> strio
+ *
+ * See IO#each_codepoint.
+ */
+static VALUE
+strio_each_codepoint(VALUE self)
+{
+ struct StringIO *ptr;
+ rb_encoding *enc;
+ unsigned int c;
+ int n;
+
+ RETURN_ENUMERATOR(self, 0, 0);
+
+ ptr = readable(StringIO(self));
+ enc = rb_enc_get(ptr->string);
+ for (;;) {
+ if (ptr->pos >= RSTRING_LEN(ptr->string)) {
+ return self;
+ }
+
+ c = rb_enc_codepoint_len(RSTRING_PTR(ptr->string)+ptr->pos,
+ RSTRING_END(ptr->string), &n, enc);
+ rb_yield(UINT2NUM(c));
+ ptr->pos += n;
+ }
+ return self;
+}
+
/* Boyer-Moore search: copied from regex.c */
static void
bm_init_skip(long *skip, const char *pat, long m)
@@ -1359,6 +1390,8 @@ Init_stringio()
rb_define_method(StringIO, "bytes", strio_each_byte, 0);
rb_define_method(StringIO, "each_char", strio_each_char, 0);
rb_define_method(StringIO, "chars", strio_each_char, 0);
+ rb_define_method(StringIO, "each_codepoint", strio_each_codepoint, 0);
+ rb_define_method(StringIO, "codepoints", strio_each_codepoint, 0);
rb_define_method(StringIO, "getc", strio_getc, 0);
rb_define_method(StringIO, "ungetc", strio_ungetc, 1);
rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1);
diff --git a/io.c b/io.c
index 660fb14cc0..9d67bff15e 100644
--- a/io.c
+++ b/io.c
@@ -2641,7 +2641,7 @@ rb_io_each_byte(VALUE io)
fptr->rbuf_len--;
rb_yield(INT2FIX(*p & 0xff));
p++;
- errno = 0;
+ errno = 0;
}
rb_io_check_readable(fptr);
READ_CHECK(fptr);
@@ -2776,6 +2776,89 @@ rb_io_each_char(VALUE io)
/*
* call-seq:
+ * ios.each_codepoint {|c| block } => ios
+ *
+ * Passes the <code>Integer</code> ordinal of each character in <i>ios</i>,
+ * passing the codepoint as an argument. The stream must be opened for
+ * reading or an <code>IOError</code> will be raised.
+ */
+
+static VALUE
+rb_io_each_codepoint(VALUE io)
+{
+ rb_io_t *fptr;
+ rb_encoding *enc;
+ unsigned int c;
+ int r, n;
+
+ RETURN_ENUMERATOR(io, 0, 0);
+ GetOpenFile(io, fptr);
+ rb_io_check_readable(fptr);
+
+ READ_CHECK(fptr);
+ if (NEED_READCONV(fptr)) {
+ for (;;) {
+ make_readconv(fptr, 0);
+ for (;;) {
+ if (fptr->cbuf_len) {
+ if (fptr->encs.enc)
+ r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off,
+ fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len,
+ fptr->encs.enc);
+ else
+ r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
+ if (!MBCLEN_NEEDMORE_P(r))
+ break;
+ if (fptr->cbuf_len == fptr->cbuf_capa) {
+ rb_raise(rb_eIOError, "too long character");
+ }
+ }
+ if (more_char(fptr) == -1) {
+ /* ignore an incomplete character before EOF */
+ return io;
+ }
+ }
+ if (MBCLEN_INVALID_P(r)) {
+ rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
+ }
+ n = MBCLEN_CHARFOUND_LEN(r);
+ c = rb_enc_codepoint(fptr->cbuf+fptr->cbuf_off,
+ fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len,
+ fptr->encs.enc);
+ fptr->rbuf_off += n;
+ fptr->rbuf_len -= n;
+ rb_yield(UINT2NUM(c));
+ }
+ }
+ enc = io_input_encoding(fptr);
+ for (;;) {
+ if (io_fillbuf(fptr) < 0) {
+ return io;
+ }
+ r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off,
+ fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
+ if (MBCLEN_CHARFOUND_P(r) &&
+ (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) {
+ c = rb_enc_codepoint(fptr->rbuf+fptr->rbuf_off,
+ fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
+ fptr->rbuf_off += n;
+ fptr->rbuf_len -= n;
+ rb_yield(UINT2NUM(c));
+ }
+ else if (MBCLEN_INVALID_P(r)) {
+ rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
+ }
+ else {
+ continue;
+ }
+ }
+ return io;
+}
+
+
+
+/*
+ * call-seq:
* ios.lines(sep=$/) => anEnumerator
* ios.lines(limit) => anEnumerator
* ios.lines(sep, limit) => anEnumerator
@@ -2838,6 +2921,21 @@ rb_io_chars(VALUE io)
/*
* call-seq:
+ * ios.codepoints => anEnumerator
+ *
+ * Returns an enumerator that gives each codepoint in <em>ios</em>.
+ * The stream must be opened for reading or an <code>IOError</code>
+ * will be raised.
+ */
+
+static VALUE
+rb_io_codepoints(VALUE io)
+{
+ return rb_enumeratorize(io, ID2SYM(rb_intern("each_codepoint")), 0, 0);
+}
+
+/*
+ * call-seq:
* ios.getc => string or nil
*
* Reads a one-character string from <em>ios</em>. Returns
@@ -8797,9 +8895,11 @@ Init_IO(void)
rb_define_method(rb_cIO, "each_line", rb_io_each_line, -1);
rb_define_method(rb_cIO, "each_byte", rb_io_each_byte, 0);
rb_define_method(rb_cIO, "each_char", rb_io_each_char, 0);
+ rb_define_method(rb_cIO, "each_codepoint", rb_io_each_codepoint, 0);
rb_define_method(rb_cIO, "lines", rb_io_lines, -1);
rb_define_method(rb_cIO, "bytes", rb_io_bytes, 0);
rb_define_method(rb_cIO, "chars", rb_io_chars, 0);
+ rb_define_method(rb_cIO, "codepoints", rb_io_codepoints, 0);
rb_define_method(rb_cIO, "syswrite", rb_io_syswrite, 1);
rb_define_method(rb_cIO, "sysread", rb_io_sysread, -1);
diff --git a/test/stringio/test_stringio.rb b/test/stringio/test_stringio.rb
index 8c72803b45..570f180fc7 100644
--- a/test/stringio/test_stringio.rb
+++ b/test/stringio/test_stringio.rb
@@ -340,6 +340,11 @@ class TestStringIO < Test::Unit::TestCase
assert_equal(%w(1 2 3 4), f.each_char.to_a)
end
+ def test_each_codepoint
+ f = StringIO.new("1234")
+ assert_equal([49, 50, 51, 52], f.each_codepoint.to_a)
+ end
+
def test_gets2
f = StringIO.new("foo\nbar\nbaz\n")
assert_equal("fo", f.gets(2))