From fa31dda1f839909a96cdcc6339d4674e80173ac2 Mon Sep 17 00:00:00 2001 From: matz Date: Mon, 22 Jun 2009 08:23:30 +0000 Subject: * ext/stringio/stringio.c (strio_each_codepoint): new method. [ruby-core:23949] * ext/stringio/stringio.c (strio_each_codepoint): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@23818 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++ ext/stringio/stringio.c | 33 +++++++++++++ io.c | 102 ++++++++++++++++++++++++++++++++++++++++- test/stringio/test_stringio.rb | 5 ++ 4 files changed, 146 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 6014cbcd62..1d6962b999 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Mon Jun 22 17:15:38 2009 Yukihiro Matsumoto + + * ext/stringio/stringio.c (strio_each_codepoint): new method. + [ruby-core:23949] + + * ext/stringio/stringio.c (strio_each_codepoint): ditto. + Mon Jun 22 16:26:11 2009 Nobuyoshi Nakada * ruby.c (ruby_init_loadpath_safe): removed "." from load_path. diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 9bb5ed8a2a..7514264f6a 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -824,6 +824,37 @@ strio_each_char(VALUE self) return self; } +/* + * call-seq: + * strio.each_codepoint {|c| block } -> strio + * + * See IO#each_codepoint. + */ +static VALUE +strio_each_codepoint(VALUE self) +{ + struct StringIO *ptr; + rb_encoding *enc; + unsigned int c; + int n; + + RETURN_ENUMERATOR(self, 0, 0); + + ptr = readable(StringIO(self)); + enc = rb_enc_get(ptr->string); + for (;;) { + if (ptr->pos >= RSTRING_LEN(ptr->string)) { + return self; + } + + c = rb_enc_codepoint_len(RSTRING_PTR(ptr->string)+ptr->pos, + RSTRING_END(ptr->string), &n, enc); + rb_yield(UINT2NUM(c)); + ptr->pos += n; + } + return self; +} + /* Boyer-Moore search: copied from regex.c */ static void bm_init_skip(long *skip, const char *pat, long m) @@ -1359,6 +1390,8 @@ Init_stringio() rb_define_method(StringIO, "bytes", strio_each_byte, 0); rb_define_method(StringIO, "each_char", strio_each_char, 0); rb_define_method(StringIO, "chars", strio_each_char, 0); + rb_define_method(StringIO, "each_codepoint", strio_each_codepoint, 0); + rb_define_method(StringIO, "codepoints", strio_each_codepoint, 0); rb_define_method(StringIO, "getc", strio_getc, 0); rb_define_method(StringIO, "ungetc", strio_ungetc, 1); rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1); diff --git a/io.c b/io.c index 660fb14cc0..9d67bff15e 100644 --- a/io.c +++ b/io.c @@ -2641,7 +2641,7 @@ rb_io_each_byte(VALUE io) fptr->rbuf_len--; rb_yield(INT2FIX(*p & 0xff)); p++; - errno = 0; + errno = 0; } rb_io_check_readable(fptr); READ_CHECK(fptr); @@ -2774,6 +2774,89 @@ rb_io_each_char(VALUE io) +/* + * call-seq: + * ios.each_codepoint {|c| block } => ios + * + * Passes the Integer ordinal of each character in ios, + * passing the codepoint as an argument. The stream must be opened for + * reading or an IOError will be raised. + */ + +static VALUE +rb_io_each_codepoint(VALUE io) +{ + rb_io_t *fptr; + rb_encoding *enc; + unsigned int c; + int r, n; + + RETURN_ENUMERATOR(io, 0, 0); + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + READ_CHECK(fptr); + if (NEED_READCONV(fptr)) { + for (;;) { + make_readconv(fptr, 0); + for (;;) { + if (fptr->cbuf_len) { + if (fptr->encs.enc) + r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + fptr->encs.enc); + else + r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1); + if (!MBCLEN_NEEDMORE_P(r)) + break; + if (fptr->cbuf_len == fptr->cbuf_capa) { + rb_raise(rb_eIOError, "too long character"); + } + } + if (more_char(fptr) == -1) { + /* ignore an incomplete character before EOF */ + return io; + } + } + if (MBCLEN_INVALID_P(r)) { + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); + } + n = MBCLEN_CHARFOUND_LEN(r); + c = rb_enc_codepoint(fptr->cbuf+fptr->cbuf_off, + fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len, + fptr->encs.enc); + fptr->rbuf_off += n; + fptr->rbuf_len -= n; + rb_yield(UINT2NUM(c)); + } + } + enc = io_input_encoding(fptr); + for (;;) { + if (io_fillbuf(fptr) < 0) { + return io; + } + r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, + fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); + if (MBCLEN_CHARFOUND_P(r) && + (n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) { + c = rb_enc_codepoint(fptr->rbuf+fptr->rbuf_off, + fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); + fptr->rbuf_off += n; + fptr->rbuf_len -= n; + rb_yield(UINT2NUM(c)); + } + else if (MBCLEN_INVALID_P(r)) { + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc)); + } + else { + continue; + } + } + return io; +} + + + /* * call-seq: * ios.lines(sep=$/) => anEnumerator @@ -2836,6 +2919,21 @@ rb_io_chars(VALUE io) return rb_enumeratorize(io, ID2SYM(rb_intern("each_char")), 0, 0); } +/* + * call-seq: + * ios.codepoints => anEnumerator + * + * Returns an enumerator that gives each codepoint in ios. + * The stream must be opened for reading or an IOError + * will be raised. + */ + +static VALUE +rb_io_codepoints(VALUE io) +{ + return rb_enumeratorize(io, ID2SYM(rb_intern("each_codepoint")), 0, 0); +} + /* * call-seq: * ios.getc => string or nil @@ -8797,9 +8895,11 @@ Init_IO(void) rb_define_method(rb_cIO, "each_line", rb_io_each_line, -1); rb_define_method(rb_cIO, "each_byte", rb_io_each_byte, 0); rb_define_method(rb_cIO, "each_char", rb_io_each_char, 0); + rb_define_method(rb_cIO, "each_codepoint", rb_io_each_codepoint, 0); rb_define_method(rb_cIO, "lines", rb_io_lines, -1); rb_define_method(rb_cIO, "bytes", rb_io_bytes, 0); rb_define_method(rb_cIO, "chars", rb_io_chars, 0); + rb_define_method(rb_cIO, "codepoints", rb_io_codepoints, 0); rb_define_method(rb_cIO, "syswrite", rb_io_syswrite, 1); rb_define_method(rb_cIO, "sysread", rb_io_sysread, -1); diff --git a/test/stringio/test_stringio.rb b/test/stringio/test_stringio.rb index 8c72803b45..570f180fc7 100644 --- a/test/stringio/test_stringio.rb +++ b/test/stringio/test_stringio.rb @@ -340,6 +340,11 @@ class TestStringIO < Test::Unit::TestCase assert_equal(%w(1 2 3 4), f.each_char.to_a) end + def test_each_codepoint + f = StringIO.new("1234") + assert_equal([49, 50, 51, 52], f.each_codepoint.to_a) + end + def test_gets2 f = StringIO.new("foo\nbar\nbaz\n") assert_equal("fo", f.gets(2)) -- cgit v1.2.3