summaryrefslogtreecommitdiff
path: root/io.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-24 16:36:14 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-24 16:36:14 +0000
commita04a812ed0aa7424ea1f40756402d1f199a43308 (patch)
tree9092b223c6ff47b94e637088e56e6ab27f7c2229 /io.c
parent13e239a0120deefd6f2e60345796e282a78ace29 (diff)
* include/ruby/encoding.h (rb_enc_left_char_head): new utility macro.
* include/ruby/encoding.h (rb_enc_right_char_head): ditto. * io.c (appendline): does multibyte RS search in the function. * io.c (prepare_getline_args): RS may be nil. * io.c (rb_io_getc): should process character based on external encoding, when transcoding required. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14619 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'io.c')
-rw-r--r--io.c79
1 files changed, 54 insertions, 25 deletions
diff --git a/io.c b/io.c
index faf163f1b8..56ddbcc040 100644
--- a/io.c
+++ b/io.c
@@ -363,6 +363,15 @@ io_read_encoding(rb_io_t *fptr)
: rb_default_external_encoding();
}
+static rb_encoding*
+io_input_encoding(rb_io_t *fptr)
+{
+ if (fptr->enc2) {
+ return fptr->enc2;
+ }
+ return io_read_encoding(fptr);
+}
+
void
rb_io_check_writable(rb_io_t *fptr)
{
@@ -1646,31 +1655,51 @@ io_read(int argc, VALUE *argv, VALUE io)
return str;
}
+static void
+rscheck(const char *rsptr, long rslen, VALUE rs)
+{
+ if (!rs) return;
+ if (RSTRING_PTR(rs) != rsptr && RSTRING_LEN(rs) != rslen)
+ rb_raise(rb_eRuntimeError, "rs modified");
+}
+
static int
-appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp, int mb)
+appendline(rb_io_t *fptr, int delim, const char *rsptr, int rslen, VALUE rs, VALUE *strp, long *lp)
{
VALUE str = *strp;
int c = EOF;
long limit = *lp;
- rb_encoding *enc = io_read_encoding(fptr);
+ rb_encoding *enc = io_input_encoding(fptr);
do {
long pending = READ_DATA_PENDING_COUNT(fptr);
if (pending > 0) {
const char *s = READ_DATA_PENDING_PTR(fptr);
- const char *p, *e;
+ const char *p, *e, *pp;
long last = 0, len = (c != EOF);
if (limit > 0 && pending > limit) pending = limit;
- p = s;
+ pp = p = s;
again:
e = memchr(p, delim, pending);
if (e) {
- if (mb &&
- ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,(UChar*)s,(UChar*)e) != (UChar*)e) {
+ const char *p0 = e - rslen + 1;
+ if (p0 < s) {
+ p = e + 1;
+ goto again;
+ }
+ pp = rb_enc_left_char_head(pp, p0, enc);
+ if (pp != p0) {
p = e + 1;
goto again;
}
+ if (rsptr) {
+ rscheck(rsptr, rslen, rs);
+ if (memcmp(p0, rsptr, rslen) != 0) {
+ p = e + 1;
+ goto again;
+ }
+ }
pending = e - s + 1;
}
len += pending;
@@ -1752,7 +1781,7 @@ rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit)
int c, nolimit = 0;
for (;;) {
- c = appendline(fptr, delim, &str, &limit, 0);
+ c = appendline(fptr, delim, 0, 0, 0, &str, &limit);
if (c == EOF || c == delim) break;
if (limit == 0) {
nolimit = 1;
@@ -1770,14 +1799,6 @@ rb_io_getline_fast(rb_io_t *fptr, unsigned char delim, long limit)
return str;
}
-static int
-rscheck(const char *rsptr, long rslen, VALUE rs)
-{
- if (RSTRING_PTR(rs) != rsptr && RSTRING_LEN(rs) != rslen)
- rb_raise(rb_eRuntimeError, "rs modified");
- return 0;
-}
-
static void
prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io)
{
@@ -1803,10 +1824,20 @@ prepare_getline_args(int argc, VALUE *argv, VALUE *rsp, long *limit, VALUE io)
}
}
GetOpenFile(io, fptr);
- if (fptr->enc2) {
- rs = rb_funcall(rs, id_encode, 2,
- rb_enc_from_encoding(fptr->enc2),
- rb_enc_from_encoding(fptr->enc));
+ if (!NIL_P(rs)) {
+ rb_encoding *enc_rs = rb_enc_get(rs);
+ rb_encoding *enc_io = io_read_encoding(fptr);
+
+ if (enc_io != enc_rs &&
+ (rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT ||
+ !rb_enc_asciicompat(enc_io))) {
+ rb_raise(rb_eArgError, "IO and RS encodings differ");
+ }
+ if (fptr->enc2) {
+ rs = rb_funcall(rs, id_encode, 2,
+ rb_enc_from_encoding(fptr->enc2),
+ rb_enc_from_encoding(fptr->enc));
+ }
}
*rsp = rs;
*limit = NIL_P(lim) ? -1L : NUM2LONG(lim);
@@ -1843,6 +1874,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
rslen = 2;
rspara = 1;
swallow(fptr, '\n');
+ rs = 0;
}
else if (rslen == 1) {
return rb_io_getline_fast(fptr, (unsigned char)RSTRING_PTR(rs)[0], limit);
@@ -1852,12 +1884,9 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
}
newline = rsptr[rslen - 1];
- while ((c = appendline(fptr, newline, &str, &limit, 1)) != EOF) {
+ while ((c = appendline(fptr, newline, rsptr, rslen, rs, &str, &limit)) != EOF) {
if (c == newline) {
- if (RSTRING_LEN(str) < rslen) continue;
- if (!rspara) rscheck(rsptr, rslen, rs);
- if (memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - rslen,
- rsptr, rslen) == 0) break;
+ break;
}
if (limit == 0) {
nolimit = 1;
@@ -2201,7 +2230,7 @@ rb_io_getc(VALUE io)
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
- enc = io_read_encoding(fptr);
+ enc = io_input_encoding(fptr);
READ_CHECK(fptr);
if (io_fillbuf(fptr) < 0) {
return Qnil;