summaryrefslogtreecommitdiff
path: root/ext/stringio
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-08-25 03:29:39 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-08-25 03:29:39 +0000
commita25fbe3b3e531bbe479f344af24eaf9d2eeae6ea (patch)
tree055e58ed569fb28012fadade94f518e0a888e47d /ext/stringio
parent0ada813abfe3a049da29bd423ba34606a00777bd (diff)
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing. * parse.y (pragma_encoding): encoding specification pragma. * parse.y (rb_intern3): encoding specified symbols. * string.c (rb_str_length): length based on characters. for older behavior, bytesize method added. * string.c (rb_str_index_m): index based on characters. rindex as well. * string.c (succ_char): encoding aware succeeding string. * string.c (rb_str_reverse): reverse based on characters. * string.c (rb_str_inspect): encoding aware string description. * string.c (rb_str_upcase_bang): encoding aware case conversion. downcase, capitalize, swapcase as well. * string.c (rb_str_tr_bang): tr based on characters. delete, squeeze, tr_s, count as well. * string.c (rb_str_split_m): split based on characters. * string.c (rb_str_each_line): encoding aware each_line. * string.c (rb_str_each_char): added. iteration based on characters. * string.c (rb_str_strip_bang): encoding aware whitespace stripping. lstrip, rstrip as well. * string.c (rb_str_justify): encoding aware justifying (ljust, rjust, center). * string.c (str_encoding): get encoding attribute from a string. * re.c (rb_reg_initialize): encoding aware regular expression * sprintf.c (rb_str_format): formatting (i.e. length count) based on characters. * io.c (rb_io_getc): getc to return one-character string. for older behavior, getbyte method added. * ext/stringio/stringio.c (strio_getc): ditto. * io.c (rb_io_ungetc): allow pushing arbitrary string at the current reading point. * ext/stringio/stringio.c (strio_ungetc): ditto. * ext/strscan/strscan.c: encoding support. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/stringio')
-rw-r--r--ext/stringio/stringio.c79
1 files changed, 49 insertions, 30 deletions
diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c
index 480c9ed378..08a6341b70 100644
--- a/ext/stringio/stringio.c
+++ b/ext/stringio/stringio.c
@@ -13,7 +13,7 @@
**********************************************************************/
#include "ruby.h"
-#include "rubyio.h"
+#include "ruby/io.h"
#if defined(HAVE_FCNTL_H) || defined(_WIN32)
#include <fcntl.h>
#elif defined(HAVE_SYS_FCNTL_H)
@@ -84,6 +84,18 @@ get_strio(VALUE self)
return ptr;
}
+static VALUE
+strio_substr(struct StringIO *ptr, int pos, int len)
+{
+ VALUE str = ptr->string;
+ rb_encoding *enc = rb_enc_get(str);
+ int rlen = RSTRING_LEN(str) - pos;
+
+ if (len > rlen) len = rlen;
+ if (len < 0) len = 0;
+ return rb_enc_str_new(RSTRING_PTR(str)+pos, len, enc);
+}
+
#define StringIO(obj) get_strio(obj)
#define CLOSED(ptr) (!((ptr)->flags & FMODE_READWRITE))
@@ -603,7 +615,7 @@ strio_each_byte(VALUE self)
/*
* call-seq:
- * strio.getc -> fixnum or nil
+ * strio.getc -> string or nil
*
* See IO#getc.
*/
@@ -611,15 +623,17 @@ static VALUE
strio_getc(VALUE self)
{
struct StringIO *ptr = readable(StringIO(self));
- int c;
- char ch;
+ rb_encoding *enc = rb_enc_get(ptr->string);
+ int len;
+ char *p;
if (ptr->pos >= RSTRING_LEN(ptr->string)) {
return Qnil;
}
- c = RSTRING_PTR(ptr->string)[ptr->pos++];
- ch = c & 0xff;
- return rb_str_new(&ch, 1);
+ p = RSTRING_PTR(ptr->string)+ptr->pos;
+ len = rb_enc_mbclen(p, enc);
+ ptr->pos += len;
+ return rb_enc_str_new(p, len, rb_enc_get(ptr->string));
}
/*
@@ -671,30 +685,34 @@ static VALUE
strio_ungetc(VALUE self, VALUE c)
{
struct StringIO *ptr = readable(StringIO(self));
- int cc;
- long len, pos = ptr->pos;
+ long lpos, clen;
+ char *p, *pend;
+ rb_encoding *enc;
if (NIL_P(c)) return Qnil;
if (FIXNUM_P(c)) {
- cc = FIX2INT(c);
+ int cc = FIX2INT(c);
+ char buf[16];
+
+ enc = rb_enc_get(ptr->string);
+ rb_enc_mbcput(cc, buf, enc);
+ c = rb_enc_str_new(buf, rb_enc_codelen(cc, enc), enc);
}
else {
SafeStringValue(c);
- if (RSTRING_LEN(c) > 1) {
- rb_warn("IO#ungetc pushes back only one byte");
- }
- cc = (unsigned char)RSTRING_PTR(c)[0];
+ enc = rb_enc_check(ptr->string, c);
}
- if (cc != EOF && pos > 0) {
- if ((len = RSTRING_LEN(ptr->string)) < pos-- ||
- (unsigned char)RSTRING_PTR(ptr->string)[pos] !=
- (unsigned char)cc) {
- strio_extend(ptr, pos, 1);
- RSTRING_PTR(ptr->string)[pos] = cc;
- OBJ_INFECT(ptr->string, self);
- }
- --ptr->pos;
+ /* get logical position */
+ lpos = 0; p = RSTRING_PTR(ptr->string); pend = p + ptr->pos - 1;
+ for (;;) {
+ clen = rb_enc_mbclen(p, enc);
+ if (p+clen >= pend) break;
+ p += clen;
+ lpos++;
}
+ rb_str_update(ptr->string, lpos, ptr->pos ? 1 : 0, c);
+ ptr->pos = p - RSTRING_PTR(ptr->string);
+
return Qnil;
}
@@ -800,7 +818,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
e = s + limit;
}
if (NIL_P(str)) {
- str = rb_str_substr(ptr->string, ptr->pos, e - s);
+ str = strio_substr(ptr, ptr->pos, e - s);
}
else if ((n = RSTRING_LEN(str)) == 0) {
p = s;
@@ -816,13 +834,13 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
break;
}
}
- str = rb_str_substr(ptr->string, s - RSTRING_PTR(ptr->string), e - s);
+ str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s);
}
else if (n == 1) {
if ((p = memchr(s, RSTRING_PTR(str)[0], e - s)) != 0) {
e = p + 1;
}
- str = rb_str_substr(ptr->string, ptr->pos, e - s);
+ str = strio_substr(ptr, ptr->pos, e - s);
}
else {
if (n < e - s) {
@@ -843,7 +861,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr)
}
}
}
- str = rb_str_substr(ptr->string, ptr->pos, e - s);
+ str = strio_substr(ptr, ptr->pos, e - s);
}
ptr->pos = e - RSTRING_PTR(ptr->string);
ptr->lineno++;
@@ -944,7 +962,7 @@ strio_write(VALUE self, VALUE str)
if (TYPE(str) != T_STRING)
str = rb_obj_as_string(str);
len = RSTRING_LEN(str);
- if (!len) return INT2FIX(0);
+ if (len == 0) return INT2FIX(0);
check_modifiable(ptr);
olen = RSTRING_LEN(ptr->string);
if (ptr->flags & FMODE_APPEND) {
@@ -955,7 +973,8 @@ strio_write(VALUE self, VALUE str)
}
else {
strio_extend(ptr, ptr->pos, len);
- rb_str_update(ptr->string, ptr->pos, len, str);
+ memmove(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_PTR(str), len);
+ OBJ_INFECT(ptr->string, str);
}
OBJ_INFECT(ptr->string, self);
ptr->pos += len;
@@ -1070,7 +1089,7 @@ strio_read(int argc, VALUE *argv, VALUE self)
rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc);
}
if (NIL_P(str)) {
- str = rb_str_substr(ptr->string, ptr->pos, len);
+ str = strio_substr(ptr, ptr->pos, len);
}
else {
long rest = RSTRING_LEN(ptr->string) - ptr->pos;