summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog19
-rw-r--r--encoding.c46
-rw-r--r--include/ruby/encoding.h30
-rw-r--r--marshal.c2
-rw-r--r--parse.y2
-rw-r--r--re.c2
-rw-r--r--string.c2
-rw-r--r--test/ruby/test_m17n.rb9
8 files changed, 88 insertions, 24 deletions
diff --git a/ChangeLog b/ChangeLog
index d1ae0255df..cecb8e953d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+Mon Jan 7 11:44:45 2008 Tanaka Akira <akr@fsij.org>
+
+ * encoding.c (rb_enc_internal_get_index): extracted from
+ rb_enc_get_index.
+ (rb_enc_internal_set_index): extracted from rb_enc_associate_index
+
+ * include/ruby/encoding.h (ENCODING_SET): work over ENCODING_INLINE_MAX.
+ (ENCODING_GET): ditto.
+ (ENCODING_IS_ASCII8BIT): defined.
+ (ENCODING_CODERANGE_SET): defined.
+
+ * re.c (rb_reg_fixed_encoding_p): use ENCODING_IS_ASCII8BIT.
+
+ * string.c (rb_enc_str_buf_cat): use ENCODING_IS_ASCII8BIT.
+
+ * parse.y (reg_fragment_setenc_gen): use ENCODING_IS_ASCII8BIT.
+
+ * marshal.c (has_ivars): use ENCODING_IS_ASCII8BIT.
+
Mon Jan 7 02:14:07 2008 Tanaka Akira <akr@fsij.org>
* string.c (coderange_scan): avoid rb_enc_to_index.
diff --git a/encoding.c b/encoding.c
index 2d1813fe91..65609763af 100644
--- a/encoding.c
+++ b/encoding.c
@@ -436,6 +436,33 @@ rb_id_encoding(void)
return id_encoding;
}
+int
+rb_enc_internal_get_index(VALUE obj)
+{
+ int i;
+
+ i = ENCODING_GET_INLINED(obj);
+ if (i == ENCODING_INLINE_MAX) {
+ VALUE iv;
+
+ iv = rb_ivar_get(obj, rb_id_encoding());
+ i = NUM2INT(iv);
+ }
+ return i;
+}
+
+void
+rb_enc_internal_set_index(VALUE obj, int idx)
+{
+ if (idx < ENCODING_INLINE_MAX) {
+ ENCODING_SET_INLINED(obj, idx);
+ return;
+ }
+ ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
+ rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
+ return;
+}
+
void
rb_enc_associate_index(VALUE obj, int idx)
{
@@ -444,13 +471,7 @@ rb_enc_associate_index(VALUE obj, int idx)
!rb_enc_asciicompat(rb_enc_from_index(idx))) {
ENC_CODERANGE_CLEAR(obj);
}
- if (idx < ENCODING_INLINE_MAX) {
- ENCODING_SET(obj, idx);
- return;
- }
- ENCODING_SET(obj, ENCODING_INLINE_MAX);
- rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
- return;
+ rb_enc_internal_set_index(obj, idx);
}
int
@@ -476,17 +497,8 @@ rb_enc_associate(VALUE obj, rb_encoding *enc)
int
rb_enc_get_index(VALUE obj)
{
- int i;
-
if (!enc_capable(obj)) return -1;
- i = ENCODING_GET(obj);
- if (i == ENCODING_INLINE_MAX) {
- VALUE iv;
-
- iv = rb_ivar_get(obj, rb_id_encoding());
- i = NUM2INT(iv);
- }
- return i;
+ return rb_enc_internal_get_index(obj);
}
rb_encoding*
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h
index 0ccded8b0f..3810d86b39 100644
--- a/include/ruby/encoding.h
+++ b/include/ruby/encoding.h
@@ -22,11 +22,27 @@
#define ENCODING_INLINE_MAX 1023
#define ENCODING_SHIFT (FL_USHIFT+10)
#define ENCODING_MASK (ENCODING_INLINE_MAX<<ENCODING_SHIFT)
-#define ENCODING_SET(obj,i) do {\
+
+#define ENCODING_SET_INLINED(obj,i) do {\
RBASIC(obj)->flags &= ~ENCODING_MASK;\
- RBASIC(obj)->flags |= i << ENCODING_SHIFT;\
+ RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\
+} while (0)
+#define ENCODING_SET(obj,i) do {\
+ VALUE rb_encoding_set_obj = (obj); \
+ int encoding_set_enc_index = (i); \
+ if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
+ ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
+ else \
+ rb_enc_internal_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
} while (0)
-#define ENCODING_GET(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
+
+#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
+#define ENCODING_GET(obj) \
+ (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
+ ENCODING_GET_INLINED(obj) : \
+ rb_enc_internal_get_index(obj))
+
+#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
#define ENC_CODERANGE_MASK (FL_USER8|FL_USER9)
#define ENC_CODERANGE_UNKNOWN 0
@@ -39,6 +55,12 @@
(RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
+#define ENCODING_CODERANGE_SET(obj, encindex, cr) \
+ do { \
+ VALUE rb_encoding_coderange_obj = (obj); \
+ ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
+ ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
+ } while (0)
typedef OnigEncodingType rb_encoding;
@@ -56,6 +78,8 @@ rb_encoding* rb_enc_check(VALUE,VALUE);
void rb_enc_associate_index(VALUE, int);
void rb_enc_associate(VALUE, rb_encoding*);
void rb_enc_copy(VALUE dst, VALUE src);
+int rb_enc_internal_get_index(VALUE obj);
+void rb_enc_internal_set_index(VALUE obj, int encindex);
VALUE rb_enc_str_new(const char*, long, rb_encoding*);
VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int);
diff --git a/marshal.c b/marshal.c
index 55ef25b31b..29f26d0b2f 100644
--- a/marshal.c
+++ b/marshal.c
@@ -526,7 +526,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
st_data_t num;
int hasiv = 0;
#define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
- (!SPECIAL_CONST_P(obj) && ENCODING_GET(obj)))
+ (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
if (limit == 0) {
rb_raise(rb_eArgError, "exceed depth limit");
diff --git a/parse.y b/parse.y
index bd9f1b1286..75459c354d 100644
--- a/parse.y
+++ b/parse.y
@@ -8470,7 +8470,7 @@ reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options)
if (c) {
int opt, idx;
rb_char_to_option_kcode(c, &opt, &idx);
- if (idx != ENCODING_GET(str) && ENCODING_GET(str) &&
+ if (idx != ENCODING_GET(str) && !ENCODING_IS_ASCII8BIT(str) &&
rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
compile_error(PARSER_ARG
"regexp encoding option '%c' differs from source encoding '%s'",
diff --git a/re.c b/re.c
index fb05e2e892..fc9e830131 100644
--- a/re.c
+++ b/re.c
@@ -937,7 +937,7 @@ rb_match_busy(VALUE match)
static VALUE
rb_reg_fixed_encoding_p(VALUE re)
{
- if (ENCODING_GET(re) != 0 || FL_TEST(re, KCODE_FIXED))
+ if (!ENCODING_IS_ASCII8BIT(re) || FL_TEST(re, KCODE_FIXED))
return Qtrue;
else
return Qfalse;
diff --git a/string.c b/string.c
index 752a2e5353..524f9bd9ea 100644
--- a/string.c
+++ b/string.c
@@ -1063,7 +1063,7 @@ rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
rb_encoding *str_enc = rb_enc_get(str);
rb_encoding *res_enc;
int str_cr, ptr_cr, res_cr;
- int str_a8 = ENCODING_GET(str) == 0;
+ int str_a8 = ENCODING_IS_ASCII8BIT(str);
int ptr_a8 = ptr_enc == rb_ascii8bit_encoding();
str_cr = ENC_CODERANGE(str);
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index b15bcfdf10..32415683b6 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -370,6 +370,15 @@ class TestM17N < Test::Unit::TestCase
assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
end
+ def test_regexp_windows_31j
+ begin
+ Regexp.new("\xa1".force_encoding("windows-31j")) =~ "\xa1\xa1".force_encoding("euc-jp")
+ rescue ArgumentError
+ err = $!
+ end
+ assert_match(/windows-31j/i, err.message)
+ end
+
def test_regexp_embed
r = eval(e("/\xc2\xa1/"))
assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) }