* Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f

v5.13.3 [Bug#7972] [Bug#7974] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@39547 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2013-03-01 16:36:37 +0000
committer: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2013-03-01 16:36:37 +0000
commit: 78dbaa16481c6ccda491d40695abe3d1c1d8c9b1 (patch)
tree: e0b1fe1fd2db4939c23bbf7a835be5665009860f
parent: 76855753d717a7c5bcb5c8d644d7bbc5ff256125 (diff)
8 files changed, 80 insertions, 56 deletions
diff --git a/ChangeLog b/ChangeLog
index 329a78be99..8b38879b0e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Sat Mar  2 01:33:17 2013  NARUSE, Yui  <naruse@ruby-lang.org>
+
+	* Merge Onigmo 0fe387da2fee089254f6b04990541c731a26757f
+	  v5.13.3 [Bug#7972] [Bug#7974]
+
 Fri Mar  1 11:09:06 2013  Eric Hodel  <drbrain@segment7.net>
 
 	* lib/fileutils.rb:  Revert r34669 which altered the way
diff --git a/enc/shift_jis.c b/enc/shift_jis.c
index e2bcaec189..5f5a802874 100644
--- a/enc/shift_jis.c
+++ b/enc/shift_jis.c
@@ -231,7 +231,7 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
   }
   else if (code <= 0xffff) {
     int low = code & 0xff;
-    if (low < 0x40 || low == 0x7f || 0xfc < low)
+    if (! SJIS_ISMB_TRAIL(low))
       return ONIGERR_INVALID_CODE_POINT_VALUE;
     return 2;
   }
diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h
index c01b6f7bb4..e3763adc59 100644
--- a/include/ruby/oniguruma.h
+++ b/include/ruby/oniguruma.h
@@ -40,7 +40,7 @@ extern "C" {
 #define ONIGURUMA
 #define ONIGURUMA_VERSION_MAJOR   5
 #define ONIGURUMA_VERSION_MINOR   13
-#define ONIGURUMA_VERSION_TEENY   1
+#define ONIGURUMA_VERSION_TEENY   3
 
 #ifdef __cplusplus
 # ifndef  HAVE_PROTOTYPES
diff --git a/regcomp.c b/regcomp.c
index 3b69786a94..ff74e292ed 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3,7 +3,7 @@
 **********************************************************************/
 /*-
  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2012  K.Takata  <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2013  K.Takata  <kentkt AT csc DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -118,7 +118,7 @@ static int
 bitset_is_empty(BitSetRef bs)
 {
   int i;
-  for (i = 0; i < (int )BITSET_SIZE; i++) {
+  for (i = 0; i < BITSET_SIZE; i++) {
     if (bs[i] != 0) return 0;
   }
   return 1;
@@ -4311,7 +4311,7 @@ typedef struct {
   OptAncInfo anc;
 
   int   reach_end;
-  int   ignore_case;
+  int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
   int   len;
   UChar s[OPT_EXACT_MAXLEN];
 } OptExactInfo;
@@ -4548,7 +4548,7 @@ clear_opt_exact_info(OptExactInfo* ex)
   clear_mml(&ex->mmd);
   clear_opt_anc_info(&ex->anc);
   ex->reach_end   = 0;
-  ex->ignore_case = 0;
+  ex->ignore_case = -1;   /* unset */
   ex->len         = 0;
   ex->s[0]        = '\0';
 }
@@ -4566,11 +4566,10 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
   UChar *p, *end;
   OptAncInfo tanc;
 
-  if (! to->ignore_case && add->ignore_case) {
-    if (to->len >= add->len) return ;  /* avoid */
-
-    to->ignore_case = 1;
-  }
+  if (to->ignore_case < 0)
+    to->ignore_case = add->ignore_case;
+  else if (to->ignore_case != add->ignore_case)
+    return ;  /* avoid */
 
   p = add->s;
   end = p + add->len;
@@ -4636,7 +4635,10 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
     to->reach_end = 0;
   }
   to->len = i;
-  to->ignore_case |= add->ignore_case;
+  if (to->ignore_case < 0)
+    to->ignore_case = add->ignore_case;
+  else if (add->ignore_case >= 0)
+    to->ignore_case |= add->ignore_case;
 
   alt_merge_opt_anc_info(&to->anc, &add->anc);
   if (! to->reach_end) to->anc.right_anchor = 0;
@@ -4666,8 +4668,8 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
     if (alt->len > 1) v2 += 5;
   }
 
-  if (now->ignore_case == 0) v1 *= 2;
-  if (alt->ignore_case == 0) v2 *= 2;
+  if (now->ignore_case <= 0) v1 *= 2;
+  if (alt->ignore_case <= 0) v2 *= 2;
 
   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
     copy_opt_exact_info(now, alt);
@@ -4765,7 +4767,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
 
   if (m->value <= 0) return -1;
 
-  ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
+  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
   vm = COMP_EM_BASE * 5 * 2 / m->value;
   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
 }
@@ -4947,7 +4949,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
 
       if (! NSTRING_IS_AMBIG(node)) {
 	concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
-				  NSTRING_IS_RAW(node), env->enc);
+				  is_raw, env->enc);
+	opt->exb.ignore_case = 0;
 	if (slen > 0) {
 	  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
 	}
@@ -5260,7 +5263,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
   allow_reverse =
 	ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
 
-  if (e->ignore_case) {
+  if (e->ignore_case > 0) {
     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
       r = set_bm_skip(reg->exact, reg->exact_end, reg,
 		      reg->map, &(reg->int_map), 1);
diff --git a/regexec.c b/regexec.c
index 743b2f3cf8..553186135b 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2559,7 +2559,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
 		  (int )mem, (intptr_t )s, s);
 #endif
 	  if (isnull == -1) goto fail;
-	  goto 	null_check_found;
+	  goto null_check_found;
 	}
       }
       MOP_OUT;
@@ -2585,7 +2585,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
 		  (int )mem, (intptr_t )s, s);
 #endif
 	  if (isnull == -1) goto fail;
-	  goto 	null_check_found;
+	  goto null_check_found;
 	}
 	else {
 	  STACK_PUSH_NULL_CHECK_END(mem);
diff --git a/regint.h b/regint.h
index 1a2519bc1b..3b55d8e9f9 100644
--- a/regint.h
+++ b/regint.h
@@ -390,7 +390,7 @@ typedef unsigned int  BitStatusType;
 /* bitset */
 #define BITS_PER_BYTE      8
 #define SINGLE_BYTE_SIZE   (1 << BITS_PER_BYTE)
-#define BITS_IN_ROOM       ((int)sizeof(Bits) * BITS_PER_BYTE)
+#define BITS_IN_ROOM       ((int )sizeof(Bits) * BITS_PER_BYTE)
 #define BITSET_SIZE        (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
 
 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@@ -405,11 +405,11 @@ typedef Bits*          BitSetRef;
 
 #define BITSET_CLEAR(bs) do {\
   int i;\
-  for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; }	\
+  for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }	\
 } while (0)
 
-#define BS_ROOM(bs,pos)            (bs)[(int)(pos) / BITS_IN_ROOM]
-#define BS_BIT(pos)                (1 << ((int)(pos) % BITS_IN_ROOM))
+#define BS_ROOM(bs,pos)            (bs)[(int )(pos) / BITS_IN_ROOM]
+#define BS_BIT(pos)                (1 << ((int )(pos) % BITS_IN_ROOM))
 
 #define BITSET_AT(bs, pos)         (BS_ROOM(bs,pos) & BS_BIT(pos))
 #define BITSET_SET_BIT(bs, pos)     BS_ROOM(bs,pos) |= BS_BIT(pos)
@@ -457,7 +457,7 @@ typedef struct _BBuf {
 #define BBUF_WRITE1(buf,pos,byte) do{\
   int used = (pos) + 1;\
   if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
-  (buf)->p[(pos)] = (byte);\
+  (buf)->p[(pos)] = (UChar )(byte);\
   if ((buf)->used < (unsigned int )used) (buf)->used = used;\
 } while (0)
 
diff --git a/regparse.c b/regparse.c
index 74863829bc..82af0a8fc3 100644
--- a/regparse.c
+++ b/regparse.c
@@ -3,7 +3,7 @@
 **********************************************************************/
 /*-
  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2012  K.Takata  <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2013  K.Takata  <kentkt AT csc DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -156,7 +156,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
 #define BITSET_IS_EMPTY(bs,empty) do {\
   int i;\
   empty = 1;\
-  for (i = 0; i < (int )BITSET_SIZE; i++) {\
+  for (i = 0; i < BITSET_SIZE; i++) {\
     if ((bs)[i] != 0) {\
       empty = 0; break;\
     }\
@@ -185,35 +185,35 @@ static void
 bitset_invert(BitSetRef bs)
 {
   int i;
-  for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
+  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
 }
 
 static void
 bitset_invert_to(BitSetRef from, BitSetRef to)
 {
   int i;
-  for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
+  for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
 }
 
 static void
 bitset_and(BitSetRef dest, BitSetRef bs)
 {
   int i;
-  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
+  for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
 }
 
 static void
 bitset_or(BitSetRef dest, BitSetRef bs)
 {
   int i;
-  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
+  for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
 }
 
 static void
 bitset_copy(BitSetRef dest, BitSetRef bs)
 {
   int i;
-  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
+  for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
 }
 
 extern int
@@ -425,9 +425,6 @@ typedef struct {
 typedef st_table  NameTable;
 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
 
-#define NAMEBUF_SIZE    24
-#define NAMEBUF_SIZE_1  25
-
 #ifdef ONIG_DEBUG
 static int
 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
@@ -589,7 +586,7 @@ onig_number_of_names(regex_t* reg)
   NameTable* t = (NameTable* )reg->name_table;
 
   if (IS_NOT_NULL(t))
-    return (int)t->num_entries;
+    return (int )t->num_entries;
   else
     return 0;
 }
@@ -2627,7 +2624,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
     name_end = p;
     PFETCH(c);
     if (c == end_code || c == ')' || c == '+' || c == '-') {
-      if (is_num == 2) 	r = ONIGERR_INVALID_GROUP_NAME;
+      if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
       break;
     }
 
@@ -2752,7 +2749,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
       name_end = p;
       PFETCH(c);
       if (c == end_code || c == ')') {
-	if (is_num == 2) 	r = ONIGERR_INVALID_GROUP_NAME;
+	if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
 	break;
       }
 
@@ -4124,24 +4121,36 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
 
   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
   if (r == 0) {
-    r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
-    if ((r == 0) && ascii_range) {
-      if (not != 0) {
-	r = add_code_range_to_buf0(&(cc->mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
-      }
-      else {
-	CClassNode ccascii;
-	initialize_cclass(&ccascii);
-	if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
-	  add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
+    if (ascii_range) {
+      CClassNode ccwork;
+      initialize_cclass(&ccwork);
+      r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
+				   ranges);
+      if (r == 0) {
+	if (not) {
+	  r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
 	}
 	else {
-	  bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
+	  CClassNode ccascii;
+	  initialize_cclass(&ccascii);
+	  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
+	    add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
+	  }
+	  else {
+	    bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
+	  }
+	  r = and_cclass(&ccwork, &ccascii, env);
+	  if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
+	}
+	if (r == 0) {
+	  r = or_cclass(cc, &ccwork, env);
 	}
-	r = and_cclass(cc, &ccascii, env);
-	if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
+	if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
       }
     }
+    else {
+      r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
+    }
     return r;
   }
   else if (r != ONIG_NO_SUPPORT_CONFIG) {
@@ -4562,7 +4571,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
 	UChar* psave = p;
 	int i, base = tok->base;
 
-	buf[0] = tok->u.c;
+	buf[0] = (UChar )tok->u.c;
 	for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
 	  r = fetch_token_in_cc(tok, &p, end, env);
 	  if (r < 0) goto err;
@@ -4570,7 +4579,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
 	    fetched = 1;
 	    break;
 	  }
-	  buf[i] = tok->u.c;
+	  buf[i] = (UChar )tok->u.c;
 	}
 
 	if (i < ONIGENC_MBC_MINLEN(env->enc)) {
@@ -4706,7 +4715,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
 
 	if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
 	  CC_ESC_WARN(env, (UChar* )"-");
-	  goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
+	  goto range_end_val;   /* [0-9-a] is allowed as [0-9\-a] */
 	}
 	r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
 	goto err;
@@ -5684,7 +5693,7 @@ countbits(unsigned int bits)
 static int
 is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
 {
-  const OnigCodePoint not_found = (OnigCodePoint)-1;
+  const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
   OnigCodePoint c = not_found;
   int i;
   BBuf *bbuf = cc->mbuf;
@@ -5710,7 +5719,7 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
   }
 
   /* check bitset */
-  for (i = 0; i < (int )BITSET_SIZE; i++) {
+  for (i = 0; i < BITSET_SIZE; i++) {
     Bits b1 = cc->bs[i];
     if (b1 != 0) {
       if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb
index 7584f6e030..255d9c5175 100755
--- a/tool/enc-unicode.rb
+++ b/tool/enc-unicode.rb
@@ -225,13 +225,20 @@ def parse_block(data)
   blocks << constname
 end
 
+# shim for Ruby 1.8
+unless {}.respond_to?(:key)
+  class Hash
+    alias key index
+  end
+end
+
 $const_cache = {}
 # make_const(property, pairs, name): Prints a 'static const' structure for a
 # given property, group of paired codepoints, and a human-friendly name for
 # the group
 def make_const(prop, data, name)
   puts "\n/* '#{prop}': #{name} */"
-  if origprop = $const_cache.index(data) # don't use Hash#key because it is 1.9 feature
+  if origprop = $const_cache.key(data)
     puts "#define CR_#{prop} CR_#{origprop}"
   else
     $const_cache[prop] = data
author	naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2013-03-01 16:36:37 +0000
committer	naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2013-03-01 16:36:37 +0000
commit	78dbaa16481c6ccda491d40695abe3d1c1d8c9b1 (patch)
tree	e0b1fe1fd2db4939c23bbf7a835be5665009860f
parent	76855753d717a7c5bcb5c8d644d7bbc5ff256125 (diff)