summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--enc/unicode/name2ctype.h193
-rw-r--r--enc/unicode/name2ctype.h.blt193
-rw-r--r--enc/unicode/name2ctype.kwd193
-rw-r--r--enc/unicode/name2ctype.src193
-rw-r--r--test/ruby/test_regexp.rb1
-rwxr-xr-xtool/enc-unicode.rb20
7 files changed, 272 insertions, 529 deletions
diff --git a/ChangeLog b/ChangeLog
index 7143bb582b..2154e62c8a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Thu Oct 8 05:45:14 2009 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * tool/enc-unicode.rb: parse range notation of UnicodeData.txt.
+
+ * enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
+ enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
+ follow above change. [ruby-dev:39444]
+
Thu Oct 8 02:46:24 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (rb_str_inspect): copy by chunks.
diff --git a/enc/unicode/name2ctype.h b/enc/unicode/name2ctype.h
index e0926f8969..bbc19caf4d 100644
--- a/enc/unicode/name2ctype.h
+++ b/enc/unicode/name2ctype.h
@@ -43,7 +43,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
- 482,
+ 477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -377,10 +377,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -413,8 +411,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -520,10 +517,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@@ -593,7 +588,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
- 492,
+ 487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@@ -914,10 +909,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -945,8 +938,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -1080,10 +1072,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@@ -1694,7 +1684,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
- 494,
+ 489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@@ -2017,10 +2007,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -2048,8 +2036,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -2183,10 +2170,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@@ -2950,7 +2935,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
- 508,
+ 503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@@ -3302,10 +3287,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3340,8 +3323,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3453,17 +3435,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
- 502,
+ 497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@@ -3812,10 +3792,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3850,8 +3828,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3960,10 +3937,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@@ -3984,7 +3959,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
- 495,
+ 484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@@ -4300,10 +4275,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -4331,15 +4304,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xfa2d,
+ 0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@@ -4469,22 +4437,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
+ 0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
- 26,
+ 20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@@ -4496,21 +4460,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xf8ff,
+ 0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10ffff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@@ -4542,7 +4500,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
- 495,
+ 484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@@ -4858,9 +4816,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
- 0x3401, 0x4db4,
0x4db6, 0x4dbf,
- 0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@@ -4889,14 +4845,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
- 0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
- 0xd801, 0xdb7e,
- 0xdb81, 0xdbfe,
- 0xdc01, 0xdffe,
- 0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@@ -5027,42 +4978,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
- 0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
- 0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
- 0xf0001, 0xffffc,
- 0xffffe, 0xfffff,
- 0x100001, 0x10ffff,
+ 0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
- 6,
- 0xe000, 0xe000,
- 0xf8ff, 0xf8ff,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10fffd,
+ 3,
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
- 4,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xdfff,
+ 1,
+ 0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
- 427,
+ 422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -5347,10 +5288,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -5389,8 +5328,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -5485,10 +5423,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@@ -6152,7 +6088,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
- 316,
+ 311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@@ -6362,10 +6298,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@@ -6400,8 +6334,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -6464,10 +6397,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */
diff --git a/enc/unicode/name2ctype.h.blt b/enc/unicode/name2ctype.h.blt
index e0926f8969..bbc19caf4d 100644
--- a/enc/unicode/name2ctype.h.blt
+++ b/enc/unicode/name2ctype.h.blt
@@ -43,7 +43,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
- 482,
+ 477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -377,10 +377,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -413,8 +411,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -520,10 +517,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@@ -593,7 +588,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
- 492,
+ 487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@@ -914,10 +909,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -945,8 +938,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -1080,10 +1072,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@@ -1694,7 +1684,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
- 494,
+ 489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@@ -2017,10 +2007,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -2048,8 +2036,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -2183,10 +2170,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@@ -2950,7 +2935,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
- 508,
+ 503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@@ -3302,10 +3287,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3340,8 +3323,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3453,17 +3435,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
- 502,
+ 497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@@ -3812,10 +3792,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3850,8 +3828,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3960,10 +3937,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@@ -3984,7 +3959,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
- 495,
+ 484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@@ -4300,10 +4275,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -4331,15 +4304,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xfa2d,
+ 0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@@ -4469,22 +4437,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
+ 0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
- 26,
+ 20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@@ -4496,21 +4460,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xf8ff,
+ 0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10ffff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@@ -4542,7 +4500,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
- 495,
+ 484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@@ -4858,9 +4816,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
- 0x3401, 0x4db4,
0x4db6, 0x4dbf,
- 0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@@ -4889,14 +4845,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
- 0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
- 0xd801, 0xdb7e,
- 0xdb81, 0xdbfe,
- 0xdc01, 0xdffe,
- 0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@@ -5027,42 +4978,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
- 0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
- 0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
- 0xf0001, 0xffffc,
- 0xffffe, 0xfffff,
- 0x100001, 0x10ffff,
+ 0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
- 6,
- 0xe000, 0xe000,
- 0xf8ff, 0xf8ff,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10fffd,
+ 3,
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
- 4,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xdfff,
+ 1,
+ 0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
- 427,
+ 422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -5347,10 +5288,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -5389,8 +5328,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -5485,10 +5423,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@@ -6152,7 +6088,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
- 316,
+ 311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@@ -6362,10 +6298,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@@ -6400,8 +6334,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -6464,10 +6397,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */
diff --git a/enc/unicode/name2ctype.kwd b/enc/unicode/name2ctype.kwd
index d1304671f0..46058a8341 100644
--- a/enc/unicode/name2ctype.kwd
+++ b/enc/unicode/name2ctype.kwd
@@ -8,7 +8,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
- 482,
+ 477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -342,10 +342,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -378,8 +376,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -485,10 +482,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@@ -558,7 +553,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
- 492,
+ 487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@@ -879,10 +874,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -910,8 +903,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -1045,10 +1037,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@@ -1659,7 +1649,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
- 494,
+ 489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@@ -1982,10 +1972,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -2013,8 +2001,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -2148,10 +2135,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@@ -2915,7 +2900,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
- 508,
+ 503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@@ -3267,10 +3252,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3305,8 +3288,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3418,17 +3400,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
- 502,
+ 497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@@ -3777,10 +3757,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3815,8 +3793,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3925,10 +3902,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@@ -3948,7 +3923,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
- 495,
+ 484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@@ -4264,10 +4239,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -4295,15 +4268,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xfa2d,
+ 0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@@ -4433,22 +4401,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
+ 0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
- 26,
+ 20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@@ -4460,21 +4424,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xf8ff,
+ 0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10ffff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@@ -4506,7 +4464,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
- 495,
+ 484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@@ -4822,9 +4780,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
- 0x3401, 0x4db4,
0x4db6, 0x4dbf,
- 0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@@ -4853,14 +4809,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
- 0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
- 0xd801, 0xdb7e,
- 0xdb81, 0xdbfe,
- 0xdc01, 0xdffe,
- 0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@@ -4991,42 +4942,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
- 0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
- 0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
- 0xf0001, 0xffffc,
- 0xffffe, 0xfffff,
- 0x100001, 0x10ffff,
+ 0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
- 6,
- 0xe000, 0xe000,
- 0xf8ff, 0xf8ff,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10fffd,
+ 3,
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
- 4,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xdfff,
+ 1,
+ 0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
- 427,
+ 422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -5311,10 +5252,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -5353,8 +5292,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -5449,10 +5387,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@@ -6116,7 +6052,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
- 316,
+ 311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@@ -6326,10 +6262,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@@ -6364,8 +6298,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -6428,10 +6361,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */
diff --git a/enc/unicode/name2ctype.src b/enc/unicode/name2ctype.src
index d1304671f0..46058a8341 100644
--- a/enc/unicode/name2ctype.src
+++ b/enc/unicode/name2ctype.src
@@ -8,7 +8,7 @@ static const OnigCodePoint CR_NEWLINE[] = {
/* 'Alpha': [[:Alpha:]] */
static const OnigCodePoint CR_Alpha[] = {
- 482,
+ 477,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -342,10 +342,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -378,8 +376,7 @@ static const OnigCodePoint CR_Alpha[] = {
0xaadb, 0xaadd,
0xabc0, 0xabea,
0xabec, 0xabed,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -485,10 +482,8 @@ static const OnigCodePoint CR_Alpha[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alpha */
@@ -558,7 +553,7 @@ static const OnigCodePoint CR_Digit[] = {
/* 'Graph': [[:Graph:]] */
static const OnigCodePoint CR_Graph[] = {
- 492,
+ 487,
0x0021, 0x007e,
0x00a1, 0x00ac,
0x00ae, 0x0377,
@@ -879,10 +874,8 @@ static const OnigCodePoint CR_Graph[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -910,8 +903,7 @@ static const OnigCodePoint CR_Graph[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -1045,10 +1037,8 @@ static const OnigCodePoint CR_Graph[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Graph */
@@ -1659,7 +1649,7 @@ static const OnigCodePoint CR_Lower[] = {
/* 'Print': [[:Print:]] */
static const OnigCodePoint CR_Print[] = {
- 494,
+ 489,
0x0009, 0x000d,
0x0020, 0x007e,
0x0085, 0x0085,
@@ -1982,10 +1972,8 @@ static const OnigCodePoint CR_Print[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -2013,8 +2001,7 @@ static const OnigCodePoint CR_Print[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -2148,10 +2135,8 @@ static const OnigCodePoint CR_Print[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Print */
@@ -2915,7 +2900,7 @@ static const OnigCodePoint CR_XDigit[] = {
/* 'Word': [[:Word:]] */
static const OnigCodePoint CR_Word[] = {
- 508,
+ 503,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@@ -3267,10 +3252,8 @@ static const OnigCodePoint CR_Word[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3305,8 +3288,7 @@ static const OnigCodePoint CR_Word[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3418,17 +3400,15 @@ static const OnigCodePoint CR_Word[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Word */
/* 'Alnum': [[:Alnum:]] */
static const OnigCodePoint CR_Alnum[] = {
- 502,
+ 497,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@@ -3777,10 +3757,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -3815,8 +3793,7 @@ static const OnigCodePoint CR_Alnum[] = {
0xabc0, 0xabea,
0xabec, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -3925,10 +3902,8 @@ static const OnigCodePoint CR_Alnum[] = {
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
0x1d7ce, 0x1d7ff,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0100, 0xe01ef,
}; /* CR_Alnum */
@@ -3948,7 +3923,7 @@ static const OnigCodePoint CR_Any[] = {
/* 'Assigned': - */
static const OnigCodePoint CR_Assigned[] = {
- 495,
+ 484,
0x0000, 0x0377,
0x037a, 0x037e,
0x0384, 0x038a,
@@ -4264,10 +4239,8 @@ static const OnigCodePoint CR_Assigned[] = {
0x31c0, 0x31e3,
0x31f0, 0x321e,
0x3220, 0x32fe,
- 0x3300, 0x3400,
- 0x4db5, 0x4db5,
- 0x4dc0, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
0xa000, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@@ -4295,15 +4268,10 @@ static const OnigCodePoint CR_Assigned[] = {
0xaadb, 0xaadf,
0xabc0, 0xabed,
0xabf0, 0xabf9,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xfa2d,
+ 0xd800, 0xfa2d,
0xfa30, 0xfa6d,
0xfa70, 0xfad9,
0xfb00, 0xfb06,
@@ -4433,22 +4401,18 @@ static const OnigCodePoint CR_Assigned[] = {
0x1f200, 0x1f200,
0x1f210, 0x1f231,
0x1f240, 0x1f248,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
0xe0100, 0xe01ef,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
+ 0xf0000, 0xffffd,
}; /* CR_Assigned */
/* 'C': Major Category */
static const OnigCodePoint CR_C[] = {
- 26,
+ 20,
0x0000, 0x001f,
0x007f, 0x009f,
0x00ad, 0x00ad,
@@ -4460,21 +4424,15 @@ static const OnigCodePoint CR_C[] = {
0x202a, 0x202e,
0x2060, 0x2064,
0x206a, 0x206f,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xe000,
- 0xf8ff, 0xf8ff,
+ 0xd800, 0xf8ff,
0xfeff, 0xfeff,
0xfff9, 0xfffb,
0x110bd, 0x110bd,
0x1d173, 0x1d17a,
0xe0001, 0xe0001,
0xe0020, 0xe007f,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10ffff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10ffff,
}; /* CR_C */
/* 'Cc': General Category */
@@ -4506,7 +4464,7 @@ static const OnigCodePoint CR_Cf[] = {
/* 'Cn': General Category */
static const OnigCodePoint CR_Cn[] = {
- 495,
+ 484,
0x0378, 0x0379,
0x037f, 0x0383,
0x038b, 0x038b,
@@ -4822,9 +4780,7 @@ static const OnigCodePoint CR_Cn[] = {
0x31e4, 0x31ef,
0x321f, 0x321f,
0x32ff, 0x32ff,
- 0x3401, 0x4db4,
0x4db6, 0x4dbf,
- 0x4e01, 0x9fca,
0x9fcc, 0x9fff,
0xa48d, 0xa48f,
0xa4c7, 0xa4cf,
@@ -4853,14 +4809,9 @@ static const OnigCodePoint CR_Cn[] = {
0xaae0, 0xabbf,
0xabee, 0xabef,
0xabfa, 0xabff,
- 0xac01, 0xd7a2,
0xd7a4, 0xd7af,
0xd7c7, 0xd7ca,
0xd7fc, 0xd7ff,
- 0xd801, 0xdb7e,
- 0xdb81, 0xdbfe,
- 0xdc01, 0xdffe,
- 0xe001, 0xf8fe,
0xfa2e, 0xfa2f,
0xfa6e, 0xfa6f,
0xfada, 0xfaff,
@@ -4991,42 +4942,32 @@ static const OnigCodePoint CR_Cn[] = {
0x1f201, 0x1f20f,
0x1f232, 0x1f23f,
0x1f249, 0x1ffff,
- 0x20001, 0x2a6d5,
0x2a6d7, 0x2a6ff,
- 0x2a701, 0x2b733,
0x2b735, 0x2f7ff,
0x2fa1e, 0xe0000,
0xe0002, 0xe001f,
0xe0080, 0xe00ff,
0xe01f0, 0xeffff,
- 0xf0001, 0xffffc,
- 0xffffe, 0xfffff,
- 0x100001, 0x10ffff,
+ 0xffffe, 0x10ffff,
}; /* CR_Cn */
/* 'Co': General Category */
static const OnigCodePoint CR_Co[] = {
- 6,
- 0xe000, 0xe000,
- 0xf8ff, 0xf8ff,
- 0xf0000, 0xf0000,
- 0xffffd, 0xffffd,
- 0x100000, 0x100000,
- 0x10fffd, 0x10fffd,
+ 3,
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
}; /* CR_Co */
/* 'Cs': General Category */
static const OnigCodePoint CR_Cs[] = {
- 4,
- 0xd800, 0xd800,
- 0xdb7f, 0xdb80,
- 0xdbff, 0xdc00,
- 0xdfff, 0xdfff,
+ 1,
+ 0xd800, 0xdfff,
}; /* CR_Cs */
/* 'L': Major Category */
static const OnigCodePoint CR_L[] = {
- 427,
+ 422,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@@ -5311,10 +5252,8 @@ static const OnigCodePoint CR_L[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa48c,
0xa4d0, 0xa4fd,
0xa500, 0xa60c,
@@ -5353,8 +5292,7 @@ static const OnigCodePoint CR_L[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadd,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -5449,10 +5387,8 @@ static const OnigCodePoint CR_L[] = {
0x1d78a, 0x1d7a8,
0x1d7aa, 0x1d7c2,
0x1d7c4, 0x1d7cb,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_L */
@@ -6116,7 +6052,7 @@ static const OnigCodePoint CR_Lm[] = {
/* 'Lo': General Category */
static const OnigCodePoint CR_Lo[] = {
- 316,
+ 311,
0x01bb, 0x01bb,
0x01c0, 0x01c3,
0x0294, 0x0294,
@@ -6326,10 +6262,8 @@ static const OnigCodePoint CR_Lo[] = {
0x3131, 0x318e,
0x31a0, 0x31b7,
0x31f0, 0x31ff,
- 0x3400, 0x3400,
- 0x4db5, 0x4db5,
- 0x4e00, 0x4e00,
- 0x9fcb, 0x9fcb,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
0xa000, 0xa014,
0xa016, 0xa48c,
0xa4d0, 0xa4f7,
@@ -6364,8 +6298,7 @@ static const OnigCodePoint CR_Lo[] = {
0xaac2, 0xaac2,
0xaadb, 0xaadc,
0xabc0, 0xabe2,
- 0xac00, 0xac00,
- 0xd7a3, 0xd7a3,
+ 0xac00, 0xd7a3,
0xd7b0, 0xd7c6,
0xd7cb, 0xd7fb,
0xf900, 0xfa2d,
@@ -6428,10 +6361,8 @@ static const OnigCodePoint CR_Lo[] = {
0x11083, 0x110af,
0x12000, 0x1236e,
0x13000, 0x1342e,
- 0x20000, 0x20000,
- 0x2a6d6, 0x2a6d6,
- 0x2a700, 0x2a700,
- 0x2b734, 0x2b734,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
0x2f800, 0x2fa1d,
}; /* CR_Lo */
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index cdbe277ba8..1ad202652d 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -745,6 +745,7 @@ class TestRegexp < Test::Unit::TestCase
assert_match(/^\u3042{0}\p{Any}$/, "a")
assert_match(/^\u3042{0}\p{Any}$/, "\u3041")
assert_match(/^\u3042{0}\p{Any}$/, "\0")
+ assert_match(/^\p{Lo}{4}$/u, "\u3401\u4E01\u{20001}\u{2A701}")
assert_no_match(/^\u3042{0}\p{Any}$/, "\0\0")
assert_no_match(/^\u3042{0}\p{Any}$/, "")
assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + "\u3042" + '}$/') }
diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb
index 8429bcc178..57edb3b3e5 100755
--- a/tool/enc-unicode.rb
+++ b/tool/enc-unicode.rb
@@ -40,26 +40,36 @@ end
def parse_unicode_data(file)
last_cp = 0
data = {'Cn' => []}
+ beg_cp = nil
IO.foreach(file) do |line|
fields = line.split(';')
cp = fields[0].to_i(16)
+ case fields[1]
+ when /\A<(.*),\s*First>\z/
+ beg_cp = cp
+ next
+ when /\A<(.*),\s*Last>\z/
+ cps = (beg_cp..cp).to_a
+ else
+ beg_cp = cp
+ cps = [cp]
+ end
+
# The Cn category represents unassigned characters. These are not listed in
# UnicodeData.txt so we must derive them by looking for 'holes' in the range
# of listed codepoints. We increment the last codepoint seen and compare it
# with the current codepoint. If the current codepoint is less than
# last_cp.next we have found a hole, so we add the missing codepoint to the
# Cn category.
- while ((last_cp = last_cp.next) < cp)
- data['Cn'] << last_cp
- end
+ data['Cn'].concat((last_cp.next...beg_cp).to_a)
# The third field denotes the 'General' category, e.g. Lu
- (data[fields[2]] ||= []) << cp
+ (data[fields[2]] ||= []).concat(cps)
# The 'Major' category is the first letter of the 'General' category, e.g.
# 'Lu' -> 'L'
- (data[fields[2][0,1]] ||= []) << cp
+ (data[fields[2][0,1]] ||= []).concat(cps)
last_cp = cp
end