summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authoraycabta <aycabta@gmail.com>2021-12-23 23:08:37 +0900
committergit <svn-admin@ruby-lang.org>2021-12-24 00:51:47 +0900
commit726cc8122e5abb17a2af156e83ac88c1e4e9a42e (patch)
tree4d19869e9c2abf8631e56e0f10195c649180fdcc /lib
parentdaf4a8884b36f44127fca9d0502080b32eda7f03 (diff)
[ruby/reline] "Halfwidth char + halfwidth (han)dakuten" is a single grapheme.
When a halfwidth character is followed by a halfwidth dakuten or a halfwidth handakuten character, it should be treated as a single grapheme. https://github.com/ruby/reline/commit/9f20b9ec28
Diffstat (limited to 'lib')
-rw-r--r--lib/reline/unicode.rb10
1 files changed, 9 insertions, 1 deletions
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb
index 80cc54a05e..6000c9f82a 100644
--- a/lib/reline/unicode.rb
+++ b/lib/reline/unicode.rb
@@ -79,6 +79,8 @@ class Reline::Unicode
require 'reline/unicode/east_asian_width'
+ HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
+
MBCharWidthRE = /
(?<width_2_1>
[#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
@@ -93,6 +95,12 @@ class Reline::Unicode
#{ EastAsianWidth::TYPE_H }
| #{ EastAsianWidth::TYPE_NA }
| #{ EastAsianWidth::TYPE_N }
+ )(?!#{ HalfwidthDakutenHandakuten })
+ | (?<width_2_3>
+ (?: #{ EastAsianWidth::TYPE_H }
+ | #{ EastAsianWidth::TYPE_NA }
+ | #{ EastAsianWidth::TYPE_N })
+ #{ HalfwidthDakutenHandakuten }
)
| (?<ambiguous_width>
#{EastAsianWidth::TYPE_A}
@@ -109,7 +117,7 @@ class Reline::Unicode
m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
case
when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
- when m[:width_2_1], m[:width_2_2] then 2
+ when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
when m[:width_3] then 3
when m[:width_0] then 0
when m[:width_1] then 1