diff options
Diffstat (limited to 'lib/reline/unicode.rb')
-rw-r--r-- | lib/reline/unicode.rb | 93 |
1 files changed, 39 insertions, 54 deletions
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb index 80cc54a05e..26ef207ba6 100644 --- a/lib/reline/unicode.rb +++ b/lib/reline/unicode.rb @@ -38,33 +38,8 @@ class Reline::Unicode NON_PRINTING_START = "\1" NON_PRINTING_END = "\2" CSI_REGEXP = /\e\[[\d;]*[ABCDEFGHJKSTfminsuhl]/ - OSC_REGEXP = /\e\]\d+(?:;[^;]+)*\a/ + OSC_REGEXP = /\e\]\d+(?:;[^;\a\e]+)*(?:\a|\e\\)/ WIDTH_SCANNER = /\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o - NON_PRINTING_START_INDEX = 0 - NON_PRINTING_END_INDEX = 1 - CSI_REGEXP_INDEX = 2 - OSC_REGEXP_INDEX = 3 - GRAPHEME_CLUSTER_INDEX = 4 - - def self.get_mbchar_byte_size_by_first_char(c) - # Checks UTF-8 character byte size - case c.ord - # 0b0xxxxxxx - when ->(code) { (code ^ 0b10000000).allbits?(0b10000000) } then 1 - # 0b110xxxxx - when ->(code) { (code ^ 0b00100000).allbits?(0b11100000) } then 2 - # 0b1110xxxx - when ->(code) { (code ^ 0b00010000).allbits?(0b11110000) } then 3 - # 0b11110xxx - when ->(code) { (code ^ 0b00001000).allbits?(0b11111000) } then 4 - # 0b111110xx - when ->(code) { (code ^ 0b00000100).allbits?(0b11111100) } then 5 - # 0b1111110x - when ->(code) { (code ^ 0b00000010).allbits?(0b11111110) } then 6 - # successor of mbchar - else 0 - end - end def self.escape_for_print(str) str.chars.map! { |gr| @@ -79,6 +54,8 @@ class Reline::Unicode require 'reline/unicode/east_asian_width' + HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/ + MBCharWidthRE = / (?<width_2_1> [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...) @@ -93,6 +70,12 @@ class Reline::Unicode #{ EastAsianWidth::TYPE_H } | #{ EastAsianWidth::TYPE_NA } | #{ EastAsianWidth::TYPE_N } + )(?!#{ HalfwidthDakutenHandakuten }) + | (?<width_2_3> + (?: #{ EastAsianWidth::TYPE_H } + | #{ EastAsianWidth::TYPE_NA } + | #{ EastAsianWidth::TYPE_N }) + #{ HalfwidthDakutenHandakuten } ) | (?<ambiguous_width> #{EastAsianWidth::TYPE_A} @@ -109,7 +92,7 @@ class Reline::Unicode m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE) case when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER - when m[:width_2_1], m[:width_2_2] then 2 + when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2 when m[:width_3] then 3 when m[:width_0] then 0 when m[:width_1] then 1 @@ -124,15 +107,14 @@ class Reline::Unicode width = 0 rest = str.encode(Encoding::UTF_8) in_zero_width = false - rest.scan(WIDTH_SCANNER) do |gc| + rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case - when gc[NON_PRINTING_START_INDEX] + when non_printing_start in_zero_width = true - when gc[NON_PRINTING_END_INDEX] + when non_printing_end in_zero_width = false - when gc[CSI_REGEXP_INDEX], gc[OSC_REGEXP_INDEX] - when gc[GRAPHEME_CLUSTER_INDEX] - gc = gc[GRAPHEME_CLUSTER_INDEX] + when csi, osc + when gc unless in_zero_width width += get_mbchar_width(gc) end @@ -152,24 +134,28 @@ class Reline::Unicode width = 0 rest = str.encode(Encoding::UTF_8) in_zero_width = false - rest.scan(WIDTH_SCANNER) do |gc| + seq = String.new(encoding: encoding) + rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case - when gc[NON_PRINTING_START_INDEX] + when non_printing_start in_zero_width = true - when gc[NON_PRINTING_END_INDEX] + lines.last << NON_PRINTING_START + when non_printing_end in_zero_width = false - when gc[CSI_REGEXP_INDEX] - lines.last << gc[CSI_REGEXP_INDEX] - when gc[OSC_REGEXP_INDEX] - lines.last << gc[OSC_REGEXP_INDEX] - when gc[GRAPHEME_CLUSTER_INDEX] - gc = gc[GRAPHEME_CLUSTER_INDEX] + lines.last << NON_PRINTING_END + when csi + lines.last << csi + seq << csi + when osc + lines.last << osc + seq << osc + when gc unless in_zero_width mbchar_width = get_mbchar_width(gc) if (width += mbchar_width) > max_width width = mbchar_width lines << nil - lines << String.new(encoding: encoding) + lines << seq.dup height += 1 end end @@ -186,23 +172,22 @@ class Reline::Unicode end # Take a chunk of a String cut by width with escape sequences. - def self.take_range(str, start_col, max_width, encoding = str.encoding) - chunk = String.new(encoding: encoding) + def self.take_range(str, start_col, max_width) + chunk = String.new(encoding: str.encoding) total_width = 0 rest = str.encode(Encoding::UTF_8) in_zero_width = false - rest.scan(WIDTH_SCANNER) do |gc| + rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc| case - when gc[NON_PRINTING_START_INDEX] + when non_printing_start in_zero_width = true - when gc[NON_PRINTING_END_INDEX] + when non_printing_end in_zero_width = false - when gc[CSI_REGEXP_INDEX] - chunk << gc[CSI_REGEXP_INDEX] - when gc[OSC_REGEXP_INDEX] - chunk << gc[OSC_REGEXP_INDEX] - when gc[GRAPHEME_CLUSTER_INDEX] - gc = gc[GRAPHEME_CLUSTER_INDEX] + when csi + chunk << csi + when osc + chunk << osc + when gc if in_zero_width chunk << gc else |