summaryrefslogtreecommitdiff
path: root/lib/reline/unicode.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/reline/unicode.rb')
-rw-r--r--lib/reline/unicode.rb119
1 files changed, 71 insertions, 48 deletions
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb
index 7f94e95287..ef239d5e9e 100644
--- a/lib/reline/unicode.rb
+++ b/lib/reline/unicode.rb
@@ -43,62 +43,39 @@ class Reline::Unicode
def self.escape_for_print(str)
str.chars.map! { |gr|
- escaped = EscapedPairs[gr.ord]
- if escaped && gr != -"\n" && gr != -"\t"
- escaped
- else
+ case gr
+ when -"\n"
gr
+ when -"\t"
+ -' '
+ else
+ EscapedPairs[gr.ord] || gr
end
}.join
end
require 'reline/unicode/east_asian_width'
- HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
-
- MBCharWidthRE = /
- (?<width_2_1>
- [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
- )
- | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
- | (?<width_0>^\p{M})
- | (?<width_2_2>
- #{ EastAsianWidth::TYPE_F }
- | #{ EastAsianWidth::TYPE_W }
- )
- | (?<width_1>
- #{ EastAsianWidth::TYPE_H }
- | #{ EastAsianWidth::TYPE_NA }
- | #{ EastAsianWidth::TYPE_N }
- )(?!#{ HalfwidthDakutenHandakuten })
- | (?<width_2_3>
- (?: #{ EastAsianWidth::TYPE_H }
- | #{ EastAsianWidth::TYPE_NA }
- | #{ EastAsianWidth::TYPE_N })
- #{ HalfwidthDakutenHandakuten }
- )
- | (?<ambiguous_width>
- #{EastAsianWidth::TYPE_A}
- )
- /x
-
def self.get_mbchar_width(mbchar)
ord = mbchar.ord
- if (0x00 <= ord and ord <= 0x1F) # in EscapedPairs
+ if ord <= 0x1F # in EscapedPairs
return 2
- elsif (0x20 <= ord and ord <= 0x7E) # printable ASCII chars
+ elsif ord <= 0x7E # printable ASCII chars
return 1
end
- m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
- case
- when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
- when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
- when m[:width_3] then 3
- when m[:width_0] then 0
- when m[:width_1] then 1
- when m[:ambiguous_width] then Reline.ambiguous_width
+ utf8_mbchar = mbchar.encode(Encoding::UTF_8)
+ ord = utf8_mbchar.ord
+ chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
+ size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
+ if size == -1
+ Reline.ambiguous_width
+ elsif size == 1 && utf8_mbchar.size >= 2
+ second_char_ord = utf8_mbchar[1].ord
+ # Halfwidth Dakuten Handakuten
+ # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster
+ (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1
else
- nil
+ size
end
end
@@ -179,32 +156,78 @@ class Reline::Unicode
# Take a chunk of a String cut by width with escape sequences.
def self.take_range(str, start_col, max_width)
+ take_mbchar_range(str, start_col, max_width).first
+ end
+
+ def self.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false)
chunk = String.new(encoding: str.encoding)
+
+ end_col = start_col + width
total_width = 0
rest = str.encode(Encoding::UTF_8)
in_zero_width = false
+ chunk_start_col = nil
+ chunk_end_col = nil
+ has_csi = false
rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc|
case
when non_printing_start
in_zero_width = true
+ chunk << NON_PRINTING_START
when non_printing_end
in_zero_width = false
+ chunk << NON_PRINTING_END
when csi
+ has_csi = true
chunk << csi
when osc
chunk << osc
when gc
if in_zero_width
chunk << gc
+ next
+ end
+
+ mbchar_width = get_mbchar_width(gc)
+ prev_width = total_width
+ total_width += mbchar_width
+
+ if (cover_begin || padding ? total_width <= start_col : prev_width < start_col)
+ # Current character haven't reached start_col yet
+ next
+ elsif padding && !cover_begin && prev_width < start_col && start_col < total_width
+ # Add preceding padding. This padding might have background color.
+ chunk << ' '
+ chunk_start_col ||= start_col
+ chunk_end_col = total_width
+ next
+ elsif (cover_end ? prev_width < end_col : total_width <= end_col)
+ # Current character is in the range
+ chunk << gc
+ chunk_start_col ||= prev_width
+ chunk_end_col = total_width
+ break if total_width >= end_col
else
- mbchar_width = get_mbchar_width(gc)
- total_width += mbchar_width
- break if (start_col + max_width) < total_width
- chunk << gc if start_col < total_width
+ # Current character exceeds end_col
+ if padding && end_col < total_width
+ # Add succeeding padding. This padding might have background color.
+ chunk << ' '
+ chunk_start_col ||= prev_width
+ chunk_end_col = end_col
+ end
+ break
end
end
end
- chunk
+ chunk_start_col ||= start_col
+ chunk_end_col ||= start_col
+ if padding && chunk_end_col < end_col
+ # Append padding. This padding should not include background color.
+ chunk << "\e[0m" if has_csi
+ chunk << ' ' * (end_col - chunk_end_col)
+ chunk_end_col = end_col
+ end
+ [chunk, chunk_start_col, chunk_end_col - chunk_start_col]
end
def self.get_next_mbchar_size(line, byte_pointer)