1 files changed, 80 insertions, 71 deletions
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb
index 0a7f59cf06..ef239d5e9e 100644
--- a/lib/reline/unicode.rb
+++ b/lib/reline/unicode.rb
@@ -41,84 +41,41 @@ class Reline::Unicode
   OSC_REGEXP = /\e\]\d+(?:;[^;\a\e]+)*(?:\a|\e\\)/
   WIDTH_SCANNER = /\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o
 
-  def self.get_mbchar_byte_size_by_first_char(c)
-    # Checks UTF-8 character byte size
-    case c.ord
-    # 0b0xxxxxxx
-    when ->(code) { (code ^ 0b10000000).allbits?(0b10000000) } then 1
-    # 0b110xxxxx
-    when ->(code) { (code ^ 0b00100000).allbits?(0b11100000) } then 2
-    # 0b1110xxxx
-    when ->(code) { (code ^ 0b00010000).allbits?(0b11110000) } then 3
-    # 0b11110xxx
-    when ->(code) { (code ^ 0b00001000).allbits?(0b11111000) } then 4
-    # 0b111110xx
-    when ->(code) { (code ^ 0b00000100).allbits?(0b11111100) } then 5
-    # 0b1111110x
-    when ->(code) { (code ^ 0b00000010).allbits?(0b11111110) } then 6
-    # successor of mbchar
-    else 0
-    end
-  end
-
   def self.escape_for_print(str)
     str.chars.map! { |gr|
-      escaped = EscapedPairs[gr.ord]
-      if escaped && gr != -"\n" && gr != -"\t"
-        escaped
-      else
+      case gr
+      when -"\n"
         gr
+      when -"\t"
+        -'  '
+      else
+        EscapedPairs[gr.ord] || gr
       end
     }.join
   end
 
   require 'reline/unicode/east_asian_width'
 
-  HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
-
-  MBCharWidthRE = /
-    (?<width_2_1>
-      [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
-    )
-  | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
-  | (?<width_0>^\p{M})
-  | (?<width_2_2>
-      #{ EastAsianWidth::TYPE_F }
-    | #{ EastAsianWidth::TYPE_W }
-    )
-  | (?<width_1>
-      #{ EastAsianWidth::TYPE_H }
-    | #{ EastAsianWidth::TYPE_NA }
-    | #{ EastAsianWidth::TYPE_N }
-    )(?!#{ HalfwidthDakutenHandakuten })
-  | (?<width_2_3>
-      (?: #{ EastAsianWidth::TYPE_H }
-        | #{ EastAsianWidth::TYPE_NA }
-        | #{ EastAsianWidth::TYPE_N })
-      #{ HalfwidthDakutenHandakuten }
-    )
-  | (?<ambiguous_width>
-      #{EastAsianWidth::TYPE_A}
-    )
-  /x
-
   def self.get_mbchar_width(mbchar)
     ord = mbchar.ord
-    if (0x00 <= ord and ord <= 0x1F) # in EscapedPairs
+    if ord <= 0x1F # in EscapedPairs
       return 2
-    elsif (0x20 <= ord and ord <= 0x7E) # printable ASCII chars
+    elsif ord <= 0x7E # printable ASCII chars
       return 1
     end
-    m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
-    case
-    when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
-    when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
-    when m[:width_3] then 3
-    when m[:width_0] then 0
-    when m[:width_1] then 1
-    when m[:ambiguous_width] then Reline.ambiguous_width
+    utf8_mbchar = mbchar.encode(Encoding::UTF_8)
+    ord = utf8_mbchar.ord
+    chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
+    size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
+    if size == -1
+      Reline.ambiguous_width
+    elsif size == 1 && utf8_mbchar.size >= 2
+      second_char_ord = utf8_mbchar[1].ord
+      # Halfwidth Dakuten Handakuten
+      # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster
+      (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1
     else
-      nil
+      size
     end
   end
 
@@ -148,10 +105,10 @@ class Reline::Unicode
     end
   end
 
-  def self.split_by_width(str, max_width, encoding = str.encoding)
+  def self.split_by_width(str, max_width, encoding = str.encoding, offset: 0)
     lines = [String.new(encoding: encoding)]
     height = 1
-    width = 0
+    width = offset
     rest = str.encode(Encoding::UTF_8)
     in_zero_width = false
     seq = String.new(encoding: encoding)
@@ -165,7 +122,13 @@ class Reline::Unicode
         lines.last << NON_PRINTING_END
       when csi
         lines.last << csi
-        seq << csi
+        unless in_zero_width
+          if csi == -"\e[m" || csi == -"\e[0m"
+            seq.clear
+          else
+            seq << csi
+          end
+        end
       when osc
         lines.last << osc
         seq << osc
@@ -193,32 +156,78 @@ class Reline::Unicode
 
   # Take a chunk of a String cut by width with escape sequences.
   def self.take_range(str, start_col, max_width)
+    take_mbchar_range(str, start_col, max_width).first
+  end
+
+  def self.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false)
     chunk = String.new(encoding: str.encoding)
+
+    end_col = start_col + width
     total_width = 0
     rest = str.encode(Encoding::UTF_8)
     in_zero_width = false
+    chunk_start_col = nil
+    chunk_end_col = nil
+    has_csi = false
     rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc|
       case
       when non_printing_start
         in_zero_width = true
+        chunk << NON_PRINTING_START
       when non_printing_end
         in_zero_width = false
+        chunk << NON_PRINTING_END
       when csi
+        has_csi = true
         chunk << csi
       when osc
         chunk << osc
       when gc
         if in_zero_width
           chunk << gc
+          next
+        end
+
+        mbchar_width = get_mbchar_width(gc)
+        prev_width = total_width
+        total_width += mbchar_width
+
+        if (cover_begin || padding ? total_width <= start_col : prev_width < start_col)
+          # Current character haven't reached start_col yet
+          next
+        elsif padding && !cover_begin && prev_width < start_col && start_col < total_width
+          # Add preceding padding. This padding might have background color.
+          chunk << ' '
+          chunk_start_col ||= start_col
+          chunk_end_col = total_width
+          next
+        elsif (cover_end ? prev_width < end_col : total_width <= end_col)
+          # Current character is in the range
+          chunk << gc
+          chunk_start_col ||= prev_width
+          chunk_end_col = total_width
+          break if total_width >= end_col
         else
-          mbchar_width = get_mbchar_width(gc)
-          total_width += mbchar_width
-          break if (start_col + max_width) < total_width
-          chunk << gc if start_col < total_width
+          # Current character exceeds end_col
+          if padding && end_col < total_width
+            # Add succeeding padding. This padding might have background color.
+            chunk << ' '
+            chunk_start_col ||= prev_width
+            chunk_end_col = end_col
+          end
+          break
         end
       end
     end
-    chunk
+    chunk_start_col ||= start_col
+    chunk_end_col ||= start_col
+    if padding && chunk_end_col < end_col
+      # Append padding. This padding should not include background color.
+      chunk << "\e[0m" if has_csi
+      chunk << ' ' * (end_col - chunk_end_col)
+      chunk_end_col = end_col
+    end
+    [chunk, chunk_start_col, chunk_end_col - chunk_start_col]
   end
 
   def self.get_next_mbchar_size(line, byte_pointer)