diff options
Diffstat (limited to 'lib/reline/unicode.rb')
-rw-r--r-- | lib/reline/unicode.rb | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb new file mode 100644 index 0000000000..bdb182f59c --- /dev/null +++ b/lib/reline/unicode.rb @@ -0,0 +1,415 @@ +class Reline::Unicode + EscapedPairs = { + 0x00 => '^@', + 0x01 => '^A', # C-a + 0x02 => '^B', + 0x03 => '^C', + 0x04 => '^D', + 0x05 => '^E', + 0x06 => '^F', + 0x07 => '^G', + 0x08 => '^H', # Backspace + 0x09 => '^I', + 0x0A => '^J', + 0x0B => '^K', + 0x0C => '^L', + 0x0D => '^M', # Enter + 0x0E => '^N', + 0x0F => '^O', + 0x10 => '^P', + 0x11 => '^Q', + 0x12 => '^R', + 0x13 => '^S', + 0x14 => '^T', + 0x15 => '^U', + 0x16 => '^V', + 0x17 => '^W', + 0x18 => '^X', + 0x19 => '^Y', + 0x1A => '^Z', # C-z + 0x1B => '^[', # C-[ C-3 + 0x1D => '^]', # C-] + 0x1E => '^^', # C-~ C-6 + 0x1F => '^_', # C-_ C-7 + 0x7F => '^?', # C-? C-8 + } + EscapedChars = EscapedPairs.keys.map(&:chr) + + def self.get_mbchar_byte_size_by_first_char(c) + # Checks UTF-8 character byte size + case c.ord + # 0b0xxxxxxx + when ->(code) { (code ^ 0b10000000).allbits?(0b10000000) } then 1 + # 0b110xxxxx + when ->(code) { (code ^ 0b00100000).allbits?(0b11100000) } then 2 + # 0b1110xxxx + when ->(code) { (code ^ 0b00010000).allbits?(0b11110000) } then 3 + # 0b11110xxx + when ->(code) { (code ^ 0b00001000).allbits?(0b11111000) } then 4 + # 0b111110xx + when ->(code) { (code ^ 0b00000100).allbits?(0b11111100) } then 5 + # 0b1111110x + when ->(code) { (code ^ 0b00000010).allbits?(0b11111110) } then 6 + # successor of mbchar + else 0 + end + end + + def self.get_mbchar_width(mbchar) + case mbchar.encode(Encoding::UTF_8) + when *EscapedChars # ^ + char, such as ^M, ^H, ^[, ... + 2 + when /^\u{2E3B}/ # THREE-EM DASH + 3 + when /^\p{M}/ + 0 + when EastAsianWidth::TYPE_A + Reline.ambiguous_width + when EastAsianWidth::TYPE_F, EastAsianWidth::TYPE_W + 2 + when EastAsianWidth::TYPE_H, EastAsianWidth::TYPE_NA, EastAsianWidth::TYPE_N + 1 + else + nil + end + end + + def self.get_next_mbchar_size(line, byte_pointer) + grapheme = line.byteslice(byte_pointer..-1).grapheme_clusters.first + grapheme ? grapheme.bytesize : 0 + end + + def self.get_prev_mbchar_size(line, byte_pointer) + if byte_pointer.zero? + 0 + else + grapheme = line.byteslice(0..(byte_pointer - 1)).grapheme_clusters.last + grapheme ? grapheme.bytesize : 0 + end + end + + def self.em_forward_word(line, byte_pointer) + width = 0 + byte_size = 0 + while line.bytesize > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ + width += get_mbchar_width(mbchar) + byte_size += size + end + while line.bytesize > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end + + def self.em_forward_word_with_capitalization(line, byte_pointer) + width = 0 + byte_size = 0 + new_str = String.new + while line.bytesize > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ + new_str += mbchar + width += get_mbchar_width(mbchar) + byte_size += size + end + first = true + while line.bytesize > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ + if first + new_str += mbchar.upcase + first = false + else + new_str += mbchar.downcase + end + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width, new_str] + end + + def self.em_backward_word(line, byte_pointer) + width = 0 + byte_size = 0 + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/ + width += get_mbchar_width(mbchar) + byte_size += size + end + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/ + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end + + def self.em_big_backward_word(line, byte_pointer) + width = 0 + byte_size = 0 + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + break if mbchar =~ /\S/ + width += get_mbchar_width(mbchar) + byte_size += size + end + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + break if mbchar =~ /\s/ + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end + + def self.vi_big_forward_word(line, byte_pointer) + width = 0 + byte_size = 0 + while (line.bytesize - 1) > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar =~ /\s/ + width += get_mbchar_width(mbchar) + byte_size += size + end + while (line.bytesize - 1) > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar =~ /\S/ + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end + + def self.vi_big_forward_end_word(line, byte_pointer) + if (line.bytesize - 1) > byte_pointer + size = get_next_mbchar_size(line, byte_pointer) + mbchar = line.byteslice(byte_pointer, size) + width = get_mbchar_width(mbchar) + byte_size = size + else + return [0, 0] + end + while (line.bytesize - 1) > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar =~ /\S/ + width += get_mbchar_width(mbchar) + byte_size += size + end + prev_width = width + prev_byte_size = byte_size + while line.bytesize > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar =~ /\s/ + prev_width = width + prev_byte_size = byte_size + width += get_mbchar_width(mbchar) + byte_size += size + end + [prev_byte_size, prev_width] + end + + def self.vi_big_backward_word(line, byte_pointer) + width = 0 + byte_size = 0 + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + break if mbchar =~ /\S/ + width += get_mbchar_width(mbchar) + byte_size += size + end + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + break if mbchar =~ /\s/ + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end + + def self.vi_forward_word(line, byte_pointer) + if (line.bytesize - 1) > byte_pointer + size = get_next_mbchar_size(line, byte_pointer) + mbchar = line.byteslice(byte_pointer, size) + if mbchar =~ /\w/ + started_by = :word + elsif mbchar =~ /\s/ + started_by = :space + else + started_by = :non_word_printable + end + width = get_mbchar_width(mbchar) + byte_size = size + else + return [0, 0] + end + while (line.bytesize - 1) > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + case started_by + when :word + break if mbchar =~ /\W/ + when :space + break if mbchar =~ /\S/ + when :non_word_printable + break if mbchar =~ /\w|\s/ + end + width += get_mbchar_width(mbchar) + byte_size += size + end + while (line.bytesize - 1) > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + break if mbchar =~ /\S/ + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end + + def self.vi_forward_end_word(line, byte_pointer) + if (line.bytesize - 1) > byte_pointer + size = get_next_mbchar_size(line, byte_pointer) + mbchar = line.byteslice(byte_pointer, size) + if mbchar =~ /\w/ + started_by = :word + elsif mbchar =~ /\s/ + started_by = :space + else + started_by = :non_word_printable + end + width = get_mbchar_width(mbchar) + byte_size = size + else + return [0, 0] + end + if (line.bytesize - 1) > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + if mbchar =~ /\w/ + second = :word + elsif mbchar =~ /\s/ + second = :space + else + second = :non_word_printable + end + second_width = get_mbchar_width(mbchar) + second_byte_size = size + else + return [byte_size, width] + end + if second == :space + width += second_width + byte_size += second_byte_size + while (line.bytesize - 1) > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + if mbchar =~ /\S/ + if mbchar =~ /\w/ + started_by = :word + else + started_by = :non_word_printable + end + break + end + width += get_mbchar_width(mbchar) + byte_size += size + end + else + case [started_by, second] + when [:word, :non_word_printable], [:non_word_printable, :word] + started_by = second + else + width += second_width + byte_size += second_byte_size + started_by = second + end + end + prev_width = width + prev_byte_size = byte_size + while line.bytesize > (byte_pointer + byte_size) + size = get_next_mbchar_size(line, byte_pointer + byte_size) + mbchar = line.byteslice(byte_pointer + byte_size, size) + case started_by + when :word + break if mbchar =~ /\W/ + when :non_word_printable + break if mbchar =~ /[\w\s]/ + end + prev_width = width + prev_byte_size = byte_size + width += get_mbchar_width(mbchar) + byte_size += size + end + [prev_byte_size, prev_width] + end + + def self.vi_backward_word(line, byte_pointer) + width = 0 + byte_size = 0 + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + if mbchar =~ /\S/ + if mbchar =~ /\w/ + started_by = :word + else + started_by = :non_word_printable + end + break + end + width += get_mbchar_width(mbchar) + byte_size += size + end + while 0 < (byte_pointer - byte_size) + size = get_prev_mbchar_size(line, byte_pointer - byte_size) + mbchar = line.byteslice(byte_pointer - byte_size - size, size) + case started_by + when :word + break if mbchar =~ /\W/ + when :non_word_printable + break if mbchar =~ /[\w\s]/ + end + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end + + def self.ed_move_to_begin(line) + width = 0 + byte_size = 0 + while (line.bytesize - 1) > byte_size + size = get_next_mbchar_size(line, byte_size) + mbchar = line.byteslice(byte_size, size) + if mbchar =~ /\S/ + break + end + width += get_mbchar_width(mbchar) + byte_size += size + end + [byte_size, width] + end +end + +require 'reline/unicode/east_asian_width' |