require 'test/unit' require 'tmpdir' require 'timeout' require_relative 'envutil' class TestIO_M17N < Test::Unit::TestCase ENCS = [ Encoding::ASCII_8BIT, Encoding::EUC_JP, Encoding::Shift_JIS, Encoding::UTF_8 ] def with_tmpdir Dir.mktmpdir {|dir| Dir.chdir(dir) { yield dir } } end def with_pipe(enc=nil) r, w = IO.pipe(enc) begin yield r, w ensure r.close if !r.closed? w.close if !w.closed? end end def generate_file(path, content) open(path, "wb") {|f| f.write content } end def encdump(str) "#{str.dump}.force_encoding(#{str.encoding.name.dump})" end def assert_str_equal(expected, actual, message=nil) full_message = build_message(message, <"euc-jp") {|line| line.inspect } } } end def test_set_encoding with_pipe("utf-8:euc-jp") {|r, w| s = "\u3042".force_encoding("ascii-8bit") s << "\x82\xa0".force_encoding("ascii-8bit") w << s w.close assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) r.set_encoding("shift_jis:euc-jp") assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) } end def test_set_encoding2 with_pipe("utf-8:euc-jp") {|r, w| s = "\u3042".force_encoding("ascii-8bit") s << "\x82\xa0".force_encoding("ascii-8bit") w << s w.close assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) r.set_encoding("shift_jis", "euc-jp") assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) } end def test_set_encoding_nil with_pipe("utf-8:euc-jp") {|r, w| s = "\u3042".force_encoding("ascii-8bit") s << "\x82\xa0".force_encoding("ascii-8bit") w << s w.close assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) r.set_encoding(nil) assert_equal("\x82\xa0".force_encoding(Encoding.default_external), r.read) } end def test_set_encoding_enc with_pipe("utf-8:euc-jp") {|r, w| s = "\u3042".force_encoding("ascii-8bit") s << "\x82\xa0".force_encoding("ascii-8bit") w << s w.close assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) r.set_encoding(Encoding::Shift_JIS) assert_equal("\x82\xa0".force_encoding(Encoding::Shift_JIS), r.getc) } end def test_write_conversion_fixenc with_pipe {|r, w| w.set_encoding("iso-2022-jp:utf-8") t = Thread.new { r.read.force_encoding("ascii-8bit") } w << "\u3042" w << "\u3044" w.close assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value) } end def test_write_conversion_anyenc_stateful with_pipe {|r, w| w.set_encoding("iso-2022-jp") t = Thread.new { r.read.force_encoding("ascii-8bit") } w << "\u3042" w << "\x82\xa2".force_encoding("sjis") w.close assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value) } end def test_write_conversion_anyenc_stateless with_pipe {|r, w| w.set_encoding("euc-jp") t = Thread.new { r.read.force_encoding("ascii-8bit") } w << "\u3042" w << "\x82\xa2".force_encoding("sjis") w.close assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), t.value) } end def test_write_conversion_anyenc_stateful_nosync with_pipe {|r, w| w.sync = false w.set_encoding("iso-2022-jp") t = Thread.new { r.read.force_encoding("ascii-8bit") } w << "\u3042" w << "\x82\xa2".force_encoding("sjis") w.close assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value) } end def test_stdin_external_encoding_with_reopen with_tmpdir { open("tst", "w+") {|f| pid = spawn(EnvUtil.rubybin, '-e', <<-'End', 10=>f) io = IO.new(10, "r+") STDIN.reopen(io) STDIN.external_encoding STDIN.write "\u3042" STDIN.flush End Process.wait pid f.rewind result = f.read.force_encoding("ascii-8bit") assert_equal("\u3042".force_encoding("ascii-8bit"), result) } } end def test_popen_r_enc IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f| assert_equal(Encoding::ASCII_8BIT, f.external_encoding) assert_equal(nil, f.internal_encoding) s = f.read assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } end def test_popen_r_enc_in_opt IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", encoding: "ascii-8bit") {|f| assert_equal(Encoding::ASCII_8BIT, f.external_encoding) assert_equal(nil, f.internal_encoding) s = f.read assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } end def test_popen_r_enc_in_opt2 IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", external_encoding: "ascii-8bit") {|f| assert_equal(Encoding::ASCII_8BIT, f.external_encoding) assert_equal(nil, f.internal_encoding) s = f.read assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } end def test_popen_r_enc_enc IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r:shift_jis:euc-jp") {|f| assert_equal(Encoding::Shift_JIS, f.external_encoding) assert_equal(Encoding::EUC_JP, f.internal_encoding) s = f.read assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) } end def test_popen_r_enc_enc_in_opt IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", encoding: "shift_jis:euc-jp") {|f| assert_equal(Encoding::Shift_JIS, f.external_encoding) assert_equal(Encoding::EUC_JP, f.internal_encoding) s = f.read assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) } end def test_popen_r_enc_enc_in_opt2 IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f| assert_equal(Encoding::Shift_JIS, f.external_encoding) assert_equal(Encoding::EUC_JP, f.internal_encoding) s = f.read assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) } end def test_popenv_r_enc_enc_in_opt2 IO.popen([EnvUtil.rubybin, "-e", "putc 0xa1"], "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f| assert_equal(Encoding::Shift_JIS, f.external_encoding) assert_equal(Encoding::EUC_JP, f.internal_encoding) s = f.read assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) } end def test_open_pipe_r_enc open("|#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f| assert_equal(Encoding::ASCII_8BIT, f.external_encoding) assert_equal(nil, f.internal_encoding) s = f.read assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } end def test_s_foreach_enc with_tmpdir { generate_file("t", "\xff") IO.foreach("t", :mode => "r:ascii-8bit") {|s| assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } } end def test_s_foreach_enc_in_opt with_tmpdir { generate_file("t", "\xff") IO.foreach("t", :encoding => "ascii-8bit") {|s| assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } } end def test_s_foreach_enc_in_opt2 with_tmpdir { generate_file("t", "\xff") IO.foreach("t", :external_encoding => "ascii-8bit") {|s| assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } } end def test_s_foreach_enc_enc with_tmpdir { generate_file("t", "\u3042") IO.foreach("t", :mode => "r:utf-8:euc-jp") {|s| assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) } } end def test_s_foreach_enc_enc_in_opt with_tmpdir { generate_file("t", "\u3042") IO.foreach("t", :mode => "r", :encoding => "utf-8:euc-jp") {|s| assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) } } end def test_s_foreach_enc_enc_in_opt2 with_tmpdir { generate_file("t", "\u3042") IO.foreach("t", :mode => "r", :external_encoding => "utf-8", :internal_encoding => "euc-jp") {|s| assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) } } end def test_s_foreach_open_args_enc with_tmpdir { generate_file("t", "\xff") IO.foreach("t", :open_args => ["r:ascii-8bit"]) {|s| assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } } end def test_s_foreach_open_args_enc_in_opt with_tmpdir { generate_file("t", "\xff") IO.foreach("t", :open_args => ["r", encoding: "ascii-8bit"]) {|s| assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } } end def test_s_foreach_open_args_enc_in_opt2 with_tmpdir { generate_file("t", "\xff") IO.foreach("t", :open_args => ["r", external_encoding: "ascii-8bit"]) {|s| assert_equal(Encoding::ASCII_8BIT, s.encoding) assert_equal("\xff".force_encoding("ascii-8bit"), s) } } end def test_s_foreach_open_args_enc_enc with_tmpdir { generate_file("t", "\u3042") IO.foreach("t", :open_args => ["r:utf-8:euc-jp"]) {|s| assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) } } end def test_s_foreach_open_args_enc_enc_in_opt with_tmpdir { generate_file("t", "\u3042") IO.foreach("t", :open_args => ["r", encoding: "utf-8:euc-jp"]) {|s| assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) } } end def test_s_foreach_open_args_enc_enc_in_opt2 with_tmpdir { generate_file("t", "\u3042") IO.foreach("t", :open_args => ["r", external_encoding: "utf-8", internal_encoding: "euc-jp"]) {|s| assert_equal(Encoding::EUC_JP, s.encoding) assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) } } end def test_both_textmode_binmode assert_raise(ArgumentError) { open("not-exist", "r", :textmode=>true, :binmode=>true) } end def test_textmode_decode_universal_newline_read with_tmpdir { generate_file("t.crlf", "a\r\nb\r\nc\r\n") assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8")) assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt")) open("t.crlf", "rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n", f.read) } open("t.crlf", "rt") {|f| assert_equal("a\nb\nc\n", f.read) } open("t.crlf", "r", :textmode=>true) {|f| assert_equal("a\nb\nc\n", f.read) } generate_file("t.cr", "a\rb\rc\r") assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8")) assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt")) generate_file("t.lf", "a\nb\nc\n") assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8")) assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt")) } end def test_textmode_decode_universal_newline_getc with_tmpdir { generate_file("t.crlf", "a\r\nb\r\nc\r\n") open("t.crlf", "rt") {|f| assert_equal("a", f.getc) assert_equal("\n", f.getc) assert_equal("b", f.getc) assert_equal("\n", f.getc) assert_equal("c", f.getc) assert_equal("\n", f.getc) assert_equal(nil, f.getc) } generate_file("t.cr", "a\rb\rc\r") open("t.cr", "rt") {|f| assert_equal("a", f.getc) assert_equal("\n", f.getc) assert_equal("b", f.getc) assert_equal("\n", f.getc) assert_equal("c", f.getc) assert_equal("\n", f.getc) assert_equal(nil, f.getc) } generate_file("t.lf", "a\nb\nc\n") open("t.lf", "rt") {|f| assert_equal("a", f.getc) assert_equal("\n", f.getc) assert_equal("b", f.getc) assert_equal("\n", f.getc) assert_equal("c", f.getc) assert_equal("\n", f.getc) assert_equal(nil, f.getc) } } end def test_textmode_decode_universal_newline_gets with_tmpdir { generate_file("t.crlf", "a\r\nb\r\nc\r\n") open("t.crlf", "rt") {|f| assert_equal("a\n", f.gets) assert_equal("b\n", f.gets) assert_equal("c\n", f.gets) assert_equal(nil, f.gets) } generate_file("t.cr", "a\rb\rc\r") open("t.cr", "rt") {|f| assert_equal("a\n", f.gets) assert_equal("b\n", f.gets) assert_equal("c\n", f.gets) assert_equal(nil, f.gets) } generate_file("t.lf", "a\nb\nc\n") open("t.lf", "rt") {|f| assert_equal("a\n", f.gets) assert_equal("b\n", f.gets) assert_equal("c\n", f.gets) assert_equal(nil, f.gets) } } end def test_textmode_decode_universal_newline_utf16 with_tmpdir { generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n") assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8")) generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0") assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8")) generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r") assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8")) generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0") assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8")) generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n") assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8")) generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0") assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8")) } end def system_newline File::BINARY == 0 ? "\n" : "\r\n" end def test_textmode_encode_newline with_tmpdir { open("t.txt", "wt") {|f| f.puts "abc" f.puts "def" } content = File.read("t.txt", :mode=>"rb") nl = system_newline assert_equal("abc#{nl}def#{nl}", content) } end def test_binary with_tmpdir { src = "a\nb\rc\r\nd\n" generate_file("t.txt", src) open("t.txt", "rb") {|f| assert_equal(src, f.read) } open("t.txt", "r", :binmode=>true) {|f| assert_equal(src, f.read) } if File::BINARY == 0 open("t.txt", "r") {|f| assert_equal(src, f.read) } end } end def test_binmode with_tmpdir { src = "a\r\nb\r\nc\r\n" generate_file("t.txt", src) open("t.txt", "rt") {|f| assert_equal("a", f.getc) assert_equal("\n", f.getc) f.binmode assert_equal("\n", f.getc) assert_equal("b", f.getc) assert_equal("\r", f.getc) assert_equal("\n", f.getc) assert_equal("c", f.getc) assert_equal("\r", f.getc) assert_equal("\n", f.getc) assert_equal(nil, f.getc) } } end def test_binmode2 with_tmpdir { src = "a\r\nb\r\nc\r\n" generate_file("t.txt", src) open("t.txt", "rt:euc-jp:utf-8") {|f| assert_equal("a", f.getc) assert_equal("\n", f.getc) f.binmode assert_equal("\n", f.getc) assert_equal("b", f.getc) assert_equal("\r", f.getc) assert_equal("\n", f.getc) assert_equal("c", f.getc) assert_equal("\r", f.getc) assert_equal("\n", f.getc) assert_equal(nil, f.getc) } } end end