summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/csv/parser.rb85
-rw-r--r--test/csv/parse/test_inputs_scanner.rb37
2 files changed, 88 insertions, 34 deletions
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index ef33a69478..e1fe559a41 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -166,6 +166,7 @@ class CSV
end
def keep_start
+ adjust_last_keep
@keeps.push([@scanner.pos, nil])
end
@@ -196,7 +197,17 @@ class CSV
end
def keep_drop
- @keeps.pop
+ _, buffer = @keeps.pop
+ return unless buffer
+
+ last_keep = @keeps.last
+ return unless last_keep
+
+ if last_keep[1]
+ last_keep[1] << buffer
+ else
+ last_keep[1] = buffer
+ end
end
def rest
@@ -204,24 +215,30 @@ class CSV
end
private
+ def adjust_last_keep
+ keep = @keeps.last
+ return if keep.nil?
+
+ keep_start = keep[0]
+ return if @scanner.pos == keep_start
+
+ string = @scanner.string
+ keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
+ if keep_data
+ keep_buffer = keep[1]
+ if keep_buffer
+ keep_buffer << keep_data
+ else
+ keep[1] = keep_data.dup
+ end
+ end
+ keep[0] = 0
+ end
+
def read_chunk
return false if @last_scanner
- unless @keeps.empty?
- keep = @keeps.last
- keep_start = keep[0]
- string = @scanner.string
- keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
- if keep_data
- keep_buffer = keep[1]
- if keep_buffer
- keep_buffer << keep_data
- else
- keep[1] = keep_data.dup
- end
- end
- keep[0] = 0
- end
+ adjust_last_keep
input = @inputs.first
case input
@@ -728,28 +745,26 @@ class CSV
sample[0, 128].index(@quote_character)
end
- SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
- if SCANNER_TEST
- class UnoptimizedStringIO
- def initialize(string)
- @io = StringIO.new(string, "rb:#{string.encoding}")
- end
+ class UnoptimizedStringIO # :nodoc:
+ def initialize(string)
+ @io = StringIO.new(string, "rb:#{string.encoding}")
+ end
- def gets(*args)
- @io.gets(*args)
- end
+ def gets(*args)
+ @io.gets(*args)
+ end
- def each_line(*args, &block)
- @io.each_line(*args, &block)
- end
+ def each_line(*args, &block)
+ @io.each_line(*args, &block)
+ end
- def eof?
- @io.eof?
- end
+ def eof?
+ @io.eof?
end
+ end
- SCANNER_TEST_CHUNK_SIZE =
- Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
+ SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
+ if SCANNER_TEST
def build_scanner
inputs = @samples.collect do |sample|
UnoptimizedStringIO.new(sample)
@@ -759,9 +774,11 @@ class CSV
else
inputs << @input
end
+ chunk_size =
+ Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
InputsScanner.new(inputs,
@encoding,
- chunk_size: SCANNER_TEST_CHUNK_SIZE)
+ chunk_size: chunk_size)
end
else
def build_scanner
diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb
new file mode 100644
index 0000000000..dd0a64cc45
--- /dev/null
+++ b/test/csv/parse/test_inputs_scanner.rb
@@ -0,0 +1,37 @@
+require_relative "../helper"
+
+class TestCSVParseInputsScanner < Test::Unit::TestCase
+ include Helper
+
+ def test_keep_over_chunks_nested_back
+ input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 2)
+ scanner.keep_start
+ assert_equal("abc", scanner.scan_all(/[a-c]+/))
+ scanner.keep_start
+ assert_equal("def", scanner.scan_all(/[d-f]+/))
+ scanner.keep_back
+ scanner.keep_back
+ assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+ end
+
+
+ def test_keep_over_chunks_nested_drop_back
+ input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
+ scanner = CSV::Parser::InputsScanner.new([input],
+ Encoding::UTF_8,
+ nil,
+ chunk_size: 3)
+ scanner.keep_start
+ assert_equal("ab", scanner.scan(/../))
+ scanner.keep_start
+ assert_equal("c", scanner.scan(/./))
+ assert_equal("d", scanner.scan(/./))
+ scanner.keep_drop
+ scanner.keep_back
+ assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
+ end
+end