summaryrefslogtreecommitdiff
path: root/lib/csv
diff options
context:
space:
mode:
authorSutou Kouhei <kou@clear-code.com>2021-12-24 10:44:11 +0900
committerSutou Kouhei <kou@cozmixng.org>2021-12-24 14:35:33 +0900
commit22ef4f6445376b992b2725124594dad1c77a185e (patch)
tree31c624339ba0f33258c6843e7b6cbc7d5bb00c76 /lib/csv
parent002ce9f515d984aa0fe43c51448b36ea3651a395 (diff)
[ruby/csv] Revert "parser: fix a keep bug that some texts may be dropped unexpectedly"
This reverts commit https://github.com/ruby/csv/commit/5c6523da0a61. This introduces another pbolem. We should try again later. https://github.com/ruby/csv/commit/43a1d6fff1
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/5336
Diffstat (limited to 'lib/csv')
-rw-r--r--lib/csv/parser.rb85
1 files changed, 34 insertions, 51 deletions
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index 78ff363d69..7e943acf21 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -167,7 +167,6 @@ class CSV
end
def keep_start
- adjust_last_keep
@keeps.push([@scanner.pos, nil])
end
@@ -198,17 +197,7 @@ class CSV
end
def keep_drop
- _, buffer = @keeps.pop
- return unless buffer
-
- last_keep = @keeps.last
- return unless last_keep
-
- if last_keep[1]
- last_keep[1] << buffer
- else
- last_keep[1] = buffer
- end
+ @keeps.pop
end
def rest
@@ -216,30 +205,24 @@ class CSV
end
private
- def adjust_last_keep
- keep = @keeps.last
- return if keep.nil?
-
- keep_start = keep[0]
- return if @scanner.pos == keep_start
-
- string = @scanner.string
- keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
- if keep_data
- keep_buffer = keep[1]
- if keep_buffer
- keep_buffer << keep_data
- else
- keep[1] = keep_data.dup
- end
- end
- keep[0] = 0
- end
-
def read_chunk
return false if @last_scanner
- adjust_last_keep
+ unless @keeps.empty?
+ keep = @keeps.last
+ keep_start = keep[0]
+ string = @scanner.string
+ keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
+ if keep_data
+ keep_buffer = keep[1]
+ if keep_buffer
+ keep_buffer << keep_data
+ else
+ keep[1] = keep_data.dup
+ end
+ end
+ keep[0] = 0
+ end
input = @inputs.first
case input
@@ -746,26 +729,28 @@ class CSV
sample[0, 128].index(@quote_character)
end
- class UnoptimizedStringIO # :nodoc:
- def initialize(string)
- @io = StringIO.new(string, "rb:#{string.encoding}")
- end
+ SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
+ if SCANNER_TEST
+ class UnoptimizedStringIO
+ def initialize(string)
+ @io = StringIO.new(string, "rb:#{string.encoding}")
+ end
- def gets(*args)
- @io.gets(*args)
- end
+ def gets(*args)
+ @io.gets(*args)
+ end
- def each_line(*args, &block)
- @io.each_line(*args, &block)
- end
+ def each_line(*args, &block)
+ @io.each_line(*args, &block)
+ end
- def eof?
- @io.eof?
+ def eof?
+ @io.eof?
+ end
end
- end
- SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
- if SCANNER_TEST
+ SCANNER_TEST_CHUNK_SIZE =
+ Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
def build_scanner
inputs = @samples.collect do |sample|
UnoptimizedStringIO.new(sample)
@@ -775,12 +760,10 @@ class CSV
else
inputs << @input
end
- chunk_size =
- Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
InputsScanner.new(inputs,
@encoding,
@row_separator,
- chunk_size: chunk_size)
+ chunk_size: SCANNER_TEST_CHUNK_SIZE)
end
else
def build_scanner