diff options
Diffstat (limited to 'lib/csv/parser.rb')
| -rw-r--r-- | lib/csv/parser.rb | 1142 |
1 files changed, 1142 insertions, 0 deletions
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb new file mode 100644 index 0000000000..2fb3b0a46e --- /dev/null +++ b/lib/csv/parser.rb @@ -0,0 +1,1142 @@ +# frozen_string_literal: true + +require "strscan" + +require_relative "delete_suffix" +require_relative "match_p" +require_relative "row" +require_relative "table" + +using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix) +using CSV::MatchP if CSV.const_defined?(:MatchP) + +class CSV + # Note: Don't use this class directly. This is an internal class. + class Parser + # + # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO + # or String object being read from or written to. Your data is never transcoded + # (unless you ask Ruby to transcode it for you) and will literally be parsed in + # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the + # Encoding of your data. This is accomplished by transcoding the parser itself + # into your Encoding. + # + + # Raised when encoding is invalid. + class InvalidEncoding < StandardError + end + + # + # CSV::Scanner receives a CSV output, scans it and return the content. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # Uses StringScanner (the official strscan gem). Strscan provides lexical + # scanning operations on a String. We inherit its object and take advantage + # on the methods. For more information, please visit: + # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html + # + class Scanner < StringScanner + alias_method :scan_all, :scan + + def initialize(*args) + super + @keeps = [] + end + + def each_line(row_separator) + position = pos + rest.each_line(row_separator) do |line| + position += line.bytesize + self.pos = position + yield(line) + end + end + + def keep_start + @keeps.push(pos) + end + + def keep_end + start = @keeps.pop + string.byteslice(start, pos - start) + end + + def keep_back + self.pos = @keeps.pop + end + + def keep_drop + @keeps.pop + end + end + + # + # CSV::InputsScanner receives IO inputs, encoding and the chunk_size. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # CSV::InputsScanner.scan() tries to match with pattern at the current position. + # If there's a match, the scanner advances the “scan pointer” and returns the matched string. + # Otherwise, the scanner returns nil. + # + # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). + # If there is no more data (eos? = true), it returns "". + # + class InputsScanner + def initialize(inputs, encoding, chunk_size: 8192) + @inputs = inputs.dup + @encoding = encoding + @chunk_size = chunk_size + @last_scanner = @inputs.empty? + @keeps = [] + read_chunk + end + + def each_line(row_separator) + buffer = nil + input = @scanner.rest + position = @scanner.pos + offset = 0 + n_row_separator_chars = row_separator.size + while true + input.each_line(row_separator) do |line| + @scanner.pos += line.bytesize + if buffer + if n_row_separator_chars == 2 and + buffer.end_with?(row_separator[0]) and + line.start_with?(row_separator[1]) + buffer << line[0] + line = line[1..-1] + position += buffer.bytesize + offset + @scanner.pos = position + offset = 0 + yield(buffer) + buffer = nil + next if line.empty? + else + buffer << line + line = buffer + buffer = nil + end + end + if line.end_with?(row_separator) + position += line.bytesize + offset + @scanner.pos = position + offset = 0 + yield(line) + else + buffer = line + end + end + break unless read_chunk + input = @scanner.rest + position = @scanner.pos + offset = -buffer.bytesize if buffer + end + yield(buffer) if buffer + end + + def scan(pattern) + value = @scanner.scan(pattern) + return value if @last_scanner + + if value + read_chunk if @scanner.eos? + return value + else + nil + end + end + + def scan_all(pattern) + value = @scanner.scan(pattern) + return value if @last_scanner + + return nil if value.nil? + while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) + value << sub_value + end + value + end + + def eos? + @scanner.eos? + end + + def keep_start + @keeps.push([@scanner.pos, nil]) + end + + def keep_end + start, buffer = @keeps.pop + keep = @scanner.string.byteslice(start, @scanner.pos - start) + if buffer + buffer << keep + keep = buffer + end + keep + end + + def keep_back + start, buffer = @keeps.pop + if buffer + string = @scanner.string + keep = string.byteslice(start, string.bytesize - start) + if keep and not keep.empty? + @inputs.unshift(StringIO.new(keep)) + @last_scanner = false + end + @scanner = StringScanner.new(buffer) + else + @scanner.pos = start + end + read_chunk if @scanner.eos? + end + + def keep_drop + @keeps.pop + end + + def rest + @scanner.rest + end + + private + def read_chunk + return false if @last_scanner + + unless @keeps.empty? + keep = @keeps.last + keep_start = keep[0] + string = @scanner.string + keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) + if keep_data + keep_buffer = keep[1] + if keep_buffer + keep_buffer << keep_data + else + keep[1] = keep_data.dup + end + end + keep[0] = 0 + end + + input = @inputs.first + case input + when StringIO + string = input.read + raise InvalidEncoding unless string.valid_encoding? + @scanner = StringScanner.new(string) + @inputs.shift + @last_scanner = @inputs.empty? + true + else + chunk = input.gets(nil, @chunk_size) + if chunk + raise InvalidEncoding unless chunk.valid_encoding? + @scanner = StringScanner.new(chunk) + if input.respond_to?(:eof?) and input.eof? + @inputs.shift + @last_scanner = @inputs.empty? + end + true + else + @scanner = StringScanner.new("".encode(@encoding)) + @inputs.shift + @last_scanner = @inputs.empty? + if @last_scanner + false + else + read_chunk + end + end + end + end + end + + def initialize(input, options) + @input = input + @options = options + @samples = [] + + prepare + end + + def column_separator + @column_separator + end + + def row_separator + @row_separator + end + + def quote_character + @quote_character + end + + def field_size_limit + @field_size_limit + end + + def skip_lines + @skip_lines + end + + def unconverted_fields? + @unconverted_fields + end + + def headers + @headers + end + + def header_row? + @use_headers and @headers.nil? + end + + def return_headers? + @return_headers + end + + def skip_blanks? + @skip_blanks + end + + def liberal_parsing? + @liberal_parsing + end + + def lineno + @lineno + end + + def line + last_line + end + + def parse(&block) + return to_enum(__method__) unless block_given? + + if @return_headers and @headers and @raw_headers + headers = Row.new(@headers, @raw_headers, true) + if @unconverted_fields + headers = add_unconverted_fields(headers, []) + end + yield headers + end + + begin + @scanner ||= build_scanner + if quote_character.nil? + parse_no_quote(&block) + elsif @need_robust_parsing + parse_quotable_robust(&block) + else + parse_quotable_loose(&block) + end + rescue InvalidEncoding + if @scanner + ignore_broken_line + lineno = @lineno + else + lineno = @lineno + 1 + end + message = "Invalid byte sequence in #{@encoding}" + raise MalformedCSVError.new(message, lineno) + end + end + + def use_headers? + @use_headers + end + + private + # A set of tasks to prepare the file in order to parse it + def prepare + prepare_variable + prepare_quote_character + prepare_backslash + prepare_skip_lines + prepare_strip + prepare_separators + prepare_quoted + prepare_unquoted + prepare_line + prepare_header + prepare_parser + end + + def prepare_variable + @need_robust_parsing = false + @encoding = @options[:encoding] + liberal_parsing = @options[:liberal_parsing] + if liberal_parsing + @liberal_parsing = true + if liberal_parsing.is_a?(Hash) + @double_quote_outside_quote = + liberal_parsing[:double_quote_outside_quote] + @backslash_quote = liberal_parsing[:backslash_quote] + else + @double_quote_outside_quote = false + @backslash_quote = false + end + @need_robust_parsing = true + else + @liberal_parsing = false + @backslash_quote = false + end + @unconverted_fields = @options[:unconverted_fields] + @field_size_limit = @options[:field_size_limit] + @skip_blanks = @options[:skip_blanks] + @fields_converter = @options[:fields_converter] + @header_fields_converter = @options[:header_fields_converter] + end + + def prepare_quote_character + @quote_character = @options[:quote_character] + if @quote_character.nil? + @escaped_quote_character = nil + @escaped_quote = nil + else + @quote_character = @quote_character.to_s.encode(@encoding) + if @quote_character.length != 1 + message = ":quote_char has to be nil or a single character String" + raise ArgumentError, message + end + @double_quote_character = @quote_character * 2 + @escaped_quote_character = Regexp.escape(@quote_character) + @escaped_quote = Regexp.new(@escaped_quote_character) + end + end + + def prepare_backslash + return unless @backslash_quote + + @backslash_character = "\\".encode(@encoding) + + @escaped_backslash_character = Regexp.escape(@backslash_character) + @escaped_backslash = Regexp.new(@escaped_backslash_character) + if @quote_character.nil? + @backslash_quote_character = nil + else + @backslash_quote_character = + @backslash_character + @escaped_quote_character + end + end + + def prepare_skip_lines + skip_lines = @options[:skip_lines] + case skip_lines + when String + @skip_lines = skip_lines.encode(@encoding) + when Regexp, nil + @skip_lines = skip_lines + else + unless skip_lines.respond_to?(:match) + message = + ":skip_lines has to respond to \#match: #{skip_lines.inspect}" + raise ArgumentError, message + end + @skip_lines = skip_lines + end + end + + def prepare_strip + @strip = @options[:strip] + @escaped_strip = nil + @strip_value = nil + @rstrip_value = nil + if @strip.is_a?(String) + case @strip.length + when 0 + raise ArgumentError, ":strip must not be an empty String" + when 1 + # ok + else + raise ArgumentError, ":strip doesn't support 2 or more characters yet" + end + @strip = @strip.encode(@encoding) + @escaped_strip = Regexp.escape(@strip) + if @quote_character + @strip_value = Regexp.new(@escaped_strip + + "+".encode(@encoding)) + @rstrip_value = Regexp.new(@escaped_strip + + "+\\z".encode(@encoding)) + end + @need_robust_parsing = true + elsif @strip + strip_values = " \t\f\v" + @escaped_strip = strip_values.encode(@encoding) + if @quote_character + @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding)) + @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding)) + end + @need_robust_parsing = true + end + end + + begin + StringScanner.new("x").scan("x") + rescue TypeError + @@string_scanner_scan_accept_string = false + else + @@string_scanner_scan_accept_string = true + end + + def prepare_separators + column_separator = @options[:column_separator] + @column_separator = column_separator.to_s.encode(@encoding) + if @column_separator.size < 1 + message = ":col_sep must be 1 or more characters: " + message += column_separator.inspect + raise ArgumentError, message + end + @row_separator = + resolve_row_separator(@options[:row_separator]).encode(@encoding) + + @escaped_column_separator = Regexp.escape(@column_separator) + @escaped_first_column_separator = Regexp.escape(@column_separator[0]) + if @column_separator.size > 1 + @column_end = Regexp.new(@escaped_column_separator) + @column_ends = @column_separator.each_char.collect do |char| + Regexp.new(Regexp.escape(char)) + end + @first_column_separators = Regexp.new(@escaped_first_column_separator + + "+".encode(@encoding)) + else + if @@string_scanner_scan_accept_string + @column_end = @column_separator + else + @column_end = Regexp.new(@escaped_column_separator) + end + @column_ends = nil + @first_column_separators = nil + end + + escaped_row_separator = Regexp.escape(@row_separator) + @row_end = Regexp.new(escaped_row_separator) + if @row_separator.size > 1 + @row_ends = @row_separator.each_char.collect do |char| + Regexp.new(Regexp.escape(char)) + end + else + @row_ends = nil + end + + @cr = "\r".encode(@encoding) + @lf = "\n".encode(@encoding) + @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding)) + @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding)) + end + + def prepare_quoted + if @quote_character + @quotes = Regexp.new(@escaped_quote_character + + "+".encode(@encoding)) + no_quoted_values = @escaped_quote_character.dup + if @backslash_quote + no_quoted_values << @escaped_backslash_character + end + @quoted_value = Regexp.new("[^".encode(@encoding) + + no_quoted_values + + "]+".encode(@encoding)) + end + if @escaped_strip + @split_column_separator = Regexp.new(@escaped_strip + + "*".encode(@encoding) + + @escaped_column_separator + + @escaped_strip + + "*".encode(@encoding)) + else + if @column_separator == " ".encode(@encoding) + @split_column_separator = Regexp.new(@escaped_column_separator) + else + @split_column_separator = @column_separator + end + end + end + + def prepare_unquoted + return if @quote_character.nil? + + no_unquoted_values = "\r\n".encode(@encoding) + no_unquoted_values << @escaped_first_column_separator + unless @liberal_parsing + no_unquoted_values << @escaped_quote_character + end + @unquoted_value = Regexp.new("[^".encode(@encoding) + + no_unquoted_values + + "]+".encode(@encoding)) + end + + def resolve_row_separator(separator) + if separator == :auto + cr = "\r".encode(@encoding) + lf = "\n".encode(@encoding) + if @input.is_a?(StringIO) + pos = @input.pos + separator = detect_row_separator(@input.read, cr, lf) + @input.seek(pos) + elsif @input.respond_to?(:gets) + if @input.is_a?(File) + chunk_size = 32 * 1024 + else + chunk_size = 1024 + end + begin + while separator == :auto + # + # if we run out of data, it's probably a single line + # (ensure will set default value) + # + break unless sample = @input.gets(nil, chunk_size) + + # extend sample if we're unsure of the line ending + if sample.end_with?(cr) + sample << (@input.gets(nil, 1) || "") + end + + @samples << sample + + separator = detect_row_separator(sample, cr, lf) + end + rescue IOError + # do nothing: ensure will set default + end + end + separator = $INPUT_RECORD_SEPARATOR if separator == :auto + end + separator.to_s.encode(@encoding) + end + + def detect_row_separator(sample, cr, lf) + lf_index = sample.index(lf) + if lf_index + cr_index = sample[0, lf_index].index(cr) + else + cr_index = sample.index(cr) + end + if cr_index and lf_index + if cr_index + 1 == lf_index + cr + lf + elsif cr_index < lf_index + cr + else + lf + end + elsif cr_index + cr + elsif lf_index + lf + else + :auto + end + end + + def prepare_line + @lineno = 0 + @last_line = nil + @scanner = nil + end + + def last_line + if @scanner + @last_line ||= @scanner.keep_end + else + @last_line + end + end + + def prepare_header + @return_headers = @options[:return_headers] + + headers = @options[:headers] + case headers + when Array + @raw_headers = headers + @use_headers = true + when String + @raw_headers = parse_headers(headers) + @use_headers = true + when nil, false + @raw_headers = nil + @use_headers = false + else + @raw_headers = nil + @use_headers = true + end + if @raw_headers + @headers = adjust_headers(@raw_headers) + else + @headers = nil + end + end + + def parse_headers(row) + CSV.parse_line(row, + col_sep: @column_separator, + row_sep: @row_separator, + quote_char: @quote_character) + end + + def adjust_headers(headers) + adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno) + adjusted_headers.each {|h| h.freeze if h.is_a? String} + adjusted_headers + end + + def prepare_parser + @may_quoted = may_quoted? + end + + def may_quoted? + return false if @quote_character.nil? + + if @input.is_a?(StringIO) + pos = @input.pos + sample = @input.read + @input.seek(pos) + else + return false if @samples.empty? + sample = @samples.first + end + sample[0, 128].index(@quote_character) + end + + SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") + if SCANNER_TEST + class UnoptimizedStringIO + def initialize(string) + @io = StringIO.new(string, "rb:#{string.encoding}") + end + + def gets(*args) + @io.gets(*args) + end + + def each_line(*args, &block) + @io.each_line(*args, &block) + end + + def eof? + @io.eof? + end + end + + def build_scanner + inputs = @samples.collect do |sample| + UnoptimizedStringIO.new(sample) + end + if @input.is_a?(StringIO) + inputs << UnoptimizedStringIO.new(@input.read) + else + inputs << @input + end + chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1" + InputsScanner.new(inputs, + @encoding, + chunk_size: Integer(chunk_size, 10)) + end + else + def build_scanner + string = nil + if @samples.empty? and @input.is_a?(StringIO) + string = @input.read + elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof? + string = @samples[0] + end + if string + unless string.valid_encoding? + index = string.lines(@row_separator).index do |line| + !line.valid_encoding? + end + if index + message = "Invalid byte sequence in #{@encoding}" + raise MalformedCSVError.new(message, @lineno + index + 1) + end + end + Scanner.new(string) + else + inputs = @samples.collect do |sample| + StringIO.new(sample) + end + inputs << @input + InputsScanner.new(inputs, @encoding) + end + end + end + + def skip_needless_lines + return unless @skip_lines + + until @scanner.eos? + @scanner.keep_start + line = @scanner.scan_all(@not_line_end) || "".encode(@encoding) + line << @row_separator if parse_row_end + if skip_line?(line) + @lineno += 1 + @scanner.keep_drop + else + @scanner.keep_back + return + end + end + end + + def skip_line?(line) + line = line.delete_suffix(@row_separator) + case @skip_lines + when String + line.include?(@skip_lines) + when Regexp + @skip_lines.match?(line) + else + @skip_lines.match(line) + end + end + + def parse_no_quote(&block) + @scanner.each_line(@row_separator) do |line| + next if @skip_lines and skip_line?(line) + original_line = line + line = line.delete_suffix(@row_separator) + + if line.empty? + next if @skip_blanks + row = [] + else + line = strip_value(line) + row = line.split(@split_column_separator, -1) + n_columns = row.size + i = 0 + while i < n_columns + row[i] = nil if row[i].empty? + i += 1 + end + end + @last_line = original_line + emit_row(row, &block) + end + end + + def parse_quotable_loose(&block) + @scanner.keep_start + @scanner.each_line(@row_separator) do |line| + if @skip_lines and skip_line?(line) + @scanner.keep_drop + @scanner.keep_start + next + end + original_line = line + line = line.delete_suffix(@row_separator) + + if line.empty? + if @skip_blanks + @scanner.keep_drop + @scanner.keep_start + next + end + row = [] + elsif line.include?(@cr) or line.include?(@lf) + @scanner.keep_back + @need_robust_parsing = true + return parse_quotable_robust(&block) + else + row = line.split(@split_column_separator, -1) + n_columns = row.size + i = 0 + while i < n_columns + column = row[i] + if column.empty? + row[i] = nil + else + n_quotes = column.count(@quote_character) + if n_quotes.zero? + # no quote + elsif n_quotes == 2 and + column.start_with?(@quote_character) and + column.end_with?(@quote_character) + row[i] = column[1..-2] + else + @scanner.keep_back + @need_robust_parsing = true + return parse_quotable_robust(&block) + end + end + i += 1 + end + end + @scanner.keep_drop + @scanner.keep_start + @last_line = original_line + emit_row(row, &block) + end + @scanner.keep_drop + end + + def parse_quotable_robust(&block) + row = [] + skip_needless_lines + start_row + while true + @quoted_column_value = false + @unquoted_column_value = false + @scanner.scan_all(@strip_value) if @strip_value + value = parse_column_value + if value + @scanner.scan_all(@strip_value) if @strip_value + if @field_size_limit and value.size >= @field_size_limit + ignore_broken_line + raise MalformedCSVError.new("Field size exceeded", @lineno) + end + end + if parse_column_end + row << value + elsif parse_row_end + if row.empty? and value.nil? + emit_row([], &block) unless @skip_blanks + else + row << value + emit_row(row, &block) + row = [] + end + skip_needless_lines + start_row + elsif @scanner.eos? + break if row.empty? and value.nil? + row << value + emit_row(row, &block) + break + else + if @quoted_column_value + ignore_broken_line + message = "Any value after quoted field isn't allowed" + raise MalformedCSVError.new(message, @lineno) + elsif @unquoted_column_value and + (new_line = @scanner.scan(@cr_or_lf)) + ignore_broken_line + message = "Unquoted fields do not allow new line " + + "<#{new_line.inspect}>" + raise MalformedCSVError.new(message, @lineno) + elsif @scanner.rest.start_with?(@quote_character) + ignore_broken_line + message = "Illegal quoting" + raise MalformedCSVError.new(message, @lineno) + elsif (new_line = @scanner.scan(@cr_or_lf)) + ignore_broken_line + message = "New line must be <#{@row_separator.inspect}> " + + "not <#{new_line.inspect}>" + raise MalformedCSVError.new(message, @lineno) + else + ignore_broken_line + raise MalformedCSVError.new("TODO: Meaningful message", + @lineno) + end + end + end + end + + def parse_column_value + if @liberal_parsing + quoted_value = parse_quoted_column_value + if quoted_value + @scanner.scan_all(@strip_value) if @strip_value + unquoted_value = parse_unquoted_column_value + if unquoted_value + if @double_quote_outside_quote + unquoted_value = unquoted_value.gsub(@quote_character * 2, + @quote_character) + if quoted_value.empty? # %Q{""...} case + return @quote_character + unquoted_value + end + end + @quote_character + quoted_value + @quote_character + unquoted_value + else + quoted_value + end + else + parse_unquoted_column_value + end + elsif @may_quoted + parse_quoted_column_value || + parse_unquoted_column_value + else + parse_unquoted_column_value || + parse_quoted_column_value + end + end + + def parse_unquoted_column_value + value = @scanner.scan_all(@unquoted_value) + return nil unless value + + @unquoted_column_value = true + if @first_column_separators + while true + @scanner.keep_start + is_column_end = @column_ends.all? do |column_end| + @scanner.scan(column_end) + end + @scanner.keep_back + break if is_column_end + sub_separator = @scanner.scan_all(@first_column_separators) + break if sub_separator.nil? + value << sub_separator + sub_value = @scanner.scan_all(@unquoted_value) + break if sub_value.nil? + value << sub_value + end + end + value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote + if @rstrip_value + value.gsub!(@rstrip_value, "") + end + value + end + + def parse_quoted_column_value + quotes = @scanner.scan_all(@quotes) + return nil unless quotes + + @quoted_column_value = true + n_quotes = quotes.size + if (n_quotes % 2).zero? + quotes[0, (n_quotes - 2) / 2] + else + value = quotes[0, (n_quotes - 1) / 2] + while true + quoted_value = @scanner.scan_all(@quoted_value) + value << quoted_value if quoted_value + if @backslash_quote + if @scanner.scan(@escaped_backslash) + if @scanner.scan(@escaped_quote) + value << @quote_character + else + value << @backslash_character + end + next + end + end + + quotes = @scanner.scan_all(@quotes) + unless quotes + ignore_broken_line + message = "Unclosed quoted field" + raise MalformedCSVError.new(message, @lineno) + end + n_quotes = quotes.size + if n_quotes == 1 + break + elsif (n_quotes % 2) == 1 + value << quotes[0, (n_quotes - 1) / 2] + break + else + value << quotes[0, n_quotes / 2] + end + end + value + end + end + + def parse_column_end + return true if @scanner.scan(@column_end) + return false unless @column_ends + + @scanner.keep_start + if @column_ends.all? {|column_end| @scanner.scan(column_end)} + @scanner.keep_drop + true + else + @scanner.keep_back + false + end + end + + def parse_row_end + return true if @scanner.scan(@row_end) + return false unless @row_ends + @scanner.keep_start + if @row_ends.all? {|row_end| @scanner.scan(row_end)} + @scanner.keep_drop + true + else + @scanner.keep_back + false + end + end + + def strip_value(value) + return value unless @strip + return nil if value.nil? + + case @strip + when String + size = value.size + while value.start_with?(@strip) + size -= 1 + value = value[1, size] + end + while value.end_with?(@strip) + size -= 1 + value = value[0, size] + end + else + value.strip! + end + value + end + + def ignore_broken_line + @scanner.scan_all(@not_line_end) + @scanner.scan_all(@cr_or_lf) + @lineno += 1 + end + + def start_row + if @last_line + @last_line = nil + else + @scanner.keep_drop + end + @scanner.keep_start + end + + def emit_row(row, &block) + @lineno += 1 + + raw_row = row + if @use_headers + if @headers.nil? + @headers = adjust_headers(row) + return unless @return_headers + row = Row.new(@headers, row, true) + else + row = Row.new(@headers, + @fields_converter.convert(raw_row, @headers, @lineno)) + end + else + # convert fields, if needed... + row = @fields_converter.convert(raw_row, nil, @lineno) + end + + # inject unconverted fields and accessor, if requested... + if @unconverted_fields and not row.respond_to?(:unconverted_fields) + add_unconverted_fields(row, raw_row) + end + + yield(row) + end + + # This method injects an instance variable <tt>unconverted_fields</tt> into + # +row+ and an accessor method for +row+ called unconverted_fields(). The + # variable is set to the contents of +fields+. + def add_unconverted_fields(row, fields) + class << row + attr_reader :unconverted_fields + end + row.instance_variable_set(:@unconverted_fields, fields) + row + end + end +end |
