diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/csv.rb | 13 | ||||
-rw-r--r-- | lib/csv/parser.rb | 23 |
2 files changed, 30 insertions, 6 deletions
diff --git a/lib/csv.rb b/lib/csv.rb index 42e99435cb..06a490f34c 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -330,6 +330,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # liberal_parsing: false, # nil_value: nil, # empty_value: "", +# strip: false, # # For generating. # write_headers: nil, # quote_empty: true, @@ -337,7 +338,6 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # write_converters: nil, # write_nil_value: nil, # write_empty_value: "", -# strip: false, # } # # ==== Options for Parsing @@ -355,8 +355,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # - +header_converters+: Specifies the header converters to be used. # - +skip_blanks+: Specifies whether blanks lines are to be ignored. # - +skip_lines+: Specifies how comments lines are to be recognized. -# - +strip+: Specifies whether leading and trailing whitespace are -# to be stripped from fields.. +# - +strip+: Specifies whether leading and trailing whitespace are to be +# stripped from fields. This must be compatible with +col_sep+; if it is not, +# then an +ArgumentError+ exception will be raised. # - +liberal_parsing+: Specifies whether \CSV should attempt to parse # non-compliant data. # - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field. @@ -935,6 +936,7 @@ class CSV liberal_parsing: false, nil_value: nil, empty_value: "", + strip: false, # For generating. write_headers: nil, quote_empty: true, @@ -942,7 +944,6 @@ class CSV write_converters: nil, write_nil_value: nil, write_empty_value: "", - strip: false, }.freeze class << self @@ -1760,11 +1761,11 @@ class CSV encoding: nil, nil_value: nil, empty_value: "", + strip: false, quote_empty: true, write_converters: nil, write_nil_value: nil, - write_empty_value: "", - strip: false) + write_empty_value: "") raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? if data.is_a?(String) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 3334acfbdd..f87db3bb12 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -361,6 +361,7 @@ class CSV prepare_skip_lines prepare_strip prepare_separators + validate_strip_and_col_sep_options prepare_quoted prepare_unquoted prepare_line @@ -531,6 +532,28 @@ class CSV @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding)) end + # This method verifies that there are no (obvious) ambiguities with the + # provided +col_sep+ and +strip+ parsing options. For example, if +col_sep+ + # and +strip+ were both equal to +\t+, then there would be no clear way to + # parse the input. + def validate_strip_and_col_sep_options + return unless @strip + + if @strip.is_a?(String) + if @column_separator.start_with?(@strip) || @column_separator.end_with?(@strip) + raise ArgumentError, + "The provided strip (#{@escaped_strip}) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + else + if Regexp.new("\\A[#{@escaped_strip}]|[#{@escaped_strip}]\\z").match?(@column_separator) + raise ArgumentError, + "The provided strip (true) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + end + end + def prepare_quoted if @quote_character @quotes = Regexp.new(@escaped_quote_character + |