diff options
Diffstat (limited to 'doc/csv/options/parsing')
-rw-r--r-- | doc/csv/options/parsing/converters.rdoc | 45 | ||||
-rw-r--r-- | doc/csv/options/parsing/empty_value.rdoc | 13 | ||||
-rw-r--r-- | doc/csv/options/parsing/field_size_limit.rdoc | 39 | ||||
-rw-r--r-- | doc/csv/options/parsing/header_converters.rdoc | 31 | ||||
-rw-r--r-- | doc/csv/options/parsing/headers.rdoc | 63 | ||||
-rw-r--r-- | doc/csv/options/parsing/liberal_parsing.rdoc | 19 | ||||
-rw-r--r-- | doc/csv/options/parsing/nil_value.rdoc | 12 | ||||
-rw-r--r-- | doc/csv/options/parsing/return_headers.rdoc | 22 | ||||
-rw-r--r-- | doc/csv/options/parsing/skip_blanks.rdoc | 31 | ||||
-rw-r--r-- | doc/csv/options/parsing/skip_lines.rdoc | 37 | ||||
-rw-r--r-- | doc/csv/options/parsing/strip.rdoc | 15 | ||||
-rw-r--r-- | doc/csv/options/parsing/unconverted_fields.rdoc | 27 |
12 files changed, 354 insertions, 0 deletions
diff --git a/doc/csv/options/parsing/converters.rdoc b/doc/csv/options/parsing/converters.rdoc new file mode 100644 index 0000000000..993803c5d0 --- /dev/null +++ b/doc/csv/options/parsing/converters.rdoc @@ -0,0 +1,45 @@ +====== Option +converters+ + +Specifies a single field converter name or \Proc, +or an \Array of field converter names and Procs. + +See {Field Converters}[#class-CSV-label-Field+Converters] + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:converters) # => nil + +The value may be a single field converter name: + str = '1,2,3' + # Without a converter + ary = CSV.parse_line(str) + ary # => ["1", "2", "3"] + # With built-in converter :integer + ary = CSV.parse_line(str, converters: :integer) + ary # => [1, 2, 3] + +The value may be an \Array of field converter names: + str = '1,3.14159' + # Without converters + ary = CSV.parse_line(str) + ary # => ["1", "3.14159"] + # With built-in converters + ary = CSV.parse_line(str, converters: [:integer, :float]) + ary # => [1, 3.14159] + +The value may be a \Proc custom converter: + str = ' foo , bar , baz ' + # Without a converter + ary = CSV.parse_line(str) + ary # => [" foo ", " bar ", " baz "] + # With a custom converter + ary = CSV.parse_line(str, converters: proc {|field| field.strip }) + ary # => ["foo", "bar", "baz"] + +See also {Custom Converters}[#class-CSV-label-Custom+Converters] + +--- + +Raises an exception if the converter is not a converter name or a \Proc: + str = 'foo,0' + # Raises NoMethodError (undefined method `arity' for nil:NilClass) + CSV.parse(str, converters: :foo) diff --git a/doc/csv/options/parsing/empty_value.rdoc b/doc/csv/options/parsing/empty_value.rdoc new file mode 100644 index 0000000000..7d3bcc078c --- /dev/null +++ b/doc/csv/options/parsing/empty_value.rdoc @@ -0,0 +1,13 @@ +====== Option +empty_value+ + +Specifies the object that is to be substituted +for each field that has an empty \String. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:empty_value) # => "" (empty string) + +With the default, <tt>""</tt>: + CSV.parse_line('a,"",b,"",c') # => ["a", "", "b", "", "c"] + +With a different object: + CSV.parse_line('a,"",b,"",c', empty_value: 'x') # => ["a", "x", "b", "x", "c"] diff --git a/doc/csv/options/parsing/field_size_limit.rdoc b/doc/csv/options/parsing/field_size_limit.rdoc new file mode 100644 index 0000000000..797c5776fc --- /dev/null +++ b/doc/csv/options/parsing/field_size_limit.rdoc @@ -0,0 +1,39 @@ +====== Option +field_size_limit+ + +Specifies the \Integer field size limit. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:field_size_limit) # => nil + +This is a maximum size CSV will read ahead looking for the closing quote for a field. +(In truth, it reads to the first line ending beyond this size.) +If a quote cannot be found within the limit CSV will raise a MalformedCSVError, +assuming the data is faulty. +You can use this limit to prevent what are effectively DoS attacks on the parser. +However, this limit can cause a legitimate parse to fail; +therefore the default value is +nil+ (no limit). + +For the examples in this section: + str = <<~EOT + "a","b" + " + 2345 + ","" + EOT + str # => "\"a\",\"b\"\n\"\n2345\n\",\"\"\n" + +Using the default +nil+: + ary = CSV.parse(str) + ary # => [["a", "b"], ["\n2345\n", ""]] + +Using <tt>50</tt>: + field_size_limit = 50 + ary = CSV.parse(str, field_size_limit: field_size_limit) + ary # => [["a", "b"], ["\n2345\n", ""]] + +--- + +Raises an exception if a field is too long: + big_str = "123456789\n" * 1024 + # Raises CSV::MalformedCSVError (Field size exceeded in line 1.) + CSV.parse('valid,fields,"' + big_str + '"', field_size_limit: 2048) diff --git a/doc/csv/options/parsing/header_converters.rdoc b/doc/csv/options/parsing/header_converters.rdoc new file mode 100644 index 0000000000..329d96a897 --- /dev/null +++ b/doc/csv/options/parsing/header_converters.rdoc @@ -0,0 +1,31 @@ +====== Option +header_converters+ + +Specifies a \String converter name or an \Array of converter names. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:header_converters) # => nil + +Identical in functionality to option {converters}[#class-CSV-label-Option+converters] +except that: +- The converters apply only to the header row. +- The built-in header converters are +:downcase+ and +:symbol+. + +Examples: + str = <<-EOT + foo,0 + bar,1 + baz,2 + EOT + headers = ['Name', 'Value'] + # With no header converter + csv = CSV.parse(str, headers: headers) + csv.headers # => ["Name", "Value"] + # With header converter :downcase + csv = CSV.parse(str, headers: headers, header_converters: :downcase) + csv.headers # => ["name", "value"] + # With header converter :symbol + csv = CSV.parse(str, headers: headers, header_converters: :symbol) + csv.headers # => [:name, :value] + # With both + csv = CSV.parse(str, headers: headers, header_converters: [:downcase, :symbol]) + csv.headers # => [:name, :value] diff --git a/doc/csv/options/parsing/headers.rdoc b/doc/csv/options/parsing/headers.rdoc new file mode 100644 index 0000000000..0ea151f24b --- /dev/null +++ b/doc/csv/options/parsing/headers.rdoc @@ -0,0 +1,63 @@ +====== Option +headers+ + +Specifies a boolean, \Symbol, \Array, or \String to be used +to define column headers. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:headers) # => false + +--- + +Without +headers+: + str = <<-EOT + Name,Count + foo,0 + bar,1 + bax,2 + EOT + csv = CSV.new(str) + csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + csv.headers # => nil + csv.shift # => ["Name", "Count"] + +--- + +If set to +true+ or the \Symbol +:first_row+, +the first row of the data is treated as a row of headers: + str = <<-EOT + Name,Count + foo,0 + bar,1 + bax,2 + EOT + csv = CSV.new(str, headers: true) + csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:2 col_sep:"," row_sep:"\n" quote_char:"\"" headers:["Name", "Count"]> + csv.headers # => ["Name", "Count"] + csv.shift # => #<CSV::Row "Name":"bar" "Count":"1"> + +--- + +If set to an \Array, the \Array elements are treated as headers: + str = <<-EOT + foo,0 + bar,1 + bax,2 + EOT + csv = CSV.new(str, headers: ['Name', 'Count']) + csv + csv.headers # => ["Name", "Count"] + csv.shift # => #<CSV::Row "Name":"bar" "Count":"1"> + +--- + +If set to a \String +str+, method <tt>CSV::parse_line(str, options)</tt> is called +with the current +options+, and the returned \Array is treated as headers: + str = <<-EOT + foo,0 + bar,1 + bax,2 + EOT + csv = CSV.new(str, headers: 'Name,Count') + csv + csv.headers # => ["Name", "Count"] + csv.shift # => #<CSV::Row "Name":"bar" "Count":"1"> diff --git a/doc/csv/options/parsing/liberal_parsing.rdoc b/doc/csv/options/parsing/liberal_parsing.rdoc new file mode 100644 index 0000000000..b8b9b00c98 --- /dev/null +++ b/doc/csv/options/parsing/liberal_parsing.rdoc @@ -0,0 +1,19 @@ +====== Option +liberal_parsing+ + +Specifies the boolean value that determines whether +CSV will attempt to parse input not conformant with RFC 4180, +such as double quotes in unquoted fields. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:liberal_parsing) # => false + +For examples in this section: + str = 'is,this "three, or four",fields' + +Without +liberal_parsing+: + # Raises CSV::MalformedCSVError (Illegal quoting in str 1.) + CSV.parse_line(str) + +With +liberal_parsing+: + ary = CSV.parse_line(str, liberal_parsing: true) + ary # => ["is", "this \"three", " or four\"", "fields"] diff --git a/doc/csv/options/parsing/nil_value.rdoc b/doc/csv/options/parsing/nil_value.rdoc new file mode 100644 index 0000000000..412e8795e8 --- /dev/null +++ b/doc/csv/options/parsing/nil_value.rdoc @@ -0,0 +1,12 @@ +====== Option +nil_value+ + +Specifies the object that is to be substituted for each null (no-text) field. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:nil_value) # => nil + +With the default, +nil+: + CSV.parse_line('a,,b,,c') # => ["a", nil, "b", nil, "c"] + +With a different object: + CSV.parse_line('a,,b,,c', nil_value: 0) # => ["a", 0, "b", 0, "c"] diff --git a/doc/csv/options/parsing/return_headers.rdoc b/doc/csv/options/parsing/return_headers.rdoc new file mode 100644 index 0000000000..45d2e3f3de --- /dev/null +++ b/doc/csv/options/parsing/return_headers.rdoc @@ -0,0 +1,22 @@ +====== Option +return_headers+ + +Specifies the boolean that determines whether method #shift +returns or ignores the header row. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:return_headers) # => false + +Examples: + str = <<-EOT + Name,Count + foo,0 + bar,1 + bax,2 + EOT + # Without return_headers first row is str. + csv = CSV.new(str, headers: true) + csv.shift # => #<CSV::Row "Name":"foo" "Count":"0"> + # With return_headers first row is headers. + csv = CSV.new(str, headers: true, return_headers: true) + csv.shift # => #<CSV::Row "Name":"Name" "Count":"Count"> + diff --git a/doc/csv/options/parsing/skip_blanks.rdoc b/doc/csv/options/parsing/skip_blanks.rdoc new file mode 100644 index 0000000000..2c8f7b7bb8 --- /dev/null +++ b/doc/csv/options/parsing/skip_blanks.rdoc @@ -0,0 +1,31 @@ +====== Option +skip_blanks+ + +Specifies a boolean that determines whether blank lines in the input will be ignored; +a line that contains a column separator is not considered to be blank. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:skip_blanks) # => false + +See also option {skiplines}[#class-CSV-label-Option+skip_lines]. + +For examples in this section: + str = <<-EOT + foo,0 + + bar,1 + baz,2 + + , + EOT + +Using the default, +false+: + ary = CSV.parse(str) + ary # => [["foo", "0"], [], ["bar", "1"], ["baz", "2"], [], [nil, nil]] + +Using +true+: + ary = CSV.parse(str, skip_blanks: true) + ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]] + +Using a truthy value: + ary = CSV.parse(str, skip_blanks: :foo) + ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]] diff --git a/doc/csv/options/parsing/skip_lines.rdoc b/doc/csv/options/parsing/skip_lines.rdoc new file mode 100644 index 0000000000..1481c40a5f --- /dev/null +++ b/doc/csv/options/parsing/skip_lines.rdoc @@ -0,0 +1,37 @@ +====== Option +skip_lines+ + +Specifies an object to use in identifying comment lines in the input that are to be ignored: +* If a \Regexp, ignores lines that match it. +* If a \String, converts it to a \Regexp, ignores lines that match it. +* If +nil+, no lines are considered to be comments. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:skip_lines) # => nil + +For examples in this section: + str = <<-EOT + # Comment + foo,0 + bar,1 + baz,2 + # Another comment + EOT + str # => "# Comment\nfoo,0\nbar,1\nbaz,2\n# Another comment\n" + +Using the default, +nil+: + ary = CSV.parse(str) + ary # => [["# Comment"], ["foo", "0"], ["bar", "1"], ["baz", "2"], ["# Another comment"]] + +Using a \Regexp: + ary = CSV.parse(str, skip_lines: /^#/) + ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + +Using a \String: + ary = CSV.parse(str, skip_lines: '#') + ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + +--- + +Raises an exception if given an object that is not a \Regexp, a \String, or +nil+: + # Raises ArgumentError (:skip_lines has to respond to #match: 0) + CSV.parse(str, skip_lines: 0) diff --git a/doc/csv/options/parsing/strip.rdoc b/doc/csv/options/parsing/strip.rdoc new file mode 100644 index 0000000000..56ae4310c3 --- /dev/null +++ b/doc/csv/options/parsing/strip.rdoc @@ -0,0 +1,15 @@ +====== Option +strip+ + +Specifies the boolean value that determines whether +whitespace is stripped from each input field. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:strip) # => false + +With default value +false+: + ary = CSV.parse_line(' a , b ') + ary # => [" a ", " b "] + +With value +true+: + ary = CSV.parse_line(' a , b ', strip: true) + ary # => ["a", "b"] diff --git a/doc/csv/options/parsing/unconverted_fields.rdoc b/doc/csv/options/parsing/unconverted_fields.rdoc new file mode 100644 index 0000000000..3e7f839d49 --- /dev/null +++ b/doc/csv/options/parsing/unconverted_fields.rdoc @@ -0,0 +1,27 @@ +====== Option +unconverted_fields+ + +Specifies the boolean that determines whether unconverted field values are to be available. + +Default value: + CSV::DEFAULT_OPTIONS.fetch(:unconverted_fields) # => nil + +The unconverted field values are those found in the source data, +prior to any conversions performed via option +converters+. + +When option +unconverted_fields+ is +true+, +each returned row (\Array or \CSV::Row) has an added method, ++unconverted_fields+, that returns the unconverted field values: + str = <<-EOT + foo,0 + bar,1 + baz,2 + EOT + # Without unconverted_fields + csv = CSV.parse(str, converters: :integer) + csv # => [["foo", 0], ["bar", 1], ["baz", 2]] + csv.first.respond_to?(:unconverted_fields) # => false + # With unconverted_fields + csv = CSV.parse(str, converters: :integer, unconverted_fields: true) + csv # => [["foo", 0], ["bar", 1], ["baz", 2]] + csv.first.respond_to?(:unconverted_fields) # => true + csv.first.unconverted_fields # => ["foo", "0"] |