summaryrefslogtreecommitdiff
path: root/lib/csv.rb
diff options
context:
space:
mode:
authorwatson1978 <watson1978@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-18 10:28:58 +0000
committerwatson1978 <watson1978@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-18 10:28:58 +0000
commitdce4a3f58c991aaa6186d1d1436482f66e10cd4b (patch)
treecb08f6780017e64139e5e2c13fe04f387b1ad7a0 /lib/csv.rb
parent01e9d9ac7ee272339a3c1c4a227f416c72db50a3 (diff)
Improve CSV performance
If it will not use special variables (like $1, $&, $`...), it can improve the performance by using Regexp#match? or String#match? instead of Regexp#=~ or String#=~. This patch is same idea as https://github.com/ruby/ruby/pull/1836 [Fix GH-1842] ## Environment * OS : Ubuntu 17.10 * Compiler : gcc version 7.2.0 * CPU : Intel(R) Core(TM) i5-3210M CPU @ 2.50GHz * Memory : 16 GB ## TL;DR Methods | Before | After | Speed up ----------- | ------ | ------ | -------- CSV.foreach | 44.825 | 48.201 | 7.5% CSV#shift | 45.200 | 49.584 | 9.7% CSV.read | 42.968 | 46.853 | 9.0% CSV.table | 10.933 | 11.277 | 3.1% ## Before ``` Calculating ------------------------------------- CSV.foreach 44.825 (± 0.0%) i/s - 228.000 in 5.086576s CSV#shift 45.200 (± 0.0%) i/s - 228.000 in 5.044297s CSV.read 42.968 (± 0.0%) i/s - 216.000 in 5.027504s CSV.table 10.933 (± 0.0%) i/s - 55.000 in 5.031098s ``` ## After ``` Calculating ------------------------------------- CSV.foreach 48.201 (± 0.0%) i/s - 244.000 in 5.062256s CSV#shift 49.584 (± 0.0%) i/s - 248.000 in 5.001652s CSV.read 46.853 (± 0.0%) i/s - 236.000 in 5.037044s CSV.table 11.277 (± 0.0%) i/s - 57.000 in 5.054694s ``` ## Benchmark code ```ruby require 'csv' require 'benchmark/ips' CSV.open("/tmp/file.csv", "w") do |csv| csv << ["player", "gameA", "gameB"] 1000.times do csv << ['"Alice"', "84.0", "79.5"] csv << ['"Bob"', "20.0", "56.5"] end end Benchmark.ips do |x| x.report "CSV.foreach" do CSV.foreach("/tmp/file.csv") do |row| end end x.report "CSV#shift" do CSV.open("/tmp/file.csv") do |csv| while line = csv.shift end end end x.report "CSV.read" do CSV.read("/tmp/file.csv") end x.report "CSV.table" do CSV.table("/tmp/file.csv") end end ``` git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62806 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/csv.rb')
-rwxr-xr-x[-rw-r--r--]lib/csv.rb14
1 files changed, 7 insertions, 7 deletions
diff --git a/lib/csv.rb b/lib/csv.rb
index 1f616803aa..732d4f1bd6 100644..100755
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -970,7 +970,7 @@ class CSV
date: lambda { |f|
begin
e = f.encode(ConverterEncoding)
- e =~ DateMatcher ? Date.parse(e) : f
+ e.match?(DateMatcher) ? Date.parse(e) : f
rescue # encoding conversion or date parse errors
f
end
@@ -978,7 +978,7 @@ class CSV
date_time: lambda { |f|
begin
e = f.encode(ConverterEncoding)
- e =~ DateTimeMatcher ? DateTime.parse(e) : f
+ e.match?(DateTimeMatcher) ? DateTime.parse(e) : f
rescue # encoding conversion or date parse errors
f
end
@@ -1271,7 +1271,7 @@ class CSV
begin
f = File.open(filename, mode, file_opts)
rescue ArgumentError => e
- raise unless /needs binmode/ =~ e.message and mode == "r"
+ raise unless /needs binmode/.match?(e.message) and mode == "r"
mode = "rb"
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
retry
@@ -1870,7 +1870,7 @@ class CSV
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
# extended column ends
csv.last << part[0..-2]
- if csv.last =~ @parsers[:stray_quote]
+ if csv.last.match?(@parsers[:stray_quote])
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
end
@@ -1888,7 +1888,7 @@ class CSV
elsif part.end_with?(@quote_char)
# regular quoted column
csv << part[1..-2]
- if csv.last =~ @parsers[:stray_quote]
+ if csv.last.match?(@parsers[:stray_quote])
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
end
@@ -1899,9 +1899,9 @@ class CSV
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
end
- elsif part =~ @parsers[:quote_or_nl]
+ elsif part.match?(@parsers[:quote_or_nl])
# Unquoted field with bad characters.
- if part =~ @parsers[:nl_or_lf]
+ if part.match?(@parsers[:nl_or_lf])
raise MalformedCSVError, "Unquoted fields do not allow " +
"\\r or \\n (line #{lineno + 1})."
else