summaryrefslogtreecommitdiff
path: root/lib/shellwords.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/shellwords.rb')
-rw-r--r--lib/shellwords.rb191
1 files changed, 125 insertions, 66 deletions
diff --git a/lib/shellwords.rb b/lib/shellwords.rb
index a4ec552a7e..eb5fa2d226 100644
--- a/lib/shellwords.rb
+++ b/lib/shellwords.rb
@@ -1,61 +1,99 @@
-#
-# shellwords.rb: Manipulates strings a la UNIX Bourne shell
-#
-
+# frozen-string-literal: true
+##
+# == Manipulates strings like the UNIX Bourne shell
#
# This module manipulates strings according to the word parsing rules
# of the UNIX Bourne shell.
#
# The shellwords() function was originally a port of shellwords.pl,
-# but modified to conform to POSIX / SUSv3 (IEEE Std 1003.1-2001).
+# but modified to conform to the Shell & Utilities volume of the IEEE
+# Std 1003.1-2008, 2016 Edition [1].
+#
+# === Usage
+#
+# You can use Shellwords to parse a string into a Bourne shell friendly Array.
+#
+# require 'shellwords'
+#
+# argv = Shellwords.split('three blind "mice"')
+# argv #=> ["three", "blind", "mice"]
+#
+# Once you've required Shellwords, you can use the #split alias
+# String#shellsplit.
+#
+# argv = "see how they run".shellsplit
+# argv #=> ["see", "how", "they", "run"]
+#
+# Be careful you don't leave a quote unmatched.
#
-# Authors:
-# - Wakou Aoyama
-# - Akinori MUSHA <knu@iDaemons.org>
+# argv = "they all ran after the farmer's wife".shellsplit
+# #=> ArgumentError: Unmatched double quote: ...
#
-# Contact:
-# - Akinori MUSHA <knu@iDaemons.org> (current maintainer)
+# In this case, you might want to use Shellwords.escape, or its alias
+# String#shellescape.
#
+# This method will escape the String for you to safely use with a Bourne shell.
+#
+# argv = Shellwords.escape("special's.txt")
+# argv #=> "special\\'s.txt"
+# system("cat " + argv)
+#
+# Shellwords also comes with a core extension for Array, Array#shelljoin.
+#
+# argv = %w{ls -lta lib}
+# system(argv.shelljoin)
+#
+# You can use this method to create an escaped string out of an array of tokens
+# separated by a space. In this example we used the literal shortcut for
+# Array.new.
+#
+# === Authors
+# * Wakou Aoyama
+# * Akinori MUSHA <knu@iDaemons.org>
+#
+# === Contact
+# * Akinori MUSHA <knu@iDaemons.org> (current maintainer)
+#
+# === Resources
+#
+# 1: {IEEE Std 1003.1-2008, 2016 Edition, the Shell & Utilities volume}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html]
+
module Shellwords
- #
# Splits a string into an array of tokens in the same way the UNIX
# Bourne shell does.
#
# argv = Shellwords.split('here are "two words"')
# argv #=> ["here", "are", "two words"]
#
- # +String#shellsplit+ is a shorthand for this function.
+ # Note, however, that this is not a command line parser. Shell
+ # metacharacters except for the single and double quotes and
+ # backslash are not treated as such.
+ #
+ # argv = Shellwords.split('ruby my_prog.rb | less')
+ # argv #=> ["ruby", "my_prog.rb", "|", "less"]
+ #
+ # String#shellsplit is a shortcut for this function.
#
# argv = 'here are "two words"'.shellsplit
# argv #=> ["here", "are", "two words"]
- #
def shellsplit(line)
- line = String.new(line) rescue
- raise(ArgumentError, "Argument must be a string")
- line.lstrip!
words = []
- until line.empty?
- field = ''
- loop do
- if line.sub!(/\A"(([^"\\]|\\.)*)"/, '') then
- snippet = $1.gsub(/\\(.)/, '\1')
- elsif line =~ /\A"/ then
- raise ArgumentError, "Unmatched double quote: #{line}"
- elsif line.sub!(/\A'([^']*)'/, '') then
- snippet = $1
- elsif line =~ /\A'/ then
- raise ArgumentError, "Unmatched single quote: #{line}"
- elsif line.sub!(/\A\\(.)?/, '') then
- snippet = $1 || '\\'
- elsif line.sub!(/\A([^\s\\'"]+)/, '') then
- snippet = $1
- else
- line.lstrip!
- break
- end
- field.concat(snippet)
+ field = String.new
+ line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do
+ |word, sq, dq, esc, garbage, sep|
+ raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage
+ # 2.2.3 Double-Quotes:
+ #
+ # The <backslash> shall retain its special meaning as an
+ # escape character only when followed by one of the following
+ # characters when considered special:
+ #
+ # $ ` " \ <newline>
+ field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1'))
+ if sep
+ words << field
+ field = String.new
end
- words.push(field)
end
words
end
@@ -68,35 +106,51 @@ module Shellwords
alias split shellsplit
end
- #
# Escapes a string so that it can be safely used in a Bourne shell
- # command line.
+ # command line. +str+ can be a non-string object that responds to
+ # +to_s+.
#
# Note that a resulted string should be used unquoted and is not
# intended for use in double quotes nor in single quotes.
#
- # open("| grep #{Shellwords.escape(pattern)} file") { |pipe|
- # # ...
- # }
+ # argv = Shellwords.escape("It's better to give than to receive")
+ # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
+ #
+ # String#shellescape is a shorthand for this function.
#
- # +String#shellescape+ is a shorthand for this function.
+ # argv = "It's better to give than to receive".shellescape
+ # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
#
- # open("| grep #{pattern.shellescape} file") { |pipe|
- # # ...
+ # # Search files in lib for method definitions
+ # pattern = "^[ \t]*def "
+ # open("| grep -Ern #{pattern.shellescape} lib") { |grep|
+ # grep.each_line { |line|
+ # file, lineno, matched_line = line.split(':', 3)
+ # # ...
+ # }
# }
#
+ # It is the caller's responsibility to encode the string in the right
+ # encoding for the shell environment where this string is used.
+ #
+ # Multibyte characters are treated as multibyte characters, not as bytes.
+ #
+ # Returns an empty quoted String if +str+ has a length of zero.
def shellescape(str)
+ str = str.to_s
+
# An empty argument will be skipped, so return empty quotes.
- return "''" if str.empty?
+ return "''".dup if str.empty?
str = str.dup
- # Process as a single byte sequence because not all shell
- # implementations are multibyte aware.
- str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1")
+ # Treat multibyte characters as is. It is the caller's responsibility
+ # to encode the string in the right encoding for the shell
+ # environment.
+ str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1")
# A LF cannot be escaped with a backslash because a backslash + LF
- # combo is regarded as line continuation and simply ignored.
+ # combo is regarded as a line continuation and simply ignored.
str.gsub!(/\n/, "'\n'")
return str
@@ -108,19 +162,25 @@ module Shellwords
alias escape shellescape
end
+ # Builds a command line string from an argument list, +array+.
#
- # Builds a command line string from an argument list +array+ joining
- # all elements escaped for Bourne shell and separated by a space.
+ # All elements are joined into a single string with fields separated by a
+ # space, where each element is escaped for the Bourne shell and stringified
+ # using +to_s+.
#
- # open('|' + Shellwords.join(['grep', pattern, *files])) { |pipe|
- # # ...
- # }
+ # ary = ["There's", "a", "time", "and", "place", "for", "everything"]
+ # argv = Shellwords.join(ary)
+ # argv #=> "There\\'s a time and place for everything"
#
- # +Array#shelljoin+ is a shorthand for this function.
+ # Array#shelljoin is a shortcut for this function.
#
- # open('|' + ['grep', pattern, *files].shelljoin) { |pipe|
- # # ...
- # }
+ # ary = ["Don't", "rock", "the", "boat"]
+ # argv = ary.shelljoin
+ # argv #=> "Don\\'t rock the boat"
+ #
+ # You can also mix non-string objects in the elements as allowed in Array#join.
+ #
+ # output = `#{['ps', '-p', $$].shelljoin}`
#
def shelljoin(array)
array.map { |arg| shellescape(arg) }.join(' ')
@@ -134,38 +194,37 @@ module Shellwords
end
class String
- #
# call-seq:
# str.shellsplit => array
#
# Splits +str+ into an array of tokens in the same way the UNIX
- # Bourne shell does. See +Shellwords::shellsplit+ for details.
+ # Bourne shell does.
#
+ # See Shellwords.shellsplit for details.
def shellsplit
Shellwords.split(self)
end
- #
# call-seq:
# str.shellescape => string
#
# Escapes +str+ so that it can be safely used in a Bourne shell
- # command line. See +Shellwords::shellescape+ for details.
+ # command line.
#
+ # See Shellwords.shellescape for details.
def shellescape
Shellwords.escape(self)
end
end
class Array
- #
# call-seq:
# array.shelljoin => string
#
# Builds a command line string from an argument list +array+ joining
- # all elements escaped for Bourne shell and separated by a space.
- # See +Shellwords::shelljoin+ for details.
+ # all elements escaped for the Bourne shell and separated by a space.
#
+ # See Shellwords.shelljoin for details.
def shelljoin
Shellwords.join(self)
end