summaryrefslogtreecommitdiff
path: root/lib/shellwords.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/shellwords.rb')
-rw-r--r--lib/shellwords.rb216
1 files changed, 149 insertions, 67 deletions
diff --git a/lib/shellwords.rb b/lib/shellwords.rb
index a4ec552a7e..20a85ed9d2 100644
--- a/lib/shellwords.rb
+++ b/lib/shellwords.rb
@@ -1,61 +1,115 @@
-#
-# shellwords.rb: Manipulates strings a la UNIX Bourne shell
-#
-
+# frozen-string-literal: true
+##
+# == Manipulates strings like the UNIX Bourne shell
#
# This module manipulates strings according to the word parsing rules
# of the UNIX Bourne shell.
#
-# The shellwords() function was originally a port of shellwords.pl,
-# but modified to conform to POSIX / SUSv3 (IEEE Std 1003.1-2001).
+# The <tt>shellwords()</tt> function was originally a port of shellwords.pl, but
+# modified to conform to {the Shell & Utilities volume of the IEEE Std 1003.1-2008, 2016
+# Edition}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html]
+#
+# === Usage
+#
+# You can use Shellwords to parse a string into a Bourne shell friendly Array.
+#
+# require 'shellwords'
+#
+# argv = Shellwords.split('three blind "mice"')
+# argv #=> ["three", "blind", "mice"]
+#
+# Once you've required Shellwords, you can use the #split alias
+# String#shellsplit.
+#
+# argv = "see how they run".shellsplit
+# argv #=> ["see", "how", "they", "run"]
+#
+# They treat quotes as special characters, so an unmatched quote will
+# cause an ArgumentError.
+#
+# argv = "they all ran after the farmer's wife".shellsplit
+# #=> ArgumentError: Unmatched quote: ...
#
-# Authors:
-# - Wakou Aoyama
-# - Akinori MUSHA <knu@iDaemons.org>
+# Shellwords also provides methods that do the opposite.
+# Shellwords.escape, or its alias, String#shellescape, escapes
+# shell metacharacters in a string for use in a command line.
#
-# Contact:
-# - Akinori MUSHA <knu@iDaemons.org> (current maintainer)
+# filename = "special's.txt"
#
+# system("cat -- #{filename.shellescape}")
+# # runs "cat -- special\\'s.txt"
+#
+# Note the '--'. Without it, cat(1) will treat the following argument
+# as a command line option if it starts with '-'. It is guaranteed
+# that Shellwords.escape converts a string to a form that a Bourne
+# shell will parse back to the original string, but it is the
+# programmer's responsibility to make sure that passing an arbitrary
+# argument to a command does no harm.
+#
+# Shellwords also comes with a core extension for Array, Array#shelljoin.
+#
+# dir = "Funny GIFs"
+# argv = %W[ls -lta -- #{dir}]
+# system(argv.shelljoin + " | less")
+# # runs "ls -lta -- Funny\\ GIFs | less"
+#
+# You can use this method to build a complete command line out of an
+# array of arguments.
+#
+# === Authors
+# * Wakou Aoyama
+# * Akinori MUSHA <knu@iDaemons.org>
+#
+# === Contact
+# * Akinori MUSHA <knu@iDaemons.org> (current maintainer)
+
module Shellwords
- #
+ # The version number string.
+ VERSION = "0.2.2"
+
# Splits a string into an array of tokens in the same way the UNIX
# Bourne shell does.
#
# argv = Shellwords.split('here are "two words"')
# argv #=> ["here", "are", "two words"]
#
- # +String#shellsplit+ is a shorthand for this function.
+ # +line+ must not contain NUL characters because of nature of
+ # +exec+ system call.
+ #
+ # Note, however, that this is not a command line parser. Shell
+ # metacharacters except for the single and double quotes and
+ # backslash are not treated as such.
+ #
+ # argv = Shellwords.split('ruby my_prog.rb | less')
+ # argv #=> ["ruby", "my_prog.rb", "|", "less"]
+ #
+ # String#shellsplit is a shortcut for this function.
#
# argv = 'here are "two words"'.shellsplit
# argv #=> ["here", "are", "two words"]
- #
def shellsplit(line)
- line = String.new(line) rescue
- raise(ArgumentError, "Argument must be a string")
- line.lstrip!
words = []
- until line.empty?
- field = ''
- loop do
- if line.sub!(/\A"(([^"\\]|\\.)*)"/, '') then
- snippet = $1.gsub(/\\(.)/, '\1')
- elsif line =~ /\A"/ then
- raise ArgumentError, "Unmatched double quote: #{line}"
- elsif line.sub!(/\A'([^']*)'/, '') then
- snippet = $1
- elsif line =~ /\A'/ then
- raise ArgumentError, "Unmatched single quote: #{line}"
- elsif line.sub!(/\A\\(.)?/, '') then
- snippet = $1 || '\\'
- elsif line.sub!(/\A([^\s\\'"]+)/, '') then
- snippet = $1
- else
- line.lstrip!
- break
- end
- field.concat(snippet)
+ field = String.new
+ line.scan(/\G\s*(?>([^\0\s\\\'\"]+)|'([^\0\']*)'|"((?:[^\0\"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do
+ |word, sq, dq, esc, garbage, sep|
+ if garbage
+ b = $~.begin(0)
+ line = $~[0]
+ line = "..." + line if b > 0
+ raise ArgumentError, "#{garbage == "\0" ? 'Nul character' : 'Unmatched quote'} at #{b}: #{line}"
+ end
+ # 2.2.3 Double-Quotes:
+ #
+ # The <backslash> shall retain its special meaning as an
+ # escape character only when followed by one of the following
+ # characters when considered special:
+ #
+ # $ ` " \ <newline>
+ field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1'))
+ if sep
+ words << field
+ field = String.new
end
- words.push(field)
end
words
end
@@ -68,35 +122,57 @@ module Shellwords
alias split shellsplit
end
- #
# Escapes a string so that it can be safely used in a Bourne shell
- # command line.
+ # command line. +str+ can be a non-string object that responds to
+ # +to_s+.
+ #
+ # +str+ must not contain NUL characters because of nature of +exec+
+ # system call.
#
# Note that a resulted string should be used unquoted and is not
# intended for use in double quotes nor in single quotes.
#
- # open("| grep #{Shellwords.escape(pattern)} file") { |pipe|
- # # ...
- # }
+ # argv = Shellwords.escape("It's better to give than to receive")
+ # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
+ #
+ # String#shellescape is a shorthand for this function.
#
- # +String#shellescape+ is a shorthand for this function.
+ # argv = "It's better to give than to receive".shellescape
+ # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
#
- # open("| grep #{pattern.shellescape} file") { |pipe|
- # # ...
+ # # Search files in lib for method definitions
+ # pattern = "^[ \t]*def "
+ # open("| grep -Ern -e #{pattern.shellescape} lib") { |grep|
+ # grep.each_line { |line|
+ # file, lineno, matched_line = line.split(':', 3)
+ # # ...
+ # }
# }
#
+ # It is the caller's responsibility to encode the string in the right
+ # encoding for the shell environment where this string is used.
+ #
+ # Multibyte characters are treated as multibyte characters, not as bytes.
+ #
+ # Returns an empty quoted String if +str+ has a length of zero.
def shellescape(str)
+ str = str.to_s
+
# An empty argument will be skipped, so return empty quotes.
- return "''" if str.empty?
+ return "''".dup if str.empty?
+
+ # Shellwords cannot contain NUL characters.
+ raise ArgumentError, "NUL character" if str.index("\0")
str = str.dup
- # Process as a single byte sequence because not all shell
- # implementations are multibyte aware.
- str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1")
+ # Treat multibyte characters as is. It is the caller's responsibility
+ # to encode the string in the right encoding for the shell
+ # environment.
+ str.gsub!(/[^A-Za-z0-9_\-.,:+\/@\n]/, "\\\\\\&")
# A LF cannot be escaped with a backslash because a backslash + LF
- # combo is regarded as line continuation and simply ignored.
+ # combo is regarded as a line continuation and simply ignored.
str.gsub!(/\n/, "'\n'")
return str
@@ -108,19 +184,26 @@ module Shellwords
alias escape shellescape
end
+ # Builds a command line string from an argument list, +array+.
#
- # Builds a command line string from an argument list +array+ joining
- # all elements escaped for Bourne shell and separated by a space.
+ # All elements are joined into a single string with fields separated by a
+ # space, where each element is escaped for the Bourne shell and stringified
+ # using +to_s+.
+ # See also Shellwords.shellescape.
#
- # open('|' + Shellwords.join(['grep', pattern, *files])) { |pipe|
- # # ...
- # }
+ # ary = ["There's", "a", "time", "and", "place", "for", "everything"]
+ # argv = Shellwords.join(ary)
+ # argv #=> "There\\'s a time and place for everything"
#
- # +Array#shelljoin+ is a shorthand for this function.
+ # Array#shelljoin is a shortcut for this function.
#
- # open('|' + ['grep', pattern, *files].shelljoin) { |pipe|
- # # ...
- # }
+ # ary = ["Don't", "rock", "the", "boat"]
+ # argv = ary.shelljoin
+ # argv #=> "Don\\'t rock the boat"
+ #
+ # You can also mix non-string objects in the elements as allowed in Array#join.
+ #
+ # output = `#{['ps', '-p', $$].shelljoin}`
#
def shelljoin(array)
array.map { |arg| shellescape(arg) }.join(' ')
@@ -134,38 +217,37 @@ module Shellwords
end
class String
- #
# call-seq:
# str.shellsplit => array
#
# Splits +str+ into an array of tokens in the same way the UNIX
- # Bourne shell does. See +Shellwords::shellsplit+ for details.
+ # Bourne shell does.
#
+ # See Shellwords.shellsplit for details.
def shellsplit
Shellwords.split(self)
end
- #
# call-seq:
# str.shellescape => string
#
# Escapes +str+ so that it can be safely used in a Bourne shell
- # command line. See +Shellwords::shellescape+ for details.
+ # command line.
#
+ # See Shellwords.shellescape for details.
def shellescape
Shellwords.escape(self)
end
end
class Array
- #
# call-seq:
# array.shelljoin => string
#
# Builds a command line string from an argument list +array+ joining
- # all elements escaped for Bourne shell and separated by a space.
- # See +Shellwords::shelljoin+ for details.
+ # all elements escaped for the Bourne shell and separated by a space.
#
+ # See Shellwords.shelljoin for details.
def shelljoin
Shellwords.join(self)
end