summaryrefslogtreecommitdiff
path: root/lib/shellwords.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/shellwords.rb')
-rw-r--r--lib/shellwords.rb71
1 files changed, 47 insertions, 24 deletions
diff --git a/lib/shellwords.rb b/lib/shellwords.rb
index eb5fa2d226..20a85ed9d2 100644
--- a/lib/shellwords.rb
+++ b/lib/shellwords.rb
@@ -5,9 +5,9 @@
# This module manipulates strings according to the word parsing rules
# of the UNIX Bourne shell.
#
-# The shellwords() function was originally a port of shellwords.pl,
-# but modified to conform to the Shell & Utilities volume of the IEEE
-# Std 1003.1-2008, 2016 Edition [1].
+# The <tt>shellwords()</tt> function was originally a port of shellwords.pl, but
+# modified to conform to {the Shell & Utilities volume of the IEEE Std 1003.1-2008, 2016
+# Edition}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html]
#
# === Usage
#
@@ -24,28 +24,37 @@
# argv = "see how they run".shellsplit
# argv #=> ["see", "how", "they", "run"]
#
-# Be careful you don't leave a quote unmatched.
+# They treat quotes as special characters, so an unmatched quote will
+# cause an ArgumentError.
#
# argv = "they all ran after the farmer's wife".shellsplit
-# #=> ArgumentError: Unmatched double quote: ...
+# #=> ArgumentError: Unmatched quote: ...
#
-# In this case, you might want to use Shellwords.escape, or its alias
-# String#shellescape.
+# Shellwords also provides methods that do the opposite.
+# Shellwords.escape, or its alias, String#shellescape, escapes
+# shell metacharacters in a string for use in a command line.
#
-# This method will escape the String for you to safely use with a Bourne shell.
+# filename = "special's.txt"
#
-# argv = Shellwords.escape("special's.txt")
-# argv #=> "special\\'s.txt"
-# system("cat " + argv)
+# system("cat -- #{filename.shellescape}")
+# # runs "cat -- special\\'s.txt"
+#
+# Note the '--'. Without it, cat(1) will treat the following argument
+# as a command line option if it starts with '-'. It is guaranteed
+# that Shellwords.escape converts a string to a form that a Bourne
+# shell will parse back to the original string, but it is the
+# programmer's responsibility to make sure that passing an arbitrary
+# argument to a command does no harm.
#
# Shellwords also comes with a core extension for Array, Array#shelljoin.
#
-# argv = %w{ls -lta lib}
-# system(argv.shelljoin)
+# dir = "Funny GIFs"
+# argv = %W[ls -lta -- #{dir}]
+# system(argv.shelljoin + " | less")
+# # runs "ls -lta -- Funny\\ GIFs | less"
#
-# You can use this method to create an escaped string out of an array of tokens
-# separated by a space. In this example we used the literal shortcut for
-# Array.new.
+# You can use this method to build a complete command line out of an
+# array of arguments.
#
# === Authors
# * Wakou Aoyama
@@ -53,18 +62,20 @@
#
# === Contact
# * Akinori MUSHA <knu@iDaemons.org> (current maintainer)
-#
-# === Resources
-#
-# 1: {IEEE Std 1003.1-2008, 2016 Edition, the Shell & Utilities volume}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html]
module Shellwords
+ # The version number string.
+ VERSION = "0.2.2"
+
# Splits a string into an array of tokens in the same way the UNIX
# Bourne shell does.
#
# argv = Shellwords.split('here are "two words"')
# argv #=> ["here", "are", "two words"]
#
+ # +line+ must not contain NUL characters because of nature of
+ # +exec+ system call.
+ #
# Note, however, that this is not a command line parser. Shell
# metacharacters except for the single and double quotes and
# backslash are not treated as such.
@@ -79,9 +90,14 @@ module Shellwords
def shellsplit(line)
words = []
field = String.new
- line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do
+ line.scan(/\G\s*(?>([^\0\s\\\'\"]+)|'([^\0\']*)'|"((?:[^\0\"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do
|word, sq, dq, esc, garbage, sep|
- raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage
+ if garbage
+ b = $~.begin(0)
+ line = $~[0]
+ line = "..." + line if b > 0
+ raise ArgumentError, "#{garbage == "\0" ? 'Nul character' : 'Unmatched quote'} at #{b}: #{line}"
+ end
# 2.2.3 Double-Quotes:
#
# The <backslash> shall retain its special meaning as an
@@ -110,6 +126,9 @@ module Shellwords
# command line. +str+ can be a non-string object that responds to
# +to_s+.
#
+ # +str+ must not contain NUL characters because of nature of +exec+
+ # system call.
+ #
# Note that a resulted string should be used unquoted and is not
# intended for use in double quotes nor in single quotes.
#
@@ -123,7 +142,7 @@ module Shellwords
#
# # Search files in lib for method definitions
# pattern = "^[ \t]*def "
- # open("| grep -Ern #{pattern.shellescape} lib") { |grep|
+ # open("| grep -Ern -e #{pattern.shellescape} lib") { |grep|
# grep.each_line { |line|
# file, lineno, matched_line = line.split(':', 3)
# # ...
@@ -142,12 +161,15 @@ module Shellwords
# An empty argument will be skipped, so return empty quotes.
return "''".dup if str.empty?
+ # Shellwords cannot contain NUL characters.
+ raise ArgumentError, "NUL character" if str.index("\0")
+
str = str.dup
# Treat multibyte characters as is. It is the caller's responsibility
# to encode the string in the right encoding for the shell
# environment.
- str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1")
+ str.gsub!(/[^A-Za-z0-9_\-.,:+\/@\n]/, "\\\\\\&")
# A LF cannot be escaped with a backslash because a backslash + LF
# combo is regarded as a line continuation and simply ignored.
@@ -167,6 +189,7 @@ module Shellwords
# All elements are joined into a single string with fields separated by a
# space, where each element is escaped for the Bourne shell and stringified
# using +to_s+.
+ # See also Shellwords.shellescape.
#
# ary = ["There's", "a", "time", "and", "place", "for", "everything"]
# argv = Shellwords.join(ary)