summaryrefslogtreecommitdiff
path: root/lib/shellwords.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/shellwords.rb')
-rw-r--r--lib/shellwords.rb107
1 files changed, 73 insertions, 34 deletions
diff --git a/lib/shellwords.rb b/lib/shellwords.rb
index c3586d29fa..20a85ed9d2 100644
--- a/lib/shellwords.rb
+++ b/lib/shellwords.rb
@@ -1,15 +1,17 @@
+# frozen-string-literal: true
##
# == Manipulates strings like the UNIX Bourne shell
#
# This module manipulates strings according to the word parsing rules
# of the UNIX Bourne shell.
#
-# The shellwords() function was originally a port of shellwords.pl,
-# but modified to conform to POSIX / SUSv3 (IEEE Std 1003.1-2001 [1]).
+# The <tt>shellwords()</tt> function was originally a port of shellwords.pl, but
+# modified to conform to {the Shell & Utilities volume of the IEEE Std 1003.1-2008, 2016
+# Edition}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html]
#
# === Usage
#
-# You can use shellwords to parse a string into a Bourne shell friendly Array.
+# You can use Shellwords to parse a string into a Bourne shell friendly Array.
#
# require 'shellwords'
#
@@ -22,28 +24,37 @@
# argv = "see how they run".shellsplit
# argv #=> ["see", "how", "they", "run"]
#
-# Be careful you don't leave a quote unmatched.
+# They treat quotes as special characters, so an unmatched quote will
+# cause an ArgumentError.
#
# argv = "they all ran after the farmer's wife".shellsplit
-# #=> ArgumentError: Unmatched double quote: ...
+# #=> ArgumentError: Unmatched quote: ...
#
-# In this case, you might want to use Shellwords.escape, or it's alias
-# String#shellescape.
+# Shellwords also provides methods that do the opposite.
+# Shellwords.escape, or its alias, String#shellescape, escapes
+# shell metacharacters in a string for use in a command line.
#
-# This method will escape the String for you to safely use with a Bourne shell.
+# filename = "special's.txt"
#
-# argv = Shellwords.escape("special's.txt")
-# argv #=> "special\\s.txt"
-# system("cat " + argv)
+# system("cat -- #{filename.shellescape}")
+# # runs "cat -- special\\'s.txt"
+#
+# Note the '--'. Without it, cat(1) will treat the following argument
+# as a command line option if it starts with '-'. It is guaranteed
+# that Shellwords.escape converts a string to a form that a Bourne
+# shell will parse back to the original string, but it is the
+# programmer's responsibility to make sure that passing an arbitrary
+# argument to a command does no harm.
#
# Shellwords also comes with a core extension for Array, Array#shelljoin.
#
-# argv = %w{ls -lta lib}
-# system(argv.shelljoin)
+# dir = "Funny GIFs"
+# argv = %W[ls -lta -- #{dir}]
+# system(argv.shelljoin + " | less")
+# # runs "ls -lta -- Funny\\ GIFs | less"
#
-# You can use this method to create an escaped string out of an array of tokens
-# separated by a space. In this example we'll use the literal shortcut for
-# Array.new.
+# You can use this method to build a complete command line out of an
+# array of arguments.
#
# === Authors
# * Wakou Aoyama
@@ -51,32 +62,53 @@
#
# === Contact
# * Akinori MUSHA <knu@iDaemons.org> (current maintainer)
-#
-# === Resources
-#
-# 1: {IEEE Std 1003.1-2004}[http://pubs.opengroup.org/onlinepubs/009695399/toc.htm]
module Shellwords
+ # The version number string.
+ VERSION = "0.2.2"
+
# Splits a string into an array of tokens in the same way the UNIX
# Bourne shell does.
#
# argv = Shellwords.split('here are "two words"')
# argv #=> ["here", "are", "two words"]
#
+ # +line+ must not contain NUL characters because of nature of
+ # +exec+ system call.
+ #
+ # Note, however, that this is not a command line parser. Shell
+ # metacharacters except for the single and double quotes and
+ # backslash are not treated as such.
+ #
+ # argv = Shellwords.split('ruby my_prog.rb | less')
+ # argv #=> ["ruby", "my_prog.rb", "|", "less"]
+ #
# String#shellsplit is a shortcut for this function.
#
# argv = 'here are "two words"'.shellsplit
# argv #=> ["here", "are", "two words"]
def shellsplit(line)
words = []
- field = ''
- line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do
+ field = String.new
+ line.scan(/\G\s*(?>([^\0\s\\\'\"]+)|'([^\0\']*)'|"((?:[^\0\"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do
|word, sq, dq, esc, garbage, sep|
- raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage
- field << (word || sq || (dq || esc).gsub(/\\(.)/, '\\1'))
+ if garbage
+ b = $~.begin(0)
+ line = $~[0]
+ line = "..." + line if b > 0
+ raise ArgumentError, "#{garbage == "\0" ? 'Nul character' : 'Unmatched quote'} at #{b}: #{line}"
+ end
+ # 2.2.3 Double-Quotes:
+ #
+ # The <backslash> shall retain its special meaning as an
+ # escape character only when followed by one of the following
+ # characters when considered special:
+ #
+ # $ ` " \ <newline>
+ field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1'))
if sep
words << field
- field = ''
+ field = String.new
end
end
words
@@ -94,6 +126,9 @@ module Shellwords
# command line. +str+ can be a non-string object that responds to
# +to_s+.
#
+ # +str+ must not contain NUL characters because of nature of +exec+
+ # system call.
+ #
# Note that a resulted string should be used unquoted and is not
# intended for use in double quotes nor in single quotes.
#
@@ -107,7 +142,7 @@ module Shellwords
#
# # Search files in lib for method definitions
# pattern = "^[ \t]*def "
- # open("| grep -Ern #{pattern.shellescape} lib") { |grep|
+ # open("| grep -Ern -e #{pattern.shellescape} lib") { |grep|
# grep.each_line { |line|
# file, lineno, matched_line = line.split(':', 3)
# # ...
@@ -117,24 +152,27 @@ module Shellwords
# It is the caller's responsibility to encode the string in the right
# encoding for the shell environment where this string is used.
#
- # Multibyte characters are treated as multibyte characters, not bytes.
+ # Multibyte characters are treated as multibyte characters, not as bytes.
#
# Returns an empty quoted String if +str+ has a length of zero.
def shellescape(str)
str = str.to_s
# An empty argument will be skipped, so return empty quotes.
- return "''" if str.empty?
+ return "''".dup if str.empty?
+
+ # Shellwords cannot contain NUL characters.
+ raise ArgumentError, "NUL character" if str.index("\0")
str = str.dup
- # Treat multibyte characters as is. It is caller's responsibility
+ # Treat multibyte characters as is. It is the caller's responsibility
# to encode the string in the right encoding for the shell
# environment.
- str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1")
+ str.gsub!(/[^A-Za-z0-9_\-.,:+\/@\n]/, "\\\\\\&")
# A LF cannot be escaped with a backslash because a backslash + LF
- # combo is regarded as line continuation and simply ignored.
+ # combo is regarded as a line continuation and simply ignored.
str.gsub!(/\n/, "'\n'")
return str
@@ -149,8 +187,9 @@ module Shellwords
# Builds a command line string from an argument list, +array+.
#
# All elements are joined into a single string with fields separated by a
- # space, where each element is escaped for Bourne shell and stringified using
- # +to_s+.
+ # space, where each element is escaped for the Bourne shell and stringified
+ # using +to_s+.
+ # See also Shellwords.shellescape.
#
# ary = ["There's", "a", "time", "and", "place", "for", "everything"]
# argv = Shellwords.join(ary)
@@ -206,7 +245,7 @@ class Array
# array.shelljoin => string
#
# Builds a command line string from an argument list +array+ joining
- # all elements escaped for Bourne shell and separated by a space.
+ # all elements escaped for the Bourne shell and separated by a space.
#
# See Shellwords.shelljoin for details.
def shelljoin