summaryrefslogtreecommitdiff
path: root/lib/shellwords.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/shellwords.rb')
-rw-r--r--lib/shellwords.rb281
1 files changed, 243 insertions, 38 deletions
diff --git a/lib/shellwords.rb b/lib/shellwords.rb
index 5c31f8ca78..20a85ed9d2 100644
--- a/lib/shellwords.rb
+++ b/lib/shellwords.rb
@@ -1,49 +1,254 @@
-# shellwords.rb
-# original is shellwords.pl
+# frozen-string-literal: true
+##
+# == Manipulates strings like the UNIX Bourne shell
#
-# Usage:
-# require 'shellwords'
-# words = Shellwords.shellwords(line)
+# This module manipulates strings according to the word parsing rules
+# of the UNIX Bourne shell.
#
-# or
+# The <tt>shellwords()</tt> function was originally a port of shellwords.pl, but
+# modified to conform to {the Shell & Utilities volume of the IEEE Std 1003.1-2008, 2016
+# Edition}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html]
#
-# require 'shellwords'
-# include Shellwords
-# words = shellwords(line)
+# === Usage
+#
+# You can use Shellwords to parse a string into a Bourne shell friendly Array.
+#
+# require 'shellwords'
+#
+# argv = Shellwords.split('three blind "mice"')
+# argv #=> ["three", "blind", "mice"]
+#
+# Once you've required Shellwords, you can use the #split alias
+# String#shellsplit.
+#
+# argv = "see how they run".shellsplit
+# argv #=> ["see", "how", "they", "run"]
+#
+# They treat quotes as special characters, so an unmatched quote will
+# cause an ArgumentError.
+#
+# argv = "they all ran after the farmer's wife".shellsplit
+# #=> ArgumentError: Unmatched quote: ...
+#
+# Shellwords also provides methods that do the opposite.
+# Shellwords.escape, or its alias, String#shellescape, escapes
+# shell metacharacters in a string for use in a command line.
+#
+# filename = "special's.txt"
+#
+# system("cat -- #{filename.shellescape}")
+# # runs "cat -- special\\'s.txt"
+#
+# Note the '--'. Without it, cat(1) will treat the following argument
+# as a command line option if it starts with '-'. It is guaranteed
+# that Shellwords.escape converts a string to a form that a Bourne
+# shell will parse back to the original string, but it is the
+# programmer's responsibility to make sure that passing an arbitrary
+# argument to a command does no harm.
+#
+# Shellwords also comes with a core extension for Array, Array#shelljoin.
+#
+# dir = "Funny GIFs"
+# argv = %W[ls -lta -- #{dir}]
+# system(argv.shelljoin + " | less")
+# # runs "ls -lta -- Funny\\ GIFs | less"
+#
+# You can use this method to build a complete command line out of an
+# array of arguments.
+#
+# === Authors
+# * Wakou Aoyama
+# * Akinori MUSHA <knu@iDaemons.org>
+#
+# === Contact
+# * Akinori MUSHA <knu@iDaemons.org> (current maintainer)
module Shellwords
- def shellwords(line)
- unless line.kind_of?(String)
- raise ArgumentError, "Argument must be String class object."
- end
- line.sub!(/\A\s+/, '')
+ # The version number string.
+ VERSION = "0.2.2"
+
+ # Splits a string into an array of tokens in the same way the UNIX
+ # Bourne shell does.
+ #
+ # argv = Shellwords.split('here are "two words"')
+ # argv #=> ["here", "are", "two words"]
+ #
+ # +line+ must not contain NUL characters because of nature of
+ # +exec+ system call.
+ #
+ # Note, however, that this is not a command line parser. Shell
+ # metacharacters except for the single and double quotes and
+ # backslash are not treated as such.
+ #
+ # argv = Shellwords.split('ruby my_prog.rb | less')
+ # argv #=> ["ruby", "my_prog.rb", "|", "less"]
+ #
+ # String#shellsplit is a shortcut for this function.
+ #
+ # argv = 'here are "two words"'.shellsplit
+ # argv #=> ["here", "are", "two words"]
+ def shellsplit(line)
words = []
- while line != ''
- field = ''
- while true
- if line.sub!(/\A"(([^"\\]|\\.)*)"/, '') then #"
- snippet = $1
- snippet.gsub!(/\\(.)/, '\1')
- elsif line =~ /\A"/ then #"
- raise ArgumentError, "Unmatched double quote: #{line}"
- elsif line.sub!(/\A'(([^'\\]|\\.)*)'/, '') then #'
- snippet = $1
- snippet.gsub!(/\\(.)/, '\1')
- elsif line =~ /\A'/ then #'
- raise ArgumentError, "Unmatched single quote: #{line}"
- elsif line.sub!(/\A\\(.)/, '') then
- snippet = $1
- elsif line.sub!(/\A([^\s\\'"]+)/, '') then #'
- snippet = $1
- else
- line.sub!(/\A\s+/, '')
- break
- end
- field.concat(snippet)
+ field = String.new
+ line.scan(/\G\s*(?>([^\0\s\\\'\"]+)|'([^\0\']*)'|"((?:[^\0\"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do
+ |word, sq, dq, esc, garbage, sep|
+ if garbage
+ b = $~.begin(0)
+ line = $~[0]
+ line = "..." + line if b > 0
+ raise ArgumentError, "#{garbage == "\0" ? 'Nul character' : 'Unmatched quote'} at #{b}: #{line}"
+ end
+ # 2.2.3 Double-Quotes:
+ #
+ # The <backslash> shall retain its special meaning as an
+ # escape character only when followed by one of the following
+ # characters when considered special:
+ #
+ # $ ` " \ <newline>
+ field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1'))
+ if sep
+ words << field
+ field = String.new
end
- words.push(field)
end
words
end
- module_function :shellwords
+
+ alias shellwords shellsplit
+
+ module_function :shellsplit, :shellwords
+
+ class << self
+ alias split shellsplit
+ end
+
+ # Escapes a string so that it can be safely used in a Bourne shell
+ # command line. +str+ can be a non-string object that responds to
+ # +to_s+.
+ #
+ # +str+ must not contain NUL characters because of nature of +exec+
+ # system call.
+ #
+ # Note that a resulted string should be used unquoted and is not
+ # intended for use in double quotes nor in single quotes.
+ #
+ # argv = Shellwords.escape("It's better to give than to receive")
+ # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
+ #
+ # String#shellescape is a shorthand for this function.
+ #
+ # argv = "It's better to give than to receive".shellescape
+ # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
+ #
+ # # Search files in lib for method definitions
+ # pattern = "^[ \t]*def "
+ # open("| grep -Ern -e #{pattern.shellescape} lib") { |grep|
+ # grep.each_line { |line|
+ # file, lineno, matched_line = line.split(':', 3)
+ # # ...
+ # }
+ # }
+ #
+ # It is the caller's responsibility to encode the string in the right
+ # encoding for the shell environment where this string is used.
+ #
+ # Multibyte characters are treated as multibyte characters, not as bytes.
+ #
+ # Returns an empty quoted String if +str+ has a length of zero.
+ def shellescape(str)
+ str = str.to_s
+
+ # An empty argument will be skipped, so return empty quotes.
+ return "''".dup if str.empty?
+
+ # Shellwords cannot contain NUL characters.
+ raise ArgumentError, "NUL character" if str.index("\0")
+
+ str = str.dup
+
+ # Treat multibyte characters as is. It is the caller's responsibility
+ # to encode the string in the right encoding for the shell
+ # environment.
+ str.gsub!(/[^A-Za-z0-9_\-.,:+\/@\n]/, "\\\\\\&")
+
+ # A LF cannot be escaped with a backslash because a backslash + LF
+ # combo is regarded as a line continuation and simply ignored.
+ str.gsub!(/\n/, "'\n'")
+
+ return str
+ end
+
+ module_function :shellescape
+
+ class << self
+ alias escape shellescape
+ end
+
+ # Builds a command line string from an argument list, +array+.
+ #
+ # All elements are joined into a single string with fields separated by a
+ # space, where each element is escaped for the Bourne shell and stringified
+ # using +to_s+.
+ # See also Shellwords.shellescape.
+ #
+ # ary = ["There's", "a", "time", "and", "place", "for", "everything"]
+ # argv = Shellwords.join(ary)
+ # argv #=> "There\\'s a time and place for everything"
+ #
+ # Array#shelljoin is a shortcut for this function.
+ #
+ # ary = ["Don't", "rock", "the", "boat"]
+ # argv = ary.shelljoin
+ # argv #=> "Don\\'t rock the boat"
+ #
+ # You can also mix non-string objects in the elements as allowed in Array#join.
+ #
+ # output = `#{['ps', '-p', $$].shelljoin}`
+ #
+ def shelljoin(array)
+ array.map { |arg| shellescape(arg) }.join(' ')
+ end
+
+ module_function :shelljoin
+
+ class << self
+ alias join shelljoin
+ end
+end
+
+class String
+ # call-seq:
+ # str.shellsplit => array
+ #
+ # Splits +str+ into an array of tokens in the same way the UNIX
+ # Bourne shell does.
+ #
+ # See Shellwords.shellsplit for details.
+ def shellsplit
+ Shellwords.split(self)
+ end
+
+ # call-seq:
+ # str.shellescape => string
+ #
+ # Escapes +str+ so that it can be safely used in a Bourne shell
+ # command line.
+ #
+ # See Shellwords.shellescape for details.
+ def shellescape
+ Shellwords.escape(self)
+ end
+end
+
+class Array
+ # call-seq:
+ # array.shelljoin => string
+ #
+ # Builds a command line string from an argument list +array+ joining
+ # all elements escaped for the Bourne shell and separated by a space.
+ #
+ # See Shellwords.shelljoin for details.
+ def shelljoin
+ Shellwords.join(self)
+ end
end