diff options
Diffstat (limited to 'lib/shellwords.rb')
| -rw-r--r-- | lib/shellwords.rb | 71 |
1 files changed, 47 insertions, 24 deletions
diff --git a/lib/shellwords.rb b/lib/shellwords.rb index eb5fa2d226..20a85ed9d2 100644 --- a/lib/shellwords.rb +++ b/lib/shellwords.rb @@ -5,9 +5,9 @@ # This module manipulates strings according to the word parsing rules # of the UNIX Bourne shell. # -# The shellwords() function was originally a port of shellwords.pl, -# but modified to conform to the Shell & Utilities volume of the IEEE -# Std 1003.1-2008, 2016 Edition [1]. +# The <tt>shellwords()</tt> function was originally a port of shellwords.pl, but +# modified to conform to {the Shell & Utilities volume of the IEEE Std 1003.1-2008, 2016 +# Edition}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html] # # === Usage # @@ -24,28 +24,37 @@ # argv = "see how they run".shellsplit # argv #=> ["see", "how", "they", "run"] # -# Be careful you don't leave a quote unmatched. +# They treat quotes as special characters, so an unmatched quote will +# cause an ArgumentError. # # argv = "they all ran after the farmer's wife".shellsplit -# #=> ArgumentError: Unmatched double quote: ... +# #=> ArgumentError: Unmatched quote: ... # -# In this case, you might want to use Shellwords.escape, or its alias -# String#shellescape. +# Shellwords also provides methods that do the opposite. +# Shellwords.escape, or its alias, String#shellescape, escapes +# shell metacharacters in a string for use in a command line. # -# This method will escape the String for you to safely use with a Bourne shell. +# filename = "special's.txt" # -# argv = Shellwords.escape("special's.txt") -# argv #=> "special\\'s.txt" -# system("cat " + argv) +# system("cat -- #{filename.shellescape}") +# # runs "cat -- special\\'s.txt" +# +# Note the '--'. Without it, cat(1) will treat the following argument +# as a command line option if it starts with '-'. It is guaranteed +# that Shellwords.escape converts a string to a form that a Bourne +# shell will parse back to the original string, but it is the +# programmer's responsibility to make sure that passing an arbitrary +# argument to a command does no harm. # # Shellwords also comes with a core extension for Array, Array#shelljoin. # -# argv = %w{ls -lta lib} -# system(argv.shelljoin) +# dir = "Funny GIFs" +# argv = %W[ls -lta -- #{dir}] +# system(argv.shelljoin + " | less") +# # runs "ls -lta -- Funny\\ GIFs | less" # -# You can use this method to create an escaped string out of an array of tokens -# separated by a space. In this example we used the literal shortcut for -# Array.new. +# You can use this method to build a complete command line out of an +# array of arguments. # # === Authors # * Wakou Aoyama @@ -53,18 +62,20 @@ # # === Contact # * Akinori MUSHA <knu@iDaemons.org> (current maintainer) -# -# === Resources -# -# 1: {IEEE Std 1003.1-2008, 2016 Edition, the Shell & Utilities volume}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html] module Shellwords + # The version number string. + VERSION = "0.2.2" + # Splits a string into an array of tokens in the same way the UNIX # Bourne shell does. # # argv = Shellwords.split('here are "two words"') # argv #=> ["here", "are", "two words"] # + # +line+ must not contain NUL characters because of nature of + # +exec+ system call. + # # Note, however, that this is not a command line parser. Shell # metacharacters except for the single and double quotes and # backslash are not treated as such. @@ -79,9 +90,14 @@ module Shellwords def shellsplit(line) words = [] field = String.new - line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do + line.scan(/\G\s*(?>([^\0\s\\\'\"]+)|'([^\0\']*)'|"((?:[^\0\"\\]|\\[^\0])*)"|(\\[^\0]?)|(\S))(\s|\z)?/m) do |word, sq, dq, esc, garbage, sep| - raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage + if garbage + b = $~.begin(0) + line = $~[0] + line = "..." + line if b > 0 + raise ArgumentError, "#{garbage == "\0" ? 'Nul character' : 'Unmatched quote'} at #{b}: #{line}" + end # 2.2.3 Double-Quotes: # # The <backslash> shall retain its special meaning as an @@ -110,6 +126,9 @@ module Shellwords # command line. +str+ can be a non-string object that responds to # +to_s+. # + # +str+ must not contain NUL characters because of nature of +exec+ + # system call. + # # Note that a resulted string should be used unquoted and is not # intended for use in double quotes nor in single quotes. # @@ -123,7 +142,7 @@ module Shellwords # # # Search files in lib for method definitions # pattern = "^[ \t]*def " - # open("| grep -Ern #{pattern.shellescape} lib") { |grep| + # open("| grep -Ern -e #{pattern.shellescape} lib") { |grep| # grep.each_line { |line| # file, lineno, matched_line = line.split(':', 3) # # ... @@ -142,12 +161,15 @@ module Shellwords # An empty argument will be skipped, so return empty quotes. return "''".dup if str.empty? + # Shellwords cannot contain NUL characters. + raise ArgumentError, "NUL character" if str.index("\0") + str = str.dup # Treat multibyte characters as is. It is the caller's responsibility # to encode the string in the right encoding for the shell # environment. - str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1") + str.gsub!(/[^A-Za-z0-9_\-.,:+\/@\n]/, "\\\\\\&") # A LF cannot be escaped with a backslash because a backslash + LF # combo is regarded as a line continuation and simply ignored. @@ -167,6 +189,7 @@ module Shellwords # All elements are joined into a single string with fields separated by a # space, where each element is escaped for the Bourne shell and stringified # using +to_s+. + # See also Shellwords.shellescape. # # ary = ["There's", "a", "time", "and", "place", "for", "everything"] # argv = Shellwords.join(ary) |
