diff options
Diffstat (limited to 'lib/shellwords.rb')
| -rw-r--r-- | lib/shellwords.rb | 191 |
1 files changed, 125 insertions, 66 deletions
diff --git a/lib/shellwords.rb b/lib/shellwords.rb index a4ec552a7e..eb5fa2d226 100644 --- a/lib/shellwords.rb +++ b/lib/shellwords.rb @@ -1,61 +1,99 @@ -# -# shellwords.rb: Manipulates strings a la UNIX Bourne shell -# - +# frozen-string-literal: true +## +# == Manipulates strings like the UNIX Bourne shell # # This module manipulates strings according to the word parsing rules # of the UNIX Bourne shell. # # The shellwords() function was originally a port of shellwords.pl, -# but modified to conform to POSIX / SUSv3 (IEEE Std 1003.1-2001). +# but modified to conform to the Shell & Utilities volume of the IEEE +# Std 1003.1-2008, 2016 Edition [1]. +# +# === Usage +# +# You can use Shellwords to parse a string into a Bourne shell friendly Array. +# +# require 'shellwords' +# +# argv = Shellwords.split('three blind "mice"') +# argv #=> ["three", "blind", "mice"] +# +# Once you've required Shellwords, you can use the #split alias +# String#shellsplit. +# +# argv = "see how they run".shellsplit +# argv #=> ["see", "how", "they", "run"] +# +# Be careful you don't leave a quote unmatched. # -# Authors: -# - Wakou Aoyama -# - Akinori MUSHA <knu@iDaemons.org> +# argv = "they all ran after the farmer's wife".shellsplit +# #=> ArgumentError: Unmatched double quote: ... # -# Contact: -# - Akinori MUSHA <knu@iDaemons.org> (current maintainer) +# In this case, you might want to use Shellwords.escape, or its alias +# String#shellescape. # +# This method will escape the String for you to safely use with a Bourne shell. +# +# argv = Shellwords.escape("special's.txt") +# argv #=> "special\\'s.txt" +# system("cat " + argv) +# +# Shellwords also comes with a core extension for Array, Array#shelljoin. +# +# argv = %w{ls -lta lib} +# system(argv.shelljoin) +# +# You can use this method to create an escaped string out of an array of tokens +# separated by a space. In this example we used the literal shortcut for +# Array.new. +# +# === Authors +# * Wakou Aoyama +# * Akinori MUSHA <knu@iDaemons.org> +# +# === Contact +# * Akinori MUSHA <knu@iDaemons.org> (current maintainer) +# +# === Resources +# +# 1: {IEEE Std 1003.1-2008, 2016 Edition, the Shell & Utilities volume}[http://pubs.opengroup.org/onlinepubs/9699919799/utilities/contents.html] + module Shellwords - # # Splits a string into an array of tokens in the same way the UNIX # Bourne shell does. # # argv = Shellwords.split('here are "two words"') # argv #=> ["here", "are", "two words"] # - # +String#shellsplit+ is a shorthand for this function. + # Note, however, that this is not a command line parser. Shell + # metacharacters except for the single and double quotes and + # backslash are not treated as such. + # + # argv = Shellwords.split('ruby my_prog.rb | less') + # argv #=> ["ruby", "my_prog.rb", "|", "less"] + # + # String#shellsplit is a shortcut for this function. # # argv = 'here are "two words"'.shellsplit # argv #=> ["here", "are", "two words"] - # def shellsplit(line) - line = String.new(line) rescue - raise(ArgumentError, "Argument must be a string") - line.lstrip! words = [] - until line.empty? - field = '' - loop do - if line.sub!(/\A"(([^"\\]|\\.)*)"/, '') then - snippet = $1.gsub(/\\(.)/, '\1') - elsif line =~ /\A"/ then - raise ArgumentError, "Unmatched double quote: #{line}" - elsif line.sub!(/\A'([^']*)'/, '') then - snippet = $1 - elsif line =~ /\A'/ then - raise ArgumentError, "Unmatched single quote: #{line}" - elsif line.sub!(/\A\\(.)?/, '') then - snippet = $1 || '\\' - elsif line.sub!(/\A([^\s\\'"]+)/, '') then - snippet = $1 - else - line.lstrip! - break - end - field.concat(snippet) + field = String.new + line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do + |word, sq, dq, esc, garbage, sep| + raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage + # 2.2.3 Double-Quotes: + # + # The <backslash> shall retain its special meaning as an + # escape character only when followed by one of the following + # characters when considered special: + # + # $ ` " \ <newline> + field << (word || sq || (dq && dq.gsub(/\\([$`"\\\n])/, '\\1')) || esc.gsub(/\\(.)/, '\\1')) + if sep + words << field + field = String.new end - words.push(field) end words end @@ -68,35 +106,51 @@ module Shellwords alias split shellsplit end - # # Escapes a string so that it can be safely used in a Bourne shell - # command line. + # command line. +str+ can be a non-string object that responds to + # +to_s+. # # Note that a resulted string should be used unquoted and is not # intended for use in double quotes nor in single quotes. # - # open("| grep #{Shellwords.escape(pattern)} file") { |pipe| - # # ... - # } + # argv = Shellwords.escape("It's better to give than to receive") + # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive" + # + # String#shellescape is a shorthand for this function. # - # +String#shellescape+ is a shorthand for this function. + # argv = "It's better to give than to receive".shellescape + # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive" # - # open("| grep #{pattern.shellescape} file") { |pipe| - # # ... + # # Search files in lib for method definitions + # pattern = "^[ \t]*def " + # open("| grep -Ern #{pattern.shellescape} lib") { |grep| + # grep.each_line { |line| + # file, lineno, matched_line = line.split(':', 3) + # # ... + # } # } # + # It is the caller's responsibility to encode the string in the right + # encoding for the shell environment where this string is used. + # + # Multibyte characters are treated as multibyte characters, not as bytes. + # + # Returns an empty quoted String if +str+ has a length of zero. def shellescape(str) + str = str.to_s + # An empty argument will be skipped, so return empty quotes. - return "''" if str.empty? + return "''".dup if str.empty? str = str.dup - # Process as a single byte sequence because not all shell - # implementations are multibyte aware. - str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1") + # Treat multibyte characters as is. It is the caller's responsibility + # to encode the string in the right encoding for the shell + # environment. + str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1") # A LF cannot be escaped with a backslash because a backslash + LF - # combo is regarded as line continuation and simply ignored. + # combo is regarded as a line continuation and simply ignored. str.gsub!(/\n/, "'\n'") return str @@ -108,19 +162,25 @@ module Shellwords alias escape shellescape end + # Builds a command line string from an argument list, +array+. # - # Builds a command line string from an argument list +array+ joining - # all elements escaped for Bourne shell and separated by a space. + # All elements are joined into a single string with fields separated by a + # space, where each element is escaped for the Bourne shell and stringified + # using +to_s+. # - # open('|' + Shellwords.join(['grep', pattern, *files])) { |pipe| - # # ... - # } + # ary = ["There's", "a", "time", "and", "place", "for", "everything"] + # argv = Shellwords.join(ary) + # argv #=> "There\\'s a time and place for everything" # - # +Array#shelljoin+ is a shorthand for this function. + # Array#shelljoin is a shortcut for this function. # - # open('|' + ['grep', pattern, *files].shelljoin) { |pipe| - # # ... - # } + # ary = ["Don't", "rock", "the", "boat"] + # argv = ary.shelljoin + # argv #=> "Don\\'t rock the boat" + # + # You can also mix non-string objects in the elements as allowed in Array#join. + # + # output = `#{['ps', '-p', $$].shelljoin}` # def shelljoin(array) array.map { |arg| shellescape(arg) }.join(' ') @@ -134,38 +194,37 @@ module Shellwords end class String - # # call-seq: # str.shellsplit => array # # Splits +str+ into an array of tokens in the same way the UNIX - # Bourne shell does. See +Shellwords::shellsplit+ for details. + # Bourne shell does. # + # See Shellwords.shellsplit for details. def shellsplit Shellwords.split(self) end - # # call-seq: # str.shellescape => string # # Escapes +str+ so that it can be safely used in a Bourne shell - # command line. See +Shellwords::shellescape+ for details. + # command line. # + # See Shellwords.shellescape for details. def shellescape Shellwords.escape(self) end end class Array - # # call-seq: # array.shelljoin => string # # Builds a command line string from an argument list +array+ joining - # all elements escaped for Bourne shell and separated by a space. - # See +Shellwords::shelljoin+ for details. + # all elements escaped for the Bourne shell and separated by a space. # + # See Shellwords.shelljoin for details. def shelljoin Shellwords.join(self) end |
