## # == Manipulates strings like the UNIX Bourne shell # # This module manipulates strings according to the word parsing rules # of the UNIX Bourne shell. # # The shellwords() function was originally a port of shellwords.pl, # but modified to conform to POSIX / SUSv3 (IEEE Std 1003.1-2001 [1]). # # === Usage # # You can use shellwords to parse a string into a Bourne shell friendly Array. # # require 'shellwords' # # argv = Shellwords.split('three blind "mice"') # argv #=> ["three", "blind", "mice"] # # Once you've required Shellwords, you can use the #split alias # String#shellsplit. # # argv = "see how they run".shellsplit # argv #=> ["see", "how", "they", "run"] # # Be careful you don't leave a quote unmatched. # # argv = "they all ran after the farmer's wife".shellsplit # #=> ArgumentError: Unmatched double quote: ... # # In this case, you might want to use Shellwords.escape, or it's alias # String#shellescape. # # This method will escape the String for you to safely use with a Bourne shell. # # argv = Shellwords.escape("special's.txt") # argv #=> "special\\s.txt" # system("cat " + argv) # # Shellwords also comes with a core extension for Array, Array#shelljoin. # # argv = %w{ls -lta lib} # system(argv.shelljoin) # # You can use this method to create an escaped string out of an array of tokens # separated by a space. In this example we'll use the literal shortcut for # Array.new. # # === Authors # * Wakou Aoyama # * Akinori MUSHA # # === Contact # * Akinori MUSHA (current maintainer) # # === Resources # # 1: {IEEE Std 1003.1-2004}[http://pubs.opengroup.org/onlinepubs/009695399/toc.htm] module Shellwords # Splits a string into an array of tokens in the same way the UNIX # Bourne shell does. # # argv = Shellwords.split('here are "two words"') # argv #=> ["here", "are", "two words"] # # String#shellsplit is a shortcut for this function. # # argv = 'here are "two words"'.shellsplit # argv #=> ["here", "are", "two words"] def shellsplit(line) words = [] field = '' line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do |word, sq, dq, esc, garbage, sep| raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage field << (word || sq || (dq || esc).gsub(/\\(.)/, '\\1')) if sep words << field field = '' end end words end alias shellwords shellsplit module_function :shellsplit, :shellwords class << self alias split shellsplit end # Escapes a string so that it can be safely used in a Bourne shell # command line. +str+ can be a non-string object that responds to # +to_s+. # # Note that a resulted string should be used unquoted and is not # intended for use in double quotes nor in single quotes. # # argv = Shellwords.escape("It's better to give than to receive") # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive" # # String#shellescape is a shorthand for this function. # # argv = "It's better to give than to receive".shellescape # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive" # # # Search files in lib for method definitions # pattern = "^[ \t]*def " # open("| grep -Ern #{pattern.shellescape} lib") { |grep| # grep.each_line { |line| # file, lineno, matched_line = line.split(':', 3) # # ... # } # } # # It is the caller's responsibility to encode the string in the right # encoding for the shell environment where this string is used. # # Multibyte characters are treated as multibyte characters, not bytes. # # Returns an empty quoted String if +str+ has a length of zero. def shellescape(str) str = str.to_s # An empty argument will be skipped, so return empty quotes. return "''" if str.empty? str = str.dup # Treat multibyte characters as is. It is caller's responsibility # to encode the string in the right encoding for the shell # environment. str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1") # A LF cannot be escaped with a backslash because a backslash + LF # combo is regarded as line continuation and simply ignored. str.gsub!(/\n/, "'\n'") return str end module_function :shellescape class << self alias escape shellescape end # Builds a command line string from an argument list, +array+. # # All elements are joined into a single string with fields separated by a # space, where each element is escaped for Bourne shell and stringified using # +to_s+. # # ary = ["There's", "a", "time", "and", "place", "for", "everything"] # argv = Shellwords.join(ary) # argv #=> "There\\'s a time and place for everything" # # Array#shelljoin is a shortcut for this function. # # ary = ["Don't", "rock", "the", "boat"] # argv = ary.shelljoin # argv #=> "Don\\'t rock the boat" # # You can also mix non-string objects in the elements as allowed in Array#join. # # output = `#{['ps', '-p', $$].shelljoin}` # def shelljoin(array) array.map { |arg| shellescape(arg) }.join(' ') end module_function :shelljoin class << self alias join shelljoin end end class String # call-seq: # str.shellsplit => array # # Splits +str+ into an array of tokens in the same way the UNIX # Bourne shell does. # # See Shellwords.shellsplit for details. def shellsplit Shellwords.split(self) end # call-seq: # str.shellescape => string # # Escapes +str+ so that it can be safely used in a Bourne shell # command line. # # See Shellwords.shellescape for details. def shellescape Shellwords.escape(self) end end class Array # call-seq: # array.shelljoin => string # # Builds a command line string from an argument list +array+ joining # all elements escaped for Bourne shell and separated by a space. # # See Shellwords.shelljoin for details. def shelljoin Shellwords.join(self) end end