diff options
Diffstat (limited to 'lib/cgi/util.rb')
| -rw-r--r-- | lib/cgi/util.rb | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb new file mode 100644 index 0000000000..ce77a0ccd5 --- /dev/null +++ b/lib/cgi/util.rb @@ -0,0 +1,252 @@ +# frozen_string_literal: true +class CGI + module Util; end + include Util + extend Util +end +module CGI::Util + @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) + + # URL-encode a string into application/x-www-form-urlencoded. + # Space characters (+" "+) are encoded with plus signs (+"+"+) + # url_encoded_string = CGI.escape("'Stop!' said Fred") + # # => "%27Stop%21%27+said+Fred" + def escape(string) + encoding = string.encoding + buffer = string.b + buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end + buffer.tr!(' ', '+') + buffer.force_encoding(encoding) + end + + # URL-decode an application/x-www-form-urlencoded string with encoding(optional). + # string = CGI.unescape("%27Stop%21%27+said+Fred") + # # => "'Stop!' said Fred" + def unescape(string, encoding = @@accept_charset) + str = string.tr('+', ' ') + str = str.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| + [m.delete('%')].pack('H*') + end + str.force_encoding(encoding) + str.valid_encoding? ? str : str.force_encoding(string.encoding) + end + + # URL-encode a string following RFC 3986 + # Space characters (+" "+) are encoded with (+"%20"+) + # url_encoded_string = CGI.escape("'Stop!' said Fred") + # # => "%27Stop%21%27%20said%20Fred" + def escapeURIComponent(string) + encoding = string.encoding + buffer = string.b + buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end + buffer.force_encoding(encoding) + end + + # URL-decode a string following RFC 3986 with encoding(optional). + # string = CGI.unescape("%27Stop%21%27+said%20Fred") + # # => "'Stop!'+said Fred" + def unescapeURIComponent(string, encoding = @@accept_charset) + str = string.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| + [m.delete('%')].pack('H*') + end + str.force_encoding(encoding) + str.valid_encoding? ? str : str.force_encoding(string.encoding) + end + + # The set of special characters and their escaped values + TABLE_FOR_ESCAPE_HTML__ = { + "'" => ''', + '&' => '&', + '"' => '"', + '<' => '<', + '>' => '>', + } + + # Escape special characters in HTML, namely '&\"<> + # CGI.escapeHTML('Usage: foo "bar" <baz>') + # # => "Usage: foo "bar" <baz>" + def escapeHTML(string) + enc = string.encoding + unless enc.ascii_compatible? + if enc.dummy? + origenc = enc + enc = Encoding::Converter.asciicompat_encoding(enc) + string = enc ? string.encode(enc) : string.b + end + table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}] + string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table) + string.encode!(origenc) if origenc + string + else + string = string.b + string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) + string.force_encoding(enc) + end + end + + begin + require 'cgi/escape' + rescue LoadError + end + + # Unescape a string that has been HTML-escaped + # CGI.unescapeHTML("Usage: foo "bar" <baz>") + # # => "Usage: foo \"bar\" <baz>" + def unescapeHTML(string) + enc = string.encoding + unless enc.ascii_compatible? + if enc.dummy? + origenc = enc + enc = Encoding::Converter.asciicompat_encoding(enc) + string = enc ? string.encode(enc) : string.b + end + string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do + case $1.encode(Encoding::US_ASCII) + when 'apos' then "'".encode(enc) + when 'amp' then '&'.encode(enc) + when 'quot' then '"'.encode(enc) + when 'gt' then '>'.encode(enc) + when 'lt' then '<'.encode(enc) + when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) + when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) + end + end + string.encode!(origenc) if origenc + return string + end + return string unless string.include? '&' + charlimit = case enc + when Encoding::UTF_8; 0x10ffff + when Encoding::ISO_8859_1; 256 + else 128 + end + string = string.b + string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do + match = $1.dup + case match + when 'apos' then "'" + when 'amp' then '&' + when 'quot' then '"' + when 'gt' then '>' + when 'lt' then '<' + when /\A#0*(\d+)\z/ + n = $1.to_i + if n < charlimit + n.chr(enc) + else + "&##{$1};" + end + when /\A#x([0-9a-f]+)\z/i + n = $1.hex + if n < charlimit + n.chr(enc) + else + "&#x#{$1};" + end + else + "&#{match};" + end + end + string.force_encoding enc + end + + # Synonym for CGI.escapeHTML(str) + alias escape_html escapeHTML + + # Synonym for CGI.unescapeHTML(str) + alias unescape_html unescapeHTML + + # Escape only the tags of certain HTML elements in +string+. + # + # Takes an element or elements or array of elements. Each element + # is specified by the name of the element, without angle brackets. + # This matches both the start and the end tag of that element. + # The attribute list of the open tag will also be escaped (for + # instance, the double-quotes surrounding attribute values). + # + # print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG") + # # "<BR><A HREF="url"></A>" + # + # print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"]) + # # "<BR><A HREF="url"></A>" + def escapeElement(string, *elements) + elements = elements[0] if elements[0].kind_of?(Array) + unless elements.empty? + string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do + CGI.escapeHTML($&) + end + else + string + end + end + + # Undo escaping such as that done by CGI.escapeElement() + # + # print CGI.unescapeElement( + # CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG") + # # "<BR><A HREF="url"></A>" + # + # print CGI.unescapeElement( + # CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"]) + # # "<BR><A HREF="url"></A>" + def unescapeElement(string, *elements) + elements = elements[0] if elements[0].kind_of?(Array) + unless elements.empty? + string.gsub(/<\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:>)?/im) do + unescapeHTML($&) + end + else + string + end + end + + # Synonym for CGI.escapeElement(str) + alias escape_element escapeElement + + # Synonym for CGI.unescapeElement(str) + alias unescape_element unescapeElement + + # Format a +Time+ object as a String using the format specified by RFC 1123. + # + # CGI.rfc1123_date(Time.now) + # # Sat, 01 Jan 2000 00:00:00 GMT + def rfc1123_date(time) + time.getgm.strftime("%a, %d %b %Y %T GMT") + end + + # Prettify (indent) an HTML string. + # + # +string+ is the HTML string to indent. +shift+ is the indentation + # unit to use; it defaults to two spaces. + # + # print CGI.pretty("<HTML><BODY></BODY></HTML>") + # # <HTML> + # # <BODY> + # # </BODY> + # # </HTML> + # + # print CGI.pretty("<HTML><BODY></BODY></HTML>", "\t") + # # <HTML> + # # <BODY> + # # </BODY> + # # </HTML> + # + def pretty(string, shift = " ") + lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n") + end_pos = 0 + while end_pos = lines.index(/^<\/(\w+)/, end_pos) + element = $1.dup + start_pos = lines.rindex(/^\s*<#{element}/i, end_pos) + lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__" + end + lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') + end + + alias h escapeHTML +end |
