diff options
Diffstat (limited to 'lib/rubygems/safe_marshal')
-rw-r--r-- | lib/rubygems/safe_marshal/elements.rb | 146 | ||||
-rw-r--r-- | lib/rubygems/safe_marshal/reader.rb | 308 | ||||
-rw-r--r-- | lib/rubygems/safe_marshal/visitors/stream_printer.rb | 31 | ||||
-rw-r--r-- | lib/rubygems/safe_marshal/visitors/to_ruby.rb | 415 | ||||
-rw-r--r-- | lib/rubygems/safe_marshal/visitors/visitor.rb | 74 |
5 files changed, 974 insertions, 0 deletions
diff --git a/lib/rubygems/safe_marshal/elements.rb b/lib/rubygems/safe_marshal/elements.rb new file mode 100644 index 0000000000..f8874b1b2f --- /dev/null +++ b/lib/rubygems/safe_marshal/elements.rb @@ -0,0 +1,146 @@ +# frozen_string_literal: true + +module Gem + module SafeMarshal + module Elements + class Element + end + + class Symbol < Element + def initialize(name) + @name = name + end + attr_reader :name + end + + class UserDefined < Element + def initialize(name, binary_string) + @name = name + @binary_string = binary_string + end + + attr_reader :name, :binary_string + end + + class UserMarshal < Element + def initialize(name, data) + @name = name + @data = data + end + + attr_reader :name, :data + end + + class String < Element + def initialize(str) + @str = str + end + + attr_reader :str + end + + class Hash < Element + def initialize(pairs) + @pairs = pairs + end + + attr_reader :pairs + end + + class HashWithDefaultValue < Hash + def initialize(pairs, default) + super(pairs) + @default = default + end + + attr_reader :default + end + + class Array < Element + def initialize(elements) + @elements = elements + end + + attr_reader :elements + end + + class Integer < Element + def initialize(int) + @int = int + end + + attr_reader :int + end + + class True < Element + def initialize + end + TRUE = new.freeze + end + + class False < Element + def initialize + end + + FALSE = new.freeze + end + + class WithIvars < Element + def initialize(object, ivars) + @object = object + @ivars = ivars + end + + attr_reader :object, :ivars + end + + class Object < Element + def initialize(name) + @name = name + end + attr_reader :name + end + + class Nil < Element + NIL = new.freeze + end + + class ObjectLink < Element + def initialize(offset) + @offset = offset + end + attr_reader :offset + end + + class SymbolLink < Element + def initialize(offset) + @offset = offset + end + attr_reader :offset + end + + class Float < Element + def initialize(string) + @string = string + end + attr_reader :string + end + + class Bignum < Element # rubocop:disable Lint/UnifiedInteger + def initialize(sign, data) + @sign = sign + @data = data + end + attr_reader :sign, :data + end + + class UserClass < Element + def initialize(name, wrapped_object) + @name = name + @wrapped_object = wrapped_object + end + attr_reader :name, :wrapped_object + end + end + end +end diff --git a/lib/rubygems/safe_marshal/reader.rb b/lib/rubygems/safe_marshal/reader.rb new file mode 100644 index 0000000000..740be113e5 --- /dev/null +++ b/lib/rubygems/safe_marshal/reader.rb @@ -0,0 +1,308 @@ +# frozen_string_literal: true + +require_relative "elements" + +module Gem + module SafeMarshal + class Reader + class Error < StandardError + end + + class UnsupportedVersionError < Error + end + + class UnconsumedBytesError < Error + end + + class NotImplementedError < Error + end + + class EOFError < Error + end + + def initialize(io) + @io = io + end + + def read! + read_header + root = read_element + raise UnconsumedBytesError unless @io.eof? + root + end + + private + + MARSHAL_VERSION = [Marshal::MAJOR_VERSION, Marshal::MINOR_VERSION].map(&:chr).join.freeze + private_constant :MARSHAL_VERSION + + def read_header + v = @io.read(2) + raise UnsupportedVersionError, "Unsupported marshal version #{v.bytes.map(&:ord).join(".")}, expected #{Marshal::MAJOR_VERSION}.#{Marshal::MINOR_VERSION}" unless v == MARSHAL_VERSION + end + + def read_byte + @io.getbyte + end + + def read_integer + b = read_byte + + case b + when 0x00 + 0 + when 0x01 + read_byte + when 0x02 + read_byte | (read_byte << 8) + when 0x03 + read_byte | (read_byte << 8) | (read_byte << 16) + when 0x04 + read_byte | (read_byte << 8) | (read_byte << 16) | (read_byte << 24) + when 0xFC + read_byte | (read_byte << 8) | (read_byte << 16) | (read_byte << 24) | -0x100000000 + when 0xFD + read_byte | (read_byte << 8) | (read_byte << 16) | -0x1000000 + when 0xFE + read_byte | (read_byte << 8) | -0x10000 + when 0xFF + read_byte | -0x100 + when nil + raise EOFError, "Unexpected EOF" + else + signed = (b ^ 128) - 128 + if b >= 128 + signed + 5 + else + signed - 5 + end + end + end + + def read_element + type = read_byte + case type + when 34 then read_string # ?" + when 48 then read_nil # ?0 + when 58 then read_symbol # ?: + when 59 then read_symbol_link # ?; + when 64 then read_object_link # ?@ + when 70 then read_false # ?F + when 73 then read_object_with_ivars # ?I + when 84 then read_true # ?T + when 85 then read_user_marshal # ?U + when 91 then read_array # ?[ + when 102 then read_float # ?f + when 105 then Elements::Integer.new(read_integer) # ?i + when 108 then read_bignum # ?l + when 111 then read_object # ?o + when 117 then read_user_defined # ?u + when 123 then read_hash # ?{ + when 125 then read_hash_with_default_value # ?} + when 101 then read_extended_object # ?e + when 99 then read_class # ?c + when 109 then read_module # ?m + when 77 then read_class_or_module # ?M + when 100 then read_data # ?d + when 47 then read_regexp # ?/ + when 83 then read_struct # ?S + when 67 then read_user_class # ?C + when nil + raise EOFError, "Unexpected EOF" + else + raise Error, "Unknown marshal type discriminator #{type.chr.inspect} (#{type})" + end + end + + STRING_E_SYMBOL = Elements::Symbol.new("E").freeze + private_constant :STRING_E_SYMBOL + + def read_symbol + len = read_integer + if len == 1 + byte = read_byte + if byte == 69 # ?E + STRING_E_SYMBOL + else + Elements::Symbol.new(byte.chr) + end + else + name = -@io.read(len) + Elements::Symbol.new(name) + end + end + + EMPTY_STRING = Elements::String.new("".b.freeze).freeze + private_constant :EMPTY_STRING + + def read_string + length = read_integer + return EMPTY_STRING if length == 0 + str = @io.read(length) + Elements::String.new(str) + end + + def read_true + Elements::True::TRUE + end + + def read_false + Elements::False::FALSE + end + + def read_user_defined + name = read_element + binary_string = @io.read(read_integer) + Elements::UserDefined.new(name, binary_string) + end + + EMPTY_ARRAY = Elements::Array.new([].freeze).freeze + private_constant :EMPTY_ARRAY + + def read_array + length = read_integer + return EMPTY_ARRAY if length == 0 + elements = Array.new(length) do + read_element + end + Elements::Array.new(elements) + end + + def read_object_with_ivars + object = read_element + ivars = Array.new(read_integer) do + [read_element, read_element] + end + Elements::WithIvars.new(object, ivars) + end + + def read_symbol_link + offset = read_integer + Elements::SymbolLink.new(offset) + end + + def read_user_marshal + name = read_element + data = read_element + Elements::UserMarshal.new(name, data) + end + + # profiling bundle install --full-index shows that + # offset 6 is by far the most common object link, + # so we special case it to avoid allocating a new + # object a third of the time. + # the following are all the object links that + # appear more than 10000 times in my profiling + + OBJECT_LINKS = { + 6 => Elements::ObjectLink.new(6).freeze, + 30 => Elements::ObjectLink.new(30).freeze, + 81 => Elements::ObjectLink.new(81).freeze, + 34 => Elements::ObjectLink.new(34).freeze, + 38 => Elements::ObjectLink.new(38).freeze, + 50 => Elements::ObjectLink.new(50).freeze, + 91 => Elements::ObjectLink.new(91).freeze, + 42 => Elements::ObjectLink.new(42).freeze, + 46 => Elements::ObjectLink.new(46).freeze, + 150 => Elements::ObjectLink.new(150).freeze, + 100 => Elements::ObjectLink.new(100).freeze, + 104 => Elements::ObjectLink.new(104).freeze, + 108 => Elements::ObjectLink.new(108).freeze, + 242 => Elements::ObjectLink.new(242).freeze, + 246 => Elements::ObjectLink.new(246).freeze, + 139 => Elements::ObjectLink.new(139).freeze, + 143 => Elements::ObjectLink.new(143).freeze, + 114 => Elements::ObjectLink.new(114).freeze, + 308 => Elements::ObjectLink.new(308).freeze, + 200 => Elements::ObjectLink.new(200).freeze, + 54 => Elements::ObjectLink.new(54).freeze, + 62 => Elements::ObjectLink.new(62).freeze, + 1_286_245 => Elements::ObjectLink.new(1_286_245).freeze, + }.freeze + private_constant :OBJECT_LINKS + + def read_object_link + offset = read_integer + OBJECT_LINKS[offset] || Elements::ObjectLink.new(offset) + end + + EMPTY_HASH = Elements::Hash.new([].freeze).freeze + private_constant :EMPTY_HASH + + def read_hash + length = read_integer + return EMPTY_HASH if length == 0 + pairs = Array.new(length) do + [read_element, read_element] + end + Elements::Hash.new(pairs) + end + + def read_hash_with_default_value + pairs = Array.new(read_integer) do + [read_element, read_element] + end + default = read_element + Elements::HashWithDefaultValue.new(pairs, default) + end + + def read_object + name = read_element + object = Elements::Object.new(name) + ivars = Array.new(read_integer) do + [read_element, read_element] + end + Elements::WithIvars.new(object, ivars) + end + + def read_nil + Elements::Nil::NIL + end + + def read_float + string = @io.read(read_integer) + Elements::Float.new(string) + end + + def read_bignum + sign = read_byte + data = @io.read(read_integer * 2) + Elements::Bignum.new(sign, data) + end + + def read_extended_object + raise NotImplementedError, "Reading Marshal objects of type extended_object is not implemented" + end + + def read_class + raise NotImplementedError, "Reading Marshal objects of type class is not implemented" + end + + def read_module + raise NotImplementedError, "Reading Marshal objects of type module is not implemented" + end + + def read_class_or_module + raise NotImplementedError, "Reading Marshal objects of type class_or_module is not implemented" + end + + def read_data + raise NotImplementedError, "Reading Marshal objects of type data is not implemented" + end + + def read_regexp + raise NotImplementedError, "Reading Marshal objects of type regexp is not implemented" + end + + def read_struct + raise NotImplementedError, "Reading Marshal objects of type struct is not implemented" + end + + def read_user_class + name = read_element + wrapped_object = read_element + Elements::UserClass.new(name, wrapped_object) + end + end + end +end diff --git a/lib/rubygems/safe_marshal/visitors/stream_printer.rb b/lib/rubygems/safe_marshal/visitors/stream_printer.rb new file mode 100644 index 0000000000..162b36ad05 --- /dev/null +++ b/lib/rubygems/safe_marshal/visitors/stream_printer.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +require_relative "visitor" + +module Gem::SafeMarshal + module Visitors + class StreamPrinter < Visitor + def initialize(io, indent: "") + @io = io + @indent = indent + @level = 0 + end + + def visit(target) + @io.write("#{@indent * @level}#{target.class}") + target.instance_variables.each do |ivar| + value = target.instance_variable_get(ivar) + next if Elements::Element === value || Array === value + @io.write(" #{ivar}=#{value.inspect}") + end + @io.write("\n") + begin + @level += 1 + super + ensure + @level -= 1 + end + end + end + end +end diff --git a/lib/rubygems/safe_marshal/visitors/to_ruby.rb b/lib/rubygems/safe_marshal/visitors/to_ruby.rb new file mode 100644 index 0000000000..a9f1d048d4 --- /dev/null +++ b/lib/rubygems/safe_marshal/visitors/to_ruby.rb @@ -0,0 +1,415 @@ +# frozen_string_literal: true + +require_relative "visitor" + +module Gem::SafeMarshal + module Visitors + class ToRuby < Visitor + def initialize(permitted_classes:, permitted_symbols:, permitted_ivars:) + @permitted_classes = permitted_classes + @permitted_symbols = ["E"].concat(permitted_symbols).concat(permitted_classes) + @permitted_ivars = permitted_ivars + + @objects = [] + @symbols = [] + @class_cache = {} + + @stack = ["root"] + @stack_idx = 1 + end + + def inspect # :nodoc: + format("#<%s permitted_classes: %p permitted_symbols: %p permitted_ivars: %p>", + self.class, @permitted_classes, @permitted_symbols, @permitted_ivars) + end + + def visit(target) + stack_idx = @stack_idx + super + ensure + @stack_idx = stack_idx - 1 + end + + private + + def push_stack(element) + @stack[@stack_idx] = element + @stack_idx += 1 + end + + def visit_Gem_SafeMarshal_Elements_Array(a) + array = register_object([]) + + elements = a.elements + size = elements.size + idx = 0 + # not idiomatic, but there's a huge number of IMEMOs allocated here, so we avoid the block + # because this is such a hot path when doing a bundle install with the full index + until idx == size + push_stack idx + array << visit(elements[idx]) + idx += 1 + end + + array + end + + def visit_Gem_SafeMarshal_Elements_Symbol(s) + name = s.name + raise UnpermittedSymbolError.new(symbol: name, stack: formatted_stack) unless @permitted_symbols.include?(name) + visit_symbol_type(s) + end + + def map_ivars(klass, ivars) + stack_idx = @stack_idx + ivars.map.with_index do |(k, v), i| + @stack_idx = stack_idx + + push_stack "ivar_" + push_stack i + k = resolve_ivar(klass, k) + + @stack_idx = stack_idx + push_stack k + + next k, visit(v) + end + end + + def visit_Gem_SafeMarshal_Elements_WithIvars(e) + object_offset = @objects.size + push_stack "object" + object = visit(e.object) + ivars = map_ivars(object.class, e.ivars) + + case e.object + when Elements::UserDefined + if object.class == ::Time + internal = [] + + ivars.reject! do |k, v| + case k + when :offset, :zone, :nano_num, :nano_den, :submicro + internal << [k, v] + true + else + false + end + end + + s = e.object.binary_string + + marshal_string = "\x04\bIu:\tTime".b + marshal_string.concat(s.size + 5) + marshal_string << s + marshal_string.concat(internal.size + 5) + + internal.each do |k, v| + marshal_string.concat(":") + marshal_string.concat(k.size + 5) + marshal_string.concat(k.to_s) + dumped = Marshal.dump(v) + dumped[0, 2] = "" + marshal_string.concat(dumped) + end + + object = @objects[object_offset] = Marshal.load(marshal_string) + end + when Elements::String + enc = nil + + ivars.reject! do |k, v| + case k + when :E + case v + when TrueClass + enc = "UTF-8" + when FalseClass + enc = "US-ASCII" + else + raise FormatError, "Unexpected value for String :E #{v.inspect}" + end + when :encoding + enc = v + else + next false + end + true + end + + object.force_encoding(enc) if enc + end + + ivars.each do |k, v| + object.instance_variable_set k, v + end + object + end + + def visit_Gem_SafeMarshal_Elements_Hash(o) + hash = register_object({}) + + o.pairs.each_with_index do |(k, v), i| + push_stack i + k = visit(k) + push_stack k + hash[k] = visit(v) + end + + hash + end + + def visit_Gem_SafeMarshal_Elements_HashWithDefaultValue(o) + hash = visit_Gem_SafeMarshal_Elements_Hash(o) + push_stack :default + hash.default = visit(o.default) + hash + end + + def visit_Gem_SafeMarshal_Elements_Object(o) + register_object(resolve_class(o.name).allocate) + end + + def visit_Gem_SafeMarshal_Elements_ObjectLink(o) + @objects[o.offset] + end + + def visit_Gem_SafeMarshal_Elements_SymbolLink(o) + @symbols[o.offset] + end + + def visit_Gem_SafeMarshal_Elements_UserDefined(o) + register_object(call_method(resolve_class(o.name), :_load, o.binary_string)) + end + + def visit_Gem_SafeMarshal_Elements_UserMarshal(o) + klass = resolve_class(o.name) + compat = COMPAT_CLASSES.fetch(klass, nil) + idx = @objects.size + object = register_object(call_method(compat || klass, :allocate)) + + push_stack :data + ret = call_method(object, :marshal_load, visit(o.data)) + + if compat + object = @objects[idx] = ret + end + + object + end + + def visit_Gem_SafeMarshal_Elements_Integer(i) + i.int + end + + def visit_Gem_SafeMarshal_Elements_Nil(_) + nil + end + + def visit_Gem_SafeMarshal_Elements_True(_) + true + end + + def visit_Gem_SafeMarshal_Elements_False(_) + false + end + + def visit_Gem_SafeMarshal_Elements_String(s) + register_object(+s.str) + end + + def visit_Gem_SafeMarshal_Elements_Float(f) + case f.string + when "inf" + ::Float::INFINITY + when "-inf" + -::Float::INFINITY + when "nan" + ::Float::NAN + else + f.string.to_f + end + end + + def visit_Gem_SafeMarshal_Elements_Bignum(b) + result = 0 + b.data.each_byte.with_index do |byte, exp| + result += (byte * 2**(exp * 8)) + end + + case b.sign + when 43 # ?+ + result + when 45 # ?- + -result + else + raise FormatError, "Unexpected sign for Bignum #{b.sign.chr.inspect} (#{b.sign})" + end + end + + def visit_Gem_SafeMarshal_Elements_UserClass(r) + if resolve_class(r.name) == ::Hash && r.wrapped_object.is_a?(Elements::Hash) + + hash = register_object({}.compare_by_identity) + + o = r.wrapped_object + o.pairs.each_with_index do |(k, v), i| + push_stack i + k = visit(k) + push_stack k + hash[k] = visit(v) + end + + if o.is_a?(Elements::HashWithDefaultValue) + push_stack :default + hash.default = visit(o.default) + end + + hash + else + raise UnsupportedError.new("Unsupported user class #{resolve_class(r.name)} in marshal stream", stack: formatted_stack) + end + end + + def resolve_class(n) + @class_cache[n] ||= begin + to_s = resolve_symbol_name(n) + raise UnpermittedClassError.new(name: to_s, stack: formatted_stack) unless @permitted_classes.include?(to_s) + visit_symbol_type(n) + begin + ::Object.const_get(to_s) + rescue NameError + raise ArgumentError, "Undefined class #{to_s.inspect}" + end + end + end + + class RationalCompat + def marshal_load(s) + num, den = s + raise ArgumentError, "Expected 2 ints" unless s.size == 2 && num.is_a?(Integer) && den.is_a?(Integer) + Rational(num, den) + end + end + private_constant :RationalCompat + + COMPAT_CLASSES = {}.tap do |h| + h[Rational] = RationalCompat + end.compare_by_identity.freeze + private_constant :COMPAT_CLASSES + + def resolve_ivar(klass, name) + to_s = resolve_symbol_name(name) + + raise UnpermittedIvarError.new(symbol: to_s, klass: klass, stack: formatted_stack) unless @permitted_ivars.fetch(klass.name, [].freeze).include?(to_s) + + visit_symbol_type(name) + end + + def visit_symbol_type(element) + case element + when Elements::Symbol + sym = element.name.to_sym + @symbols << sym + sym + when Elements::SymbolLink + visit_Gem_SafeMarshal_Elements_SymbolLink(element) + end + end + + # This is a hot method, so avoid respond_to? checks on every invocation + if :read.respond_to?(:name) + def resolve_symbol_name(element) + case element + when Elements::Symbol + element.name + when Elements::SymbolLink + visit_Gem_SafeMarshal_Elements_SymbolLink(element).name + else + raise FormatError, "Expected symbol or symbol link, got #{element.inspect} @ #{formatted_stack.join(".")}" + end + end + else + def resolve_symbol_name(element) + case element + when Elements::Symbol + element.name + when Elements::SymbolLink + visit_Gem_SafeMarshal_Elements_SymbolLink(element).to_s + else + raise FormatError, "Expected symbol or symbol link, got #{element.inspect} @ #{formatted_stack.join(".")}" + end + end + end + + def register_object(o) + @objects << o + o + end + + def call_method(receiver, method, *args) + receiver.__send__(method, *args) + rescue NoMethodError => e + raise unless e.receiver == receiver + + raise MethodCallError, "Unable to call #{method.inspect} on #{receiver.inspect}, perhaps it is a class using marshal compat, which is not visible in ruby? #{e}" + end + + def formatted_stack + formatted = [] + @stack[0, @stack_idx].each do |e| + if e.is_a?(Integer) + if formatted.last == "ivar_" + formatted[-1] = "ivar_#{e}" + else + formatted << "[#{e}]" + end + else + formatted << e + end + end + formatted + end + + class Error < StandardError + end + + class UnpermittedSymbolError < Error + def initialize(symbol:, stack:) + @symbol = symbol + @stack = stack + super "Attempting to load unpermitted symbol #{symbol.inspect} @ #{stack.join "."}" + end + end + + class UnpermittedIvarError < Error + def initialize(symbol:, klass:, stack:) + @symbol = symbol + @klass = klass + @stack = stack + super "Attempting to set unpermitted ivar #{symbol.inspect} on object of class #{klass} @ #{stack.join "."}" + end + end + + class UnpermittedClassError < Error + def initialize(name:, stack:) + @name = name + @stack = stack + super "Attempting to load unpermitted class #{name.inspect} @ #{stack.join "."}" + end + end + + class UnsupportedError < Error + def initialize(message, stack:) + super "#{message} @ #{stack.join "."}" + end + end + + class FormatError < Error + end + + class MethodCallError < Error + end + end + end +end diff --git a/lib/rubygems/safe_marshal/visitors/visitor.rb b/lib/rubygems/safe_marshal/visitors/visitor.rb new file mode 100644 index 0000000000..c9a079dc0e --- /dev/null +++ b/lib/rubygems/safe_marshal/visitors/visitor.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module Gem::SafeMarshal::Visitors + class Visitor + def visit(target) + send DISPATCH.fetch(target.class), target + end + + private + + DISPATCH = Gem::SafeMarshal::Elements.constants.each_with_object({}) do |c, h| + next if c == :Element + + klass = Gem::SafeMarshal::Elements.const_get(c) + h[klass] = :"visit_#{klass.name.gsub("::", "_")}" + h.default = :visit_unknown_element + end.compare_by_identity.freeze + private_constant :DISPATCH + + def visit_unknown_element(e) + raise ArgumentError, "Attempting to visit unknown element #{e.inspect}" + end + + def visit_Gem_SafeMarshal_Elements_Array(target) + target.elements.each {|e| visit(e) } + end + + def visit_Gem_SafeMarshal_Elements_Bignum(target); end + def visit_Gem_SafeMarshal_Elements_False(target); end + def visit_Gem_SafeMarshal_Elements_Float(target); end + + def visit_Gem_SafeMarshal_Elements_Hash(target) + target.pairs.each do |k, v| + visit(k) + visit(v) + end + end + + def visit_Gem_SafeMarshal_Elements_HashWithDefaultValue(target) + visit_Gem_SafeMarshal_Elements_Hash(target) + visit(target.default) + end + + def visit_Gem_SafeMarshal_Elements_Integer(target); end + def visit_Gem_SafeMarshal_Elements_Nil(target); end + + def visit_Gem_SafeMarshal_Elements_Object(target) + visit(target.name) + end + + def visit_Gem_SafeMarshal_Elements_ObjectLink(target); end + def visit_Gem_SafeMarshal_Elements_String(target); end + def visit_Gem_SafeMarshal_Elements_Symbol(target); end + def visit_Gem_SafeMarshal_Elements_SymbolLink(target); end + def visit_Gem_SafeMarshal_Elements_True(target); end + + def visit_Gem_SafeMarshal_Elements_UserDefined(target) + visit(target.name) + end + + def visit_Gem_SafeMarshal_Elements_UserMarshal(target) + visit(target.name) + visit(target.data) + end + + def visit_Gem_SafeMarshal_Elements_WithIvars(target) + visit(target.object) + target.ivars.each do |k, v| + visit(k) + visit(v) + end + end + end +end |