From 7d984d76bac857a54c1775b203c944da6cb20ac4 Mon Sep 17 00:00:00 2001 From: tenderlove Date: Thu, 29 Mar 2012 01:25:11 +0000 Subject: merge revision(s) 32578,33401,33403,33404,33531,33655,33679,33809,33900,33965,34067,34069,34087,34328,34330,34527,34772,34783,34839,34914,34953,34954,35153: [Backport #6212] * ext/psych/lib/psych.rb: updating version to match gem * ext/psych/psych.gemspec: ditto * ext/psych/lib/psych/visitors/to_ruby.rb: fixing deprecation warning * ext/psych/lib/psych.rb: define a new BadAlias error class. * ext/psych/lib/psych/visitors/to_ruby.rb: raise an exception when deserializing an alias that does not exist. * test/psych/test_merge_keys.rb: corresponding test. * ext/psych/lib/psych.rb (load, parse): stop parsing or loading after the first document has been parsed. * test/psych/test_stream.rb: pertinent tests. * ext/psych/lib/psych.rb (parse_stream, load_stream): if a block is given, documents will be yielded to the block as they are parsed. [ruby-core:42404] [Bug #5978] * ext/psych/lib/psych/handlers/document_stream.rb: add a handler that yields documents as they are parsed * test/psych/test_stream.rb: corresponding tests. * ext/psych/lib/psych/core_ext.rb: only extend Kernel if IRB is loaded in order to stop method pollution. * ext/psych/lib/psych.rb: default open YAML files with utf8 external encoding. [ruby-core:42967] * test/psych/test_tainted.rb: ditto * ext/psych/parser.c: prevent a memory leak by protecting calls to handler callbacks. * test/psych/test_parser.rb: test to demonstrate leak. * ext/psych/parser.c: set parser encoding based on the YAML input rather than user configuration. * test/psych/test_encoding.rb: corresponding tests. * test/psych/test_parser.rb: ditto * test/psych/test_tainted.rb: ditto * ext/psych/parser.c: removed external encoding setter, allow parser to be reused. * ext/psych/lib/psych/parser.rb: added external encoding setter. * test/psych/test_parser.rb: test parser reuse * ext/psych/lib/psych/visitors/to_ruby.rb: Added support for loading subclasses of String with ivars * ext/psych/lib/psych/visitors/yaml_tree.rb: Added support for dumping subclasses of String with ivars * test/psych/test_string.rb: corresponding tests * ext/psych/lib/psych/visitors/to_ruby.rb: Added ability to load array subclasses with ivars. * ext/psych/lib/psych/visitors/yaml_tree.rb: Added ability to dump array subclasses with ivars. * test/psych/test_array.rb: corresponding tests * ext/psych/emitter.c: fixing clang warnings. Thanks Joey! * ext/psych/lib/psych/visitors/to_ruby.rb: BigDecimals can be restored from YAML. * ext/psych/lib/psych/visitors/yaml_tree.rb: BigDecimals can be dumped to YAML. * test/psych/test_numeric.rb: tests for BigDecimal serialization * ext/psych/lib/psych/scalar_scanner.rb: Strings that look like dates should be treated as strings and not dates. * test/psych/test_scalar_scanner.rb: corresponding tests. * ext/psych/lib/psych.rb (module Psych): parse and load methods take an optional file name that is used when raising Psych::SyntaxError exceptions * ext/psych/lib/psych/syntax_error.rb (module Psych): allow nil file names and handle nil file names in the exception message * test/psych/test_exception.rb (module Psych): Tests for changes. * ext/psych/parser.c (parse): parse method can take an option file name for use in exception messages. * test/psych/test_parser.rb: corresponding tests. * ext/psych/lib/psych.rb: remove autoload from psych * ext/psych/lib/psych/json.rb: ditto * ext/psych/lib/psych/tree_builder.rb: dump complex numbers, rationals, etc with reference ids. * ext/psych/lib/psych/visitors/yaml_tree.rb: ditto * ext/psych/lib/psych/visitors/to_ruby.rb: loading complex numbers, rationals, etc with reference ids. * test/psych/test_object_references.rb: corresponding tests * ext/psych/lib/psych/scalar_scanner.rb: make sure strings that look like base 60 numbers are serialized as quoted strings. * test/psych/test_string.rb: test for change. * ext/psych/parser.c: remove unused variable. * ext/psych/lib/psych/syntax_error.rb: Add file, line, offset, and message attributes during parse failure. * ext/psych/parser.c: Update parser to raise exception with correct values. * test/psych/test_exception.rb: corresponding tests. * ext/psych/parser.c (parse): Use context_mark for indicating error line and column. * ext/psych/lib/psych/scalar_scanner.rb: use normal begin / rescue since postfix rescue cannot receive the exception class. Thanks nagachika! git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@35165 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/psych/emitter.c | 2 +- ext/psych/lib/psych.rb | 108 +++++++-- ext/psych/lib/psych/core_ext.rb | 2 + ext/psych/lib/psych/handlers/document_stream.rb | 22 ++ ext/psych/lib/psych/json.rb | 6 - ext/psych/lib/psych/parser.rb | 4 + ext/psych/lib/psych/scalar_scanner.rb | 20 +- ext/psych/lib/psych/syntax_error.rb | 19 ++ ext/psych/lib/psych/tree_builder.rb | 4 +- ext/psych/lib/psych/visitors/to_ruby.rb | 70 ++++-- ext/psych/lib/psych/visitors/yaml_tree.rb | 69 +++++- ext/psych/parser.c | 303 +++++++++++++++++++----- 12 files changed, 506 insertions(+), 123 deletions(-) create mode 100644 ext/psych/lib/psych/handlers/document_stream.rb delete mode 100644 ext/psych/lib/psych/json.rb create mode 100644 ext/psych/lib/psych/syntax_error.rb (limited to 'ext') diff --git a/ext/psych/emitter.c b/ext/psych/emitter.c index a85fa45ef5..15fdcfe75b 100644 --- a/ext/psych/emitter.c +++ b/ext/psych/emitter.c @@ -351,7 +351,7 @@ static VALUE start_mapping( (yaml_char_t *)(NIL_P(anchor) ? NULL : StringValuePtr(anchor)), (yaml_char_t *)(NIL_P(tag) ? NULL : StringValuePtr(tag)), implicit ? 1 : 0, - (yaml_sequence_style_t)NUM2INT(style) + (yaml_mapping_style_t)NUM2INT(style) ); emit(emitter, &event); diff --git a/ext/psych/lib/psych.rb b/ext/psych/lib/psych.rb index f9052f92a4..82578204fb 100644 --- a/ext/psych/lib/psych.rb +++ b/ext/psych/lib/psych.rb @@ -10,7 +10,10 @@ require 'psych/set' require 'psych/coder' require 'psych/core_ext' require 'psych/deprecated' -require 'psych/json' +require 'psych/stream' +require 'psych/json/tree_builder' +require 'psych/json/stream' +require 'psych/handlers/document_stream' ### # = Overview @@ -90,7 +93,7 @@ require 'psych/json' module Psych # The version is Psych you're using - VERSION = '1.2.2' + VERSION = '1.3.1' # The version of libyaml Psych is using LIBYAML_VERSION = Psych.libyaml_version.join '.' @@ -98,39 +101,66 @@ module Psych class Exception < RuntimeError end - autoload :Stream, 'psych/stream' + class BadAlias < Exception + end ### # Load +yaml+ in to a Ruby data structure. If multiple documents are # provided, the object contained in the first document will be returned. + # +filename+ will be used in the exception message if any exception is raised + # while parsing. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. # # Example: # - # Psych.load("--- a") # => 'a' - # Psych.load("---\n - a\n - b") # => ['a', 'b'] - def self.load yaml - result = parse(yaml) + # Psych.load("--- a") # => 'a' + # Psych.load("---\n - a\n - b") # => ['a', 'b'] + # + # begin + # Psych.load("--- `", "file.txt") + # rescue Psych::SyntaxError => ex + # ex.file # => 'file.txt' + # ex.message # => "(foo.txt): found character that cannot start any token" + # end + def self.load yaml, filename = nil + result = parse(yaml, filename) result ? result.to_ruby : result end ### # Parse a YAML string in +yaml+. Returns the first object of a YAML AST. + # +filename+ is used in the exception message if a Psych::SyntaxError is + # raised. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. # # Example: # # Psych.parse("---\n - a\n - b") # => # # + # begin + # Psych.parse("--- `", "file.txt") + # rescue Psych::SyntaxError => ex + # ex.file # => 'file.txt' + # ex.message # => "(foo.txt): found character that cannot start any token" + # end + # # See Psych::Nodes for more information about YAML AST. - def self.parse yaml - children = parse_stream(yaml).children - children.empty? ? false : children.first.children.first + def self.parse yaml, filename = nil + parse_stream(yaml, filename) do |node| + return node + end + false end ### # Parse a file at +filename+. Returns the YAML AST. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. def self.parse_file filename - File.open filename do |f| - parse f + File.open filename, 'r:bom|utf-8' do |f| + parse f, filename end end @@ -143,16 +173,39 @@ module Psych ### # Parse a YAML string in +yaml+. Returns the full AST for the YAML document. # This method can handle multiple YAML documents contained in +yaml+. + # +filename+ is used in the exception message if a Psych::SyntaxError is + # raised. + # + # If a block is given, a Psych::Nodes::Document node will be yielded to the + # block as it's being parsed. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. # # Example: # # Psych.parse_stream("---\n - a\n - b") # => # # + # Psych.parse_stream("--- a\n--- b") do |node| + # node # => # + # end + # + # begin + # Psych.parse_stream("--- `", "file.txt") + # rescue Psych::SyntaxError => ex + # ex.file # => 'file.txt' + # ex.message # => "(foo.txt): found character that cannot start any token" + # end + # # See Psych::Nodes for more information about YAML AST. - def self.parse_stream yaml - parser = self.parser - parser.parse yaml - parser.handler.root + def self.parse_stream yaml, filename = nil, &block + if block_given? + parser = Psych::Parser.new(Handlers::DocumentStream.new(&block)) + parser.parse yaml, filename + else + parser = self.parser + parser.parse yaml, filename + parser.handler.root + end end ### @@ -214,19 +267,34 @@ module Psych ### # Load multiple documents given in +yaml+. Returns the parsed documents - # as a list. For example: + # as a list. If a block is given, each document will be converted to ruby + # and passed to the block during parsing + # + # Example: # # Psych.load_stream("--- foo\n...\n--- bar\n...") # => ['foo', 'bar'] # - def self.load_stream yaml - parse_stream(yaml).children.map { |child| child.to_ruby } + # list = [] + # Psych.load_stream("--- foo\n...\n--- bar\n...") do |ruby| + # list << ruby + # end + # list # => ['foo', 'bar'] + # + def self.load_stream yaml, filename = nil + if block_given? + parse_stream(yaml, filename) do |node| + yield node.to_ruby + end + else + parse_stream(yaml, filename).children.map { |child| child.to_ruby } + end end ### # Load the document contained in +filename+. Returns the yaml contained in # +filename+ as a ruby object def self.load_file filename - File.open(filename) { |f| self.load f } + File.open(filename, 'r:bom|utf-8') { |f| self.load f, filename } end # :stopdoc: diff --git a/ext/psych/lib/psych/core_ext.rb b/ext/psych/lib/psych/core_ext.rb index 2ad75e1661..4a04c2d128 100644 --- a/ext/psych/lib/psych/core_ext.rb +++ b/ext/psych/lib/psych/core_ext.rb @@ -30,6 +30,7 @@ class Module alias :yaml_as :psych_yaml_as end +if defined?(::IRB) module Kernel def psych_y *objects puts Psych.dump_stream(*objects) @@ -38,3 +39,4 @@ module Kernel alias y psych_y private :y end +end diff --git a/ext/psych/lib/psych/handlers/document_stream.rb b/ext/psych/lib/psych/handlers/document_stream.rb new file mode 100644 index 0000000000..e429993c1c --- /dev/null +++ b/ext/psych/lib/psych/handlers/document_stream.rb @@ -0,0 +1,22 @@ +require 'psych/tree_builder' + +module Psych + module Handlers + class DocumentStream < Psych::TreeBuilder # :nodoc: + def initialize &block + super + @block = block + end + + def start_document version, tag_directives, implicit + n = Nodes::Document.new version, tag_directives, implicit + push n + end + + def end_document implicit_end = !streaming? + @last.implicit_end = implicit_end + @block.call pop + end + end + end +end diff --git a/ext/psych/lib/psych/json.rb b/ext/psych/lib/psych/json.rb deleted file mode 100644 index 412ab2708b..0000000000 --- a/ext/psych/lib/psych/json.rb +++ /dev/null @@ -1,6 +0,0 @@ -module Psych - module JSON - autoload :TreeBuilder, 'psych/json/tree_builder' - autoload :Stream, 'psych/json/stream' - end -end diff --git a/ext/psych/lib/psych/parser.rb b/ext/psych/lib/psych/parser.rb index 5d75605d49..84085f1fb0 100644 --- a/ext/psych/lib/psych/parser.rb +++ b/ext/psych/lib/psych/parser.rb @@ -36,12 +36,16 @@ module Psych # The handler on which events will be called attr_accessor :handler + # Set the encoding for this parser to +encoding+ + attr_writer :external_encoding + ### # Creates a new Psych::Parser instance with +handler+. YAML events will # be called on +handler+. See Psych::Parser for more details. def initialize handler = Handler.new @handler = handler + @external_encoding = ANY end end end diff --git a/ext/psych/lib/psych/scalar_scanner.rb b/ext/psych/lib/psych/scalar_scanner.rb index 3e8acbb21c..fa2d385a63 100644 --- a/ext/psych/lib/psych/scalar_scanner.rb +++ b/ext/psych/lib/psych/scalar_scanner.rb @@ -46,9 +46,13 @@ module Psych end when TIME parse_time string - when /^\d{4}-\d{1,2}-\d{1,2}$/ + when /^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/ require 'date' - Date.strptime(string, '%Y-%m-%d') + begin + Date.strptime(string, '%Y-%m-%d') + rescue ArgumentError + string + end when /^\.inf$/i 1 / 0.0 when /^-\.inf$/i @@ -61,7 +65,7 @@ module Psych else string.sub(/^:/, '').to_sym end - when /^[-+]?[1-9][0-9_]*(:[0-5]?[0-9])+$/ + when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+$/ i = 0 string.split(':').each_with_index do |n,e| i += (n.to_i * 60 ** (e - 2).abs) @@ -74,13 +78,19 @@ module Psych end i when FLOAT - return Float(string.gsub(/[,_]/, '')) rescue ArgumentError + begin + return Float(string.gsub(/[,_]/, '')) + rescue ArgumentError + end @string_cache[string] = true string else if string.count('.') < 2 - return Integer(string.gsub(/[,_]/, '')) rescue ArgumentError + begin + return Integer(string.gsub(/[,_]/, '')) + rescue ArgumentError + end end @string_cache[string] = true diff --git a/ext/psych/lib/psych/syntax_error.rb b/ext/psych/lib/psych/syntax_error.rb new file mode 100644 index 0000000000..f79743dba4 --- /dev/null +++ b/ext/psych/lib/psych/syntax_error.rb @@ -0,0 +1,19 @@ +module Psych + class SyntaxError < ::SyntaxError + attr_reader :file, :line, :column, :offset, :problem, :context + + def initialize file, line, col, offset, problem, context + err = [problem, context].compact.join ' ' + filename = file || '' + message = "(%s): %s at line %d column %d" % [filename, err, line, col] + + @file = file + @line = line + @column = col + @offset = offset + @problem = problem + @context = context + super(message) + end + end +end diff --git a/ext/psych/lib/psych/tree_builder.rb b/ext/psych/lib/psych/tree_builder.rb index 8b4e972314..c8f344787c 100644 --- a/ext/psych/lib/psych/tree_builder.rb +++ b/ext/psych/lib/psych/tree_builder.rb @@ -72,7 +72,9 @@ module Psych end def scalar value, anchor, tag, plain, quoted, style - @last.children << Nodes::Scalar.new(value,anchor,tag,plain,quoted,style) + s = Nodes::Scalar.new(value,anchor,tag,plain,quoted,style) + @last.children << s + s end def alias anchor diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb index ca046c5da9..2e082f90b4 100644 --- a/ext/psych/lib/psych/visitors/to_ruby.rb +++ b/ext/psych/lib/psych/visitors/to_ruby.rb @@ -31,9 +31,7 @@ module Psych result end - def visit_Psych_Nodes_Scalar o - @st[o.anchor] = o.value if o.anchor - + def deserialize o if klass = Psych.load_tags[o.tag] instance = klass.allocate @@ -52,8 +50,16 @@ module Psych case o.tag when '!binary', 'tag:yaml.org,2002:binary' o.value.unpack('m').first - when '!str', 'tag:yaml.org,2002:str' - o.value + when /^!(?:str|ruby\/string)(?::(.*))?/, 'tag:yaml.org,2002:str' + klass = resolve_class($1) + if klass + klass.allocate.replace o.value + else + o.value + end + when '!ruby/object:BigDecimal' + require 'bigdecimal' + BigDecimal._load o.value when "!ruby/object:DateTime" require 'date' @ss.parse_time(o.value).to_datetime @@ -92,6 +98,11 @@ module Psych @ss.tokenize o.value end end + private :deserialize + + def visit_Psych_Nodes_Scalar o + register o, deserialize(o) + end def visit_Psych_Nodes_Sequence o if klass = Psych.load_tags[o.tag] @@ -108,15 +119,18 @@ module Psych case o.tag when '!omap', 'tag:yaml.org,2002:omap' - map = Psych::Omap.new - @st[o.anchor] = map if o.anchor + map = register(o, Psych::Omap.new) o.children.each { |a| map[accept(a.children.first)] = accept a.children.last } map + when /^!(?:seq|ruby\/array):(.*)$/ + klass = resolve_class($1) + list = register(o, klass.allocate) + o.children.each { |c| list.push accept c } + list else - list = [] - @st[o.anchor] = list if o.anchor + list = register(o, []) o.children.each { |c| list.push accept c } list end @@ -127,16 +141,33 @@ module Psych return revive_hash({}, o) unless o.tag case o.tag - when '!str', 'tag:yaml.org,2002:str' + when /^!(?:str|ruby\/string)(?::(.*))?/, 'tag:yaml.org,2002:str' + klass = resolve_class($1) members = Hash[*o.children.map { |c| accept c }] string = members.delete 'str' + + if klass + string = klass.allocate + string.replace string + end + init_with(string, members.map { |k,v| [k.to_s.sub(/^@/, ''),v] }, o) + when /^!ruby\/array:(.*)$/ + klass = resolve_class($1) + list = register(o, klass.allocate) + + members = Hash[o.children.map { |c| accept c }.each_slice(2).to_a] + list.replace members['internal'] + + members['ivars'].each do |ivar, v| + list.instance_variable_set ivar, v + end + list when /^!ruby\/struct:?(.*)?$/ klass = resolve_class($1) if klass - s = klass.allocate - @st[o.anchor] = s if o.anchor + s = register(o, klass.allocate) members = {} struct_members = s.members.map { |x| x.to_sym } @@ -158,7 +189,7 @@ module Psych when '!ruby/range' h = Hash[*o.children.map { |c| accept c }] - Range.new(h['begin'], h['end'], h['excl']) + register o, Range.new(h['begin'], h['end'], h['excl']) when /^!ruby\/exception:?(.*)?$/ h = Hash[*o.children.map { |c| accept c }] @@ -177,11 +208,11 @@ module Psych when '!ruby/object:Complex' h = Hash[*o.children.map { |c| accept c }] - Complex(h['real'], h['image']) + register o, Complex(h['real'], h['image']) when '!ruby/object:Rational' h = Hash[*o.children.map { |c| accept c }] - Rational(h['numerator'], h['denominator']) + register o, Rational(h['numerator'], h['denominator']) when /^!ruby\/object:?(.*)?$/ name = $1 || 'Object' @@ -205,10 +236,15 @@ module Psych end def visit_Psych_Nodes_Alias o - @st[o.anchor] + @st.fetch(o.anchor) { raise BadAlias, "Unknown alias: #{o.anchor}" } end private + def register node, object + @st[node.anchor] = object if node.anchor + object + end + def revive_hash hash, o @st[o.anchor] = hash if o.anchor @@ -249,7 +285,7 @@ module Psych o.init_with c elsif o.respond_to?(:yaml_initialize) if $VERBOSE - "Implementing #{o.class}#yaml_initialize is deprecated, please implement \"init_with(coder)\"" + warn "Implementing #{o.class}#yaml_initialize is deprecated, please implement \"init_with(coder)\"" end o.yaml_initialize c.tag, c.map else diff --git a/ext/psych/lib/psych/visitors/yaml_tree.rb b/ext/psych/lib/psych/visitors/yaml_tree.rb index 5a092850c3..80af0466eb 100644 --- a/ext/psych/lib/psych/visitors/yaml_tree.rb +++ b/ext/psych/lib/psych/visitors/yaml_tree.rb @@ -159,13 +159,13 @@ module Psych end def visit_Regexp o - @emitter.scalar o.inspect, nil, '!ruby/regexp', false, false, Nodes::Scalar::ANY + register o, @emitter.scalar(o.inspect, nil, '!ruby/regexp', false, false, Nodes::Scalar::ANY) end def visit_DateTime o formatted = format_time o.to_time tag = '!ruby/object:DateTime' - @emitter.scalar formatted, nil, tag, false, false, Nodes::Scalar::ANY + register o, @emitter.scalar(formatted, nil, tag, false, false, Nodes::Scalar::ANY) end def visit_Time o @@ -174,7 +174,7 @@ module Psych end def visit_Rational o - @emitter.start_mapping(nil, '!ruby/object:Rational', false, Nodes::Mapping::BLOCK) + register o, @emitter.start_mapping(nil, '!ruby/object:Rational', false, Nodes::Mapping::BLOCK) [ 'denominator', o.denominator.to_s, @@ -187,7 +187,7 @@ module Psych end def visit_Complex o - @emitter.start_mapping(nil, '!ruby/object:Complex', false, Nodes::Mapping::BLOCK) + register o, @emitter.start_mapping(nil, '!ruby/object:Complex', false, Nodes::Mapping::BLOCK) ['real', o.real.to_s, 'image', o.imag.to_s].each do |m| @emitter.scalar m, nil, nil, true, false, Nodes::Scalar::ANY @@ -214,6 +214,10 @@ module Psych end end + def visit_BigDecimal o + @emitter.scalar o._dump, nil, '!ruby/object:BigDecimal', false, false, Nodes::Scalar::ANY + end + def binary? string string.encoding == Encoding::ASCII_8BIT || string.index("\x00") || @@ -241,9 +245,15 @@ module Psych ivars = find_ivars o if ivars.empty? + unless o.class == ::String + tag = "!ruby/string:#{o.class}" + end @emitter.scalar str, nil, tag, plain, quote, style else - @emitter.start_mapping nil, '!str', false, Nodes::Mapping::BLOCK + maptag = '!ruby/string' + maptag << ":#{o.class}" unless o.class == ::String + + @emitter.start_mapping nil, maptag, false, Nodes::Mapping::BLOCK @emitter.scalar 'str', nil, nil, true, false, Nodes::Scalar::ANY @emitter.scalar str, nil, tag, plain, quote, style @@ -255,16 +265,16 @@ module Psych def visit_Module o raise TypeError, "can't dump anonymous module: #{o}" unless o.name - @emitter.scalar o.name, nil, '!ruby/module', false, false, Nodes::Scalar::SINGLE_QUOTED + register o, @emitter.scalar(o.name, nil, '!ruby/module', false, false, Nodes::Scalar::SINGLE_QUOTED) end def visit_Class o raise TypeError, "can't dump anonymous class: #{o}" unless o.name - @emitter.scalar o.name, nil, '!ruby/class', false, false, Nodes::Scalar::SINGLE_QUOTED + register o, @emitter.scalar(o.name, nil, '!ruby/class', false, false, Nodes::Scalar::SINGLE_QUOTED) end def visit_Range o - @emitter.start_mapping nil, '!ruby/range', false, Nodes::Mapping::BLOCK + register o, @emitter.start_mapping(nil, '!ruby/range', false, Nodes::Mapping::BLOCK) ['begin', o.begin, 'end', o.end, 'excl', o.exclude_end?].each do |m| accept m end @@ -297,9 +307,13 @@ module Psych end def visit_Array o - register o, @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK) - o.each { |c| accept c } - @emitter.end_sequence + if o.class == ::Array + register o, @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK) + o.each { |c| accept c } + @emitter.end_sequence + else + visit_array_subclass o + end end def visit_NilClass o @@ -311,6 +325,39 @@ module Psych end private + def visit_array_subclass o + tag = "!ruby/array:#{o.class}" + if o.instance_variables.empty? + node = @emitter.start_sequence(nil, tag, false, Nodes::Sequence::BLOCK) + register o, node + o.each { |c| accept c } + @emitter.end_sequence + else + node = @emitter.start_mapping(nil, tag, false, Nodes::Sequence::BLOCK) + register o, node + + # Dump the internal list + accept 'internal' + @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK) + o.each { |c| accept c } + @emitter.end_sequence + + # Dump the ivars + accept 'ivars' + @emitter.start_mapping(nil, nil, true, Nodes::Sequence::BLOCK) + o.instance_variables.each do |ivar| + accept ivar + accept o.instance_variable_get ivar + end + @emitter.end_mapping + + @emitter.end_mapping + end + end + + def dump_list o + end + # '%:z' was no defined until 1.9.3 if RUBY_VERSION < '1.9.3' def format_time time diff --git a/ext/psych/parser.c b/ext/psych/parser.c index e68768f562..9808c6b60e 100644 --- a/ext/psych/parser.c +++ b/ext/psych/parser.c @@ -59,6 +59,163 @@ static VALUE allocate(VALUE klass) return Data_Wrap_Struct(klass, 0, dealloc, parser); } +static VALUE make_exception(yaml_parser_t * parser, VALUE path) +{ + size_t line, column; + + line = parser->context_mark.line + 1; + column = parser->context_mark.column + 1; + + return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, + path, + INT2NUM(line), + INT2NUM(column), + INT2NUM(parser->problem_offset), + parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil, + parser->context ? rb_usascii_str_new2(parser->context) : Qnil); +} + +#ifdef HAVE_RUBY_ENCODING_H +static VALUE transcode_string(VALUE src, int * parser_encoding) +{ + int utf8 = rb_utf8_encindex(); + int utf16le = rb_enc_find_index("UTF16_LE"); + int utf16be = rb_enc_find_index("UTF16_BE"); + int source_encoding = rb_enc_get_index(src); + + if (source_encoding == utf8) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (source_encoding == utf16le) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (source_encoding == utf16be) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + src = rb_str_export_to_enc(src, rb_utf8_encoding()); + RB_GC_GUARD(src); + + *parser_encoding = YAML_UTF8_ENCODING; + return src; +} + +static VALUE transcode_io(VALUE src, int * parser_encoding) +{ + VALUE io_external_encoding; + int io_external_enc_index; + + io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); + + /* if no encoding is returned, assume ascii8bit. */ + if (NIL_P(io_external_encoding)) { + io_external_enc_index = rb_ascii8bit_encindex(); + } else { + io_external_enc_index = rb_to_encoding_index(io_external_encoding); + } + + /* Treat US-ASCII as utf_8 */ + if (io_external_enc_index == rb_usascii_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_utf8_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + /* Just guess on ASCII-8BIT */ + if (io_external_enc_index == rb_ascii8bit_encindex()) { + *parser_encoding = YAML_ANY_ENCODING; + return src; + } + + rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s", + rb_enc_name(rb_enc_from_index(io_external_enc_index))); + + return Qnil; +} + +#endif + +static VALUE protected_start_stream(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_start_stream, 1, args[1]); +} + +static VALUE protected_start_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_document, 3, args + 1); +} + +static VALUE protected_end_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_end_document, 1, args[1]); +} + +static VALUE protected_alias(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_alias, 1, args[1]); +} + +static VALUE protected_scalar(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_scalar, 6, args + 1); +} + +static VALUE protected_start_sequence(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_sequence, 4, args + 1); +} + +static VALUE protected_end_sequence(VALUE handler) +{ + return rb_funcall(handler, id_end_sequence, 0); +} + +static VALUE protected_start_mapping(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_mapping, 4, args + 1); +} + +static VALUE protected_end_mapping(VALUE handler) +{ + return rb_funcall(handler, id_end_mapping, 0); +} + +static VALUE protected_empty(VALUE handler) +{ + return rb_funcall(handler, id_empty, 0); +} + +static VALUE protected_end_stream(VALUE handler) +{ + return rb_funcall(handler, id_end_stream, 0); +} + /* * call-seq: * parser.parse(yaml) @@ -68,27 +225,48 @@ static VALUE allocate(VALUE klass) * * See Psych::Parser and Psych::Parser#handler */ -static VALUE parse(VALUE self, VALUE yaml) +static VALUE parse(int argc, VALUE *argv, VALUE self) { + VALUE yaml, path; yaml_parser_t * parser; yaml_event_t event; int done = 0; int tainted = 0; + int state = 0; + int parser_encoding = YAML_ANY_ENCODING; #ifdef HAVE_RUBY_ENCODING_H int encoding = rb_utf8_encindex(); rb_encoding * internal_enc = rb_default_internal_encoding(); #endif VALUE handler = rb_iv_get(self, "@handler"); + if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { + if(rb_respond_to(yaml, id_path)) + path = rb_funcall(yaml, id_path, 0); + else + path = rb_str_new2(""); + } + Data_Get_Struct(self, yaml_parser_t, parser); + yaml_parser_delete(parser); + yaml_parser_initialize(parser); + if (OBJ_TAINTED(yaml)) tainted = 1; - if(rb_respond_to(yaml, id_read)) { + if (rb_respond_to(yaml, id_read)) { +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_io(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif yaml_parser_set_input(parser, io_reader, (void *)yaml); if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1; } else { StringValue(yaml); +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_string(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif yaml_parser_set_input_string( parser, (const unsigned char *)RSTRING_PTR(yaml), @@ -98,32 +276,28 @@ static VALUE parse(VALUE self, VALUE yaml) while(!done) { if(!yaml_parser_parse(parser, &event)) { - VALUE path; - size_t line = parser->mark.line; - size_t column = parser->mark.column; - - if(rb_respond_to(yaml, id_path)) - path = rb_funcall(yaml, id_path, 0); - else - path = rb_str_new2(""); + VALUE exception; + exception = make_exception(parser, path); yaml_parser_delete(parser); yaml_parser_initialize(parser); - rb_raise(ePsychSyntaxError, "(%s): couldn't parse YAML at line %d column %d", - StringValuePtr(path), - (int)line, (int)column); + rb_exc_raise(exception); } switch(event.type) { - case YAML_STREAM_START_EVENT: - - rb_funcall(handler, id_start_stream, 1, - INT2NUM((long)event.data.stream_start.encoding) - ); - break; + case YAML_STREAM_START_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = INT2NUM((long)event.data.stream_start.encoding); + rb_protect(protected_start_stream, (VALUE)args, &state); + } + break; case YAML_DOCUMENT_START_EVENT: { + VALUE args[4]; /* Get a list of tag directives (if any) */ VALUE tag_directives = rb_ary_new(); /* Grab the document version */ @@ -161,19 +335,25 @@ static VALUE parse(VALUE self, VALUE yaml) rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); } } - rb_funcall(handler, id_start_document, 3, - version, tag_directives, - event.data.document_start.implicit == 1 ? Qtrue : Qfalse - ); + args[0] = handler; + args[1] = version; + args[2] = tag_directives; + args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_start_document, (VALUE)args, &state); } break; case YAML_DOCUMENT_END_EVENT: - rb_funcall(handler, id_end_document, 1, - event.data.document_end.implicit == 1 ? Qtrue : Qfalse - ); + { + VALUE args[2]; + + args[0] = handler; + args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_end_document, (VALUE)args, &state); + } break; case YAML_ALIAS_EVENT: { + VALUE args[2]; VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); @@ -183,11 +363,14 @@ static VALUE parse(VALUE self, VALUE yaml) #endif } - rb_funcall(handler, id_alias, 1, alias); + args[0] = handler; + args[1] = alias; + rb_protect(protected_alias, (VALUE)args, &state); } break; case YAML_SCALAR_EVENT: { + VALUE args[7]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE plain_implicit, quoted_implicit, style; @@ -225,12 +408,19 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.scalar.style); - rb_funcall(handler, id_scalar, 6, - val, anchor, tag, plain_implicit, quoted_implicit, style); + args[0] = handler; + args[1] = val; + args[2] = anchor; + args[3] = tag; + args[4] = plain_implicit; + args[5] = quoted_implicit; + args[6] = style; + rb_protect(protected_scalar, (VALUE)args, &state); } break; case YAML_SEQUENCE_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -256,15 +446,21 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.sequence_start.style); - rb_funcall(handler, id_start_sequence, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_sequence, (VALUE)args, &state); } break; case YAML_SEQUENCE_END_EVENT: - rb_funcall(handler, id_end_sequence, 0); + rb_protect(protected_end_sequence, handler, &state); break; case YAML_MAPPING_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -289,50 +485,33 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.mapping_start.style); - rb_funcall(handler, id_start_mapping, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_mapping, (VALUE)args, &state); } break; case YAML_MAPPING_END_EVENT: - rb_funcall(handler, id_end_mapping, 0); + rb_protect(protected_end_mapping, handler, &state); break; case YAML_NO_EVENT: - rb_funcall(handler, id_empty, 0); + rb_protect(protected_empty, handler, &state); break; case YAML_STREAM_END_EVENT: - rb_funcall(handler, id_end_stream, 0); + rb_protect(protected_end_stream, handler, &state); done = 1; break; } yaml_event_delete(&event); + if (state) rb_jump_tag(state); } return self; } -/* - * call-seq: - * parser.external_encoding=(encoding) - * - * Set the encoding for this parser to +encoding+ - */ -static VALUE set_external_encoding(VALUE self, VALUE encoding) -{ - yaml_parser_t * parser; - VALUE exception; - - Data_Get_Struct(self, yaml_parser_t, parser); - - if(parser->encoding) { - exception = rb_const_get_at(mPsych, rb_intern("Exception")); - rb_raise(exception, "don't set the encoding twice!"); - } - - yaml_parser_set_encoding(parser, NUM2INT(encoding)); - - return encoding; -} - /* * call-seq: * parser.mark # => # @@ -376,11 +555,11 @@ void Init_psych_parser() /* UTF-16-BE Encoding with BOM */ rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING)); + rb_require("psych/syntax_error"); ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError); - rb_define_method(cPsychParser, "parse", parse, 1); + rb_define_method(cPsychParser, "parse", parse, -1); rb_define_method(cPsychParser, "mark", mark, 0); - rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1); id_read = rb_intern("read"); id_path = rb_intern("path"); -- cgit v1.2.3