summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authortenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-03-29 01:25:11 +0000
committertenderlove <tenderlove@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-03-29 01:25:11 +0000
commit7d984d76bac857a54c1775b203c944da6cb20ac4 (patch)
treefacf5936314b9853ec789343165b79a432efa5e3 /ext
parentc857c7379c8f08d8ca985efa40e8682ab4686984 (diff)
merge revision(s) 32578,33401,33403,33404,33531,33655,33679,33809,33900,33965,34067,34069,34087,34328,34330,34527,34772,34783,34839,34914,34953,34954,35153: [Backport #6212]
* ext/psych/lib/psych.rb: updating version to match gem * ext/psych/psych.gemspec: ditto * ext/psych/lib/psych/visitors/to_ruby.rb: fixing deprecation warning * ext/psych/lib/psych.rb: define a new BadAlias error class. * ext/psych/lib/psych/visitors/to_ruby.rb: raise an exception when deserializing an alias that does not exist. * test/psych/test_merge_keys.rb: corresponding test. * ext/psych/lib/psych.rb (load, parse): stop parsing or loading after the first document has been parsed. * test/psych/test_stream.rb: pertinent tests. * ext/psych/lib/psych.rb (parse_stream, load_stream): if a block is given, documents will be yielded to the block as they are parsed. [ruby-core:42404] [Bug #5978] * ext/psych/lib/psych/handlers/document_stream.rb: add a handler that yields documents as they are parsed * test/psych/test_stream.rb: corresponding tests. * ext/psych/lib/psych/core_ext.rb: only extend Kernel if IRB is loaded in order to stop method pollution. * ext/psych/lib/psych.rb: default open YAML files with utf8 external encoding. [ruby-core:42967] * test/psych/test_tainted.rb: ditto * ext/psych/parser.c: prevent a memory leak by protecting calls to handler callbacks. * test/psych/test_parser.rb: test to demonstrate leak. * ext/psych/parser.c: set parser encoding based on the YAML input rather than user configuration. * test/psych/test_encoding.rb: corresponding tests. * test/psych/test_parser.rb: ditto * test/psych/test_tainted.rb: ditto * ext/psych/parser.c: removed external encoding setter, allow parser to be reused. * ext/psych/lib/psych/parser.rb: added external encoding setter. * test/psych/test_parser.rb: test parser reuse * ext/psych/lib/psych/visitors/to_ruby.rb: Added support for loading subclasses of String with ivars * ext/psych/lib/psych/visitors/yaml_tree.rb: Added support for dumping subclasses of String with ivars * test/psych/test_string.rb: corresponding tests * ext/psych/lib/psych/visitors/to_ruby.rb: Added ability to load array subclasses with ivars. * ext/psych/lib/psych/visitors/yaml_tree.rb: Added ability to dump array subclasses with ivars. * test/psych/test_array.rb: corresponding tests * ext/psych/emitter.c: fixing clang warnings. Thanks Joey! * ext/psych/lib/psych/visitors/to_ruby.rb: BigDecimals can be restored from YAML. * ext/psych/lib/psych/visitors/yaml_tree.rb: BigDecimals can be dumped to YAML. * test/psych/test_numeric.rb: tests for BigDecimal serialization * ext/psych/lib/psych/scalar_scanner.rb: Strings that look like dates should be treated as strings and not dates. * test/psych/test_scalar_scanner.rb: corresponding tests. * ext/psych/lib/psych.rb (module Psych): parse and load methods take an optional file name that is used when raising Psych::SyntaxError exceptions * ext/psych/lib/psych/syntax_error.rb (module Psych): allow nil file names and handle nil file names in the exception message * test/psych/test_exception.rb (module Psych): Tests for changes. * ext/psych/parser.c (parse): parse method can take an option file name for use in exception messages. * test/psych/test_parser.rb: corresponding tests. * ext/psych/lib/psych.rb: remove autoload from psych * ext/psych/lib/psych/json.rb: ditto * ext/psych/lib/psych/tree_builder.rb: dump complex numbers, rationals, etc with reference ids. * ext/psych/lib/psych/visitors/yaml_tree.rb: ditto * ext/psych/lib/psych/visitors/to_ruby.rb: loading complex numbers, rationals, etc with reference ids. * test/psych/test_object_references.rb: corresponding tests * ext/psych/lib/psych/scalar_scanner.rb: make sure strings that look like base 60 numbers are serialized as quoted strings. * test/psych/test_string.rb: test for change. * ext/psych/parser.c: remove unused variable. * ext/psych/lib/psych/syntax_error.rb: Add file, line, offset, and message attributes during parse failure. * ext/psych/parser.c: Update parser to raise exception with correct values. * test/psych/test_exception.rb: corresponding tests. * ext/psych/parser.c (parse): Use context_mark for indicating error line and column. * ext/psych/lib/psych/scalar_scanner.rb: use normal begin / rescue since postfix rescue cannot receive the exception class. Thanks nagachika! git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_3@35165 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext')
-rw-r--r--ext/psych/emitter.c2
-rw-r--r--ext/psych/lib/psych.rb108
-rw-r--r--ext/psych/lib/psych/core_ext.rb2
-rw-r--r--ext/psych/lib/psych/handlers/document_stream.rb22
-rw-r--r--ext/psych/lib/psych/json.rb6
-rw-r--r--ext/psych/lib/psych/parser.rb4
-rw-r--r--ext/psych/lib/psych/scalar_scanner.rb20
-rw-r--r--ext/psych/lib/psych/syntax_error.rb19
-rw-r--r--ext/psych/lib/psych/tree_builder.rb4
-rw-r--r--ext/psych/lib/psych/visitors/to_ruby.rb70
-rw-r--r--ext/psych/lib/psych/visitors/yaml_tree.rb69
-rw-r--r--ext/psych/parser.c303
12 files changed, 506 insertions, 123 deletions
diff --git a/ext/psych/emitter.c b/ext/psych/emitter.c
index a85fa45ef5..15fdcfe75b 100644
--- a/ext/psych/emitter.c
+++ b/ext/psych/emitter.c
@@ -351,7 +351,7 @@ static VALUE start_mapping(
(yaml_char_t *)(NIL_P(anchor) ? NULL : StringValuePtr(anchor)),
(yaml_char_t *)(NIL_P(tag) ? NULL : StringValuePtr(tag)),
implicit ? 1 : 0,
- (yaml_sequence_style_t)NUM2INT(style)
+ (yaml_mapping_style_t)NUM2INT(style)
);
emit(emitter, &event);
diff --git a/ext/psych/lib/psych.rb b/ext/psych/lib/psych.rb
index f9052f92a4..82578204fb 100644
--- a/ext/psych/lib/psych.rb
+++ b/ext/psych/lib/psych.rb
@@ -10,7 +10,10 @@ require 'psych/set'
require 'psych/coder'
require 'psych/core_ext'
require 'psych/deprecated'
-require 'psych/json'
+require 'psych/stream'
+require 'psych/json/tree_builder'
+require 'psych/json/stream'
+require 'psych/handlers/document_stream'
###
# = Overview
@@ -90,7 +93,7 @@ require 'psych/json'
module Psych
# The version is Psych you're using
- VERSION = '1.2.2'
+ VERSION = '1.3.1'
# The version of libyaml Psych is using
LIBYAML_VERSION = Psych.libyaml_version.join '.'
@@ -98,39 +101,66 @@ module Psych
class Exception < RuntimeError
end
- autoload :Stream, 'psych/stream'
+ class BadAlias < Exception
+ end
###
# Load +yaml+ in to a Ruby data structure. If multiple documents are
# provided, the object contained in the first document will be returned.
+ # +filename+ will be used in the exception message if any exception is raised
+ # while parsing.
+ #
+ # Raises a Psych::SyntaxError when a YAML syntax error is detected.
#
# Example:
#
- # Psych.load("--- a") # => 'a'
- # Psych.load("---\n - a\n - b") # => ['a', 'b']
- def self.load yaml
- result = parse(yaml)
+ # Psych.load("--- a") # => 'a'
+ # Psych.load("---\n - a\n - b") # => ['a', 'b']
+ #
+ # begin
+ # Psych.load("--- `", "file.txt")
+ # rescue Psych::SyntaxError => ex
+ # ex.file # => 'file.txt'
+ # ex.message # => "(foo.txt): found character that cannot start any token"
+ # end
+ def self.load yaml, filename = nil
+ result = parse(yaml, filename)
result ? result.to_ruby : result
end
###
# Parse a YAML string in +yaml+. Returns the first object of a YAML AST.
+ # +filename+ is used in the exception message if a Psych::SyntaxError is
+ # raised.
+ #
+ # Raises a Psych::SyntaxError when a YAML syntax error is detected.
#
# Example:
#
# Psych.parse("---\n - a\n - b") # => #<Psych::Nodes::Sequence:0x00>
#
+ # begin
+ # Psych.parse("--- `", "file.txt")
+ # rescue Psych::SyntaxError => ex
+ # ex.file # => 'file.txt'
+ # ex.message # => "(foo.txt): found character that cannot start any token"
+ # end
+ #
# See Psych::Nodes for more information about YAML AST.
- def self.parse yaml
- children = parse_stream(yaml).children
- children.empty? ? false : children.first.children.first
+ def self.parse yaml, filename = nil
+ parse_stream(yaml, filename) do |node|
+ return node
+ end
+ false
end
###
# Parse a file at +filename+. Returns the YAML AST.
+ #
+ # Raises a Psych::SyntaxError when a YAML syntax error is detected.
def self.parse_file filename
- File.open filename do |f|
- parse f
+ File.open filename, 'r:bom|utf-8' do |f|
+ parse f, filename
end
end
@@ -143,16 +173,39 @@ module Psych
###
# Parse a YAML string in +yaml+. Returns the full AST for the YAML document.
# This method can handle multiple YAML documents contained in +yaml+.
+ # +filename+ is used in the exception message if a Psych::SyntaxError is
+ # raised.
+ #
+ # If a block is given, a Psych::Nodes::Document node will be yielded to the
+ # block as it's being parsed.
+ #
+ # Raises a Psych::SyntaxError when a YAML syntax error is detected.
#
# Example:
#
# Psych.parse_stream("---\n - a\n - b") # => #<Psych::Nodes::Stream:0x00>
#
+ # Psych.parse_stream("--- a\n--- b") do |node|
+ # node # => #<Psych::Nodes::Document:0x00>
+ # end
+ #
+ # begin
+ # Psych.parse_stream("--- `", "file.txt")
+ # rescue Psych::SyntaxError => ex
+ # ex.file # => 'file.txt'
+ # ex.message # => "(foo.txt): found character that cannot start any token"
+ # end
+ #
# See Psych::Nodes for more information about YAML AST.
- def self.parse_stream yaml
- parser = self.parser
- parser.parse yaml
- parser.handler.root
+ def self.parse_stream yaml, filename = nil, &block
+ if block_given?
+ parser = Psych::Parser.new(Handlers::DocumentStream.new(&block))
+ parser.parse yaml, filename
+ else
+ parser = self.parser
+ parser.parse yaml, filename
+ parser.handler.root
+ end
end
###
@@ -214,19 +267,34 @@ module Psych
###
# Load multiple documents given in +yaml+. Returns the parsed documents
- # as a list. For example:
+ # as a list. If a block is given, each document will be converted to ruby
+ # and passed to the block during parsing
+ #
+ # Example:
#
# Psych.load_stream("--- foo\n...\n--- bar\n...") # => ['foo', 'bar']
#
- def self.load_stream yaml
- parse_stream(yaml).children.map { |child| child.to_ruby }
+ # list = []
+ # Psych.load_stream("--- foo\n...\n--- bar\n...") do |ruby|
+ # list << ruby
+ # end
+ # list # => ['foo', 'bar']
+ #
+ def self.load_stream yaml, filename = nil
+ if block_given?
+ parse_stream(yaml, filename) do |node|
+ yield node.to_ruby
+ end
+ else
+ parse_stream(yaml, filename).children.map { |child| child.to_ruby }
+ end
end
###
# Load the document contained in +filename+. Returns the yaml contained in
# +filename+ as a ruby object
def self.load_file filename
- File.open(filename) { |f| self.load f }
+ File.open(filename, 'r:bom|utf-8') { |f| self.load f, filename }
end
# :stopdoc:
diff --git a/ext/psych/lib/psych/core_ext.rb b/ext/psych/lib/psych/core_ext.rb
index 2ad75e1661..4a04c2d128 100644
--- a/ext/psych/lib/psych/core_ext.rb
+++ b/ext/psych/lib/psych/core_ext.rb
@@ -30,6 +30,7 @@ class Module
alias :yaml_as :psych_yaml_as
end
+if defined?(::IRB)
module Kernel
def psych_y *objects
puts Psych.dump_stream(*objects)
@@ -38,3 +39,4 @@ module Kernel
alias y psych_y
private :y
end
+end
diff --git a/ext/psych/lib/psych/handlers/document_stream.rb b/ext/psych/lib/psych/handlers/document_stream.rb
new file mode 100644
index 0000000000..e429993c1c
--- /dev/null
+++ b/ext/psych/lib/psych/handlers/document_stream.rb
@@ -0,0 +1,22 @@
+require 'psych/tree_builder'
+
+module Psych
+ module Handlers
+ class DocumentStream < Psych::TreeBuilder # :nodoc:
+ def initialize &block
+ super
+ @block = block
+ end
+
+ def start_document version, tag_directives, implicit
+ n = Nodes::Document.new version, tag_directives, implicit
+ push n
+ end
+
+ def end_document implicit_end = !streaming?
+ @last.implicit_end = implicit_end
+ @block.call pop
+ end
+ end
+ end
+end
diff --git a/ext/psych/lib/psych/json.rb b/ext/psych/lib/psych/json.rb
deleted file mode 100644
index 412ab2708b..0000000000
--- a/ext/psych/lib/psych/json.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module Psych
- module JSON
- autoload :TreeBuilder, 'psych/json/tree_builder'
- autoload :Stream, 'psych/json/stream'
- end
-end
diff --git a/ext/psych/lib/psych/parser.rb b/ext/psych/lib/psych/parser.rb
index 5d75605d49..84085f1fb0 100644
--- a/ext/psych/lib/psych/parser.rb
+++ b/ext/psych/lib/psych/parser.rb
@@ -36,12 +36,16 @@ module Psych
# The handler on which events will be called
attr_accessor :handler
+ # Set the encoding for this parser to +encoding+
+ attr_writer :external_encoding
+
###
# Creates a new Psych::Parser instance with +handler+. YAML events will
# be called on +handler+. See Psych::Parser for more details.
def initialize handler = Handler.new
@handler = handler
+ @external_encoding = ANY
end
end
end
diff --git a/ext/psych/lib/psych/scalar_scanner.rb b/ext/psych/lib/psych/scalar_scanner.rb
index 3e8acbb21c..fa2d385a63 100644
--- a/ext/psych/lib/psych/scalar_scanner.rb
+++ b/ext/psych/lib/psych/scalar_scanner.rb
@@ -46,9 +46,13 @@ module Psych
end
when TIME
parse_time string
- when /^\d{4}-\d{1,2}-\d{1,2}$/
+ when /^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/
require 'date'
- Date.strptime(string, '%Y-%m-%d')
+ begin
+ Date.strptime(string, '%Y-%m-%d')
+ rescue ArgumentError
+ string
+ end
when /^\.inf$/i
1 / 0.0
when /^-\.inf$/i
@@ -61,7 +65,7 @@ module Psych
else
string.sub(/^:/, '').to_sym
end
- when /^[-+]?[1-9][0-9_]*(:[0-5]?[0-9])+$/
+ when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+$/
i = 0
string.split(':').each_with_index do |n,e|
i += (n.to_i * 60 ** (e - 2).abs)
@@ -74,13 +78,19 @@ module Psych
end
i
when FLOAT
- return Float(string.gsub(/[,_]/, '')) rescue ArgumentError
+ begin
+ return Float(string.gsub(/[,_]/, ''))
+ rescue ArgumentError
+ end
@string_cache[string] = true
string
else
if string.count('.') < 2
- return Integer(string.gsub(/[,_]/, '')) rescue ArgumentError
+ begin
+ return Integer(string.gsub(/[,_]/, ''))
+ rescue ArgumentError
+ end
end
@string_cache[string] = true
diff --git a/ext/psych/lib/psych/syntax_error.rb b/ext/psych/lib/psych/syntax_error.rb
new file mode 100644
index 0000000000..f79743dba4
--- /dev/null
+++ b/ext/psych/lib/psych/syntax_error.rb
@@ -0,0 +1,19 @@
+module Psych
+ class SyntaxError < ::SyntaxError
+ attr_reader :file, :line, :column, :offset, :problem, :context
+
+ def initialize file, line, col, offset, problem, context
+ err = [problem, context].compact.join ' '
+ filename = file || '<unknown>'
+ message = "(%s): %s at line %d column %d" % [filename, err, line, col]
+
+ @file = file
+ @line = line
+ @column = col
+ @offset = offset
+ @problem = problem
+ @context = context
+ super(message)
+ end
+ end
+end
diff --git a/ext/psych/lib/psych/tree_builder.rb b/ext/psych/lib/psych/tree_builder.rb
index 8b4e972314..c8f344787c 100644
--- a/ext/psych/lib/psych/tree_builder.rb
+++ b/ext/psych/lib/psych/tree_builder.rb
@@ -72,7 +72,9 @@ module Psych
end
def scalar value, anchor, tag, plain, quoted, style
- @last.children << Nodes::Scalar.new(value,anchor,tag,plain,quoted,style)
+ s = Nodes::Scalar.new(value,anchor,tag,plain,quoted,style)
+ @last.children << s
+ s
end
def alias anchor
diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb
index ca046c5da9..2e082f90b4 100644
--- a/ext/psych/lib/psych/visitors/to_ruby.rb
+++ b/ext/psych/lib/psych/visitors/to_ruby.rb
@@ -31,9 +31,7 @@ module Psych
result
end
- def visit_Psych_Nodes_Scalar o
- @st[o.anchor] = o.value if o.anchor
-
+ def deserialize o
if klass = Psych.load_tags[o.tag]
instance = klass.allocate
@@ -52,8 +50,16 @@ module Psych
case o.tag
when '!binary', 'tag:yaml.org,2002:binary'
o.value.unpack('m').first
- when '!str', 'tag:yaml.org,2002:str'
- o.value
+ when /^!(?:str|ruby\/string)(?::(.*))?/, 'tag:yaml.org,2002:str'
+ klass = resolve_class($1)
+ if klass
+ klass.allocate.replace o.value
+ else
+ o.value
+ end
+ when '!ruby/object:BigDecimal'
+ require 'bigdecimal'
+ BigDecimal._load o.value
when "!ruby/object:DateTime"
require 'date'
@ss.parse_time(o.value).to_datetime
@@ -92,6 +98,11 @@ module Psych
@ss.tokenize o.value
end
end
+ private :deserialize
+
+ def visit_Psych_Nodes_Scalar o
+ register o, deserialize(o)
+ end
def visit_Psych_Nodes_Sequence o
if klass = Psych.load_tags[o.tag]
@@ -108,15 +119,18 @@ module Psych
case o.tag
when '!omap', 'tag:yaml.org,2002:omap'
- map = Psych::Omap.new
- @st[o.anchor] = map if o.anchor
+ map = register(o, Psych::Omap.new)
o.children.each { |a|
map[accept(a.children.first)] = accept a.children.last
}
map
+ when /^!(?:seq|ruby\/array):(.*)$/
+ klass = resolve_class($1)
+ list = register(o, klass.allocate)
+ o.children.each { |c| list.push accept c }
+ list
else
- list = []
- @st[o.anchor] = list if o.anchor
+ list = register(o, [])
o.children.each { |c| list.push accept c }
list
end
@@ -127,16 +141,33 @@ module Psych
return revive_hash({}, o) unless o.tag
case o.tag
- when '!str', 'tag:yaml.org,2002:str'
+ when /^!(?:str|ruby\/string)(?::(.*))?/, 'tag:yaml.org,2002:str'
+ klass = resolve_class($1)
members = Hash[*o.children.map { |c| accept c }]
string = members.delete 'str'
+
+ if klass
+ string = klass.allocate
+ string.replace string
+ end
+
init_with(string, members.map { |k,v| [k.to_s.sub(/^@/, ''),v] }, o)
+ when /^!ruby\/array:(.*)$/
+ klass = resolve_class($1)
+ list = register(o, klass.allocate)
+
+ members = Hash[o.children.map { |c| accept c }.each_slice(2).to_a]
+ list.replace members['internal']
+
+ members['ivars'].each do |ivar, v|
+ list.instance_variable_set ivar, v
+ end
+ list
when /^!ruby\/struct:?(.*)?$/
klass = resolve_class($1)
if klass
- s = klass.allocate
- @st[o.anchor] = s if o.anchor
+ s = register(o, klass.allocate)
members = {}
struct_members = s.members.map { |x| x.to_sym }
@@ -158,7 +189,7 @@ module Psych
when '!ruby/range'
h = Hash[*o.children.map { |c| accept c }]
- Range.new(h['begin'], h['end'], h['excl'])
+ register o, Range.new(h['begin'], h['end'], h['excl'])
when /^!ruby\/exception:?(.*)?$/
h = Hash[*o.children.map { |c| accept c }]
@@ -177,11 +208,11 @@ module Psych
when '!ruby/object:Complex'
h = Hash[*o.children.map { |c| accept c }]
- Complex(h['real'], h['image'])
+ register o, Complex(h['real'], h['image'])
when '!ruby/object:Rational'
h = Hash[*o.children.map { |c| accept c }]
- Rational(h['numerator'], h['denominator'])
+ register o, Rational(h['numerator'], h['denominator'])
when /^!ruby\/object:?(.*)?$/
name = $1 || 'Object'
@@ -205,10 +236,15 @@ module Psych
end
def visit_Psych_Nodes_Alias o
- @st[o.anchor]
+ @st.fetch(o.anchor) { raise BadAlias, "Unknown alias: #{o.anchor}" }
end
private
+ def register node, object
+ @st[node.anchor] = object if node.anchor
+ object
+ end
+
def revive_hash hash, o
@st[o.anchor] = hash if o.anchor
@@ -249,7 +285,7 @@ module Psych
o.init_with c
elsif o.respond_to?(:yaml_initialize)
if $VERBOSE
- "Implementing #{o.class}#yaml_initialize is deprecated, please implement \"init_with(coder)\""
+ warn "Implementing #{o.class}#yaml_initialize is deprecated, please implement \"init_with(coder)\""
end
o.yaml_initialize c.tag, c.map
else
diff --git a/ext/psych/lib/psych/visitors/yaml_tree.rb b/ext/psych/lib/psych/visitors/yaml_tree.rb
index 5a092850c3..80af0466eb 100644
--- a/ext/psych/lib/psych/visitors/yaml_tree.rb
+++ b/ext/psych/lib/psych/visitors/yaml_tree.rb
@@ -159,13 +159,13 @@ module Psych
end
def visit_Regexp o
- @emitter.scalar o.inspect, nil, '!ruby/regexp', false, false, Nodes::Scalar::ANY
+ register o, @emitter.scalar(o.inspect, nil, '!ruby/regexp', false, false, Nodes::Scalar::ANY)
end
def visit_DateTime o
formatted = format_time o.to_time
tag = '!ruby/object:DateTime'
- @emitter.scalar formatted, nil, tag, false, false, Nodes::Scalar::ANY
+ register o, @emitter.scalar(formatted, nil, tag, false, false, Nodes::Scalar::ANY)
end
def visit_Time o
@@ -174,7 +174,7 @@ module Psych
end
def visit_Rational o
- @emitter.start_mapping(nil, '!ruby/object:Rational', false, Nodes::Mapping::BLOCK)
+ register o, @emitter.start_mapping(nil, '!ruby/object:Rational', false, Nodes::Mapping::BLOCK)
[
'denominator', o.denominator.to_s,
@@ -187,7 +187,7 @@ module Psych
end
def visit_Complex o
- @emitter.start_mapping(nil, '!ruby/object:Complex', false, Nodes::Mapping::BLOCK)
+ register o, @emitter.start_mapping(nil, '!ruby/object:Complex', false, Nodes::Mapping::BLOCK)
['real', o.real.to_s, 'image', o.imag.to_s].each do |m|
@emitter.scalar m, nil, nil, true, false, Nodes::Scalar::ANY
@@ -214,6 +214,10 @@ module Psych
end
end
+ def visit_BigDecimal o
+ @emitter.scalar o._dump, nil, '!ruby/object:BigDecimal', false, false, Nodes::Scalar::ANY
+ end
+
def binary? string
string.encoding == Encoding::ASCII_8BIT ||
string.index("\x00") ||
@@ -241,9 +245,15 @@ module Psych
ivars = find_ivars o
if ivars.empty?
+ unless o.class == ::String
+ tag = "!ruby/string:#{o.class}"
+ end
@emitter.scalar str, nil, tag, plain, quote, style
else
- @emitter.start_mapping nil, '!str', false, Nodes::Mapping::BLOCK
+ maptag = '!ruby/string'
+ maptag << ":#{o.class}" unless o.class == ::String
+
+ @emitter.start_mapping nil, maptag, false, Nodes::Mapping::BLOCK
@emitter.scalar 'str', nil, nil, true, false, Nodes::Scalar::ANY
@emitter.scalar str, nil, tag, plain, quote, style
@@ -255,16 +265,16 @@ module Psych
def visit_Module o
raise TypeError, "can't dump anonymous module: #{o}" unless o.name
- @emitter.scalar o.name, nil, '!ruby/module', false, false, Nodes::Scalar::SINGLE_QUOTED
+ register o, @emitter.scalar(o.name, nil, '!ruby/module', false, false, Nodes::Scalar::SINGLE_QUOTED)
end
def visit_Class o
raise TypeError, "can't dump anonymous class: #{o}" unless o.name
- @emitter.scalar o.name, nil, '!ruby/class', false, false, Nodes::Scalar::SINGLE_QUOTED
+ register o, @emitter.scalar(o.name, nil, '!ruby/class', false, false, Nodes::Scalar::SINGLE_QUOTED)
end
def visit_Range o
- @emitter.start_mapping nil, '!ruby/range', false, Nodes::Mapping::BLOCK
+ register o, @emitter.start_mapping(nil, '!ruby/range', false, Nodes::Mapping::BLOCK)
['begin', o.begin, 'end', o.end, 'excl', o.exclude_end?].each do |m|
accept m
end
@@ -297,9 +307,13 @@ module Psych
end
def visit_Array o
- register o, @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK)
- o.each { |c| accept c }
- @emitter.end_sequence
+ if o.class == ::Array
+ register o, @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK)
+ o.each { |c| accept c }
+ @emitter.end_sequence
+ else
+ visit_array_subclass o
+ end
end
def visit_NilClass o
@@ -311,6 +325,39 @@ module Psych
end
private
+ def visit_array_subclass o
+ tag = "!ruby/array:#{o.class}"
+ if o.instance_variables.empty?
+ node = @emitter.start_sequence(nil, tag, false, Nodes::Sequence::BLOCK)
+ register o, node
+ o.each { |c| accept c }
+ @emitter.end_sequence
+ else
+ node = @emitter.start_mapping(nil, tag, false, Nodes::Sequence::BLOCK)
+ register o, node
+
+ # Dump the internal list
+ accept 'internal'
+ @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK)
+ o.each { |c| accept c }
+ @emitter.end_sequence
+
+ # Dump the ivars
+ accept 'ivars'
+ @emitter.start_mapping(nil, nil, true, Nodes::Sequence::BLOCK)
+ o.instance_variables.each do |ivar|
+ accept ivar
+ accept o.instance_variable_get ivar
+ end
+ @emitter.end_mapping
+
+ @emitter.end_mapping
+ end
+ end
+
+ def dump_list o
+ end
+
# '%:z' was no defined until 1.9.3
if RUBY_VERSION < '1.9.3'
def format_time time
diff --git a/ext/psych/parser.c b/ext/psych/parser.c
index e68768f562..9808c6b60e 100644
--- a/ext/psych/parser.c
+++ b/ext/psych/parser.c
@@ -59,6 +59,163 @@ static VALUE allocate(VALUE klass)
return Data_Wrap_Struct(klass, 0, dealloc, parser);
}
+static VALUE make_exception(yaml_parser_t * parser, VALUE path)
+{
+ size_t line, column;
+
+ line = parser->context_mark.line + 1;
+ column = parser->context_mark.column + 1;
+
+ return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
+ path,
+ INT2NUM(line),
+ INT2NUM(column),
+ INT2NUM(parser->problem_offset),
+ parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil,
+ parser->context ? rb_usascii_str_new2(parser->context) : Qnil);
+}
+
+#ifdef HAVE_RUBY_ENCODING_H
+static VALUE transcode_string(VALUE src, int * parser_encoding)
+{
+ int utf8 = rb_utf8_encindex();
+ int utf16le = rb_enc_find_index("UTF16_LE");
+ int utf16be = rb_enc_find_index("UTF16_BE");
+ int source_encoding = rb_enc_get_index(src);
+
+ if (source_encoding == utf8) {
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+ }
+
+ if (source_encoding == utf16le) {
+ *parser_encoding = YAML_UTF16LE_ENCODING;
+ return src;
+ }
+
+ if (source_encoding == utf16be) {
+ *parser_encoding = YAML_UTF16BE_ENCODING;
+ return src;
+ }
+
+ src = rb_str_export_to_enc(src, rb_utf8_encoding());
+ RB_GC_GUARD(src);
+
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+}
+
+static VALUE transcode_io(VALUE src, int * parser_encoding)
+{
+ VALUE io_external_encoding;
+ int io_external_enc_index;
+
+ io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);
+
+ /* if no encoding is returned, assume ascii8bit. */
+ if (NIL_P(io_external_encoding)) {
+ io_external_enc_index = rb_ascii8bit_encindex();
+ } else {
+ io_external_enc_index = rb_to_encoding_index(io_external_encoding);
+ }
+
+ /* Treat US-ASCII as utf_8 */
+ if (io_external_enc_index == rb_usascii_encindex()) {
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+ }
+
+ if (io_external_enc_index == rb_utf8_encindex()) {
+ *parser_encoding = YAML_UTF8_ENCODING;
+ return src;
+ }
+
+ if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
+ *parser_encoding = YAML_UTF16LE_ENCODING;
+ return src;
+ }
+
+ if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
+ *parser_encoding = YAML_UTF16BE_ENCODING;
+ return src;
+ }
+
+ /* Just guess on ASCII-8BIT */
+ if (io_external_enc_index == rb_ascii8bit_encindex()) {
+ *parser_encoding = YAML_ANY_ENCODING;
+ return src;
+ }
+
+ rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s",
+ rb_enc_name(rb_enc_from_index(io_external_enc_index)));
+
+ return Qnil;
+}
+
+#endif
+
+static VALUE protected_start_stream(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall(args[0], id_start_stream, 1, args[1]);
+}
+
+static VALUE protected_start_document(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_start_document, 3, args + 1);
+}
+
+static VALUE protected_end_document(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall(args[0], id_end_document, 1, args[1]);
+}
+
+static VALUE protected_alias(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall(args[0], id_alias, 1, args[1]);
+}
+
+static VALUE protected_scalar(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_scalar, 6, args + 1);
+}
+
+static VALUE protected_start_sequence(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_start_sequence, 4, args + 1);
+}
+
+static VALUE protected_end_sequence(VALUE handler)
+{
+ return rb_funcall(handler, id_end_sequence, 0);
+}
+
+static VALUE protected_start_mapping(VALUE pointer)
+{
+ VALUE *args = (VALUE *)pointer;
+ return rb_funcall3(args[0], id_start_mapping, 4, args + 1);
+}
+
+static VALUE protected_end_mapping(VALUE handler)
+{
+ return rb_funcall(handler, id_end_mapping, 0);
+}
+
+static VALUE protected_empty(VALUE handler)
+{
+ return rb_funcall(handler, id_empty, 0);
+}
+
+static VALUE protected_end_stream(VALUE handler)
+{
+ return rb_funcall(handler, id_end_stream, 0);
+}
+
/*
* call-seq:
* parser.parse(yaml)
@@ -68,27 +225,48 @@ static VALUE allocate(VALUE klass)
*
* See Psych::Parser and Psych::Parser#handler
*/
-static VALUE parse(VALUE self, VALUE yaml)
+static VALUE parse(int argc, VALUE *argv, VALUE self)
{
+ VALUE yaml, path;
yaml_parser_t * parser;
yaml_event_t event;
int done = 0;
int tainted = 0;
+ int state = 0;
+ int parser_encoding = YAML_ANY_ENCODING;
#ifdef HAVE_RUBY_ENCODING_H
int encoding = rb_utf8_encindex();
rb_encoding * internal_enc = rb_default_internal_encoding();
#endif
VALUE handler = rb_iv_get(self, "@handler");
+ if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
+ if(rb_respond_to(yaml, id_path))
+ path = rb_funcall(yaml, id_path, 0);
+ else
+ path = rb_str_new2("<unknown>");
+ }
+
Data_Get_Struct(self, yaml_parser_t, parser);
+ yaml_parser_delete(parser);
+ yaml_parser_initialize(parser);
+
if (OBJ_TAINTED(yaml)) tainted = 1;
- if(rb_respond_to(yaml, id_read)) {
+ if (rb_respond_to(yaml, id_read)) {
+#ifdef HAVE_RUBY_ENCODING_H
+ yaml = transcode_io(yaml, &parser_encoding);
+ yaml_parser_set_encoding(parser, parser_encoding);
+#endif
yaml_parser_set_input(parser, io_reader, (void *)yaml);
if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
} else {
StringValue(yaml);
+#ifdef HAVE_RUBY_ENCODING_H
+ yaml = transcode_string(yaml, &parser_encoding);
+ yaml_parser_set_encoding(parser, parser_encoding);
+#endif
yaml_parser_set_input_string(
parser,
(const unsigned char *)RSTRING_PTR(yaml),
@@ -98,32 +276,28 @@ static VALUE parse(VALUE self, VALUE yaml)
while(!done) {
if(!yaml_parser_parse(parser, &event)) {
- VALUE path;
- size_t line = parser->mark.line;
- size_t column = parser->mark.column;
-
- if(rb_respond_to(yaml, id_path))
- path = rb_funcall(yaml, id_path, 0);
- else
- path = rb_str_new2("<unknown>");
+ VALUE exception;
+ exception = make_exception(parser, path);
yaml_parser_delete(parser);
yaml_parser_initialize(parser);
- rb_raise(ePsychSyntaxError, "(%s): couldn't parse YAML at line %d column %d",
- StringValuePtr(path),
- (int)line, (int)column);
+ rb_exc_raise(exception);
}
switch(event.type) {
- case YAML_STREAM_START_EVENT:
-
- rb_funcall(handler, id_start_stream, 1,
- INT2NUM((long)event.data.stream_start.encoding)
- );
- break;
+ case YAML_STREAM_START_EVENT:
+ {
+ VALUE args[2];
+
+ args[0] = handler;
+ args[1] = INT2NUM((long)event.data.stream_start.encoding);
+ rb_protect(protected_start_stream, (VALUE)args, &state);
+ }
+ break;
case YAML_DOCUMENT_START_EVENT:
{
+ VALUE args[4];
/* Get a list of tag directives (if any) */
VALUE tag_directives = rb_ary_new();
/* Grab the document version */
@@ -161,19 +335,25 @@ static VALUE parse(VALUE self, VALUE yaml)
rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
}
}
- rb_funcall(handler, id_start_document, 3,
- version, tag_directives,
- event.data.document_start.implicit == 1 ? Qtrue : Qfalse
- );
+ args[0] = handler;
+ args[1] = version;
+ args[2] = tag_directives;
+ args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
+ rb_protect(protected_start_document, (VALUE)args, &state);
}
break;
case YAML_DOCUMENT_END_EVENT:
- rb_funcall(handler, id_end_document, 1,
- event.data.document_end.implicit == 1 ? Qtrue : Qfalse
- );
+ {
+ VALUE args[2];
+
+ args[0] = handler;
+ args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
+ rb_protect(protected_end_document, (VALUE)args, &state);
+ }
break;
case YAML_ALIAS_EVENT:
{
+ VALUE args[2];
VALUE alias = Qnil;
if(event.data.alias.anchor) {
alias = rb_str_new2((const char *)event.data.alias.anchor);
@@ -183,11 +363,14 @@ static VALUE parse(VALUE self, VALUE yaml)
#endif
}
- rb_funcall(handler, id_alias, 1, alias);
+ args[0] = handler;
+ args[1] = alias;
+ rb_protect(protected_alias, (VALUE)args, &state);
}
break;
case YAML_SCALAR_EVENT:
{
+ VALUE args[7];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE plain_implicit, quoted_implicit, style;
@@ -225,12 +408,19 @@ static VALUE parse(VALUE self, VALUE yaml)
style = INT2NUM((long)event.data.scalar.style);
- rb_funcall(handler, id_scalar, 6,
- val, anchor, tag, plain_implicit, quoted_implicit, style);
+ args[0] = handler;
+ args[1] = val;
+ args[2] = anchor;
+ args[3] = tag;
+ args[4] = plain_implicit;
+ args[5] = quoted_implicit;
+ args[6] = style;
+ rb_protect(protected_scalar, (VALUE)args, &state);
}
break;
case YAML_SEQUENCE_START_EVENT:
{
+ VALUE args[5];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE implicit, style;
@@ -256,15 +446,21 @@ static VALUE parse(VALUE self, VALUE yaml)
style = INT2NUM((long)event.data.sequence_start.style);
- rb_funcall(handler, id_start_sequence, 4,
- anchor, tag, implicit, style);
+ args[0] = handler;
+ args[1] = anchor;
+ args[2] = tag;
+ args[3] = implicit;
+ args[4] = style;
+
+ rb_protect(protected_start_sequence, (VALUE)args, &state);
}
break;
case YAML_SEQUENCE_END_EVENT:
- rb_funcall(handler, id_end_sequence, 0);
+ rb_protect(protected_end_sequence, handler, &state);
break;
case YAML_MAPPING_START_EVENT:
{
+ VALUE args[5];
VALUE anchor = Qnil;
VALUE tag = Qnil;
VALUE implicit, style;
@@ -289,22 +485,28 @@ static VALUE parse(VALUE self, VALUE yaml)
style = INT2NUM((long)event.data.mapping_start.style);
- rb_funcall(handler, id_start_mapping, 4,
- anchor, tag, implicit, style);
+ args[0] = handler;
+ args[1] = anchor;
+ args[2] = tag;
+ args[3] = implicit;
+ args[4] = style;
+
+ rb_protect(protected_start_mapping, (VALUE)args, &state);
}
break;
case YAML_MAPPING_END_EVENT:
- rb_funcall(handler, id_end_mapping, 0);
+ rb_protect(protected_end_mapping, handler, &state);
break;
case YAML_NO_EVENT:
- rb_funcall(handler, id_empty, 0);
+ rb_protect(protected_empty, handler, &state);
break;
case YAML_STREAM_END_EVENT:
- rb_funcall(handler, id_end_stream, 0);
+ rb_protect(protected_end_stream, handler, &state);
done = 1;
break;
}
yaml_event_delete(&event);
+ if (state) rb_jump_tag(state);
}
return self;
@@ -312,29 +514,6 @@ static VALUE parse(VALUE self, VALUE yaml)
/*
* call-seq:
- * parser.external_encoding=(encoding)
- *
- * Set the encoding for this parser to +encoding+
- */
-static VALUE set_external_encoding(VALUE self, VALUE encoding)
-{
- yaml_parser_t * parser;
- VALUE exception;
-
- Data_Get_Struct(self, yaml_parser_t, parser);
-
- if(parser->encoding) {
- exception = rb_const_get_at(mPsych, rb_intern("Exception"));
- rb_raise(exception, "don't set the encoding twice!");
- }
-
- yaml_parser_set_encoding(parser, NUM2INT(encoding));
-
- return encoding;
-}
-
-/*
- * call-seq:
* parser.mark # => #<Psych::Parser::Mark>
*
* Returns a Psych::Parser::Mark object that contains line, column, and index
@@ -376,11 +555,11 @@ void Init_psych_parser()
/* UTF-16-BE Encoding with BOM */
rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
+ rb_require("psych/syntax_error");
ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
- rb_define_method(cPsychParser, "parse", parse, 1);
+ rb_define_method(cPsychParser, "parse", parse, -1);
rb_define_method(cPsychParser, "mark", mark, 0);
- rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1);
id_read = rb_intern("read");
id_path = rb_intern("path");