From 48706204503ee83a9925f2a482bcf37ddcc7fa48 Mon Sep 17 00:00:00 2001 From: usa Date: Mon, 5 Apr 2021 11:45:31 +0000 Subject: REXML 3.1.7.4 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@67937 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/doctype.rb | 50 +++- lib/rexml/parsers/baseparser.rb | 323 ++++++++++++++++----- lib/rexml/rexml.rb | 2 +- test/rexml/parse/test_document_type_declaration.rb | 193 +++++++++++- test/rexml/parse/test_notation_declaration.rb | 181 ++++++++++++ test/rexml/parser/test_tree.rb | 2 +- test/rexml/parser/test_ultra_light.rb | 1 - test/rexml/test_core.rb | 17 +- test/rexml/test_doctype.rb | 151 +++++++--- version.h | 8 +- 10 files changed, 782 insertions(+), 146 deletions(-) diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index 1eb1f5b4e1..cb9bf57406 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -7,6 +7,39 @@ require 'rexml/attlistdecl' require 'rexml/xmltokens' module REXML + class ReferenceWriter + def initialize(id_type, + public_id_literal, + system_literal) + @id_type = id_type + @public_id_literal = public_id_literal + @system_literal = system_literal + @default_quote = "\"" + end + + def write(output) + output << " #{@id_type}" + if @public_id_literal + if @public_id_literal.include?("'") + quote = "\"" + else + quote = @default_quote + end + output << " #{quote}#{@public_id_literal}#{quote}" + end + if @system_literal + if @system_literal.include?("'") + quote = "\"" + elsif @system_literal.include?("\"") + quote = "'" + else + quote = @default_quote + end + output << " #{quote}#{@system_literal}#{quote}" + end + end + end + # Represents an XML DOCTYPE declaration; that is, the contents of . DOCTYPES can be used to declare the DTD of a document, as well as # being used to declare entities used in the document. @@ -50,6 +83,8 @@ module REXML super( parent ) @name = first.name @external_id = first.external_id + @long_name = first.instance_variable_get(:@long_name) + @uri = first.instance_variable_get(:@uri) elsif first.kind_of? Array super( parent ) @name = first[0] @@ -112,9 +147,12 @@ module REXML output << START output << ' ' output << @name - output << " #@external_id" if @external_id - output << " #{@long_name.inspect}" if @long_name - output << " #{@uri.inspect}" if @uri + if @external_id + reference_writer = ReferenceWriter.new(@external_id, + @long_name, + @uri) + reference_writer.write(output) + end unless @children.empty? output << ' [' @children.each { |child| @@ -249,9 +287,9 @@ module REXML end def to_s - notation = "" notation end diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 80eeb0fa79..e7ef695912 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -1,4 +1,7 @@ # frozen_string_literal: false + +require "strscan" + require 'rexml/parseexception' require 'rexml/undefinednamespaceexception' require 'rexml/source' @@ -32,8 +35,12 @@ module REXML COMBININGCHAR = '' # TODO EXTENDER = '' # TODO - NCNAME_STR= "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*" - NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*" + QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" + QNAME = /(#{QNAME_STR})/ + + # Just for backward compatibility. For example, kramdown uses this. + # It's not used in REXML. UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" NAMECHAR = '[\-\w\.:]' @@ -45,8 +52,7 @@ module REXML DOCTYPE_START = /\A\s*/um - DOCTYPE_PATTERN = /\s*)/um - ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um + ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A/um CDATA_START = /\A/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um - CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um + TAG_MATCH = /\A<((?>#{QNAME_STR}))/um + CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um ENTITY_START = /\A\s*/um SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um @@ -78,9 +83,6 @@ module REXML ATTDEF_RE = /#{ATTDEF}/ ATTLISTDECL_START = /\A\s*/um - NOTATIONDECL_START = /\A\s*/um - SYSTEM = /\A\s*/um TEXT_PATTERN = /\A([^<]*)/um @@ -98,6 +100,11 @@ module REXML GEDECL = "" ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um + NOTATIONDECL_START = /\A\s* 0 #STDERR.puts @source.encoding - @source.read if @source.buffer.size<2 #STDERR.puts "BUFFER = #{@source.buffer.inspect}" if @document_status == nil - #@source.consume( /^\s*/um ) - word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um ) + word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um ) word = word[1] unless word.nil? #STDERR.puts "WORD = #{word.inspect}" case word @@ -226,38 +231,49 @@ module REXML when INSTRUCTION_START return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] when DOCTYPE_START - md = @source.match( DOCTYPE_PATTERN, true ) + base_error_message = "Malformed DOCTYPE" + @source.match(DOCTYPE_START, true) @nsstack.unshift(curr_ns=Set.new) - identity = md[1] - close = md[2] - identity =~ IDENTITY - name = $1 - raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil? - pub_sys = $2.nil? ? nil : $2.strip - long_name = $4.nil? ? nil : $4.strip - uri = $6.nil? ? nil : $6.strip - args = [ :start_doctype, name, pub_sys, long_name, uri ] - if close == ">" + name = parse_name(base_error_message) + if @source.match(/\A\s*\[/um, true) + id = [nil, nil, nil] + @document_status = :in_doctype + elsif @source.match(/\A\s*>/um, true) + id = [nil, nil, nil] @document_status = :after_doctype - @source.read if @source.buffer.size<2 - md = @source.match(/^\s*/um, true) - @stack << [ :end_doctype ] else - @document_status = :in_doctype + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: false) + if id[0] == "SYSTEM" + # For backward compatibility + id[1], id[2] = id[2], nil + end + if @source.match(/\A\s*\[/um, true) + @document_status = :in_doctype + elsif @source.match(/\A\s*>/um, true) + @document_status = :after_doctype + else + message = "#{base_error_message}: garbage after external ID" + raise REXML::ParseException.new(message, @source) + end + end + args = [:start_doctype, name, *id] + if @document_status == :after_doctype + @source.match(/\A\s*/um, true) + @stack << [ :end_doctype ] end return args - when /^\s+/ + when /\A\s+/ else @document_status = :after_doctype - @source.read if @source.buffer.size<2 - md = @source.match(/\s*/um, true) if @source.encoding == "UTF-8" @source.buffer.force_encoding(::Encoding::UTF_8) end end end if @document_status == :in_doctype - md = @source.match(/\s*(.*?>)/um) + md = @source.match(/\A\s*(.*?>)/um) case md[1] when SYSTEMENTITY match = @source.match( SYSTEMENTITY, true )[1] @@ -314,33 +330,50 @@ module REXML end return [ :attlistdecl, element, pairs, contents ] when NOTATIONDECL_START - md = nil - if @source.match( PUBLIC ) - md = @source.match( PUBLIC, true ) - vals = [md[1],md[2],md[4],md[6]] - elsif @source.match( SYSTEM ) - md = @source.match( SYSTEM, true ) - vals = [md[1],md[2],nil,md[4]] - else - raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source ) + base_error_message = "Malformed notation declaration" + unless @source.match(/\A\s*/um) + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid declaration name" + end + raise REXML::ParseException.new(message, @source) end - return [ :notationdecl, *vals ] + name = parse_name(base_error_message) + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: true) + unless @source.match(/\A\s*>/um, true) + message = "#{base_error_message}: garbage before end >" + raise REXML::ParseException.new(message, @source) + end + return [:notationdecl, name, *id] when DOCTYPE_END @document_status = :after_doctype @source.match( DOCTYPE_END, true ) return [ :end_doctype ] end end + if @document_status == :after_doctype + @source.match(/\A\s*/um, true) + end begin + @source.read if @source.buffer.size<2 if @source.buffer[0] == ?< if @source.buffer[1] == ?/ @nsstack.shift last_tag = @tags.pop #md = @source.match_to_consume( '>', CLOSE_MATCH) md = @source.match( CLOSE_MATCH, true ) - raise REXML::ParseException.new( "Missing end tag for "+ - "'#{last_tag}' (got \"#{md[1]}\")", - @source) unless last_tag == md[1] + if md and !last_tag + message = "Unexpected top-level end tag (got '#{md[1]}')" + raise REXML::ParseException.new(message, @source) + end + if md.nil? or last_tag != md[1] + message = "Missing end tag for '#{last_tag}'" + message << " (got '#{md[1]}')" if md + raise REXML::ParseException.new(message, @source) + end return [ :end_element, last_tag ] elsif @source.buffer[1] == ?! md = @source.match(/\A(\s*[^>]*>)/um) @@ -374,40 +407,11 @@ module REXML raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) raise REXML::ParseException.new("malformed XML: missing tag start", @source) end - attributes = {} + @document_status = :in_element prefixes = Set.new prefixes << md[2] if md[2] @nsstack.unshift(curr_ns=Set.new) - if md[4].size > 0 - attrs = md[4].scan( ATTRIBUTE_PATTERN ) - raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 - attrs.each do |attr_name, prefix, local_part, quote, value| - if prefix == "xmlns" - if local_part == "xml" - if value != "http://www.w3.org/XML/1998/namespace" - msg = "The 'xml' prefix must not be bound to any other namespace "+ - "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self ) - end - elsif local_part == "xmlns" - msg = "The 'xmlns' prefix must not be declared "+ - "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self) - end - curr_ns << local_part - elsif prefix - prefixes << prefix unless prefix == "xml" - end - - if attributes.has_key?(attr_name) - msg = "Duplicate attribute #{attr_name.inspect}" - raise REXML::ParseException.new(msg, @source, self) - end - - attributes[attr_name] = value - end - end - + attributes, closed = parse_attributes(prefixes, curr_ns) # Verify that all of the prefixes have been defined for prefix in prefixes unless @nsstack.find{|k| k.member?(prefix)} @@ -415,7 +419,7 @@ module REXML end end - if md[6] + if closed @closed = md[1] @nsstack.shift else @@ -508,6 +512,169 @@ module REXML return false if /\AUTF-16\z/i =~ xml_declaration_encoding true end + + def parse_name(base_error_message) + md = @source.match(/\A\s*#{NAME}/um, true) + unless md + if @source.match(/\A\s*\S/um) + message = "#{base_error_message}: invalid name" + else + message = "#{base_error_message}: name is missing" + end + raise REXML::ParseException.new(message, @source) + end + md[1] + end + + def parse_id(base_error_message, + accept_external_id:, + accept_public_id:) + if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true)) + pubid = system = nil + pubid_literal = md[1] + pubid = pubid_literal[1..-2] if pubid_literal # Remove quote + system_literal = md[2] + system = system_literal[1..-2] if system_literal # Remove quote + ["PUBLIC", pubid, system] + elsif accept_public_id and (md = @source.match(PUBLIC_ID, true)) + pubid = system = nil + pubid_literal = md[1] + pubid = pubid_literal[1..-2] if pubid_literal # Remove quote + ["PUBLIC", pubid, nil] + elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true)) + system = nil + system_literal = md[1] + system = system_literal[1..-2] if system_literal # Remove quote + ["SYSTEM", nil, system] + else + details = parse_id_invalid_details(accept_external_id: accept_external_id, + accept_public_id: accept_public_id) + message = "#{base_error_message}: #{details}" + raise REXML::ParseException.new(message, @source) + end + end + + def parse_id_invalid_details(accept_external_id:, + accept_public_id:) + public = /\A\s*PUBLIC/um + system = /\A\s*SYSTEM/um + if (accept_external_id or accept_public_id) and @source.match(/#{public}/um) + if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um) + return "public ID literal is missing" + end + unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um) + return "invalid public ID literal" + end + if accept_public_id + if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) + return "system ID literal is missing" + end + unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + end + "garbage after system literal" + else + "garbage after public ID literal" + end + elsif accept_external_id and @source.match(/#{system}/um) + if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um) + return "system literal is missing" + end + unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + end + "garbage after system literal" + else + unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um) + return "invalid ID type" + end + "ID type is missing" + end + end + + def parse_attributes(prefixes, curr_ns) + attributes = {} + closed = false + match_data = @source.match(/^(.*?)(\/)?>/um, true) + if match_data.nil? + message = "Start tag isn't ended" + raise REXML::ParseException.new(message, @source) + end + + raw_attributes = match_data[1] + closed = !match_data[2].nil? + return attributes, closed if raw_attributes.nil? + return attributes, closed if raw_attributes.empty? + + scanner = StringScanner.new(raw_attributes) + until scanner.eos? + if scanner.scan(/\s+/) + break if scanner.eos? + end + + pos = scanner.pos + loop do + break if scanner.scan(ATTRIBUTE_PATTERN) + unless scanner.scan(QNAME) + message = "Invalid attribute name: <#{scanner.rest}>" + raise REXML::ParseException.new(message, @source) + end + name = scanner[0] + unless scanner.scan(/\s*=\s*/um) + message = "Missing attribute equal: <#{name}>" + raise REXML::ParseException.new(message, @source) + end + quote = scanner.scan(/['"]/) + unless quote + message = "Missing attribute value start quote: <#{name}>" + raise REXML::ParseException.new(message, @source) + end + unless scanner.scan(/.*#{Regexp.escape(quote)}/um) + match_data = @source.match(/^(.*?)(\/)?>/um, true) + if match_data + scanner << "/" if closed + scanner << ">" + scanner << match_data[1] + scanner.pos = pos + closed = !match_data[2].nil? + next + end + message = + "Missing attribute value end quote: <#{name}>: <#{quote}>" + raise REXML::ParseException.new(message, @source) + end + end + name = scanner[1] + prefix = scanner[2] + local_part = scanner[3] + # quote = scanner[4] + value = scanner[5] + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif local_part == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self) + end + curr_ns << local_part + elsif prefix + prefixes << prefix unless prefix == "xml" + end + + if attributes.has_key?(name) + msg = "Duplicate attribute #{name.inspect}" + raise REXML::ParseException.new(msg, @source, self) + end + + attributes[name] = value + end + return attributes, closed + end end end end diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index fbc0d339d8..652d6429af 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -24,7 +24,7 @@ module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell " DATE = "2008/019" - VERSION = "3.1.7.3" + VERSION = "3.1.7.4" REVISION = %w$Revision$[1] || '' Copyright = COPYRIGHT diff --git a/test/rexml/parse/test_document_type_declaration.rb b/test/rexml/parse/test_document_type_declaration.rb index 80f70888fb..55713909e7 100644 --- a/test/rexml/parse/test_document_type_declaration.rb +++ b/test/rexml/parse/test_document_type_declaration.rb @@ -5,17 +5,187 @@ require "rexml/document" module REXMLTests class TestParseDocumentTypeDeclaration < Test::Unit::TestCase private - def xml(internal_subset) - <<-XML - + def parse(doctype) + REXML::Document.new(<<-XML).doctype +#{doctype} XML end - def parse(internal_subset) - REXML::Document.new(xml(internal_subset)).doctype + class TestName < self + def test_valid + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal("r", doctype.name) + end + + def test_garbage_plus_before_name_at_line_start + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) + + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: invalid name +Line: 5 +Position: 51 +Last 80 unconsumed characters: ++ r SYSTEM "urn:x-rexml:test" [ ]> + DETAIL + end + end + + class TestExternalID < self + class TestSystem < self + def test_left_bracket_in_system_literal + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal([ + "r", + "SYSTEM", + nil, + "urn:x-rexml:[test", + ], + [ + doctype.name, + doctype.external_id, + doctype.public, + doctype.system, + ]) + end + + def test_greater_than_in_system_literal + doctype = parse(<<-DOCTYPE) +test" [ +]> + DOCTYPE + assert_equal([ + "r", + "SYSTEM", + nil, + "urn:x-rexml:>test", + ], + [ + doctype.name, + doctype.external_id, + doctype.public, + doctype.system, + ]) + end + + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) + + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: system literal is missing +Line: 3 +Position: 26 +Last 80 unconsumed characters: + SYSTEM> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) + + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: garbage after external ID +Line: 3 +Position: 36 +Last 80 unconsumed characters: +x'> + DETAIL + end + + def test_single_quote + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal("r\".dtd", doctype.system) + end + + def test_double_quote + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal("r'.dtd", doctype.system) + end + end + + class TestPublic < self + class TestPublicIDLiteral < self + def test_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) + + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: invalid public ID literal +Line: 3 +Position: 62 +Last 80 unconsumed characters: + PUBLIC 'double quote " is invalid' "r.dtd"> + DETAIL + end + + def test_single_quote + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal("public-id-literal", doctype.public) + end + + def test_double_quote + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal("public'-id-literal", doctype.public) + end + end + + class TestSystemLiteral < self + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) + + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: garbage after external ID +Line: 3 +Position: 65 +Last 80 unconsumed characters: +x'> + DETAIL + end + + def test_single_quote + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal("system\"-literal", doctype.system) + end + + def test_double_quote + doctype = parse(<<-DOCTYPE) + + DOCTYPE + assert_equal("system'-literal", doctype.system) + end + end + end end class TestMixed < self @@ -45,6 +215,15 @@ module REXMLTests assert_equal([REXML::NotationDecl, REXML::AttlistDecl], doctype.children.collect(&:class)) end + + private + def parse(internal_subset) + super(<<-DOCTYPE) + + DOCTYPE + end end end end diff --git a/test/rexml/parse/test_notation_declaration.rb b/test/rexml/parse/test_notation_declaration.rb index 0d29f0d81f..19a0536d0a 100644 --- a/test/rexml/parse/test_notation_declaration.rb +++ b/test/rexml/parse/test_notation_declaration.rb @@ -23,10 +23,100 @@ module REXMLTests doctype = parse("") assert_equal("name", doctype.notation("name").name) end + + def test_no_name + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: name is missing +Line: 5 +Position: 72 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_invalid_name + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid name +Line: 5 +Position: 74 +Last 80 unconsumed characters: +'> ]> + DETAIL + end + + def test_no_id_type + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid ID type +Line: 5 +Position: 77 +Last 80 unconsumed characters: +> ]> + DETAIL + end + + def test_invalid_id_type + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid ID type +Line: 5 +Position: 85 +Last 80 unconsumed characters: + INVALID> ]> + DETAIL + end end class TestExternalID < self class TestSystem < self + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: system literal is missing +Line: 5 +Position: 84 +Last 80 unconsumed characters: + SYSTEM> ]> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage before end > +Line: 5 +Position: 103 +Last 80 unconsumed characters: +x'> ]> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) @@ -44,6 +134,21 @@ module REXMLTests class TestPublic < self class TestPublicIDLiteral < self + def test_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid public ID literal +Line: 5 +Position: 129 +Last 80 unconsumed characters: + PUBLIC 'double quote " is invalid' "system-literal"> ]> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) @@ -60,6 +165,21 @@ module REXMLTests end class TestSystemLiteral < self + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage before end > +Line: 5 +Position: 123 +Last 80 unconsumed characters: +x'> ]> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) @@ -96,5 +216,66 @@ module REXMLTests end end end + + class TestPublicID < self + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: public ID literal is missing +Line: 5 +Position: 84 +Last 80 unconsumed characters: + PUBLIC> ]> + DETAIL + end + + def test_literal_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid public ID literal +Line: 5 +Position: 128 +Last 80 unconsumed characters: + PUBLIC 'double quote \" is invalid in PubidLiteral'> ]> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage before end > +Line: 5 +Position: 106 +Last 80 unconsumed characters: +x'> ]> + DETAIL + end + + def test_literal_single_quote + doctype = parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + assert_equal("public-id-literal", doctype.notation("name").public) + end + + def test_literal_double_quote + doctype = parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + assert_equal("public-id-literal", doctype.notation("name").public) + end + end end end diff --git a/test/rexml/parser/test_tree.rb b/test/rexml/parser/test_tree.rb index 7ab0addca1..8a5d9d1223 100644 --- a/test/rexml/parser/test_tree.rb +++ b/test/rexml/parser/test_tree.rb @@ -12,7 +12,7 @@ class TestTreeParser < Test::Unit::TestCase parse(xml) end assert_equal(<<-MESSAGE, exception.to_s) -Missing end tag for 'root' (got "not-root") +Missing end tag for 'root' (got 'not-root') Line: 1 Position: #{xml.bytesize} Last 80 unconsumed characters: diff --git a/test/rexml/parser/test_ultra_light.rb b/test/rexml/parser/test_ultra_light.rb index c48a13d311..cb6ee5a8ab 100644 --- a/test/rexml/parser/test_ultra_light.rb +++ b/test/rexml/parser/test_ultra_light.rb @@ -16,7 +16,6 @@ class TestUltraLightParser < Test::Unit::TestCase nil, [:entitydecl, "name", "value"] ], - [:text, "\n"], [:start_element, :parent, "root", {}], [:text, "\n"], ], diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb index 0071063128..ee5438d5e5 100644 --- a/test/rexml/test_core.rb +++ b/test/rexml/test_core.rb @@ -1,4 +1,4 @@ -# coding: binary +# coding: utf-8 # frozen_string_literal: false require_relative "rexml_test_utils" @@ -995,7 +995,7 @@ EOL document.write(s) ## XML Doctype - str = '' + str = '' source = REXML::Source.new(str) doctype = REXML::DocType.new(source) document.add(doctype) @@ -1274,14 +1274,15 @@ EOL def test_ticket_21 src = "" - assert_raise( ParseException, "invalid XML should be caught" ) { + exception = assert_raise(ParseException) do Document.new(src) - } - begin - Document.new(src) - rescue - assert_match( /missing attribute quote/, $!.message ) end + assert_equal(<<-DETAIL, exception.to_s) +Missing attribute value start quote: +Line: 1 +Position: 16 +Last 80 unconsumed characters: + DETAIL end def test_ticket_63 diff --git a/test/rexml/test_doctype.rb b/test/rexml/test_doctype.rb index 91de05b05f..d728cba606 100644 --- a/test/rexml/test_doctype.rb +++ b/test/rexml/test_doctype.rb @@ -4,65 +4,111 @@ require 'rexml/document' module REXMLTests class TestDocTypeAccessor < Test::Unit::TestCase - def setup @sysid = "urn:x-test:sysid1" - @notid1 = "urn:x-test:notation1" - @notid2 = "urn:x-test:notation2" - document_string1 = <<-"XMLEND" - - + @notation_id1 = "urn:x-test:notation1" + @notation_id2 = "urn:x-test:notation2" + xml_system = <<-XML + + ]> - - XMLEND - @doctype1 = REXML::Document.new(document_string1).doctype + + XML + @doc_type_system = REXML::Document.new(xml_system).doctype @pubid = "TEST_ID" - document_string2 = <<-"XMLEND" - - - XMLEND - @doctype2 = REXML::Document.new(document_string2).doctype - - document_string3 = <<-"XMLEND" - - - XMLEND - @doctype3 = REXML::Document.new(document_string3).doctype - + xml_public_system = <<-XML + + + XML + @doc_type_public_system = REXML::Document.new(xml_public_system).doctype end def test_public - assert_equal(nil, @doctype1.public) - assert_equal(@pubid, @doctype2.public) - assert_equal(@pubid, @doctype3.public) + assert_equal([ + nil, + @pubid, + ], + [ + @doc_type_system.public, + @doc_type_public_system.public, + ]) + end + + def test_to_s + assert_equal("", + @doc_type_public_system.to_s) end def test_system - assert_equal(@sysid, @doctype1.system) - assert_equal(nil, @doctype2.system) - assert_equal(@sysid, @doctype3.system) + assert_equal([ + @sysid, + @sysid, + ], + [ + @doc_type_system.system, + @doc_type_public_system.system, + ]) end def test_notation - assert_equal(@notid1, @doctype1.notation("n1").system) - assert_equal(@notid2, @doctype1.notation("n2").system) + assert_equal([ + @notation_id1, + @notation_id2, + ], + [ + @doc_type_system.notation("n1").system, + @doc_type_system.notation("n2").system, + ]) end def test_notations - notations = @doctype1.notations - assert_equal(2, notations.length) - assert_equal(@notid1, find_notation(notations, "n1").system) - assert_equal(@notid2, find_notation(notations, "n2").system) + notations = @doc_type_system.notations + assert_equal([ + @notation_id1, + @notation_id2, + ], + notations.collect(&:system)) end + end - def find_notation(notations, name) - notations.find { |notation| - name == notation.name - } + class TestDocType < Test::Unit::TestCase + class TestExternalID < self + class TestSystem < self + class TestSystemLiteral < self + def test_to_s + doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root.dtd"]) + assert_equal("", + doctype.to_s) + end + end + end + + class TestPublic < self + class TestPublicIDLiteral < self + def test_to_s + doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"]) + assert_equal("", + doctype.to_s) + end + end + + class TestSystemLiteral < self + def test_to_s + doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"]) + assert_equal("", + doctype.to_s) + end + + def test_to_s_double_quote + doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root\".dtd"]) + assert_equal("", + doctype.to_s) + end + end + end end - end class TestNotationDeclPublic < Test::Unit::TestCase @@ -77,11 +123,26 @@ module REXMLTests decl(@id, nil).to_s) end + def test_to_s_pubid_literal_include_apostrophe + assert_equal("", + decl("#{@id}'", nil).to_s) + end + def test_to_s_with_uri assert_equal("", decl(@id, @uri).to_s) end + def test_to_s_system_literal_include_apostrophe + assert_equal("", + decl(@id, "system'literal").to_s) + end + + def test_to_s_system_literal_include_double_quote + assert_equal("", + decl(@id, "system\"literal").to_s) + end + private def decl(id, uri) REXML::NotationDecl.new(@name, "PUBLIC", id, uri) @@ -99,9 +160,19 @@ module REXMLTests decl(@id).to_s) end + def test_to_s_include_apostrophe + assert_equal("", + decl("#{@id}'").to_s) + end + + def test_to_s_include_double_quote + assert_equal("", + decl("#{@id}\"").to_s) + end + private def decl(id) - REXML::NotationDecl.new(@name, "SYSTEM", id, nil) + REXML::NotationDecl.new(@name, "SYSTEM", nil, id) end end end diff --git a/version.h b/version.h index 4239637fe4..b7ac766eb0 100644 --- a/version.h +++ b/version.h @@ -1,10 +1,10 @@ #define RUBY_VERSION "2.5.9" -#define RUBY_RELEASE_DATE "2021-02-16" -#define RUBY_PATCHLEVEL 228 +#define RUBY_RELEASE_DATE "2021-04-05" +#define RUBY_PATCHLEVEL 229 #define RUBY_RELEASE_YEAR 2021 -#define RUBY_RELEASE_MONTH 2 -#define RUBY_RELEASE_DAY 16 +#define RUBY_RELEASE_MONTH 4 +#define RUBY_RELEASE_DAY 5 #include "ruby/version.h" -- cgit v1.2.3