summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2021-04-05 11:48:23 +0000
committerusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2021-04-05 11:48:23 +0000
commit1b59a4dc76caa061355f4289d2c54d4625671735 (patch)
tree51ccf20baa839ba3c28955a4a81d9efbe4c0e86b
parent1a38986aedf63f676efded3e834c959059d21760 (diff)
REXML 3.1.9.1
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67940 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--lib/rexml/doctype.rb71
-rw-r--r--lib/rexml/parsers/baseparser.rb178
-rw-r--r--lib/rexml/rexml.rb2
-rw-r--r--test/rexml/parse/test_document_type_declaration.rb193
-rw-r--r--test/rexml/parse/test_element.rb26
-rw-r--r--test/rexml/parse/test_notation_declaration.rb181
-rw-r--r--test/rexml/parse/test_processing_instruction.rb19
-rw-r--r--test/rexml/parser/test_ultra_light.rb1
-rw-r--r--test/rexml/test_core.rb4
-rw-r--r--test/rexml/test_doctype.rb10
-rw-r--r--version.h2
11 files changed, 607 insertions, 80 deletions
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
index 757b639639..a4e91529ac 100644
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@@ -7,6 +7,44 @@ require_relative 'attlistdecl'
require_relative 'xmltokens'
module REXML
+ class ReferenceWriter
+ def initialize(id_type,
+ public_id_literal,
+ system_literal,
+ context=nil)
+ @id_type = id_type
+ @public_id_literal = public_id_literal
+ @system_literal = system_literal
+ if context and context[:prologue_quote] == :apostrophe
+ @default_quote = "'"
+ else
+ @default_quote = "\""
+ end
+ end
+
+ def write(output)
+ output << " #{@id_type}"
+ if @public_id_literal
+ if @public_id_literal.include?("'")
+ quote = "\""
+ else
+ quote = @default_quote
+ end
+ output << " #{quote}#{@public_id_literal}#{quote}"
+ end
+ if @system_literal
+ if @system_literal.include?("'")
+ quote = "\""
+ elsif @system_literal.include?("\"")
+ quote = "'"
+ else
+ quote = @default_quote
+ end
+ output << " #{quote}#{@system_literal}#{quote}"
+ end
+ end
+ end
+
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
# being used to declare entities used in the document.
@@ -50,6 +88,8 @@ module REXML
super( parent )
@name = first.name
@external_id = first.external_id
+ @long_name = first.instance_variable_get(:@long_name)
+ @uri = first.instance_variable_get(:@uri)
elsif first.kind_of? Array
super( parent )
@name = first[0]
@@ -108,19 +148,17 @@ module REXML
# Ignored
def write( output, indent=0, transitive=false, ie_hack=false )
f = REXML::Formatters::Default.new
- c = context
- if c and c[:prologue_quote] == :apostrophe
- quote = "'"
- else
- quote = "\""
- end
indent( output, indent )
output << START
output << ' '
output << @name
- output << " #{@external_id}" if @external_id
- output << " #{quote}#{@long_name}#{quote}" if @long_name
- output << " #{quote}#{@uri}#{quote}" if @uri
+ if @external_id
+ reference_writer = ReferenceWriter.new(@external_id,
+ @long_name,
+ @uri,
+ context)
+ reference_writer.write(output)
+ end
unless @children.empty?
output << ' ['
@children.each { |child|
@@ -259,16 +297,11 @@ module REXML
end
def to_s
- c = nil
- c = parent.context if parent
- if c and c[:prologue_quote] == :apostrophe
- quote = "'"
- else
- quote = "\""
- end
- notation = "<!NOTATION #{@name} #{@middle}"
- notation << " #{quote}#{@public}#{quote}" if @public
- notation << " #{quote}#{@system}#{quote}" if @system
+ context = nil
+ context = parent.context if parent
+ notation = "<!NOTATION #{@name}"
+ reference_writer = ReferenceWriter.new(@middle, @public, @system, context)
+ reference_writer.write(notation)
notation << ">"
notation
end
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 39e9ec3fb1..b10cdcd30e 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -50,7 +50,6 @@ module REXML
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_END = /\A\s*\]\s*>/um
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
@@ -61,15 +60,14 @@ module REXML
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
- TAG_MATCH = /^<((?>#{QNAME_STR}))/um
- CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
+ TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
+ CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
ENTITY_START = /\A\s*<!ENTITY/
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
@@ -83,9 +81,6 @@ module REXML
ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
- NOTATIONDECL_START = /\A\s*<!NOTATION/um
- PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
- SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
TEXT_PATTERN = /\A([^<]*)/um
@@ -103,6 +98,11 @@ module REXML
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
+ EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
+ EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
+ PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
+
EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = {
@@ -195,11 +195,9 @@ module REXML
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
- @source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil
- #@source.consume( /^\s*/um )
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+ word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
word = word[1] unless word.nil?
#STDERR.puts "WORD = #{word.inspect}"
case word
@@ -224,38 +222,49 @@ module REXML
when INSTRUCTION_START
return process_instruction
when DOCTYPE_START
- md = @source.match( DOCTYPE_PATTERN, true )
+ base_error_message = "Malformed DOCTYPE"
+ @source.match(DOCTYPE_START, true)
@nsstack.unshift(curr_ns=Set.new)
- identity = md[1]
- close = md[2]
- identity =~ IDENTITY
- name = $1
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
- pub_sys = $2.nil? ? nil : $2.strip
- long_name = $4.nil? ? nil : $4.strip
- uri = $6.nil? ? nil : $6.strip
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
- if close == ">"
+ name = parse_name(base_error_message)
+ if @source.match(/\A\s*\[/um, true)
+ id = [nil, nil, nil]
+ @document_status = :in_doctype
+ elsif @source.match(/\A\s*>/um, true)
+ id = [nil, nil, nil]
@document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/^\s*/um, true)
- @stack << [ :end_doctype ]
else
- @document_status = :in_doctype
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: false)
+ if id[0] == "SYSTEM"
+ # For backward compatibility
+ id[1], id[2] = id[2], nil
+ end
+ if @source.match(/\A\s*\[/um, true)
+ @document_status = :in_doctype
+ elsif @source.match(/\A\s*>/um, true)
+ @document_status = :after_doctype
+ else
+ message = "#{base_error_message}: garbage after external ID"
+ raise REXML::ParseException.new(message, @source)
+ end
+ end
+ args = [:start_doctype, name, *id]
+ if @document_status == :after_doctype
+ @source.match(/\A\s*/um, true)
+ @stack << [ :end_doctype ]
end
return args
- when /^\s+/
+ when /\A\s+/
else
@document_status = :after_doctype
- @source.read if @source.buffer.size<2
- md = @source.match(/\s*/um, true)
if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8)
end
end
end
if @document_status == :in_doctype
- md = @source.match(/\s*(.*?>)/um)
+ md = @source.match(/\A\s*(.*?>)/um)
case md[1]
when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1]
@@ -312,24 +321,35 @@ module REXML
end
return [ :attlistdecl, element, pairs, contents ]
when NOTATIONDECL_START
- md = nil
- if @source.match( PUBLIC )
- md = @source.match( PUBLIC, true )
- vals = [md[1],md[2],md[4],md[6]]
- elsif @source.match( SYSTEM )
- md = @source.match( SYSTEM, true )
- vals = [md[1],md[2],nil,md[4]]
- else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ base_error_message = "Malformed notation declaration"
+ unless @source.match(/\A\s*<!NOTATION\s+/um, true)
+ if @source.match(/\A\s*<!NOTATION\s*>/um)
+ message = "#{base_error_message}: name is missing"
+ else
+ message = "#{base_error_message}: invalid declaration name"
+ end
+ raise REXML::ParseException.new(message, @source)
end
- return [ :notationdecl, *vals ]
+ name = parse_name(base_error_message)
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: true)
+ unless @source.match(/\A\s*>/um, true)
+ message = "#{base_error_message}: garbage before end >"
+ raise REXML::ParseException.new(message, @source)
+ end
+ return [:notationdecl, name, *id]
when DOCTYPE_END
@document_status = :after_doctype
@source.match( DOCTYPE_END, true )
return [ :end_doctype ]
end
end
+ if @document_status == :after_doctype
+ @source.match(/\A\s*/um, true)
+ end
begin
+ @source.read if @source.buffer.size<2
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
@nsstack.shift
@@ -368,6 +388,7 @@ module REXML
unless md
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
+ @document_status = :in_element
prefixes = Set.new
prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
@@ -473,6 +494,85 @@ module REXML
true
end
+ def parse_name(base_error_message)
+ md = @source.match(/\A\s*#{NAME}/um, true)
+ unless md
+ if @source.match(/\A\s*\S/um)
+ message = "#{base_error_message}: invalid name"
+ else
+ message = "#{base_error_message}: name is missing"
+ end
+ raise REXML::ParseException.new(message, @source)
+ end
+ md[1]
+ end
+
+ def parse_id(base_error_message,
+ accept_external_id:,
+ accept_public_id:)
+ if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
+ pubid = system = nil
+ pubid_literal = md[1]
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
+ system_literal = md[2]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ ["PUBLIC", pubid, system]
+ elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
+ pubid = system = nil
+ pubid_literal = md[1]
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
+ ["PUBLIC", pubid, nil]
+ elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
+ system = nil
+ system_literal = md[1]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ ["SYSTEM", nil, system]
+ else
+ details = parse_id_invalid_details(accept_external_id: accept_external_id,
+ accept_public_id: accept_public_id)
+ message = "#{base_error_message}: #{details}"
+ raise REXML::ParseException.new(message, @source)
+ end
+ end
+
+ def parse_id_invalid_details(accept_external_id:,
+ accept_public_id:)
+ public = /\A\s*PUBLIC/um
+ system = /\A\s*SYSTEM/um
+ if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
+ if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
+ return "public ID literal is missing"
+ end
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
+ return "invalid public ID literal"
+ end
+ if accept_public_id
+ if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
+ return "system ID literal is missing"
+ end
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
+ return "invalid system literal"
+ end
+ "garbage after system literal"
+ else
+ "garbage after public ID literal"
+ end
+ elsif accept_external_id and @source.match(/#{system}/um)
+ if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
+ return "system literal is missing"
+ end
+ unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
+ return "invalid system literal"
+ end
+ "garbage after system literal"
+ else
+ unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
+ return "invalid ID type"
+ end
+ "ID type is missing"
+ end
+ end
+
def process_instruction
match_data = @source.match(INSTRUCTION_PATTERN, true)
unless match_data
diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb
index 7851c78bf5..627dff4768 100644
--- a/lib/rexml/rexml.rb
+++ b/lib/rexml/rexml.rb
@@ -24,7 +24,7 @@
module REXML
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
DATE = "2008/019"
- VERSION = "3.1.9"
+ VERSION = "3.1.9.1"
REVISION = ""
Copyright = COPYRIGHT
diff --git a/test/rexml/parse/test_document_type_declaration.rb b/test/rexml/parse/test_document_type_declaration.rb
index 80f70888fb..55713909e7 100644
--- a/test/rexml/parse/test_document_type_declaration.rb
+++ b/test/rexml/parse/test_document_type_declaration.rb
@@ -5,17 +5,187 @@ require "rexml/document"
module REXMLTests
class TestParseDocumentTypeDeclaration < Test::Unit::TestCase
private
- def xml(internal_subset)
- <<-XML
-<!DOCTYPE r SYSTEM "urn:x-rexml:test" [
-#{internal_subset}
-]>
+ def parse(doctype)
+ REXML::Document.new(<<-XML).doctype
+#{doctype}
<r/>
XML
end
- def parse(internal_subset)
- REXML::Document.new(xml(internal_subset)).doctype
+ class TestName < self
+ def test_valid
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r>
+ DOCTYPE
+ assert_equal("r", doctype.name)
+ end
+
+ def test_garbage_plus_before_name_at_line_start
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+<!DOCTYPE +
+r SYSTEM "urn:x-rexml:test" [
+]>
+ DOCTYPE
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed DOCTYPE: invalid name
+Line: 5
+Position: 51
+Last 80 unconsumed characters:
++ r SYSTEM "urn:x-rexml:test" [ ]> <r/>
+ DETAIL
+ end
+ end
+
+ class TestExternalID < self
+ class TestSystem < self
+ def test_left_bracket_in_system_literal
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r SYSTEM "urn:x-rexml:[test" [
+]>
+ DOCTYPE
+ assert_equal([
+ "r",
+ "SYSTEM",
+ nil,
+ "urn:x-rexml:[test",
+ ],
+ [
+ doctype.name,
+ doctype.external_id,
+ doctype.public,
+ doctype.system,
+ ])
+ end
+
+ def test_greater_than_in_system_literal
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r SYSTEM "urn:x-rexml:>test" [
+]>
+ DOCTYPE
+ assert_equal([
+ "r",
+ "SYSTEM",
+ nil,
+ "urn:x-rexml:>test",
+ ],
+ [
+ doctype.name,
+ doctype.external_id,
+ doctype.public,
+ doctype.system,
+ ])
+ end
+
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+<!DOCTYPE r SYSTEM>
+ DOCTYPE
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed DOCTYPE: system literal is missing
+Line: 3
+Position: 26
+Last 80 unconsumed characters:
+ SYSTEM> <r/>
+ DETAIL
+ end
+
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+<!DOCTYPE r SYSTEM 'r.dtd'x'>
+ DOCTYPE
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed DOCTYPE: garbage after external ID
+Line: 3
+Position: 36
+Last 80 unconsumed characters:
+x'> <r/>
+ DETAIL
+ end
+
+ def test_single_quote
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r SYSTEM 'r".dtd'>
+ DOCTYPE
+ assert_equal("r\".dtd", doctype.system)
+ end
+
+ def test_double_quote
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r SYSTEM "r'.dtd">
+ DOCTYPE
+ assert_equal("r'.dtd", doctype.system)
+ end
+ end
+
+ class TestPublic < self
+ class TestPublicIDLiteral < self
+ def test_content_double_quote
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+<!DOCTYPE r PUBLIC 'double quote " is invalid' "r.dtd">
+ DOCTYPE
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed DOCTYPE: invalid public ID literal
+Line: 3
+Position: 62
+Last 80 unconsumed characters:
+ PUBLIC 'double quote " is invalid' "r.dtd"> <r/>
+ DETAIL
+ end
+
+ def test_single_quote
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r PUBLIC 'public-id-literal' "r.dtd">
+ DOCTYPE
+ assert_equal("public-id-literal", doctype.public)
+ end
+
+ def test_double_quote
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r PUBLIC "public'-id-literal" "r.dtd">
+ DOCTYPE
+ assert_equal("public'-id-literal", doctype.public)
+ end
+ end
+
+ class TestSystemLiteral < self
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+<!DOCTYPE r PUBLIC 'public-id-literal' 'system-literal'x'>
+ DOCTYPE
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed DOCTYPE: garbage after external ID
+Line: 3
+Position: 65
+Last 80 unconsumed characters:
+x'> <r/>
+ DETAIL
+ end
+
+ def test_single_quote
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r PUBLIC "public-id-literal" 'system"-literal'>
+ DOCTYPE
+ assert_equal("system\"-literal", doctype.system)
+ end
+
+ def test_double_quote
+ doctype = parse(<<-DOCTYPE)
+<!DOCTYPE r PUBLIC "public-id-literal" "system'-literal">
+ DOCTYPE
+ assert_equal("system'-literal", doctype.system)
+ end
+ end
+ end
end
class TestMixed < self
@@ -45,6 +215,15 @@ module REXMLTests
assert_equal([REXML::NotationDecl, REXML::AttlistDecl],
doctype.children.collect(&:class))
end
+
+ private
+ def parse(internal_subset)
+ super(<<-DOCTYPE)
+<!DOCTYPE r SYSTEM "urn:x-rexml:test" [
+#{internal_subset}
+]>
+ DOCTYPE
+ end
end
end
end
diff --git a/test/rexml/parse/test_element.rb b/test/rexml/parse/test_element.rb
index aad915fe7b..7206fe595a 100644
--- a/test/rexml/parse/test_element.rb
+++ b/test/rexml/parse/test_element.rb
@@ -33,6 +33,32 @@ Last 80 unconsumed characters:
DETAIL
end
+
+ def test_garbage_less_than_before_root_element_at_line_start
+ exception = assert_raise(REXML::ParseException) do
+ parse("<\n<x/>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+malformed XML: missing tag start
+Line: 2
+Position: 6
+Last 80 unconsumed characters:
+< <x/>
+ DETAIL
+ end
+
+ def test_garbage_less_than_slash_before_end_tag_at_line_start
+ exception = assert_raise(REXML::ParseException) do
+ parse("<x></\n</x>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Missing end tag for 'x'
+Line: 2
+Position: 10
+Last 80 unconsumed characters:
+</ </x>
+ DETAIL
+ end
end
end
end
diff --git a/test/rexml/parse/test_notation_declaration.rb b/test/rexml/parse/test_notation_declaration.rb
index 0d29f0d81f..19a0536d0a 100644
--- a/test/rexml/parse/test_notation_declaration.rb
+++ b/test/rexml/parse/test_notation_declaration.rb
@@ -23,10 +23,100 @@ module REXMLTests
doctype = parse("<!NOTATION name PUBLIC 'urn:public-id'>")
assert_equal("name", doctype.notation("name").name)
end
+
+ def test_no_name
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: name is missing
+Line: 5
+Position: 72
+Last 80 unconsumed characters:
+ <!NOTATION> ]> <r/>
+ DETAIL
+ end
+
+ def test_invalid_name
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION '>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid name
+Line: 5
+Position: 74
+Last 80 unconsumed characters:
+'> ]> <r/>
+ DETAIL
+ end
+
+ def test_no_id_type
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid ID type
+Line: 5
+Position: 77
+Last 80 unconsumed characters:
+> ]> <r/>
+ DETAIL
+ end
+
+ def test_invalid_id_type
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name INVALID>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid ID type
+Line: 5
+Position: 85
+Last 80 unconsumed characters:
+ INVALID> ]> <r/>
+ DETAIL
+ end
end
class TestExternalID < self
class TestSystem < self
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name SYSTEM>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: system literal is missing
+Line: 5
+Position: 84
+Last 80 unconsumed characters:
+ SYSTEM> ]> <r/>
+ DETAIL
+ end
+
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name SYSTEM 'system-literal'x'>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: garbage before end >
+Line: 5
+Position: 103
+Last 80 unconsumed characters:
+x'> ]> <r/>
+ DETAIL
+ end
+
def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name SYSTEM 'system-literal'>
@@ -44,6 +134,21 @@ module REXMLTests
class TestPublic < self
class TestPublicIDLiteral < self
+ def test_content_double_quote
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name PUBLIC 'double quote " is invalid' "system-literal">
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid public ID literal
+Line: 5
+Position: 129
+Last 80 unconsumed characters:
+ PUBLIC 'double quote " is invalid' "system-literal"> ]> <r/>
+ DETAIL
+ end
+
def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC 'public-id-literal' "system-literal">
@@ -60,6 +165,21 @@ module REXMLTests
end
class TestSystemLiteral < self
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name PUBLIC 'public-id-literal' 'system-literal'x'>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: garbage before end >
+Line: 5
+Position: 123
+Last 80 unconsumed characters:
+x'> ]> <r/>
+ DETAIL
+ end
+
def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
<!NOTATION name PUBLIC "public-id-literal" 'system-literal'>
@@ -96,5 +216,66 @@ module REXMLTests
end
end
end
+
+ class TestPublicID < self
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name PUBLIC>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: public ID literal is missing
+Line: 5
+Position: 84
+Last 80 unconsumed characters:
+ PUBLIC> ]> <r/>
+ DETAIL
+ end
+
+ def test_literal_content_double_quote
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name PUBLIC 'double quote " is invalid in PubidLiteral'>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid public ID literal
+Line: 5
+Position: 128
+Last 80 unconsumed characters:
+ PUBLIC 'double quote \" is invalid in PubidLiteral'> ]> <r/>
+ DETAIL
+ end
+
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!NOTATION name PUBLIC 'public-id-literal'x'>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: garbage before end >
+Line: 5
+Position: 106
+Last 80 unconsumed characters:
+x'> ]> <r/>
+ DETAIL
+ end
+
+ def test_literal_single_quote
+ doctype = parse(<<-INTERNAL_SUBSET)
+<!NOTATION name PUBLIC 'public-id-literal'>
+ INTERNAL_SUBSET
+ assert_equal("public-id-literal", doctype.notation("name").public)
+ end
+
+ def test_literal_double_quote
+ doctype = parse(<<-INTERNAL_SUBSET)
+<!NOTATION name PUBLIC "public-id-literal">
+ INTERNAL_SUBSET
+ assert_equal("public-id-literal", doctype.notation("name").public)
+ end
+ end
end
end
diff --git a/test/rexml/parse/test_processing_instruction.rb b/test/rexml/parse/test_processing_instruction.rb
index a23513fc6e..f0c0c24e67 100644
--- a/test/rexml/parse/test_processing_instruction.rb
+++ b/test/rexml/parse/test_processing_instruction.rb
@@ -20,6 +20,25 @@ Last 80 unconsumed characters:
<??>
DETAIL
end
+
+ def test_garbage_text
+ # TODO: This should be parse error.
+ # Create test/parse/test_document.rb or something and move this to it.
+ doc = parse(<<-XML)
+x<?x y
+<!--?><?x -->?>
+<r/>
+ XML
+ pi = doc.children[1]
+ assert_equal([
+ "x",
+ "y\n<!--",
+ ],
+ [
+ pi.target,
+ pi.content,
+ ])
+ end
end
end
end
diff --git a/test/rexml/parser/test_ultra_light.rb b/test/rexml/parser/test_ultra_light.rb
index 8f4a3980d5..44fd1d1ec0 100644
--- a/test/rexml/parser/test_ultra_light.rb
+++ b/test/rexml/parser/test_ultra_light.rb
@@ -16,7 +16,6 @@ class TestUltraLightParser < Test::Unit::TestCase
nil,
[:entitydecl, "name", "value"]
],
- [:text, "\n"],
[:start_element, :parent, "root", {}],
[:text, "\n"],
],
diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb
index 46036d7f12..ae528d75d3 100644
--- a/test/rexml/test_core.rb
+++ b/test/rexml/test_core.rb
@@ -1,4 +1,4 @@
-# coding: binary
+# coding: utf-8
# frozen_string_literal: false
require_relative "rexml_test_utils"
@@ -995,7 +995,7 @@ EOL
document.write(s)
## XML Doctype
- str = '<!DOCTYPE foo "bar">'
+ str = '<!DOCTYPE foo SYSTEM "bar">'
source = REXML::Source.new(str)
doctype = REXML::DocType.new(source)
document.add(doctype)
diff --git a/test/rexml/test_doctype.rb b/test/rexml/test_doctype.rb
index 7f42669170..915717de8e 100644
--- a/test/rexml/test_doctype.rb
+++ b/test/rexml/test_doctype.rb
@@ -18,12 +18,6 @@ module REXMLTests
@doc_type_system = REXML::Document.new(xml_system).doctype
@pubid = "TEST_ID"
- xml_public = <<-XML
- <!DOCTYPE root PUBLIC "#{@pubid}">
- <root/>
- XML
- @doc_type_public = REXML::Document.new(xml_public).doctype
-
xml_public_system = <<-XML
<!DOCTYPE root PUBLIC "#{@pubid}" "#{@sysid}">
<root/>
@@ -35,11 +29,9 @@ module REXMLTests
assert_equal([
nil,
@pubid,
- @pubid,
],
[
@doc_type_system.public,
- @doc_type_public.public,
@doc_type_public_system.public,
])
end
@@ -58,12 +50,10 @@ module REXMLTests
def test_system
assert_equal([
@sysid,
- nil,
@sysid,
],
[
@doc_type_system.system,
- @doc_type_public.system,
@doc_type_public_system.system,
])
end
diff --git a/version.h b/version.h
index c435e392b5..1c491eb59c 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
#define RUBY_VERSION "2.6.7"
#define RUBY_RELEASE_DATE "2021-04-05"
-#define RUBY_PATCHLEVEL 196
+#define RUBY_PATCHLEVEL 197
#define RUBY_RELEASE_YEAR 2021
#define RUBY_RELEASE_MONTH 4