diff options
author | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2021-04-05 11:48:23 +0000 |
---|---|---|
committer | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2021-04-05 11:48:23 +0000 |
commit | 1b59a4dc76caa061355f4289d2c54d4625671735 (patch) | |
tree | 51ccf20baa839ba3c28955a4a81d9efbe4c0e86b | |
parent | 1a38986aedf63f676efded3e834c959059d21760 (diff) |
REXML 3.1.9.1
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67940 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | lib/rexml/doctype.rb | 71 | ||||
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 178 | ||||
-rw-r--r-- | lib/rexml/rexml.rb | 2 | ||||
-rw-r--r-- | test/rexml/parse/test_document_type_declaration.rb | 193 | ||||
-rw-r--r-- | test/rexml/parse/test_element.rb | 26 | ||||
-rw-r--r-- | test/rexml/parse/test_notation_declaration.rb | 181 | ||||
-rw-r--r-- | test/rexml/parse/test_processing_instruction.rb | 19 | ||||
-rw-r--r-- | test/rexml/parser/test_ultra_light.rb | 1 | ||||
-rw-r--r-- | test/rexml/test_core.rb | 4 | ||||
-rw-r--r-- | test/rexml/test_doctype.rb | 10 | ||||
-rw-r--r-- | version.h | 2 |
11 files changed, 607 insertions, 80 deletions
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index 757b639639..a4e91529ac 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -7,6 +7,44 @@ require_relative 'attlistdecl' require_relative 'xmltokens' module REXML + class ReferenceWriter + def initialize(id_type, + public_id_literal, + system_literal, + context=nil) + @id_type = id_type + @public_id_literal = public_id_literal + @system_literal = system_literal + if context and context[:prologue_quote] == :apostrophe + @default_quote = "'" + else + @default_quote = "\"" + end + end + + def write(output) + output << " #{@id_type}" + if @public_id_literal + if @public_id_literal.include?("'") + quote = "\"" + else + quote = @default_quote + end + output << " #{quote}#{@public_id_literal}#{quote}" + end + if @system_literal + if @system_literal.include?("'") + quote = "\"" + elsif @system_literal.include?("\"") + quote = "'" + else + quote = @default_quote + end + output << " #{quote}#{@system_literal}#{quote}" + end + end + end + # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE # ... >. DOCTYPES can be used to declare the DTD of a document, as well as # being used to declare entities used in the document. @@ -50,6 +88,8 @@ module REXML super( parent ) @name = first.name @external_id = first.external_id + @long_name = first.instance_variable_get(:@long_name) + @uri = first.instance_variable_get(:@uri) elsif first.kind_of? Array super( parent ) @name = first[0] @@ -108,19 +148,17 @@ module REXML # Ignored def write( output, indent=0, transitive=false, ie_hack=false ) f = REXML::Formatters::Default.new - c = context - if c and c[:prologue_quote] == :apostrophe - quote = "'" - else - quote = "\"" - end indent( output, indent ) output << START output << ' ' output << @name - output << " #{@external_id}" if @external_id - output << " #{quote}#{@long_name}#{quote}" if @long_name - output << " #{quote}#{@uri}#{quote}" if @uri + if @external_id + reference_writer = ReferenceWriter.new(@external_id, + @long_name, + @uri, + context) + reference_writer.write(output) + end unless @children.empty? output << ' [' @children.each { |child| @@ -259,16 +297,11 @@ module REXML end def to_s - c = nil - c = parent.context if parent - if c and c[:prologue_quote] == :apostrophe - quote = "'" - else - quote = "\"" - end - notation = "<!NOTATION #{@name} #{@middle}" - notation << " #{quote}#{@public}#{quote}" if @public - notation << " #{quote}#{@system}#{quote}" if @system + context = nil + context = parent.context if parent + notation = "<!NOTATION #{@name}" + reference_writer = ReferenceWriter.new(@middle, @public, @system, context) + reference_writer.write(notation) notation << ">" notation end diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 39e9ec3fb1..b10cdcd30e 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -50,7 +50,6 @@ module REXML DOCTYPE_START = /\A\s*<!DOCTYPE\s/um DOCTYPE_END = /\A\s*\]\s*>/um - DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um COMMENT_START = /\A<!--/u COMMENT_PATTERN = /<!--(.*?)-->/um @@ -61,15 +60,14 @@ module REXML XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um INSTRUCTION_START = /\A<\?/u INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um - TAG_MATCH = /^<((?>#{QNAME_STR}))/um - CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um + TAG_MATCH = /\A<((?>#{QNAME_STR}))/um + CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um VERSION = /\bversion\s*=\s*["'](.*?)['"]/um ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um ENTITY_START = /\A\s*<!ENTITY/ - IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u ELEMENTDECL_START = /\A\s*<!ELEMENT/um ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um @@ -83,9 +81,6 @@ module REXML ATTDEF_RE = /#{ATTDEF}/ ATTLISTDECL_START = /\A\s*<!ATTLIST/um ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um - NOTATIONDECL_START = /\A\s*<!NOTATION/um - PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um - SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um TEXT_PATTERN = /\A([^<]*)/um @@ -103,6 +98,11 @@ module REXML GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um + NOTATIONDECL_START = /\A\s*<!NOTATION/um + EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um + EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um + PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um + EREFERENCE = /&(?!#{NAME};)/ DEFAULT_ENTITIES = { @@ -195,11 +195,9 @@ module REXML return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 #STDERR.puts @source.encoding - @source.read if @source.buffer.size<2 #STDERR.puts "BUFFER = #{@source.buffer.inspect}" if @document_status == nil - #@source.consume( /^\s*/um ) - word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um ) + word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um ) word = word[1] unless word.nil? #STDERR.puts "WORD = #{word.inspect}" case word @@ -224,38 +222,49 @@ module REXML when INSTRUCTION_START return process_instruction when DOCTYPE_START - md = @source.match( DOCTYPE_PATTERN, true ) + base_error_message = "Malformed DOCTYPE" + @source.match(DOCTYPE_START, true) @nsstack.unshift(curr_ns=Set.new) - identity = md[1] - close = md[2] - identity =~ IDENTITY - name = $1 - raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil? - pub_sys = $2.nil? ? nil : $2.strip - long_name = $4.nil? ? nil : $4.strip - uri = $6.nil? ? nil : $6.strip - args = [ :start_doctype, name, pub_sys, long_name, uri ] - if close == ">" + name = parse_name(base_error_message) + if @source.match(/\A\s*\[/um, true) + id = [nil, nil, nil] + @document_status = :in_doctype + elsif @source.match(/\A\s*>/um, true) + id = [nil, nil, nil] @document_status = :after_doctype - @source.read if @source.buffer.size<2 - md = @source.match(/^\s*/um, true) - @stack << [ :end_doctype ] else - @document_status = :in_doctype + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: false) + if id[0] == "SYSTEM" + # For backward compatibility + id[1], id[2] = id[2], nil + end + if @source.match(/\A\s*\[/um, true) + @document_status = :in_doctype + elsif @source.match(/\A\s*>/um, true) + @document_status = :after_doctype + else + message = "#{base_error_message}: garbage after external ID" + raise REXML::ParseException.new(message, @source) + end + end + args = [:start_doctype, name, *id] + if @document_status == :after_doctype + @source.match(/\A\s*/um, true) + @stack << [ :end_doctype ] end return args - when /^\s+/ + when /\A\s+/ else @document_status = :after_doctype - @source.read if @source.buffer.size<2 - md = @source.match(/\s*/um, true) if @source.encoding == "UTF-8" @source.buffer.force_encoding(::Encoding::UTF_8) end end end if @document_status == :in_doctype - md = @source.match(/\s*(.*?>)/um) + md = @source.match(/\A\s*(.*?>)/um) case md[1] when SYSTEMENTITY match = @source.match( SYSTEMENTITY, true )[1] @@ -312,24 +321,35 @@ module REXML end return [ :attlistdecl, element, pairs, contents ] when NOTATIONDECL_START - md = nil - if @source.match( PUBLIC ) - md = @source.match( PUBLIC, true ) - vals = [md[1],md[2],md[4],md[6]] - elsif @source.match( SYSTEM ) - md = @source.match( SYSTEM, true ) - vals = [md[1],md[2],nil,md[4]] - else - raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source ) + base_error_message = "Malformed notation declaration" + unless @source.match(/\A\s*<!NOTATION\s+/um, true) + if @source.match(/\A\s*<!NOTATION\s*>/um) + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid declaration name" + end + raise REXML::ParseException.new(message, @source) end - return [ :notationdecl, *vals ] + name = parse_name(base_error_message) + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: true) + unless @source.match(/\A\s*>/um, true) + message = "#{base_error_message}: garbage before end >" + raise REXML::ParseException.new(message, @source) + end + return [:notationdecl, name, *id] when DOCTYPE_END @document_status = :after_doctype @source.match( DOCTYPE_END, true ) return [ :end_doctype ] end end + if @document_status == :after_doctype + @source.match(/\A\s*/um, true) + end begin + @source.read if @source.buffer.size<2 if @source.buffer[0] == ?< if @source.buffer[1] == ?/ @nsstack.shift @@ -368,6 +388,7 @@ module REXML unless md raise REXML::ParseException.new("malformed XML: missing tag start", @source) end + @document_status = :in_element prefixes = Set.new prefixes << md[2] if md[2] @nsstack.unshift(curr_ns=Set.new) @@ -473,6 +494,85 @@ module REXML true end + def parse_name(base_error_message) + md = @source.match(/\A\s*#{NAME}/um, true) + unless md + if @source.match(/\A\s*\S/um) + message = "#{base_error_message}: invalid name" + else + message = "#{base_error_message}: name is missing" + end + raise REXML::ParseException.new(message, @source) + end + md[1] + end + + def parse_id(base_error_message, + accept_external_id:, + accept_public_id:) + if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true)) + pubid = system = nil + pubid_literal = md[1] + pubid = pubid_literal[1..-2] if pubid_literal # Remove quote + system_literal = md[2] + system = system_literal[1..-2] if system_literal # Remove quote + ["PUBLIC", pubid, system] + elsif accept_public_id and (md = @source.match(PUBLIC_ID, true)) + pubid = system = nil + pubid_literal = md[1] + pubid = pubid_literal[1..-2] if pubid_literal # Remove quote + ["PUBLIC", pubid, nil] + elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true)) + system = nil + system_literal = md[1] + system = system_literal[1..-2] if system_literal # Remove quote + ["SYSTEM", nil, system] + else + details = parse_id_invalid_details(accept_external_id: accept_external_id, + accept_public_id: accept_public_id) + message = "#{base_error_message}: #{details}" + raise REXML::ParseException.new(message, @source) + end + end + + def parse_id_invalid_details(accept_external_id:, + accept_public_id:) + public = /\A\s*PUBLIC/um + system = /\A\s*SYSTEM/um + if (accept_external_id or accept_public_id) and @source.match(/#{public}/um) + if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um) + return "public ID literal is missing" + end + unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um) + return "invalid public ID literal" + end + if accept_public_id + if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) + return "system ID literal is missing" + end + unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + end + "garbage after system literal" + else + "garbage after public ID literal" + end + elsif accept_external_id and @source.match(/#{system}/um) + if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um) + return "system literal is missing" + end + unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + end + "garbage after system literal" + else + unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um) + return "invalid ID type" + end + "ID type is missing" + end + end + def process_instruction match_data = @source.match(INSTRUCTION_PATTERN, true) unless match_data diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 7851c78bf5..627dff4768 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -24,7 +24,7 @@ module REXML COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>" DATE = "2008/019" - VERSION = "3.1.9" + VERSION = "3.1.9.1" REVISION = "" Copyright = COPYRIGHT diff --git a/test/rexml/parse/test_document_type_declaration.rb b/test/rexml/parse/test_document_type_declaration.rb index 80f70888fb..55713909e7 100644 --- a/test/rexml/parse/test_document_type_declaration.rb +++ b/test/rexml/parse/test_document_type_declaration.rb @@ -5,17 +5,187 @@ require "rexml/document" module REXMLTests class TestParseDocumentTypeDeclaration < Test::Unit::TestCase private - def xml(internal_subset) - <<-XML -<!DOCTYPE r SYSTEM "urn:x-rexml:test" [ -#{internal_subset} -]> + def parse(doctype) + REXML::Document.new(<<-XML).doctype +#{doctype} <r/> XML end - def parse(internal_subset) - REXML::Document.new(xml(internal_subset)).doctype + class TestName < self + def test_valid + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r> + DOCTYPE + assert_equal("r", doctype.name) + end + + def test_garbage_plus_before_name_at_line_start + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) +<!DOCTYPE + +r SYSTEM "urn:x-rexml:test" [ +]> + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: invalid name +Line: 5 +Position: 51 +Last 80 unconsumed characters: ++ r SYSTEM "urn:x-rexml:test" [ ]> <r/> + DETAIL + end + end + + class TestExternalID < self + class TestSystem < self + def test_left_bracket_in_system_literal + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r SYSTEM "urn:x-rexml:[test" [ +]> + DOCTYPE + assert_equal([ + "r", + "SYSTEM", + nil, + "urn:x-rexml:[test", + ], + [ + doctype.name, + doctype.external_id, + doctype.public, + doctype.system, + ]) + end + + def test_greater_than_in_system_literal + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r SYSTEM "urn:x-rexml:>test" [ +]> + DOCTYPE + assert_equal([ + "r", + "SYSTEM", + nil, + "urn:x-rexml:>test", + ], + [ + doctype.name, + doctype.external_id, + doctype.public, + doctype.system, + ]) + end + + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) +<!DOCTYPE r SYSTEM> + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: system literal is missing +Line: 3 +Position: 26 +Last 80 unconsumed characters: + SYSTEM> <r/> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) +<!DOCTYPE r SYSTEM 'r.dtd'x'> + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: garbage after external ID +Line: 3 +Position: 36 +Last 80 unconsumed characters: +x'> <r/> + DETAIL + end + + def test_single_quote + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r SYSTEM 'r".dtd'> + DOCTYPE + assert_equal("r\".dtd", doctype.system) + end + + def test_double_quote + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r SYSTEM "r'.dtd"> + DOCTYPE + assert_equal("r'.dtd", doctype.system) + end + end + + class TestPublic < self + class TestPublicIDLiteral < self + def test_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) +<!DOCTYPE r PUBLIC 'double quote " is invalid' "r.dtd"> + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: invalid public ID literal +Line: 3 +Position: 62 +Last 80 unconsumed characters: + PUBLIC 'double quote " is invalid' "r.dtd"> <r/> + DETAIL + end + + def test_single_quote + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r PUBLIC 'public-id-literal' "r.dtd"> + DOCTYPE + assert_equal("public-id-literal", doctype.public) + end + + def test_double_quote + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r PUBLIC "public'-id-literal" "r.dtd"> + DOCTYPE + assert_equal("public'-id-literal", doctype.public) + end + end + + class TestSystemLiteral < self + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-DOCTYPE) +<!DOCTYPE r PUBLIC 'public-id-literal' 'system-literal'x'> + DOCTYPE + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed DOCTYPE: garbage after external ID +Line: 3 +Position: 65 +Last 80 unconsumed characters: +x'> <r/> + DETAIL + end + + def test_single_quote + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r PUBLIC "public-id-literal" 'system"-literal'> + DOCTYPE + assert_equal("system\"-literal", doctype.system) + end + + def test_double_quote + doctype = parse(<<-DOCTYPE) +<!DOCTYPE r PUBLIC "public-id-literal" "system'-literal"> + DOCTYPE + assert_equal("system'-literal", doctype.system) + end + end + end end class TestMixed < self @@ -45,6 +215,15 @@ module REXMLTests assert_equal([REXML::NotationDecl, REXML::AttlistDecl], doctype.children.collect(&:class)) end + + private + def parse(internal_subset) + super(<<-DOCTYPE) +<!DOCTYPE r SYSTEM "urn:x-rexml:test" [ +#{internal_subset} +]> + DOCTYPE + end end end end diff --git a/test/rexml/parse/test_element.rb b/test/rexml/parse/test_element.rb index aad915fe7b..7206fe595a 100644 --- a/test/rexml/parse/test_element.rb +++ b/test/rexml/parse/test_element.rb @@ -33,6 +33,32 @@ Last 80 unconsumed characters: DETAIL end + + def test_garbage_less_than_before_root_element_at_line_start + exception = assert_raise(REXML::ParseException) do + parse("<\n<x/>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +malformed XML: missing tag start +Line: 2 +Position: 6 +Last 80 unconsumed characters: +< <x/> + DETAIL + end + + def test_garbage_less_than_slash_before_end_tag_at_line_start + exception = assert_raise(REXML::ParseException) do + parse("<x></\n</x>") + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Missing end tag for 'x' +Line: 2 +Position: 10 +Last 80 unconsumed characters: +</ </x> + DETAIL + end end end end diff --git a/test/rexml/parse/test_notation_declaration.rb b/test/rexml/parse/test_notation_declaration.rb index 0d29f0d81f..19a0536d0a 100644 --- a/test/rexml/parse/test_notation_declaration.rb +++ b/test/rexml/parse/test_notation_declaration.rb @@ -23,10 +23,100 @@ module REXMLTests doctype = parse("<!NOTATION name PUBLIC 'urn:public-id'>") assert_equal("name", doctype.notation("name").name) end + + def test_no_name + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: name is missing +Line: 5 +Position: 72 +Last 80 unconsumed characters: + <!NOTATION> ]> <r/> + DETAIL + end + + def test_invalid_name + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION '> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid name +Line: 5 +Position: 74 +Last 80 unconsumed characters: +'> ]> <r/> + DETAIL + end + + def test_no_id_type + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid ID type +Line: 5 +Position: 77 +Last 80 unconsumed characters: +> ]> <r/> + DETAIL + end + + def test_invalid_id_type + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name INVALID> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid ID type +Line: 5 +Position: 85 +Last 80 unconsumed characters: + INVALID> ]> <r/> + DETAIL + end end class TestExternalID < self class TestSystem < self + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name SYSTEM> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: system literal is missing +Line: 5 +Position: 84 +Last 80 unconsumed characters: + SYSTEM> ]> <r/> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name SYSTEM 'system-literal'x'> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage before end > +Line: 5 +Position: 103 +Last 80 unconsumed characters: +x'> ]> <r/> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) <!NOTATION name SYSTEM 'system-literal'> @@ -44,6 +134,21 @@ module REXMLTests class TestPublic < self class TestPublicIDLiteral < self + def test_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name PUBLIC 'double quote " is invalid' "system-literal"> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid public ID literal +Line: 5 +Position: 129 +Last 80 unconsumed characters: + PUBLIC 'double quote " is invalid' "system-literal"> ]> <r/> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) <!NOTATION name PUBLIC 'public-id-literal' "system-literal"> @@ -60,6 +165,21 @@ module REXMLTests end class TestSystemLiteral < self + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name PUBLIC 'public-id-literal' 'system-literal'x'> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage before end > +Line: 5 +Position: 123 +Last 80 unconsumed characters: +x'> ]> <r/> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) <!NOTATION name PUBLIC "public-id-literal" 'system-literal'> @@ -96,5 +216,66 @@ module REXMLTests end end end + + class TestPublicID < self + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name PUBLIC> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: public ID literal is missing +Line: 5 +Position: 84 +Last 80 unconsumed characters: + PUBLIC> ]> <r/> + DETAIL + end + + def test_literal_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name PUBLIC 'double quote " is invalid in PubidLiteral'> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid public ID literal +Line: 5 +Position: 128 +Last 80 unconsumed characters: + PUBLIC 'double quote \" is invalid in PubidLiteral'> ]> <r/> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) +<!NOTATION name PUBLIC 'public-id-literal'x'> + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage before end > +Line: 5 +Position: 106 +Last 80 unconsumed characters: +x'> ]> <r/> + DETAIL + end + + def test_literal_single_quote + doctype = parse(<<-INTERNAL_SUBSET) +<!NOTATION name PUBLIC 'public-id-literal'> + INTERNAL_SUBSET + assert_equal("public-id-literal", doctype.notation("name").public) + end + + def test_literal_double_quote + doctype = parse(<<-INTERNAL_SUBSET) +<!NOTATION name PUBLIC "public-id-literal"> + INTERNAL_SUBSET + assert_equal("public-id-literal", doctype.notation("name").public) + end + end end end diff --git a/test/rexml/parse/test_processing_instruction.rb b/test/rexml/parse/test_processing_instruction.rb index a23513fc6e..f0c0c24e67 100644 --- a/test/rexml/parse/test_processing_instruction.rb +++ b/test/rexml/parse/test_processing_instruction.rb @@ -20,6 +20,25 @@ Last 80 unconsumed characters: <??> DETAIL end + + def test_garbage_text + # TODO: This should be parse error. + # Create test/parse/test_document.rb or something and move this to it. + doc = parse(<<-XML) +x<?x y +<!--?><?x -->?> +<r/> + XML + pi = doc.children[1] + assert_equal([ + "x", + "y\n<!--", + ], + [ + pi.target, + pi.content, + ]) + end end end end diff --git a/test/rexml/parser/test_ultra_light.rb b/test/rexml/parser/test_ultra_light.rb index 8f4a3980d5..44fd1d1ec0 100644 --- a/test/rexml/parser/test_ultra_light.rb +++ b/test/rexml/parser/test_ultra_light.rb @@ -16,7 +16,6 @@ class TestUltraLightParser < Test::Unit::TestCase nil, [:entitydecl, "name", "value"] ], - [:text, "\n"], [:start_element, :parent, "root", {}], [:text, "\n"], ], diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb index 46036d7f12..ae528d75d3 100644 --- a/test/rexml/test_core.rb +++ b/test/rexml/test_core.rb @@ -1,4 +1,4 @@ -# coding: binary +# coding: utf-8 # frozen_string_literal: false require_relative "rexml_test_utils" @@ -995,7 +995,7 @@ EOL document.write(s) ## XML Doctype - str = '<!DOCTYPE foo "bar">' + str = '<!DOCTYPE foo SYSTEM "bar">' source = REXML::Source.new(str) doctype = REXML::DocType.new(source) document.add(doctype) diff --git a/test/rexml/test_doctype.rb b/test/rexml/test_doctype.rb index 7f42669170..915717de8e 100644 --- a/test/rexml/test_doctype.rb +++ b/test/rexml/test_doctype.rb @@ -18,12 +18,6 @@ module REXMLTests @doc_type_system = REXML::Document.new(xml_system).doctype @pubid = "TEST_ID" - xml_public = <<-XML - <!DOCTYPE root PUBLIC "#{@pubid}"> - <root/> - XML - @doc_type_public = REXML::Document.new(xml_public).doctype - xml_public_system = <<-XML <!DOCTYPE root PUBLIC "#{@pubid}" "#{@sysid}"> <root/> @@ -35,11 +29,9 @@ module REXMLTests assert_equal([ nil, @pubid, - @pubid, ], [ @doc_type_system.public, - @doc_type_public.public, @doc_type_public_system.public, ]) end @@ -58,12 +50,10 @@ module REXMLTests def test_system assert_equal([ @sysid, - nil, @sysid, ], [ @doc_type_system.system, - @doc_type_public.system, @doc_type_public_system.system, ]) end @@ -1,6 +1,6 @@ #define RUBY_VERSION "2.6.7" #define RUBY_RELEASE_DATE "2021-04-05" -#define RUBY_PATCHLEVEL 196 +#define RUBY_PATCHLEVEL 197 #define RUBY_RELEASE_YEAR 2021 #define RUBY_RELEASE_MONTH 4 |