# coding: binary require_relative "rexml_test_utils" require "rexml/document" require "rexml/parseexception" require "rexml/output" require "rexml/source" require "rexml/formatters/pretty" require "rexml/undefinednamespaceexception" require_relative "listener" class Tester < Test::Unit::TestCase include REXMLTestUtils include REXML def setup @xsa_source = <<-EOL Lars Marius Garshol larsga@garshol.priv.no http://www.stud.ifi.uio.no/~lmariusg/ EOL end def test_bad_markup [ " foo ", '<0/>', '&', '&a', # '&a;', # FIXME '', '', '', '', '', '', '', "", "\f", "", "\000", # FIXME '', '', '￿', # FIXME '', # FIXME '', ].each do |src| assert_raise( ParseException, %Q{Parse #{src.inspect} should have failed!} ) do Document.new(src) end end end def test_attribute # Testing constructors #a = Attribute.new "hello", "dolly" #b = Attribute.new a #d = Document.new( "" ) #c = d[0].attributes.get_attribute( "hello" ) #assert_equal a, b #for attr in [ a, b, c] # assert_equal "hello", attr.name # assert_equal "dolly", attr.value #end # This because of a reported bug in attribute handling in 1.0a8 source = 'blah' doc = Document.new source doc.elements.each do |a| a.attributes['att'] << 'B' assert_equal "AB", a.attributes['att'] a.attributes['att'] = 'C' assert_equal "C", a.attributes['att'] end # Bryan Murphy text = "this is a {target[@name='test']/@value} test" source = <<-EOL EOL xml = Document.new source value = xml.root.attributes["search"] assert_equal text, value.to_s e = Element.new "test" e.add_attributes({ "name1" => "test1", "name4" => "test4" }) e.add_attributes([["name3","test3"], ["name2","test2"]]) assert_equal "test1", e.attributes["name1"] assert_equal "test2", e.attributes["name2"] assert_equal "test3", e.attributes["name3"] assert_equal "test4", e.attributes["name4"] # ensure that the attributes come out in sorted order assert_equal %w().join(' '), e.to_s end def test_cdata test = "The quick brown fox jumped & < & < \" ' over the lazy dog." source = "" d = REXML::Document.new( source ) # Test constructors cdata = d[0][0] assert_equal test, cdata.value end def test_comment string = "This is a new comment!" source = "" comment = Comment.new string REXML::Formatters::Default.new.write( comment, out = "" ) assert_equal(source, out) comment2 = Comment.new comment assert_equal(comment, comment2) assert_raise(ParseException) { REXML::Document.new("") } assert_raise(ParseException) { REXML::Document.new(" EOF doc = Document.new source assert_equal "someuri", doc.root.namespace assert_equal "default", doc.root.elements[1].namespace assert_equal "someuri", doc.root.elements[2].namespace assert_equal "notdefault", doc.root.elements[ 3 ].namespace # Testing namespaces in attributes source = <<-EOF EOF doc = Document.new source b = doc.root.elements["b"] assert_equal "x", b.attributes["b:a"] assert_equal "y", b.attributes["a"] doc = Document.new doc.add_element "sean:blah" doc.root.text = "Some text" out = "" doc.write(out) assert_equal "Some text", out end def test_add_namespace e = Element.new 'a' e.add_namespace 'someuri' e.add_namespace 'foo', 'otheruri' e.add_namespace 'xmlns:bar', 'thirduri' assert_equal 'someuri', e.attributes['xmlns'] assert_equal 'otheruri', e.attributes['xmlns:foo'] assert_equal 'thirduri', e.attributes['xmlns:bar'] end def test_big_documentation f = File.new(fixture_path("documentation.xml")) d = Document.new f assert_equal "Sean Russell", d.elements["documentation/head/author"].text.tr("\n\t", " ").squeeze(" ") out = "" d.write out end def test_tutorial doc = Document.new File.new(fixture_path("tutorial.xml")) out = "" doc.write out end def test_stream c = Listener.new Document.parse_stream( File.new(fixture_path("documentation.xml")), c ) assert(c.ts, "Stream parsing apparantly didn't parse the whole file") assert(c.te, "Stream parsing dropped end tag for documentation") Document.parse_stream(" ", c) Document.parse_stream("<>&", c) assert_equal('<>&', c.normalize) end def test_line Document.new File.new(fixture_path("bad.xml")) assert_fail "There should have been an error" rescue Exception # We should get here assert($!.line == 5, "Should have been an error on line 5, "+ "but was reported as being on line #{$!.line}" ) end def test_substitution val = "a'b\"c" el = Element.new("a") el.attributes["x"] = val REXML::Formatters::Default.new.write(el, out="") nel = Document.new( out) assert_equal( val, nel.root.attributes["x"] ) end def test_exception source = SourceFactory.create_from "" p = ParseException.new( "dummy message", source ) begin raise "dummy" rescue Exception p.continued_exception = $! end end def test_bad_content in_gt = 'content>content' in_lt = 'content' # This is OK tree_gt = Document.new in_gt assert_equal "content>content", tree_gt.elements[1].text # This isn't begin Document.new in_lt assert_fail "Should have gotten a parse error" rescue ParseException end end def test_iso_8859_1_output_function out = "" output = Output.new( out ) koln_iso_8859_1 = "K\xF6ln" koln_utf8 = "K\xc3\xb6ln" source = Source.new( koln_iso_8859_1, 'iso-8859-1' ) results = source.scan(/.*/)[0] koln_utf8.force_encoding('UTF-8') if koln_utf8.respond_to?(:force_encoding) assert_equal koln_utf8, results output << results if koln_iso_8859_1.respond_to?(:force_encoding) koln_iso_8859_1.force_encoding('ISO-8859-1') end assert_equal koln_iso_8859_1, out end def test_attributes_each doc = Document.new("") count = 0 doc.root.elements[1].attributes.each {|k,v| count += 1 } assert_equal 4, count end def test_delete_namespace doc = Document.new "" doc.root.delete_namespace doc.root.delete_namespace 'x' assert_equal "", doc.to_s end def test_each_element_with_attribute doc = Document.new "" arry = [] block = proc { |e| assert arry.include?(e.name) arry.delete e.name } # Yields b, c, d arry = %w{b c d} doc.root.each_element_with_attribute( 'id', &block ) assert_equal 0, arry.size # Yields b, d arry = %w{b d} doc.root.each_element_with_attribute( 'id', '1', &block ) assert_equal 0, arry.size # Yields b arry = ['b'] doc.root.each_element_with_attribute( 'id', '1', 1, &block ) assert_equal 0, arry.size # Yields d arry = ['d'] doc.root.each_element_with_attribute( 'id', '1', 0, 'd', &block ) assert_equal 0, arry.size end def test_each_element_with_text doc = Document.new 'bbd' arry = [] block = proc { |e| assert arry.include?(e.name) arry.delete e.name } # Yields b, c, d arry = %w{b c d} doc.root.each_element_with_text(&block) assert_equal 0, arry.size # Yields b, d arry = %w{b c} doc.root.each_element_with_text( 'b', &block ) assert_equal 0, arry.size # Yields b arry = ['b'] doc.root.each_element_with_text( 'b', 1, &block ) assert_equal 0, arry.size # Yields d arry = ['d'] doc.root.each_element_with_text( nil, 0, 'd', &block ) assert_equal 0, arry.size end def test_element_parse_stream s = Source.new( "some text" ) l = Listener.new class << l def tag_start name, attributes raise "Didn't find proper tag name" unless 'a'==name end end Document::parse_stream(s, l) end def test_deep_clone a = Document.new( 'texttexttext' ) b = a.deep_clone assert_equal a.to_s, b.to_s a = Document.new( 'some < text more > text > ' ) b = a.deep_clone assert_equal a.to_s, b.to_s c = Document.new( b.to_s ) assert_equal a.to_s, c.to_s end def test_whitespace_before_root a = < EOL d = Document.new(a) b = "" d.write( b ) assert_equal a,b end def test_entities a = Document.new( 'eeü' ) assert_equal('eeĆ¼'.force_encoding("UTF-8"), a.root.text) end def test_element_decl element_decl = Source.new(" ]>") doc = Document.new( element_decl ) d = doc[0] assert_equal("", d.to_s.split(/\n/)[1].strip) end def test_attlist_decl doc = Document.new <<-EOL ]> EOL assert_equal 'gobble', doc.root.attributes['bar'] assert_equal 'xxx', doc.root.elements[2].namespace assert_equal 'two', doc.root.elements[1].namespace assert_equal 'foo', doc.root.namespace doc = Document.new <<-EOL ]> EOL prefixes = doc.root.prefixes.sort correct = ['svg', 'xlink', 'xml'] assert_equal correct, prefixes end def test_attlist_write file=File.new(fixture_path("foo.xml")) doc=Document.new file out = '' doc.write(out) end def test_more_namespaces assert_raise( REXML::UndefinedNamespaceException, %Q{Should have gotten an Undefined Namespace error} ) { Document.new("

") } doc2 = Document.new("

") es = XPath.match(doc2, '//c') assert_equal 0, es.size es = XPath.match(doc2, '//n:c') assert_equal 1, es.size doc2.root.add_namespace('m', '2') doc2.root.add_element("m:o") es = XPath.match(doc2, './/o') assert_equal 0, es.size es = XPath.match(doc2, '//n:c') assert_equal 1, es.size end def test_ticket_51 doc = REXML::Document.new <<-EOL X Y Z EOL # The most common case. People not caring about the namespaces much. assert_equal( "XY", XPath.match( doc, "/test/a/text()" ).join ) assert_equal( "XY", XPath.match( doc, "/test/x:a/text()" ).join ) # Surprising? I don't think so, if you believe my definition of the "common case" assert_equal( "XYZ", XPath.match( doc, "//a/text()" ).join ) # These are the uncommon cases. Namespaces are actually important, so we define our own # mappings, and pass them in. assert_equal( "XY", XPath.match( doc, "/f:test/f:a/text()", { "f" => "1" } ).join ) # The namespaces are defined, and override the original mappings assert_equal( "", XPath.match( doc, "/test/a/text()", { "f" => "1" } ).join ) assert_equal( "", XPath.match( doc, "/x:test/x:a/text()", { "f" => "1" } ).join ) assert_equal( "", XPath.match( doc, "//a/text()", { "f" => "1" } ).join ) end def test_processing_instruction d = Document.new("") assert_equal 4, XPath.match(d, '//processing-instruction()' ).size match = XPath.match(d, "//processing-instruction('foo3')" ) assert_equal 1, match.size assert_equal 'bar3', match[0].content end def test_oses_with_bad_EOLs Document.new("\n\n\n\n\n\n\n\n") end # Contributed (with patch to fix bug) by Kouhei def test_ignore_whitespace source = " abc " context_all = {:ignore_whitespace_nodes => :all} context_a = {:ignore_whitespace_nodes => %(a)} context_b = {:ignore_whitespace_nodes => %(b)} tests = [[[" abc ", "def"], context_all], [[" abc ", "def"], context_a], [[" ", " abc ", "def", " "], context_b]] tests.each do |test| assert_equal(test[0], Document.new(source, test[1]).root.texts.collect{|x| x.to_s}) end end def test_0xD_in_preface doc = "\x0D\x0D" doc = Document.new doc end def test_hyphens_in_doctype doc = REXML::Document.new <<-EOQ EOQ assert_equal('a-b-c', doc.doctype.name) end def test_accents docs = [ %Q{ }, ' ', %Q{ }, %Q{ } ] docs.each_with_index { |d,i| begin REXML::Document.new(d) rescue puts "#{i} => #{docs[i]}" raise end } end def test_replace_text e = REXML::Element.new( "a" ) e.add_text( "foo" ) assert_equal( "foo", e.to_s ) e[0].value = "bar" assert_equal( "bar", e.to_s ) e[0].value = "<" assert_equal( "<", e.to_s ) assert_equal( "<", e[0].value ) end def test_write_doctype ## XML Document and Declaration document = REXML::Document.new xmldecl = REXML::XMLDecl.new("1.0", "UTF-8") document.add(xmldecl) s = "" document.write(s) ## XML Doctype str = '' source = REXML::Source.new(str) doctype = REXML::DocType.new(source) document.add(doctype) document.write(s) ## Element element = REXML::Element.new("hoge") document.add(element) document.write(s) end def test_write_cdata src = "A" doc = REXML::Document.new( src ) out = "" doc.write( out ) assert_equal( src, out ) src = "" doc = REXML::Document.new( src ) out = "" doc.write( out ) assert_equal( src, out ) end def test_namespace_attributes source = <<-EOL EOL d = Document.new( source ) assert_equal( 'foo', REXML::XPath.first(d.root, "//x:b/@x:n").value ) assert_equal( nil, REXML::XPath.first(d.root, "//x:b/@x:n", {})) end def test_null_element_name a = REXML::Document.new assert_raise( RuntimeError ) { a.add_element( nil ) } end def test_text_raw # From the REXML tutorial # (http://www.germane-software.com/software/rexml/test/data/tutorial.html) doc = Document.new <<-EOL ]> EOL a = doc.root # This makes sure that RAW text nodes don't have their entity strings # replaced t = Text.new "Sean", false, nil, true a.text = t assert_equal( "Sean", t.to_s ) assert_equal( "Sean", t.value ) # This makes sure that they do t = Text.new "Sean", false, nil, false a.text = t assert_equal( "&s;", t.to_s ) assert_equal( "Sean", t.value ) t = Text.new "&s;", false, nil, true a.text = t assert_equal( "&s;", t.to_s ) assert_equal( "Sean", t.value ) t = Text.new "&s;", false, nil, true a.text = t assert_equal( "&s;", t.to_s ) assert_equal( "Sean", t.value ) # Ticket #44 t = REXML::Text.new( "&", false, nil, true ) assert_equal( "&", t.to_s ) t = REXML::Text.new("&", false, false) assert_equal( "&amp;", t.to_s ) end def test_to_xpath doc = REXML::Document.new( %q{ }) names = %w{ /tag1/tag2[1] /tag1/tag2[2] } doc.root.elements.each_with_index {|el, i| assert_equal( names[i], el.xpath ) } end def test_transitive doc = REXML::Document.new( "") s = "" doc.write( s, 0, true ) end # This is issue #40 def test_replace_with old = 'oldold' d = REXML::Document.new(old).root new = REXML::Text.new('new',true,nil,true) child = d.children[2] child.replace_with(new) assert_equal( new, d.children[2] ) end def test_repeated_writes a = IO.read(fixture_path("iso8859-1.xml")) f = REXML::Formatters::Pretty.new xmldoc = REXML::Document.new( a ) a_andre = xmldoc.elements['//image'].attributes['caption'] f.write(xmldoc,b="") xmldoc = REXML::Document.new(b) b_andre = xmldoc.elements['//image'].attributes['caption'] assert_equal( a_andre, b_andre ) f.write(xmldoc,c="") xmldoc = REXML::Document.new(c) c_andre = xmldoc.elements['//image'].attributes['caption'] assert_equal( b_andre, c_andre ) o = Output.new(d="","UTF-8") f.write(xmldoc,o) assert_not_equal( c, d ) end def test_pretty_format_long_text_finite n = 1_000_000 long_text = 'aaaa ' * n xml = "#{long_text}" formatter = REXML::Formatters::Pretty.new document = nil begin document = REXML::Document.new(xml) rescue REXML::ParseException skip_message = "skip this test because we can't check Pretty#wrap " + "works without # on " + "small memory system. # " + "will be raised on the system. See also [ruby-dev:42599]." return skip_message end output = "" formatter.write(document, output) assert_equal("\n" + ((" " + (" aaaa" * 15) + "\n") * (n / 15)) + " " + ("aaaa " * (n % 15)) + "\n" + "", output) end def test_pretty_format_deep_indent n = 6 elements = "" n.times do |i| elements << "" elements << "element#{i} " * 5 end (n - 1).downto(0) do |i| elements << "" end xml = "#{elements}" document = REXML::Document.new(xml) formatter = REXML::Formatters::Pretty.new formatter.width = 20 output = "" formatter.write(document, output) assert_equal(<<-XML.strip, output) element0 element0 element0 element0 element0\s element1 element1 element1 element1 element1\s element2 element2 element2 element2 element2\s element3 element3 element3 element3 element3\s element4 element4 element4 element4 element4 \s element5 element5 element5 element5 element5\s XML end def test_ticket_58 doc = REXML::Document.new doc << REXML::XMLDecl.default doc << REXML::Element.new("a") str = "" doc.write(str) assert_equal("", str) doc = REXML::Document.new doc << REXML::XMLDecl.new("1.0", "UTF-8") doc << REXML::Element.new("a") str = "" doc.write(str) assert_equal("", str) end # Incomplete tags should generate an error def test_ticket_53 assert_raise( REXML::ParseException ) { REXML::Document.new( "" ) } assert_raise( REXML::ParseException ) { REXML::Document.new( "" ) } assert_raise( REXML::ParseException ) { REXML::Document.new( "" ) } end def test_ticket_52 source = "" d = REXML::Document.new(source) d.write(k="") assert_equal( source, k ) source = "" target = "\n \n" d = REXML::Document.new(source) REXML::Formatters::Pretty.new(4).write(d,k="") assert_equal( target, k ) end def test_ticket_76 src = "
at&t" assert_raise( ParseException, %Q{"#{src}" is invalid XML} ) { REXML::Document.new(src) } end def test_ticket_21 src = "" assert_raise( ParseException, "invalid XML should be caught" ) { Document.new(src) } begin Document.new(src) rescue assert_match( /missing attribute quote/, $!.message ) end end def test_ticket_63 Document.new(File.new(fixture_path("t63-1.xml"))) end def test_ticket_75 d = REXML::Document.new(File.new(fixture_path("t75.xml"))) assert_equal("tree", d.root.name) end def test_ticket_48_part_II f = REXML::Formatters::Pretty.new #- rexml sanity check (bugs in ruby 1.8.4, ruby 1.8.6) xmldoc = Document.new("") xmldoc << XMLDecl.new(XMLDecl::DEFAULT_VERSION, "UTF-8") content = ['61c3a927223c3e26'].pack("H*") content.force_encoding('UTF-8') if content.respond_to?(:force_encoding) #- is some UTF-8 text but just to make sure my editor won't magically convert.. xmldoc.root.add_attribute('attr', content) f.write(xmldoc,out=[]) xmldoc = REXML::Document.new(out.join) sanity1 = xmldoc.root.attributes['attr'] f.write(xmldoc,out=[]) xmldoc = REXML::Document.new(out.join) sanity2 = xmldoc.root.attributes['attr'] f.write(xmldoc,out=[]) assert_equal( sanity1, sanity2 ) end def test_ticket_88 doc = REXML::Document.new("") assert_equal("", doc.to_s) doc = REXML::Document.new("") assert_equal("", doc.to_s) end def test_ticket_85 xml = < ENDXML yml = " " # The pretty printer ignores all whitespace, anyway so output1 == output2 f = REXML::Formatters::Pretty.new( 2 ) d = Document.new( xml, :ignore_whitespace_nodes=>:all ) f.write( d, output1="" ) d = Document.new( xml ) f.write( d, output2="" ) # Output directives should override whitespace directives. assert_equal( output1, output2 ) # The base case. d = Document.new(yml) f.write( d, output3="" ) assert_equal( output3.strip, output2.strip ) d = Document.new(yml) f.write( d, output4="" ) assert_equal( output3.strip, output4.strip ) end def test_ticket_91 source=" great " expected=" great " d = Document.new( source ) d.root.add_element( "bah" ) p=REXML::Formatters::Pretty.new(2) p.compact = true # Don't add whitespace to text nodes unless necessary p.write(d,out="") assert_equal( expected, out ) end def test_ticket_95 testd = REXML::Document.new "" testd.write(out1="") testd.elements["//c[2]"].xpath testd.write(out2="") assert_equal(out1,out2) end def test_ticket_102 doc = REXML::Document.new '' assert_equal( "foo", doc.root.elements["item"].attribute("name","ns").to_s ) assert_equal( "item", doc.root.elements["item[@name='foo']"].name ) end def test_ticket_14 # Per .2.5 Node Tests of XPath spec assert_raise( REXML::UndefinedNamespaceException, %Q{Should have gotten an Undefined Namespace error} ) { Document.new("") } end # 5.7 Text Nodes # Character data is grouped into text nodes. As much character data as # possible is grouped into each text node: a text node never has an # immediately following or preceding sibling that is a text node. The # string-value of a text node is the character data. A text node always has # at least one character of data. def test_ticket_105 d = Document.new("") d.root.add_text( "a" ) d.root.add_text( "b" ) assert_equal( 1, d.root.children.size ) end # phantom namespace same as default namespace def test_ticket_121 doc = REXML::Document.new( 'text' ) assert_equal 'text', doc.text( "/doc/item[@name='foo']" ) assert_equal "name='foo'", doc.root.elements["item"].attribute("name", "ns").inspect assert_equal "text", doc.root.elements["item[@name='foo']"].to_s end def test_ticket_135 bean_element = REXML::Element.new("bean") textToAdd = "(&(|(memberof=CN=somegroupabcdefgh,OU=OUsucks,DC=hookemhorns,DC=com)(mail=*someco.com))(acct=%u)(!(extraparameter:2.2.222.222222.2.2.222:=2)))" bean_element.add_element("prop", {"key"=> "filter"}).add_text(textToAdd) doc = REXML::Document.new doc.add_element(bean_element) REXML::Formatters::Pretty.new(3).write( doc, out = "" ) assert_equal "\n \n (&#38;(|(memberof=CN=somegroupabcdefgh,OU=OUsucks,DC=hookemhorns,DC=com)(mail=*someco.com))(acct=%u)(!(extraparameter:2.2.222.222222.2.2.222:=2)))\n \n", out end def test_ticket_138 doc = REXML::Document.new( '' ) expected = { "inkscape" => attribute("xmlns:inkscape", "http://www.inkscape.org/namespaces/inkscape"), "version" => { "inkscape" => attribute("inkscape:version", "0.44"), "" => attribute("version", "1.0"), }, } assert_equal(expected, doc.root.attributes) assert_equal(expected, REXML::Document.new(doc.root.to_s).root.attributes) end def test_empty_doc assert(REXML::Document.new('').children.empty?) end private def attribute(name, value) REXML::Attribute.new(name, value) end end