summaryrefslogtreecommitdiff
path: root/lib/rexml/parsers/baseparser.rb
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-06-15 18:31:16 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-06-15 18:31:16 +0000
commit2403ad9e7de8976a83798f9d86f82f3b5ae0d2cd (patch)
treec0f4098655ece836190b90b412481b561d07e640 /lib/rexml/parsers/baseparser.rb
parent4d33715fe4b0c9750b4cf4e71166e8d67ecc8395 (diff)
REXML hadn't been tested with Ruby 1.8.0, which was really, really,
unbelievably stupid of me. There were a lot of warnings and some errors that were caused by Block vs. Proc differences; these have been fixed. REXML passes all of the tests under Ruby 1.8.0. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3943 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers/baseparser.rb')
-rw-r--r--lib/rexml/parsers/baseparser.rb23
1 files changed, 13 insertions, 10 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 614bdc3948..cfcea16f1d 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -23,14 +23,15 @@ module REXML
#
# Nat Price gave me some good ideas for the API.
class BaseParser
- NCNAME_STR= '[\w:][-\w\d.]*'
+ NCNAME_STR= '[\w:][\-\w\d.]*'
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
- NAMECHAR = '[-\w\d\.:]'
+ NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
NMTOKEN = "(?:#{NAMECHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+ REFERENCE_RE = /#{REFERENCE}/
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
@@ -38,6 +39,7 @@ module REXML
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
+ CDATA_END = /^\s*\]\s*>/um
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
XMLDECL_START = /\A<\?xml\s/u;
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
@@ -51,7 +53,7 @@ module REXML
STANDALONE = /\bstandalone=["'](.*?)['"]/um
ENTITY_START = /^\s*<!ENTITY/
- IDENTITY = /^([!\*\w-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+ IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
@@ -61,16 +63,17 @@ module REXML
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+ ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /^\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
NOTATIONDECL_START = /^\s*<!NOTATION/um
- PUBLIC = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
- SYSTEM = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+ PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+ SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
TEXT_PATTERN = /\A([^<]*)/um
# Entity constants
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9-()+,./:=?;!*@$_%#"
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
@@ -243,7 +246,7 @@ module REXML
contents = md[0]
pairs = {}
- values = md[0].scan( ATTDEF )
+ values = md[0].scan( ATTDEF_RE )
values.each do |attdef|
unless attdef[3] == "#IMPLIED"
attdef.compact!
@@ -263,9 +266,9 @@ module REXML
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
end
return [ :notationdecl, md[1], md[2], md[3] ]
- when /^\s*\]\s*>/um
+ when CDATA_END
@document_status = :after_doctype
- @source.match( /^\s*\]\s*>/um, true )
+ @source.match( CDATA_END, true )
return [ :end_doctype ]
end
end
@@ -358,7 +361,7 @@ module REXML
def unnormalize( string, entities=nil, filter=nil )
rv = string.clone
rv.gsub!( /\r\n?/, "\n" )
- matches = rv.scan( REFERENCE)
+ matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
m=$1