summaryrefslogtreecommitdiff
path: root/lib/rexml/source.rb
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-06-10 01:31:01 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-06-10 01:31:01 +0000
commitea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d (patch)
treed3e1f95a5acf262a9dd46e9663b7034bb285b406 /lib/rexml/source.rb
parentca02190d8887ecd852e4e3f18f3a3ea91e9c6f7a (diff)
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/source.rb')
-rw-r--r--lib/rexml/source.rb191
1 files changed, 191 insertions, 0 deletions
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
new file mode 100644
index 0000000000..8c175785b7
--- /dev/null
+++ b/lib/rexml/source.rb
@@ -0,0 +1,191 @@
+require 'rexml/encoding'
+
+module REXML
+ # Generates Source-s. USE THIS CLASS.
+ class SourceFactory
+ # Generates a Source object
+ # @param arg Either a String, or an IO
+ # @return a Source, or nil if a bad argument was given
+ def SourceFactory::create_from arg#, slurp=true
+ if arg.kind_of? String
+ source = Source.new(arg)
+ elsif arg.kind_of? IO
+ source = IOSource.new(arg)
+ end
+ source
+ end
+ end
+
+ # A Source can be searched for patterns, and wraps buffers and other
+ # objects and provides consumption of text
+ class Source
+ include Encoding
+ # The current buffer (what we're going to read next)
+ attr_reader :buffer
+ # The line number of the last consumed text
+ attr_reader :line
+ attr_reader :encoding
+
+ # Constructor
+ # @param arg must be a String, and should be a valid XML document
+ def initialize arg
+ @orig = @buffer = arg
+ self.encoding = check_encoding( @buffer )
+ #@buffer = decode(@buffer) unless @encoding == UTF_8
+ @line = 0
+ end
+
+ # Inherited from Encoding
+ # Overridden to support optimized en/decoding
+ def encoding=(enc)
+ super
+ eval <<-EOL
+ alias :encode :to_#{encoding.tr('-', '_').downcase}
+ alias :decode :from_#{encoding.tr('-', '_').downcase}
+ EOL
+ @line_break = encode( '>' )
+ if enc != UTF_8
+ @buffer = decode(@buffer)
+ @to_utf = true
+ else
+ @to_utf = false
+ end
+ end
+
+ # Scans the source for a given pattern. Note, that this is not your
+ # usual scan() method. For one thing, the pattern argument has some
+ # requirements; for another, the source can be consumed. You can easily
+ # confuse this method. Originally, the patterns were easier
+ # to construct and this method more robust, because this method
+ # generated search regexes on the fly; however, this was
+ # computationally expensive and slowed down the entire REXML package
+ # considerably, since this is by far the most commonly called method.
+ # @param pattern must be a Regexp, and must be in the form of
+ # /^\s*(#{your pattern, with no groups})(.*)/. The first group
+ # will be returned; the second group is used if the consume flag is
+ # set.
+ # @param consume if true, the pattern returned will be consumed, leaving
+ # everything after it in the Source.
+ # @return the pattern, if found, or nil if the Source is empty or the
+ # pattern is not found.
+ def scan pattern, consume=false
+ return nil if @buffer.nil?
+ rv = @buffer.scan(pattern)
+ @buffer = $' if consume and rv.size>0
+ rv
+ end
+
+ def read
+ end
+
+ def match pattern, consume=false
+ md = pattern.match @buffer
+ @buffer = $' if consume and md
+ return md
+ end
+
+ # @return true if the Source is exhausted
+ def empty?
+ @buffer.nil? or @buffer.strip.nil?
+ end
+
+ # @return the current line in the source
+ def current_line
+ lines = @orig.split
+ res = lines.grep @buffer[0..30]
+ res = res[-1] if res.kind_of? Array
+ lines.index( res ) if res
+ end
+ end
+
+ # A Source that wraps an IO. See the Source class for method
+ # documentation
+ class IOSource < Source
+ #attr_reader :block_size
+
+ def initialize arg, block_size=500
+ @er_source = @source = arg
+ @to_utf = false
+ # READLINE OPT
+ # The following was commented out when IOSource started using readline
+ # to pull the data from the stream.
+ #@block_size = block_size
+ #super @source.read(@block_size)
+ @line_break = '>'
+ super @source.readline( @line_break )
+ end
+
+ def scan pattern, consume=false
+ rv = super
+ # You'll notice that this next section is very similar to the same
+ # section in match(), but just a liiittle different. This is
+ # because it is a touch faster to do it this way with scan()
+ # than the way match() does it; enough faster to warrent duplicating
+ # some code
+ if rv.size == 0
+ until @buffer =~ pattern or @source.nil?
+ begin
+ # READLINE OPT
+ #str = @source.read(@block_size)
+ str = @source.readline(@line_break)
+ str = decode(str) if @to_utf and str
+ @buffer << str
+ rescue
+ @source = nil
+ end
+ end
+ rv = super
+ end
+ rv.taint
+ rv
+ end
+
+ def read
+ begin
+ str = @source.readline('>')
+ str = decode(str) if @to_utf and str
+ @buffer << str
+ rescue
+ @source = nil
+ end
+ end
+
+ def match pattern, consume=false
+ rv = pattern.match(@buffer)
+ @buffer = $' if consume and rv
+ while !rv and @source
+ begin
+ str = @source.readline('>')
+ str = decode(str) if @to_utf and str
+ @buffer << str
+ rv = pattern.match(@buffer)
+ @buffer = $' if consume and rv
+ rescue
+ @source = nil
+ end
+ end
+ rv.taint
+ rv
+ end
+
+ def empty?
+ super and ( @source.nil? || @source.eof? )
+ end
+
+ # @return the current line in the source
+ def current_line
+ pos = @er_source.pos # The byte position in the source
+ lineno = @er_source.lineno # The XML < position in the source
+ @er_source.rewind
+ line = 0 # The \r\n position in the source
+ begin
+ while @er_source.pos < pos
+ @er_source.readline
+ line += 1
+ end
+ rescue
+ end
+ [pos, lineno, line]
+ end
+ end
+end