1 files changed, 59 insertions, 42 deletions
diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb
index 14f1f6c719..600eb841ac 100644
--- a/lib/rdoc/markup/parser.rb
+++ b/lib/rdoc/markup/parser.rb
@@ -80,10 +80,6 @@ class RDoc::Markup::Parser
     @binary_input   = nil
     @current_token  = nil
     @debug          = false
-    @input          = nil
-    @input_encoding = nil
-    @line           = 0
-    @line_pos       = 0
     @s              = nil
     @tokens         = []
   end
@@ -320,13 +316,6 @@ class RDoc::Markup::Parser
   end
 
   ##
-  # The character offset for the input string at the given +byte_offset+
-
-  def char_pos byte_offset
-    @input.byteslice(0, byte_offset).length
-  end
-
-  ##
   # Pulls the next token from the stream.
 
   def get
@@ -425,14 +414,53 @@ class RDoc::Markup::Parser
   end
 
   ##
+  # A simple wrapper of StringScanner that is aware of the current column and lineno
+
+  class MyStringScanner
+    def initialize(input)
+      @line = @column = 0
+      @s = StringScanner.new input
+    end
+
+    def scan(re)
+      prev_pos = @s.pos
+      ret = @s.scan(re)
+      @column += ret.length if ret
+      ret
+    end
+
+    def unscan(s)
+      @s.pos -= s.bytesize
+      @column -= s.length
+    end
+
+    def pos
+      [@column, @line]
+    end
+
+    def newline!
+      @column = 0
+      @line += 1
+    end
+
+    def eos?
+      @s.eos?
+    end
+
+    def matched
+      @s.matched
+    end
+
+    def [](i)
+      @s[i]
+    end
+  end
+
+  ##
   # Creates the StringScanner
 
   def setup_scanner input
-    @line     = 0
-    @line_pos = 0
-    @input    = input.dup
-
-    @s = StringScanner.new input
+    @s = MyStringScanner.new input
   end
 
   ##
@@ -467,31 +495,30 @@ class RDoc::Markup::Parser
       @tokens << case
                  # [CR]LF => :NEWLINE
                  when @s.scan(/\r?\n/) then
-                   token = [:NEWLINE, @s.matched, *token_pos(pos)]
-                   @line_pos = char_pos @s.pos
-                   @line += 1
+                   token = [:NEWLINE, @s.matched, *pos]
+                   @s.newline!
                    token
                  # === text => :HEADER then :TEXT
                  when @s.scan(/(=+)(\s*)/) then
                    level = @s[1].length
-                   header = [:HEADER, level, *token_pos(pos)]
+                   header = [:HEADER, level, *pos]
 
                    if @s[2] =~ /^\r?\n/ then
-                     @s.pos -= @s[2].length
+                     @s.unscan(@s[2])
                      header
                    else
                      pos = @s.pos
                      @s.scan(/.*/)
                      @tokens << header
-                     [:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)]
+                     [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
                    end
                  # --- (at least 3) and nothing else on the line => :RULE
                  when @s.scan(/(-{3,}) *\r?$/) then
-                   [:RULE, @s[1].length - 2, *token_pos(pos)]
+                   [:RULE, @s[1].length - 2, *pos]
                  # * or - followed by white space and text => :BULLET
                  when @s.scan(/([*-]) +(\S)/) then
-                   @s.pos -= @s[2].bytesize # unget \S
-                   [:BULLET, @s[1], *token_pos(pos)]
+                   @s.unscan(@s[2])
+                   [:BULLET, @s[1], *pos]
                  # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
                  when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
                    # FIXME if tab(s), the column will be wrong
@@ -500,7 +527,7 @@ class RDoc::Markup::Parser
                    # before (and provide a check for that at least in debug
                    # mode)
                    list_label = @s[1]
-                   @s.pos -= @s[2].bytesize # unget \S
+                   @s.unscan(@s[2])
                    list_type =
                      case list_label
                      when /[a-z]/ then :LALPHA
@@ -509,24 +536,24 @@ class RDoc::Markup::Parser
                      else
                        raise ParseError, "BUG token #{list_label}"
                      end
-                   [list_type, list_label, *token_pos(pos)]
+                   [list_type, list_label, *pos]
                  # [text] followed by spaces or end of line => :LABEL
                  when @s.scan(/\[(.*?)\]( +|\r?$)/) then
-                   [:LABEL, @s[1], *token_pos(pos)]
+                   [:LABEL, @s[1], *pos]
                  # text:: followed by spaces or end of line => :NOTE
                  when @s.scan(/(.*?)::( +|\r?$)/) then
-                   [:NOTE, @s[1], *token_pos(pos)]
+                   [:NOTE, @s[1], *pos]
                  # >>> followed by end of line => :BLOCKQUOTE
                  when @s.scan(/>>> *(\w+)?$/) then
-                   [:BLOCKQUOTE, @s[1], *token_pos(pos)]
+                   [:BLOCKQUOTE, @s[1], *pos]
                  # anything else: :TEXT
                  else
                    @s.scan(/(.*?)(  )?\r?$/)
-                   token = [:TEXT, @s[1], *token_pos(pos)]
+                   token = [:TEXT, @s[1], *pos]
 
                    if @s[2] then
                      @tokens << token
-                     [:BREAK, @s[2], *token_pos(pos + @s[1].length)]
+                     [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
                    else
                      token
                    end
@@ -537,16 +564,6 @@ class RDoc::Markup::Parser
   end
 
   ##
-  # Calculates the column (by character) and line of the current token based
-  # on +byte_offset+.
-
-  def token_pos byte_offset
-    offset = char_pos byte_offset
-
-    [offset - @line_pos, @line]
-  end
-
-  ##
   # Returns the current token to the token stream
 
   def unget