8 files changed, 312 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama/lexer/grammar_file.rb b/tool/lrama/lib/lrama/lexer/grammar_file.rb
new file mode 100644
index 0000000000..3d3368625d
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/grammar_file.rb
@@ -0,0 +1,31 @@
+module Lrama
+  class Lexer
+    class GrammarFile
+      class Text < String
+        def inspect
+          length <= 50 ? super : "#{self[0..47]}...".inspect
+        end
+      end
+
+      attr_reader :path, :text
+
+      def initialize(path, text)
+        @path = path
+        @text = Text.new(text).freeze
+      end
+
+      def inspect
+        "<#{self.class}: @path=#{path}, @text=#{text.inspect}>"
+      end
+
+      def ==(other)
+        self.class == other.class &&
+        self.path == other.path
+      end
+
+      def lines
+        @lines ||= text.split("\n")
+      end
+    end
+  end
+end
diff --git a/tool/lrama/lib/lrama/lexer/location.rb b/tool/lrama/lib/lrama/lexer/location.rb
new file mode 100644
index 0000000000..aefce3e16b
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/location.rb
@@ -0,0 +1,97 @@
+module Lrama
+  class Lexer
+    class Location
+      attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
+
+      def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
+        @grammar_file = grammar_file
+        @first_line = first_line
+        @first_column = first_column
+        @last_line = last_line
+        @last_column = last_column
+      end
+
+      def ==(other)
+        self.class == other.class &&
+        self.grammar_file == other.grammar_file &&
+        self.first_line == other.first_line &&
+        self.first_column == other.first_column &&
+        self.last_line == other.last_line &&
+        self.last_column == other.last_column
+      end
+
+      def partial_location(left, right)
+        offset = -first_column
+        new_first_line = -1
+        new_first_column = -1
+        new_last_line = -1
+        new_last_column = -1
+
+        _text.each.with_index do |line, index|
+          new_offset = offset + line.length + 1
+
+          if offset <= left && left <= new_offset
+            new_first_line = first_line + index
+            new_first_column = left - offset
+          end
+
+          if offset <= right && right <= new_offset
+            new_last_line = first_line + index
+            new_last_column = right - offset
+          end
+
+          offset = new_offset
+        end
+
+        Location.new(
+          grammar_file: grammar_file,
+          first_line: new_first_line, first_column: new_first_column,
+          last_line: new_last_line, last_column: new_last_column
+        )
+      end
+
+      def to_s
+        "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
+      end
+
+      def generate_error_message(error_message)
+        <<~ERROR.chomp
+          #{path}:#{first_line}:#{first_column}: #{error_message}
+          #{line_with_carets}
+        ERROR
+      end
+
+      def line_with_carets
+        <<~TEXT
+          #{text}
+          #{carets}
+        TEXT
+      end
+
+      private
+
+      def path
+        grammar_file.path
+      end
+
+      def blanks
+        (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
+      end
+
+      def carets
+        blanks + '^' * (last_column - first_column)
+      end
+
+      def text
+        @text ||= _text.join("\n")
+      end
+
+      def _text
+        @_text ||=begin
+          range = (first_line - 1)...last_line
+          grammar_file.lines[range] or raise "#{range} is invalid"
+        end
+      end
+    end
+  end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb
new file mode 100644
index 0000000000..59b49d5fba
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token.rb
@@ -0,0 +1,56 @@
+require 'lrama/lexer/token/char'
+require 'lrama/lexer/token/ident'
+require 'lrama/lexer/token/instantiate_rule'
+require 'lrama/lexer/token/tag'
+require 'lrama/lexer/token/user_code'
+
+module Lrama
+  class Lexer
+    class Token
+      attr_reader :s_value, :location
+      attr_accessor :alias_name, :referred
+
+      def initialize(s_value:, alias_name: nil, location: nil)
+        s_value.freeze
+        @s_value = s_value
+        @alias_name = alias_name
+        @location = location
+      end
+
+      def to_s
+        "value: `#{s_value}`, location: #{location}"
+      end
+
+      def referred_by?(string)
+        [self.s_value, self.alias_name].compact.include?(string)
+      end
+
+      def ==(other)
+        self.class == other.class && self.s_value == other.s_value
+      end
+
+      def first_line
+        location.first_line
+      end
+      alias :line :first_line
+
+      def first_column
+        location.first_column
+      end
+      alias :column :first_column
+
+      def last_line
+        location.last_line
+      end
+
+      def last_column
+        location.last_column
+      end
+
+      def invalid_ref(ref, message)
+        location = self.location.partial_location(ref.first_column, ref.last_column)
+        raise location.generate_error_message(message)
+      end
+    end
+  end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/char.rb b/tool/lrama/lib/lrama/lexer/token/char.rb
new file mode 100644
index 0000000000..ec3560ca09
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/char.rb
@@ -0,0 +1,8 @@
+module Lrama
+  class Lexer
+    class Token
+      class Char < Token
+      end
+    end
+  end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/ident.rb b/tool/lrama/lib/lrama/lexer/token/ident.rb
new file mode 100644
index 0000000000..e576eaeccd
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/ident.rb
@@ -0,0 +1,8 @@
+module Lrama
+  class Lexer
+    class Token
+      class Ident < Token
+      end
+    end
+  end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb
new file mode 100644
index 0000000000..1c4d1095c8
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb
@@ -0,0 +1,23 @@
+module Lrama
+  class Lexer
+    class Token
+      class InstantiateRule < Token
+        attr_reader :args, :lhs_tag
+
+        def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil)
+          super s_value: s_value, alias_name: alias_name, location: location
+          @args = args
+          @lhs_tag = lhs_tag
+        end
+
+        def rule_name
+          s_value
+        end
+
+        def args_count
+          args.count
+        end
+      end
+    end
+  end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/tag.rb b/tool/lrama/lib/lrama/lexer/token/tag.rb
new file mode 100644
index 0000000000..e54d773915
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/tag.rb
@@ -0,0 +1,12 @@
+module Lrama
+  class Lexer
+    class Token
+      class Tag < Token
+        # Omit "<>"
+        def member
+          s_value[1..-2] or raise "Unexpected Tag format (#{s_value})"
+        end
+      end
+    end
+  end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/user_code.rb b/tool/lrama/lib/lrama/lexer/token/user_code.rb
new file mode 100644
index 0000000000..4d487bf01c
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/user_code.rb
@@ -0,0 +1,77 @@
+require "strscan"
+
+module Lrama
+  class Lexer
+    class Token
+      class UserCode < Token
+        attr_accessor :tag
+
+        def references
+          @references ||= _references
+        end
+
+        private
+
+        def _references
+          scanner = StringScanner.new(s_value)
+          references = []
+
+          while !scanner.eos? do
+            case
+            when reference = scan_reference(scanner)
+              references << reference
+            when scanner.scan(/\/\*/)
+              scanner.scan_until(/\*\//)
+            else
+              scanner.getch
+            end
+          end
+
+          references
+        end
+
+        def scan_reference(scanner)
+          start = scanner.pos
+          case
+          # $ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+            tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+            return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+            tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+            return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+            return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+            return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
+
+          # @ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/@\$/) # @$
+            return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
+          when scanner.scan(/@(\d+)/) # @1
+            return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
+          when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+            return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
+          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right]  (named reference with brackets)
+            return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
+
+          # $: references
+          when scanner.scan(/\$:\$/) # $:$
+            return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$:(\d+)/) # $:1
+            return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
+            return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
+          when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
+            return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
+
+          end
+        end
+      end
+    end
+  end
+end