summaryrefslogtreecommitdiff
path: root/tool/lrama/lib/lrama/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'tool/lrama/lib/lrama/lexer')
-rw-r--r--tool/lrama/lib/lrama/lexer/grammar_file.rb31
-rw-r--r--tool/lrama/lib/lrama/lexer/location.rb97
-rw-r--r--tool/lrama/lib/lrama/lexer/token.rb56
-rw-r--r--tool/lrama/lib/lrama/lexer/token/char.rb8
-rw-r--r--tool/lrama/lib/lrama/lexer/token/ident.rb8
-rw-r--r--tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb23
-rw-r--r--tool/lrama/lib/lrama/lexer/token/tag.rb12
-rw-r--r--tool/lrama/lib/lrama/lexer/token/user_code.rb77
8 files changed, 312 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama/lexer/grammar_file.rb b/tool/lrama/lib/lrama/lexer/grammar_file.rb
new file mode 100644
index 0000000000..3d3368625d
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/grammar_file.rb
@@ -0,0 +1,31 @@
+module Lrama
+ class Lexer
+ class GrammarFile
+ class Text < String
+ def inspect
+ length <= 50 ? super : "#{self[0..47]}...".inspect
+ end
+ end
+
+ attr_reader :path, :text
+
+ def initialize(path, text)
+ @path = path
+ @text = Text.new(text).freeze
+ end
+
+ def inspect
+ "<#{self.class}: @path=#{path}, @text=#{text.inspect}>"
+ end
+
+ def ==(other)
+ self.class == other.class &&
+ self.path == other.path
+ end
+
+ def lines
+ @lines ||= text.split("\n")
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer/location.rb b/tool/lrama/lib/lrama/lexer/location.rb
new file mode 100644
index 0000000000..aefce3e16b
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/location.rb
@@ -0,0 +1,97 @@
+module Lrama
+ class Lexer
+ class Location
+ attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
+
+ def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
+ @grammar_file = grammar_file
+ @first_line = first_line
+ @first_column = first_column
+ @last_line = last_line
+ @last_column = last_column
+ end
+
+ def ==(other)
+ self.class == other.class &&
+ self.grammar_file == other.grammar_file &&
+ self.first_line == other.first_line &&
+ self.first_column == other.first_column &&
+ self.last_line == other.last_line &&
+ self.last_column == other.last_column
+ end
+
+ def partial_location(left, right)
+ offset = -first_column
+ new_first_line = -1
+ new_first_column = -1
+ new_last_line = -1
+ new_last_column = -1
+
+ _text.each.with_index do |line, index|
+ new_offset = offset + line.length + 1
+
+ if offset <= left && left <= new_offset
+ new_first_line = first_line + index
+ new_first_column = left - offset
+ end
+
+ if offset <= right && right <= new_offset
+ new_last_line = first_line + index
+ new_last_column = right - offset
+ end
+
+ offset = new_offset
+ end
+
+ Location.new(
+ grammar_file: grammar_file,
+ first_line: new_first_line, first_column: new_first_column,
+ last_line: new_last_line, last_column: new_last_column
+ )
+ end
+
+ def to_s
+ "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
+ end
+
+ def generate_error_message(error_message)
+ <<~ERROR.chomp
+ #{path}:#{first_line}:#{first_column}: #{error_message}
+ #{line_with_carets}
+ ERROR
+ end
+
+ def line_with_carets
+ <<~TEXT
+ #{text}
+ #{carets}
+ TEXT
+ end
+
+ private
+
+ def path
+ grammar_file.path
+ end
+
+ def blanks
+ (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
+ end
+
+ def carets
+ blanks + '^' * (last_column - first_column)
+ end
+
+ def text
+ @text ||= _text.join("\n")
+ end
+
+ def _text
+ @_text ||=begin
+ range = (first_line - 1)...last_line
+ grammar_file.lines[range] or raise "#{range} is invalid"
+ end
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb
new file mode 100644
index 0000000000..59b49d5fba
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token.rb
@@ -0,0 +1,56 @@
+require 'lrama/lexer/token/char'
+require 'lrama/lexer/token/ident'
+require 'lrama/lexer/token/instantiate_rule'
+require 'lrama/lexer/token/tag'
+require 'lrama/lexer/token/user_code'
+
+module Lrama
+ class Lexer
+ class Token
+ attr_reader :s_value, :location
+ attr_accessor :alias_name, :referred
+
+ def initialize(s_value:, alias_name: nil, location: nil)
+ s_value.freeze
+ @s_value = s_value
+ @alias_name = alias_name
+ @location = location
+ end
+
+ def to_s
+ "value: `#{s_value}`, location: #{location}"
+ end
+
+ def referred_by?(string)
+ [self.s_value, self.alias_name].compact.include?(string)
+ end
+
+ def ==(other)
+ self.class == other.class && self.s_value == other.s_value
+ end
+
+ def first_line
+ location.first_line
+ end
+ alias :line :first_line
+
+ def first_column
+ location.first_column
+ end
+ alias :column :first_column
+
+ def last_line
+ location.last_line
+ end
+
+ def last_column
+ location.last_column
+ end
+
+ def invalid_ref(ref, message)
+ location = self.location.partial_location(ref.first_column, ref.last_column)
+ raise location.generate_error_message(message)
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/char.rb b/tool/lrama/lib/lrama/lexer/token/char.rb
new file mode 100644
index 0000000000..ec3560ca09
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/char.rb
@@ -0,0 +1,8 @@
+module Lrama
+ class Lexer
+ class Token
+ class Char < Token
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/ident.rb b/tool/lrama/lib/lrama/lexer/token/ident.rb
new file mode 100644
index 0000000000..e576eaeccd
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/ident.rb
@@ -0,0 +1,8 @@
+module Lrama
+ class Lexer
+ class Token
+ class Ident < Token
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb
new file mode 100644
index 0000000000..1c4d1095c8
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb
@@ -0,0 +1,23 @@
+module Lrama
+ class Lexer
+ class Token
+ class InstantiateRule < Token
+ attr_reader :args, :lhs_tag
+
+ def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil)
+ super s_value: s_value, alias_name: alias_name, location: location
+ @args = args
+ @lhs_tag = lhs_tag
+ end
+
+ def rule_name
+ s_value
+ end
+
+ def args_count
+ args.count
+ end
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/tag.rb b/tool/lrama/lib/lrama/lexer/token/tag.rb
new file mode 100644
index 0000000000..e54d773915
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/tag.rb
@@ -0,0 +1,12 @@
+module Lrama
+ class Lexer
+ class Token
+ class Tag < Token
+ # Omit "<>"
+ def member
+ s_value[1..-2] or raise "Unexpected Tag format (#{s_value})"
+ end
+ end
+ end
+ end
+end
diff --git a/tool/lrama/lib/lrama/lexer/token/user_code.rb b/tool/lrama/lib/lrama/lexer/token/user_code.rb
new file mode 100644
index 0000000000..4d487bf01c
--- /dev/null
+++ b/tool/lrama/lib/lrama/lexer/token/user_code.rb
@@ -0,0 +1,77 @@
+require "strscan"
+
+module Lrama
+ class Lexer
+ class Token
+ class UserCode < Token
+ attr_accessor :tag
+
+ def references
+ @references ||= _references
+ end
+
+ private
+
+ def _references
+ scanner = StringScanner.new(s_value)
+ references = []
+
+ while !scanner.eos? do
+ case
+ when reference = scan_reference(scanner)
+ references << reference
+ when scanner.scan(/\/\*/)
+ scanner.scan_until(/\*\//)
+ else
+ scanner.getch
+ end
+ end
+
+ references
+ end
+
+ def scan_reference(scanner)
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+ return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+ return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
+ when scanner.scan(/@(\d+)/) # @1
+ return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
+
+ # $: references
+ when scanner.scan(/\$:\$/) # $:$
+ return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
+ when scanner.scan(/\$:(\d+)/) # $:1
+ return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
+ when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
+ when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
+
+ end
+ end
+ end
+ end
+ end
+end