diff options
Diffstat (limited to 'tool/lrama/lib/lrama/lexer')
-rw-r--r-- | tool/lrama/lib/lrama/lexer/grammar_file.rb | 31 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer/location.rb | 97 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer/token.rb | 56 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer/token/char.rb | 8 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer/token/ident.rb | 8 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb | 23 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer/token/tag.rb | 12 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/lexer/token/user_code.rb | 77 |
8 files changed, 312 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama/lexer/grammar_file.rb b/tool/lrama/lib/lrama/lexer/grammar_file.rb new file mode 100644 index 0000000000..3d3368625d --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/grammar_file.rb @@ -0,0 +1,31 @@ +module Lrama + class Lexer + class GrammarFile + class Text < String + def inspect + length <= 50 ? super : "#{self[0..47]}...".inspect + end + end + + attr_reader :path, :text + + def initialize(path, text) + @path = path + @text = Text.new(text).freeze + end + + def inspect + "<#{self.class}: @path=#{path}, @text=#{text.inspect}>" + end + + def ==(other) + self.class == other.class && + self.path == other.path + end + + def lines + @lines ||= text.split("\n") + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/location.rb b/tool/lrama/lib/lrama/lexer/location.rb new file mode 100644 index 0000000000..aefce3e16b --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/location.rb @@ -0,0 +1,97 @@ +module Lrama + class Lexer + class Location + attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column + + def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:) + @grammar_file = grammar_file + @first_line = first_line + @first_column = first_column + @last_line = last_line + @last_column = last_column + end + + def ==(other) + self.class == other.class && + self.grammar_file == other.grammar_file && + self.first_line == other.first_line && + self.first_column == other.first_column && + self.last_line == other.last_line && + self.last_column == other.last_column + end + + def partial_location(left, right) + offset = -first_column + new_first_line = -1 + new_first_column = -1 + new_last_line = -1 + new_last_column = -1 + + _text.each.with_index do |line, index| + new_offset = offset + line.length + 1 + + if offset <= left && left <= new_offset + new_first_line = first_line + index + new_first_column = left - offset + end + + if offset <= right && right <= new_offset + new_last_line = first_line + index + new_last_column = right - offset + end + + offset = new_offset + end + + Location.new( + grammar_file: grammar_file, + first_line: new_first_line, first_column: new_first_column, + last_line: new_last_line, last_column: new_last_column + ) + end + + def to_s + "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})" + end + + def generate_error_message(error_message) + <<~ERROR.chomp + #{path}:#{first_line}:#{first_column}: #{error_message} + #{line_with_carets} + ERROR + end + + def line_with_carets + <<~TEXT + #{text} + #{carets} + TEXT + end + + private + + def path + grammar_file.path + end + + def blanks + (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ') + end + + def carets + blanks + '^' * (last_column - first_column) + end + + def text + @text ||= _text.join("\n") + end + + def _text + @_text ||=begin + range = (first_line - 1)...last_line + grammar_file.lines[range] or raise "#{range} is invalid" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb new file mode 100644 index 0000000000..59b49d5fba --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token.rb @@ -0,0 +1,56 @@ +require 'lrama/lexer/token/char' +require 'lrama/lexer/token/ident' +require 'lrama/lexer/token/instantiate_rule' +require 'lrama/lexer/token/tag' +require 'lrama/lexer/token/user_code' + +module Lrama + class Lexer + class Token + attr_reader :s_value, :location + attr_accessor :alias_name, :referred + + def initialize(s_value:, alias_name: nil, location: nil) + s_value.freeze + @s_value = s_value + @alias_name = alias_name + @location = location + end + + def to_s + "value: `#{s_value}`, location: #{location}" + end + + def referred_by?(string) + [self.s_value, self.alias_name].compact.include?(string) + end + + def ==(other) + self.class == other.class && self.s_value == other.s_value + end + + def first_line + location.first_line + end + alias :line :first_line + + def first_column + location.first_column + end + alias :column :first_column + + def last_line + location.last_line + end + + def last_column + location.last_column + end + + def invalid_ref(ref, message) + location = self.location.partial_location(ref.first_column, ref.last_column) + raise location.generate_error_message(message) + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/char.rb b/tool/lrama/lib/lrama/lexer/token/char.rb new file mode 100644 index 0000000000..ec3560ca09 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/char.rb @@ -0,0 +1,8 @@ +module Lrama + class Lexer + class Token + class Char < Token + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/ident.rb b/tool/lrama/lib/lrama/lexer/token/ident.rb new file mode 100644 index 0000000000..e576eaeccd --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/ident.rb @@ -0,0 +1,8 @@ +module Lrama + class Lexer + class Token + class Ident < Token + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb new file mode 100644 index 0000000000..1c4d1095c8 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb @@ -0,0 +1,23 @@ +module Lrama + class Lexer + class Token + class InstantiateRule < Token + attr_reader :args, :lhs_tag + + def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil) + super s_value: s_value, alias_name: alias_name, location: location + @args = args + @lhs_tag = lhs_tag + end + + def rule_name + s_value + end + + def args_count + args.count + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/tag.rb b/tool/lrama/lib/lrama/lexer/token/tag.rb new file mode 100644 index 0000000000..e54d773915 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/tag.rb @@ -0,0 +1,12 @@ +module Lrama + class Lexer + class Token + class Tag < Token + # Omit "<>" + def member + s_value[1..-2] or raise "Unexpected Tag format (#{s_value})" + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/lexer/token/user_code.rb b/tool/lrama/lib/lrama/lexer/token/user_code.rb new file mode 100644 index 0000000000..4d487bf01c --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/user_code.rb @@ -0,0 +1,77 @@ +require "strscan" + +module Lrama + class Lexer + class Token + class UserCode < Token + attr_accessor :tag + + def references + @references ||= _references + end + + private + + def _references + scanner = StringScanner.new(s_value) + references = [] + + while !scanner.eos? do + case + when reference = scan_reference(scanner) + references << reference + when scanner.scan(/\/\*/) + scanner.scan_until(/\*\//) + else + scanner.getch + end + end + + references + end + + def scan_reference(scanner) + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil + return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos) + when scanner.scan(/@(\d+)/) # @1 + return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos) + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) + + # $: references + when scanner.scan(/\$:\$/) # $:$ + return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:(\d+)/) # $:1 + return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) + when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets) + return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos) + + end + end + end + end + end +end |