diff options
Diffstat (limited to 'tool/lrama/lib/lrama/lexer.rb')
-rw-r--r-- | tool/lrama/lib/lrama/lexer.rb | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb new file mode 100644 index 0000000000..40622a51b4 --- /dev/null +++ b/tool/lrama/lib/lrama/lexer.rb @@ -0,0 +1,188 @@ +require "strscan" + +require "lrama/lexer/grammar_file" +require "lrama/lexer/location" +require "lrama/lexer/token" + +module Lrama + class Lexer + attr_reader :head_line, :head_column, :line + attr_accessor :status, :end_symbol + + SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'] + PERCENT_TOKENS = %w( + %union + %token + %type + %left + %right + %nonassoc + %expect + %define + %require + %printer + %destructor + %lex-param + %parse-param + %initial-action + %precedence + %prec + %error-token + %before-reduce + %after-reduce + %after-shift-error-token + %after-shift + %after-pop-stack + %empty + %code + %rule + %no-stdlib + %inline + ) + + def initialize(grammar_file) + @grammar_file = grammar_file + @scanner = StringScanner.new(grammar_file.text) + @head_column = @head = @scanner.pos + @head_line = @line = 1 + @status = :initial + @end_symbol = nil + end + + def next_token + case @status + when :initial + lex_token + when :c_declaration + lex_c_code + end + end + + def column + @scanner.pos - @head + end + + def location + Location.new( + grammar_file: @grammar_file, + first_line: @head_line, first_column: @head_column, + last_line: line, last_column: column + ) + end + + def lex_token + while !@scanner.eos? do + case + when @scanner.scan(/\n/) + newline + when @scanner.scan(/\s+/) + # noop + when @scanner.scan(/\/\*/) + lex_comment + when @scanner.scan(/\/\/.*(?<newline>\n)?/) + newline if @scanner[:newline] + else + break + end + end + + reset_first_position + + case + when @scanner.eos? + return + when @scanner.scan(/#{SYMBOLS.join('|')}/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/[\?\+\*]/) + return [@scanner.matched, @scanner.matched] + when @scanner.scan(/<\w+>/) + return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/'.'/) + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/) + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/".*?"/) + return [:STRING, %Q(#{@scanner.matched})] + when @scanner.scan(/\d+/) + return [:INTEGER, Integer(@scanner.matched)] + when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/) + token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location) + type = + if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/) + :IDENT_COLON + else + :IDENTIFIER + end + return [type, token] + else + raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}." + end + end + + def lex_c_code + nested = 0 + code = '' + reset_first_position + + while !@scanner.eos? do + case + when @scanner.scan(/{/) + code += @scanner.matched + nested += 1 + when @scanner.scan(/}/) + if nested == 0 && @end_symbol == '}' + @scanner.unscan + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] + else + code += @scanner.matched + nested -= 1 + end + when @scanner.check(/#{@end_symbol}/) + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] + when @scanner.scan(/\n/) + code += @scanner.matched + newline + when @scanner.scan(/".*?"/) + code += %Q(#{@scanner.matched}) + @line += @scanner.matched.count("\n") + when @scanner.scan(/'.*?'/) + code += %Q(#{@scanner.matched}) + when @scanner.scan(/[^\"'\{\}\n]+/) + code += @scanner.matched + when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) + code += @scanner.matched + else + code += @scanner.getch + end + end + raise ParseError, "Unexpected code: #{code}." + end + + private + + def lex_comment + while !@scanner.eos? do + case + when @scanner.scan(/\n/) + newline + when @scanner.scan(/\*\//) + return + else + @scanner.getch + end + end + end + + def reset_first_position + @head_line = line + @head_column = column + end + + def newline + @line += 1 + @head = @scanner.pos + end + end +end |