From dee374059771bf9d7e9551c906414b446c719b4a Mon Sep 17 00:00:00 2001 From: aamine Date: Tue, 1 Feb 2005 18:32:40 +0000 Subject: * ext/ripper/lib/ripper/tokenizer.rb -> lexer.rb. * ext/ripper/lib/ripper/lexer.rb: new method Ripper.slice. [experimental] * ext/ripper/lib/ripper/sexp.rb: new file. [experimental] * ext/ripper/lib/ripper.rb: require ripper/lexer and ripper/sexp. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7861 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/ripper/lib/ripper.rb | 3 +- ext/ripper/lib/ripper/lexer.rb | 175 +++++++++++++++++++++++++++++++++++++ ext/ripper/lib/ripper/sexp.rb | 60 +++++++++++++ ext/ripper/lib/ripper/tokenizer.rb | 64 -------------- 4 files changed, 237 insertions(+), 65 deletions(-) create mode 100644 ext/ripper/lib/ripper/lexer.rb create mode 100644 ext/ripper/lib/ripper/sexp.rb delete mode 100644 ext/ripper/lib/ripper/tokenizer.rb (limited to 'ext/ripper/lib') diff --git a/ext/ripper/lib/ripper.rb b/ext/ripper/lib/ripper.rb index 38a5a2bd19..cb19da334a 100644 --- a/ext/ripper/lib/ripper.rb +++ b/ext/ripper/lib/ripper.rb @@ -1,3 +1,4 @@ require 'ripper/core' -require 'ripper/tokenizer' +require 'ripper/lexer' require 'ripper/filter' +require 'ripper/sexp' diff --git a/ext/ripper/lib/ripper/lexer.rb b/ext/ripper/lib/ripper/lexer.rb new file mode 100644 index 0000000000..35074acbf4 --- /dev/null +++ b/ext/ripper/lib/ripper/lexer.rb @@ -0,0 +1,175 @@ +# +# ripper/lexer.rb +# +# Copyright (C) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/core' + +class Ripper + + # Tokenizes Ruby program and returns an Array of String. + def Ripper.tokenize(src, filename = '-', lineno = 1) + Lexer.new(src, filename, lineno).tokenize + end + + # Tokenizes Ruby program and returns an Array of Array, + # which is formatted like [[lineno, column], type, token]. + # + # require 'ripper' + # require 'pp' + # + # p Ripper.scan("def m(a) nil end") + # #=> [[[1, 0], :on_kw, "def"], + # [[1, 3], :on_sp, " " ], + # [[1, 4], :on_ident, "m" ], + # [[1, 5], :on_lparen, "(" ], + # [[1, 6], :on_ident, "a" ], + # [[1, 7], :on_rparen, ")" ], + # [[1, 8], :on_sp, " " ], + # [[1, 9], :on_kw, "nil"], + # [[1, 12], :on_sp, " " ], + # [[1, 13], :on_kw, "end"]] + # + def Ripper.lex(src, filename = '-', lineno = 1) + Lexer.new(src, filename, lineno).lex + end + + class Lexer < ::Ripper #:nodoc: internal use only + def tokenize + parse().map {|pos, event, tok| tok } + end + + def lex + @buf = [] + parse + @buf.sort_by {|pos, event, tok| pos } + end + + private + + SCANNER_EVENTS.each do |event| + module_eval(<<-End) + def on_#{event}(tok) + @buf.push [[lineno(), column()], :on_#{event}, tok] + end + End + end + end + + # [EXPERIMENTAL] + # Parses +src+ and return a string which was matched to +pattern+. + # +pattern+ should be described as Regexp. + # + # require 'ripper' + # + # p Ripper.slice('def m(a) nil end', 'ident') #=> "m" + # p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+') #=> "m(a)" + # p Ripper.slice("< "string\n" + # + def Ripper.slice(src, pattern, n = 0) + if m = token_match(src, pattern) + then m.string(n) + else nil + end + end + + def Ripper.token_match(src, pattern) #:nodoc: + TokenPattern.compile(pattern).match(src) + end + + class TokenPattern #:nodoc: + + class Error < ::StandardError; end + class CompileError < Error; end + class MatchError < Error; end + + class << self + alias compile new + end + + def initialize(pattern) + @source = pattern + @re = compile(pattern) + end + + def match(str) + match_list(::Ripper.lex(str)) + end + + def match_list(tokens) + if m = @re.match(map_tokens(tokens)) + then MatchData.new(tokens, m) + else nil + end + end + + private + + def compile(pattern) + if m = /[^\w\s$()\[\]{}?*+\.]/.match(pattern) + raise CompileError, "invalid char in pattern: #{m[0].inspect}" + end + buf = '' + pattern.scan(/(?:\w+|\$\(|[()\[\]\{\}?*+\.]+)/) do |tok| + case tok + when /\w/ + buf.concat map_token(tok) + when '$(' + buf.concat '(' + when '(' + buf.concat '(?:' + when /[?*\[\])\.]/ + buf.concat tok + else + raise 'must not happen' + end + end + Regexp.compile(buf) + rescue RegexpError => err + raise CompileError, err.message + end + + def map_tokens(tokens) + tokens.map {|pos,type,str| map_token(type.to_s.sub(/\Aon_/,'')) }.join + end + + MAP = {} + seed = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a + SCANNER_EVENT_TABLE.each do |ev, | + raise CompileError, "[RIPPER FATAL] too many system token" if seed.empty? + MAP[ev.to_s.sub(/\Aon_/,'')] = seed.shift + end + + def map_token(tok) + MAP[tok] or raise CompileError, "unknown token: #{tok}" + end + + class MatchData + def initialize(tokens, match) + @tokens = tokens + @match = match + end + + def string(n = 0) + return nil unless @match + match(n).join + end + + private + + def match(n = 0) + return [] unless @match + @tokens[@match.begin(n)...@match.end(n)].map {|pos,type,str| str } + end + end + + end + +end diff --git a/ext/ripper/lib/ripper/sexp.rb b/ext/ripper/lib/ripper/sexp.rb new file mode 100644 index 0000000000..a55e5f2658 --- /dev/null +++ b/ext/ripper/lib/ripper/sexp.rb @@ -0,0 +1,60 @@ +# +# ripper/sexp.rb +# +# Copyright (C) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/core' + +class Ripper + + # [EXPERIMENTAL] + # Parses +src+ and create S-exp tree. + # This method is for mainly developper use. + # + # require 'ripper' + # require 'pp + # + # pp Ripper.sexp("def m(a) nil end") + # #=> [:program, + # [:stmts_add, + # [:stmts_new], + # [:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]], + # [:bodystmt, + # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]], + # nil, + # nil, + # nil]]]] + # + def Ripper.sexp(src, filename = '-', lineno = 1) + SexpBuilder.new(src, filename, lineno).parse + end + + class SexpBuilder < ::Ripper #:nodoc: + private + + PARSER_EVENTS.each do |event| + module_eval(<<-End) + def on_#{event}(*list) + list.unshift :#{event} + list + end + End + end + + SCANNER_EVENTS.each do |event| + module_eval(<<-End) + def on_#{event}(tok) + [:@#{event}, tok, [lineno(), column()]] + end + End + end + end + +end diff --git a/ext/ripper/lib/ripper/tokenizer.rb b/ext/ripper/lib/ripper/tokenizer.rb deleted file mode 100644 index 4209903ba7..0000000000 --- a/ext/ripper/lib/ripper/tokenizer.rb +++ /dev/null @@ -1,64 +0,0 @@ -# -# ripper/tokenizer.rb -# -# Copyright (C) 2004 Minero Aoki -# -# This program is free software. -# You can distribute and/or modify this program under the Ruby License. -# For details of Ruby License, see ruby/COPYING. -# - -require 'ripper/core' - -class Ripper - - # Tokenizes Ruby program and returns an Array of String. - def Ripper.tokenize(src, filename = '-', lineno = 1) - Tokenizer.new(src, filename, lineno).tokenize - end - - # Tokenizes Ruby program and returns an Array of Array, - # which is formatted like [[lineno, column], type, token]. - # - # require 'ripper' - # require 'pp' - # - # p Ripper.scan("def m(a) nil end") - # #=> [[[1, 0], :on_kw, "def"], - # [[1, 3], :on_sp, " " ], - # [[1, 4], :on_ident, "m" ], - # [[1, 5], :on_lparen, "(" ], - # [[1, 6], :on_ident, "a" ], - # [[1, 7], :on_rparen, ")" ], - # [[1, 8], :on_sp, " " ], - # [[1, 9], :on_kw, "nil"], - # [[1, 12], :on_sp, " " ], - # [[1, 13], :on_kw, "end"]] - # - def Ripper.scan(src, filename = '-', lineno = 1) - Tokenizer.new(src, filename, lineno).parse - end - - class Tokenizer < ::Ripper #:nodoc: internal use only - def tokenize - parse().map {|pos, event, tok| tok } - end - - def parse - @buf = [] - super - @buf.sort_by {|pos, event, tok| pos } - end - - private - - SCANNER_EVENTS.each do |event| - module_eval(<<-End) - def on_#{event}(tok) - @buf.push [[lineno(), column()], :on_#{event}, tok] - end - End - end - end - -end -- cgit v1.2.3