diff options
Diffstat (limited to 'ext/ripper/lib')
| -rw-r--r-- | ext/ripper/lib/ripper.rb | 74 | ||||
| -rw-r--r-- | ext/ripper/lib/ripper/core.rb | 74 | ||||
| -rw-r--r-- | ext/ripper/lib/ripper/filter.rb | 86 | ||||
| -rw-r--r-- | ext/ripper/lib/ripper/lexer.rb | 379 | ||||
| -rw-r--r-- | ext/ripper/lib/ripper/sexp.rb | 187 |
5 files changed, 800 insertions, 0 deletions
diff --git a/ext/ripper/lib/ripper.rb b/ext/ripper/lib/ripper.rb new file mode 100644 index 0000000000..e937d65217 --- /dev/null +++ b/ext/ripper/lib/ripper.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true +require 'ripper/core' +require 'ripper/lexer' +require 'ripper/filter' +require 'ripper/sexp' + +# Ripper is a Ruby script parser. +# +# You can get information from the parser with event-based style. +# Information such as abstract syntax trees or simple lexical analysis of the +# Ruby program. +# +# == Usage +# +# Ripper provides an easy interface for parsing your program into a symbolic +# expression tree (or S-expression). +# +# Understanding the output of the parser may come as a challenge, it's +# recommended you use PP to format the output for legibility. +# +# require 'ripper' +# require 'pp' +# +# pp Ripper.sexp('def hello(world) "Hello, #{world}!"; end') +# #=> [:program, +# [[:def, +# [:@ident, "hello", [1, 4]], +# [:paren, +# [:params, [[:@ident, "world", [1, 10]]], nil, nil, nil, nil, nil, nil]], +# [:bodystmt, +# [[:string_literal, +# [:string_content, +# [:@tstring_content, "Hello, ", [1, 18]], +# [:string_embexpr, [[:var_ref, [:@ident, "world", [1, 27]]]]], +# [:@tstring_content, "!", [1, 33]]]]], +# nil, +# nil, +# nil]]]] +# +# You can see in the example above, the expression starts with +:program+. +# +# From here, a method definition at +:def+, followed by the method's identifier +# <code>:@ident</code>. After the method's identifier comes the parentheses +# +:paren+ and the method parameters under +:params+. +# +# Next is the method body, starting at +:bodystmt+ (+stmt+ meaning statement), +# which contains the full definition of the method. +# +# In our case, we're simply returning a String, so next we have the +# +:string_literal+ expression. +# +# Within our +:string_literal+ you'll notice two <code>@tstring_content</code>, +# this is the literal part for <code>Hello, </code> and <code>!</code>. Between +# the two <code>@tstring_content</code> statements is a +:string_embexpr+, +# where _embexpr_ is an embedded expression. Our expression consists of a local +# variable, or +var_ref+, with the identifier (<code>@ident</code>) of +world+. +# +# == Resources +# +# * {Ruby Inside}[http://www.rubyinside.com/using-ripper-to-see-how-ruby-is-parsing-your-code-5270.html] +# +# == Requirements +# +# * ruby 1.9 (support CVS HEAD only) +# * bison 1.28 or later (Other yaccs do not work) +# +# == License +# +# Ruby License. +# +# - Minero Aoki +# - aamine@loveruby.net +# - http://i.loveruby.net +class Ripper; end diff --git a/ext/ripper/lib/ripper/core.rb b/ext/ripper/lib/ripper/core.rb new file mode 100644 index 0000000000..fa075da5b9 --- /dev/null +++ b/ext/ripper/lib/ripper/core.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true +# +# $Id$ +# +# Copyright (c) 2003-2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper.so' + +class Ripper + + # Parses the given Ruby program read from +src+. + # +src+ must be a String or an IO or a object with a #gets method. + def Ripper.parse(src, filename = '(ripper)', lineno = 1) + new(src, filename, lineno).parse + end + + # This array contains name of parser events. + PARSER_EVENTS = PARSER_EVENT_TABLE.keys + + # This array contains name of scanner events. + SCANNER_EVENTS = SCANNER_EVENT_TABLE.keys + + # This array contains name of all ripper events. + EVENTS = PARSER_EVENTS + SCANNER_EVENTS + + private + + # :stopdoc: + def _dispatch_0() nil end + def _dispatch_1(a) a end + def _dispatch_2(a, b) a end + def _dispatch_3(a, b, c) a end + def _dispatch_4(a, b, c, d) a end + def _dispatch_5(a, b, c, d, e) a end + def _dispatch_6(a, b, c, d, e, f) a end + def _dispatch_7(a, b, c, d, e, f, g) a end + # :startdoc: + + # + # Parser Events + # + + PARSER_EVENT_TABLE.each do |id, arity| + alias_method "on_#{id}", "_dispatch_#{arity}" + end + + # This method is called when weak warning is produced by the parser. + # +fmt+ and +args+ is printf style. + def warn(fmt, *args) + end + + # This method is called when strong warning is produced by the parser. + # +fmt+ and +args+ is printf style. + def warning(fmt, *args) + end + + # This method is called when the parser found syntax error. + def compile_error(msg) + end + + # + # Scanner Events + # + + SCANNER_EVENTS.each do |id| + alias_method "on_#{id}", :_dispatch_1 + end + +end diff --git a/ext/ripper/lib/ripper/filter.rb b/ext/ripper/lib/ripper/filter.rb new file mode 100644 index 0000000000..9955d30550 --- /dev/null +++ b/ext/ripper/lib/ripper/filter.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true +# +# $Id$ +# +# Copyright (c) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/lexer' + +class Ripper + + # This class handles only scanner events, + # which are dispatched in the 'right' order (same with input). + class Filter + + # Creates a new Ripper::Filter instance, passes parameters +src+, + # +filename+, and +lineno+ to Ripper::Lexer.new + # + # The lexer is for internal use only. + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + @__state = nil + end + + # The file name of the input. + def filename + @__lexer.filename + end + + # The line number of the current token. + # This value starts from 1. + # This method is valid only in event handlers. + def lineno + @__line + end + + # The column number of the current token. + # This value starts from 0. + # This method is valid only in event handlers. + def column + @__col + end + + # The scanner's state of the current token. + # This value is the bitwise OR of zero or more of the +Ripper::EXPR_*+ constants. + def state + @__state + end + + # Starts the parser. + # +init+ is a data accumulator and is passed to the next event handler (as + # of Enumerable#inject). + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok, state| + @__line, @__col = *pos + @__state = state + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + # This method is called when some event handler is undefined. + # +event+ is :on_XXX, +token+ is the scanned token, and +data+ is a data + # accumulator. + # + # The return value of this method is passed to the next event handler (as + # of Enumerable#inject). + def on_default(event, token, data) + data + end + + end + +end diff --git a/ext/ripper/lib/ripper/lexer.rb b/ext/ripper/lib/ripper/lexer.rb new file mode 100644 index 0000000000..9b849dfeae --- /dev/null +++ b/ext/ripper/lib/ripper/lexer.rb @@ -0,0 +1,379 @@ +# frozen_string_literal: true +# +# $Id$ +# +# Copyright (c) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/core' + +class Ripper + + # Tokenizes the Ruby program and returns an array of strings. + # The +filename+ and +lineno+ arguments are mostly ignored, since the + # return value is just the tokenized input. + # By default, this method does not handle syntax errors in +src+, + # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. + # + # p Ripper.tokenize("def m(a) nil end") + # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] + # + def Ripper.tokenize(src, filename = '-', lineno = 1, **kw) + Lexer.new(src, filename, lineno).tokenize(**kw) + end + + # Tokenizes the Ruby program and returns an array of an array, + # which is formatted like + # <code>[[lineno, column], type, token, state]</code>. + # The +filename+ argument is mostly ignored. + # By default, this method does not handle syntax errors in +src+, + # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. + # + # require 'ripper' + # require 'pp' + # + # pp Ripper.lex("def m(a) nil end") + # #=> [[[1, 0], :on_kw, "def", FNAME ], + # [[1, 3], :on_sp, " ", FNAME ], + # [[1, 4], :on_ident, "m", ENDFN ], + # [[1, 5], :on_lparen, "(", BEG|LABEL], + # [[1, 6], :on_ident, "a", ARG ], + # [[1, 7], :on_rparen, ")", ENDFN ], + # [[1, 8], :on_sp, " ", BEG ], + # [[1, 9], :on_kw, "nil", END ], + # [[1, 12], :on_sp, " ", END ], + # [[1, 13], :on_kw, "end", END ]] + # + def Ripper.lex(src, filename = '-', lineno = 1, **kw) + Lexer.new(src, filename, lineno).lex(**kw) + end + + class Lexer < ::Ripper #:nodoc: internal use only + # :stopdoc: + class State + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = Ripper.lex_state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + end + + class Elem + attr_accessor :pos, :event, :tok, :state, :message + + def initialize(pos, event, tok, state, message = nil) + @pos = pos + @event = event + @tok = tok + @state = State.new(state) + @message = message + end + + def [](index) + case index + when 0, :pos + @pos + when 1, :event + @event + when 2, :tok + @tok + when 3, :state + @state + when 4, :message + @message + else + nil + end + end + + def inspect + "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>" + end + + alias to_s inspect + + def pretty_print(q) + q.group(2, "#<#{self.class}:", ">") { + q.breakable + q.text("#{event}@#{pos[0]}:#{pos[1]}") + q.breakable + state.pretty_print(q) + q.breakable + q.text("token: ") + tok.pretty_print(q) + if message + q.breakable + q.text("message: ") + q.text(message) + end + } + end + + def to_a + if @message + [@pos, @event, @tok, @state, @message] + else + [@pos, @event, @tok, @state] + end + end + end + + attr_reader :errors + + def tokenize(**kw) + parse(**kw).sort_by(&:pos).map(&:tok) + end + + def lex(**kw) + parse(**kw).sort_by(&:pos).map(&:to_a) + end + + # parse the code and returns elements including errors. + def scan(**kw) + result = (parse(**kw) + errors + @stack.flatten).uniq.sort_by {|e| [*e.pos, (e.message ? -1 : 0)]} + result.each_with_index do |e, i| + if e.event == :on_parse_error and e.tok.empty? and (pre = result[i-1]) and + pre.pos[0] == e.pos[0] and (pre.pos[1] + pre.tok.size) == e.pos[1] + e.tok = pre.tok + e.pos[1] = pre.pos[1] + result[i-1] = e + result[i] = pre + end + end + result + end + + def parse(raise_errors: false) + @errors = [] + @buf = [] + @stack = [] + super() + @buf = @stack.pop unless @stack.empty? + if raise_errors and !@errors.empty? + raise SyntaxError, @errors.map(&:message).join(' ;') + end + @buf.flatten! + unless (result = @buf).empty? + result.concat(@buf) until (@buf = []; super(); @buf.flatten!; @buf.empty?) + end + result + end + + private + + unless SCANNER_EVENT_TABLE.key?(:ignored_sp) + SCANNER_EVENT_TABLE[:ignored_sp] = 1 + SCANNER_EVENTS << :ignored_sp + EVENTS << :ignored_sp + end + + def on_heredoc_dedent(v, w) + ignored_sp = [] + heredoc = @buf.last + if Array === heredoc + heredoc.each_with_index do |e, i| + if Elem === e and e.event == :on_tstring_content and e.pos[1].zero? + tok = e.tok.dup if w > 0 and /\A\s/ =~ e.tok + if (n = dedent_string(e.tok, w)) > 0 + if e.tok.empty? + e.tok = tok[0, n] + e.event = :on_ignored_sp + next + end + ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n], e.state)] + e.pos[1] += n + end + end + end + end + ignored_sp.reverse_each do |i, e| + heredoc[i, 0] = [e] + end + v + end + + def on_heredoc_beg(tok) + @stack.push @buf + buf = [] + @buf.push buf + @buf = buf + @buf.push Elem.new([lineno(), column()], __callee__, tok, state()) + end + + def on_heredoc_end(tok) + @buf.push Elem.new([lineno(), column()], __callee__, tok, state()) + @buf = @stack.pop unless @stack.empty? + end + + def _push_token(tok) + e = Elem.new([lineno(), column()], __callee__, tok, state()) + @buf.push(e) + e + end + + def on_error1(mesg) + @errors.push Elem.new([lineno(), column()], __callee__, token(), state(), mesg) + end + + def on_error2(mesg, elem) + if elem + elem = Elem.new(elem.pos, __callee__, elem.tok, elem.state, mesg) + else + elem = Elem.new([lineno(), column()], __callee__, token(), state(), mesg) + end + @errors.push elem + end + PARSER_EVENTS.grep(/_error\z/) do |e| + arity = PARSER_EVENT_TABLE.fetch(e) + alias_method "on_#{e}", "on_error#{arity}" + end + alias compile_error on_error1 + + (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event| + alias_method event, :_push_token + end + # :startdoc: + end + + # [EXPERIMENTAL] + # Parses +src+ and return a string which was matched to +pattern+. + # +pattern+ should be described as Regexp. + # + # require 'ripper' + # + # p Ripper.slice('def m(a) nil end', 'ident') #=> "m" + # p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+') #=> "m(a)" + # p Ripper.slice("<<EOS\nstring\nEOS", + # 'heredoc_beg nl $(tstring_content*) heredoc_end', 1) + # #=> "string\n" + # + def Ripper.slice(src, pattern, n = 0) + if m = token_match(src, pattern) + then m.string(n) + else nil + end + end + + def Ripper.token_match(src, pattern) #:nodoc: + TokenPattern.compile(pattern).match(src) + end + + class TokenPattern #:nodoc: + + class Error < ::StandardError # :nodoc: + end + class CompileError < Error # :nodoc: + end + class MatchError < Error # :nodoc: + end + + class << self + alias compile new + end + + def initialize(pattern) + @source = pattern + @re = compile(pattern) + end + + def match(str) + match_list(::Ripper.lex(str)) + end + + def match_list(tokens) + if m = @re.match(map_tokens(tokens)) + then MatchData.new(tokens, m) + else nil + end + end + + private + + def compile(pattern) + if m = /[^\w\s$()\[\]{}?*+\.]/.match(pattern) + raise CompileError, "invalid char in pattern: #{m[0].inspect}" + end + buf = +'' + pattern.scan(/(?:\w+|\$\(|[()\[\]\{\}?*+\.]+)/) do |tok| + case tok + when /\w/ + buf.concat map_token(tok) + when '$(' + buf.concat '(' + when '(' + buf.concat '(?:' + when /[?*\[\])\.]/ + buf.concat tok + else + raise 'must not happen' + end + end + Regexp.compile(buf) + rescue RegexpError => err + raise CompileError, err.message + end + + def map_tokens(tokens) + tokens.map {|pos,type,str| map_token(type.to_s.delete_prefix('on_')) }.join + end + + MAP = {} + seed = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a + SCANNER_EVENT_TABLE.each do |ev, | + raise CompileError, "[RIPPER FATAL] too many system token" if seed.empty? + MAP[ev.to_s.delete_prefix('on_')] = seed.shift + end + + def map_token(tok) + MAP[tok] or raise CompileError, "unknown token: #{tok}" + end + + class MatchData # :nodoc: + def initialize(tokens, match) + @tokens = tokens + @match = match + end + + def string(n = 0) + return nil unless @match + match(n).join + end + + private + + def match(n = 0) + return [] unless @match + @tokens[@match.begin(n)...@match.end(n)].map {|pos,type,str| str } + end + end + + end + +end diff --git a/ext/ripper/lib/ripper/sexp.rb b/ext/ripper/lib/ripper/sexp.rb new file mode 100644 index 0000000000..b1d553b1da --- /dev/null +++ b/ext/ripper/lib/ripper/sexp.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true +# +# $Id$ +# +# Copyright (c) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/core' + +class Ripper + + # [EXPERIMENTAL] + # Parses +src+ and create S-exp tree. + # Returns more readable tree rather than Ripper.sexp_raw. + # This method is mainly for developer use. + # The +filename+ argument is mostly ignored. + # By default, this method does not handle syntax errors in +src+, + # returning +nil+ in such cases. Use the +raise_errors+ keyword + # to raise a SyntaxError for an error in +src+. + # + # require 'ripper' + # require 'pp' + # + # pp Ripper.sexp("def m(a) nil end") + # #=> [:program, + # [[:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil, nil, nil]], + # [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]] + # + def Ripper.sexp(src, filename = '-', lineno = 1, raise_errors: false) + builder = SexpBuilderPP.new(src, filename, lineno) + sexp = builder.parse + if builder.error? + if raise_errors + raise SyntaxError, builder.error + end + else + sexp + end + end + + # [EXPERIMENTAL] + # Parses +src+ and create S-exp tree. + # This method is mainly for developer use. + # The +filename+ argument is mostly ignored. + # By default, this method does not handle syntax errors in +src+, + # returning +nil+ in such cases. Use the +raise_errors+ keyword + # to raise a SyntaxError for an error in +src+. + # + # require 'ripper' + # require 'pp' + # + # pp Ripper.sexp_raw("def m(a) nil end") + # #=> [:program, + # [:stmts_add, + # [:stmts_new], + # [:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]], + # [:bodystmt, + # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]], + # nil, + # nil, + # nil]]]] + # + def Ripper.sexp_raw(src, filename = '-', lineno = 1, raise_errors: false) + builder = SexpBuilder.new(src, filename, lineno) + sexp = builder.parse + if builder.error? + if raise_errors + raise SyntaxError, builder.error + end + else + sexp + end + end + + class SexpBuilder < ::Ripper #:nodoc: + attr_reader :error + + private + + def dedent_element(e, width) + if (n = dedent_string(e[1], width)) > 0 + e[2][1] += n + end + e + end + + def on_heredoc_dedent(val, width) + sub = proc do |cont| + cont.map! do |e| + if Array === e + case e[0] + when :@tstring_content + e = dedent_element(e, width) + when /_add\z/ + e[1] = sub[e[1]] + end + elsif String === e + dedent_string(e, width) + end + e + end + end + sub[val] + val + end + + events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym} + (PARSER_EVENTS - events).each do |event| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(*args) + args.unshift :#{event} + args + end + End + end + + SCANNER_EVENTS.each do |event| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(tok) + [:@#{event}, tok, [lineno(), column()]] + end + End + end + + def on_error(mesg) + @error = mesg + end + remove_method :on_parse_error + alias on_parse_error on_error + alias compile_error on_error + end + + class SexpBuilderPP < SexpBuilder #:nodoc: + private + + def on_heredoc_dedent(val, width) + val.map! do |e| + next e if Symbol === e and /_content\z/ =~ e + if Array === e and e[0] == :@tstring_content + e = dedent_element(e, width) + elsif String === e + dedent_string(e, width) + end + e + end + val + end + + def _dispatch_event_new + [] + end + + def _dispatch_event_push(list, item) + list.push item + list + end + + def on_mlhs_paren(list) + [:mlhs, *list] + end + + def on_mlhs_add_star(list, star) + list.push([:rest_param, star]) + end + + def on_mlhs_add_post(list, post) + list.concat(post) + end + + PARSER_EVENT_TABLE.each do |event, arity| + if /_new\z/ =~ event and arity == 0 + alias_method "on_#{event}", :_dispatch_event_new + elsif /_add\z/ =~ event + alias_method "on_#{event}", :_dispatch_event_push + end + end + end + +end |
