diff options
Diffstat (limited to 'trunk/ext/ripper')
-rw-r--r-- | trunk/ext/ripper/.cvsignore | 8 | ||||
-rw-r--r-- | trunk/ext/ripper/README | 30 | ||||
-rw-r--r-- | trunk/ext/ripper/depend | 34 | ||||
-rw-r--r-- | trunk/ext/ripper/eventids2.c | 277 | ||||
-rw-r--r-- | trunk/ext/ripper/extconf.rb | 22 | ||||
-rw-r--r-- | trunk/ext/ripper/lib/ripper.rb | 4 | ||||
-rw-r--r-- | trunk/ext/ripper/lib/ripper/core.rb | 70 | ||||
-rw-r--r-- | trunk/ext/ripper/lib/ripper/filter.rb | 70 | ||||
-rw-r--r-- | trunk/ext/ripper/lib/ripper/lexer.rb | 179 | ||||
-rw-r--r-- | trunk/ext/ripper/lib/ripper/sexp.rb | 99 | ||||
-rwxr-xr-x | trunk/ext/ripper/tools/generate-param-macros.rb | 14 | ||||
-rwxr-xr-x | trunk/ext/ripper/tools/generate.rb | 152 | ||||
-rwxr-xr-x | trunk/ext/ripper/tools/preproc.rb | 91 | ||||
-rwxr-xr-x | trunk/ext/ripper/tools/strip.rb | 12 |
14 files changed, 1062 insertions, 0 deletions
diff --git a/trunk/ext/ripper/.cvsignore b/trunk/ext/ripper/.cvsignore new file mode 100644 index 0000000000..c9adce0b1a --- /dev/null +++ b/trunk/ext/ripper/.cvsignore @@ -0,0 +1,8 @@ +Makefile +mkmf.log +eventids1.c +eventids2table.c +ripper.* +ids1 +ids2 +extconf.h diff --git a/trunk/ext/ripper/README b/trunk/ext/ripper/README new file mode 100644 index 0000000000..0825013ba9 --- /dev/null +++ b/trunk/ext/ripper/README @@ -0,0 +1,30 @@ +Ripper README +============= + + Ripper is a Ruby script parser. You can get information + by event-based style from the parser. + + !! WARNING !! + + Ripper is still early-alpha version. + I never assure any kind of backward compatibility. + +Requirements +------------ + + * ruby 1.9 (support CVS HEAD only) + * bison 1.28 or later (Other yaccs do not work) + +Usage +----- + + See test/ripper/* and sample/ripper/*. + +License +------- + + Ruby License. + + Minero Aoki + aamine@loveruby.net + http://i.loveruby.net diff --git a/trunk/ext/ripper/depend b/trunk/ext/ripper/depend new file mode 100644 index 0000000000..120fdc58e7 --- /dev/null +++ b/trunk/ext/ripper/depend @@ -0,0 +1,34 @@ +GEN = $(srcdir)/tools/generate.rb +SRC1 = $(top_srcdir)/parse.y +SRC2 = $(srcdir)/eventids2.c +BISON = bison + +src: ripper.c eventids1.c eventids2table.c + +ripper.o: ripper.c lex.c eventids1.c eventids2.c eventids2table.c + +.y.c: + $(BISON) -t -v -oy.tab.c $< + sed -f $(top_srcdir)/tool/ytab.sed -e "/^#/s!y\.tab\.c!$@!" y.tab.c > $@ + @$(RM) y.tab.c + +all: check +static: check + +ripper.y: $(srcdir)/tools/preproc.rb $(top_srcdir)/parse.y + $(RUBY) $(srcdir)/tools/preproc.rb $(top_srcdir)/parse.y --output=$@ + +check: $(GEN) $(SRC1) $(SRC2) + $(RUBY) $(GEN) --mode=check --ids1src=$(SRC1) --ids2src=$(SRC2) + +eventids1.c: $(srcdir)/tools/generate.rb $(SRC1) + $(RUBY) $(GEN) --mode=eventids1 --ids1src=$(SRC1) --output=$@ + +eventids2table.c: $(srcdir)/tools/generate.rb $(SRC2) + $(RUBY) $(GEN) --mode=eventids2table --ids2src=$(SRC2) --output=$@ + +# Entries for Ripper maintainer + +preproc: ripper.E +ripper.E: ripper.c + $(CC) -E $(CPPFLAGS) ripper.c | $(RUBY) $(srcdir)/tools/strip.rb > $@ diff --git a/trunk/ext/ripper/eventids2.c b/trunk/ext/ripper/eventids2.c new file mode 100644 index 0000000000..629381448a --- /dev/null +++ b/trunk/ext/ripper/eventids2.c @@ -0,0 +1,277 @@ +#define tIGNORED_NL (tLAST_TOKEN + 1) +#define tCOMMENT (tLAST_TOKEN + 2) +#define tEMBDOC_BEG (tLAST_TOKEN + 3) +#define tEMBDOC (tLAST_TOKEN + 4) +#define tEMBDOC_END (tLAST_TOKEN + 5) +#define tSP (tLAST_TOKEN + 6) +#define tHEREDOC_BEG (tLAST_TOKEN + 7) +#define tHEREDOC_END (tLAST_TOKEN + 8) +#define k__END__ (tLAST_TOKEN + 9) + +static ID ripper_id_backref; +static ID ripper_id_backtick; +static ID ripper_id_comma; +static ID ripper_id_const; +static ID ripper_id_cvar; +static ID ripper_id_embexpr_beg; +static ID ripper_id_embexpr_end; +static ID ripper_id_embvar; +static ID ripper_id_float; +static ID ripper_id_gvar; +static ID ripper_id_ident; +static ID ripper_id_int; +static ID ripper_id_ivar; +static ID ripper_id_kw; +static ID ripper_id_lbrace; +static ID ripper_id_lbracket; +static ID ripper_id_lparen; +static ID ripper_id_nl; +static ID ripper_id_op; +static ID ripper_id_period; +static ID ripper_id_rbrace; +static ID ripper_id_rbracket; +static ID ripper_id_rparen; +static ID ripper_id_semicolon; +static ID ripper_id_symbeg; +static ID ripper_id_tstring_beg; +static ID ripper_id_tstring_content; +static ID ripper_id_tstring_end; +static ID ripper_id_words_beg; +static ID ripper_id_qwords_beg; +static ID ripper_id_words_sep; +static ID ripper_id_regexp_beg; +static ID ripper_id_regexp_end; +static ID ripper_id_label; +static ID ripper_id_tlambda; +static ID ripper_id_tlambeg; + +static ID ripper_id_ignored_nl; +static ID ripper_id_comment; +static ID ripper_id_embdoc_beg; +static ID ripper_id_embdoc; +static ID ripper_id_embdoc_end; +static ID ripper_id_sp; +static ID ripper_id_heredoc_beg; +static ID ripper_id_heredoc_end; +static ID ripper_id___end__; +static ID ripper_id_CHAR; + +#include "eventids2table.c" + +static void +ripper_init_eventids2(VALUE self) +{ + ripper_id_backref = rb_intern("on_backref"); + ripper_id_backtick = rb_intern("on_backtick"); + ripper_id_comma = rb_intern("on_comma"); + ripper_id_const = rb_intern("on_const"); + ripper_id_cvar = rb_intern("on_cvar"); + ripper_id_embexpr_beg = rb_intern("on_embexpr_beg"); + ripper_id_embexpr_end = rb_intern("on_embexpr_end"); + ripper_id_embvar = rb_intern("on_embvar"); + ripper_id_float = rb_intern("on_float"); + ripper_id_gvar = rb_intern("on_gvar"); + ripper_id_ident = rb_intern("on_ident"); + ripper_id_int = rb_intern("on_int"); + ripper_id_ivar = rb_intern("on_ivar"); + ripper_id_kw = rb_intern("on_kw"); + ripper_id_lbrace = rb_intern("on_lbrace"); + ripper_id_lbracket = rb_intern("on_lbracket"); + ripper_id_lparen = rb_intern("on_lparen"); + ripper_id_nl = rb_intern("on_nl"); + ripper_id_op = rb_intern("on_op"); + ripper_id_period = rb_intern("on_period"); + ripper_id_rbrace = rb_intern("on_rbrace"); + ripper_id_rbracket = rb_intern("on_rbracket"); + ripper_id_rparen = rb_intern("on_rparen"); + ripper_id_semicolon = rb_intern("on_semicolon"); + ripper_id_symbeg = rb_intern("on_symbeg"); + ripper_id_tstring_beg = rb_intern("on_tstring_beg"); + ripper_id_tstring_content = rb_intern("on_tstring_content"); + ripper_id_tstring_end = rb_intern("on_tstring_end"); + ripper_id_words_beg = rb_intern("on_words_beg"); + ripper_id_qwords_beg = rb_intern("on_qwords_beg"); + ripper_id_words_sep = rb_intern("on_words_sep"); + ripper_id_regexp_beg = rb_intern("on_regexp_beg"); + ripper_id_regexp_end = rb_intern("on_regexp_end"); + ripper_id_label = rb_intern("on_label"); + ripper_id_tlambda = rb_intern("on_tlambda"); + ripper_id_tlambeg = rb_intern("on_tlambeg"); + + ripper_id_ignored_nl = rb_intern("on_ignored_nl"); + ripper_id_comment = rb_intern("on_comment"); + ripper_id_embdoc_beg = rb_intern("on_embdoc_beg"); + ripper_id_embdoc = rb_intern("on_embdoc"); + ripper_id_embdoc_end = rb_intern("on_embdoc_end"); + ripper_id_sp = rb_intern("on_sp"); + ripper_id_heredoc_beg = rb_intern("on_heredoc_beg"); + ripper_id_heredoc_end = rb_intern("on_heredoc_end"); + ripper_id___end__ = rb_intern("on___end__"); + ripper_id_CHAR = rb_intern("on_CHAR"); + + ripper_init_eventids2_table(self); +} + +static const struct token_assoc { + int token; + ID *id; +} token_to_eventid[] = { + {' ', &ripper_id_words_sep}, + {'!', &ripper_id_op}, + {'%', &ripper_id_op}, + {'&', &ripper_id_op}, + {'*', &ripper_id_op}, + {'+', &ripper_id_op}, + {'-', &ripper_id_op}, + {'/', &ripper_id_op}, + {'<', &ripper_id_op}, + {'=', &ripper_id_op}, + {'>', &ripper_id_op}, + {'?', &ripper_id_op}, + {'^', &ripper_id_op}, + {'|', &ripper_id_op}, + {'~', &ripper_id_op}, + {':', &ripper_id_op}, + {',', &ripper_id_comma}, + {'.', &ripper_id_period}, + {';', &ripper_id_semicolon}, + {'`', &ripper_id_backtick}, + {'\n', &ripper_id_nl}, + {keyword_alias, &ripper_id_kw}, + {keyword_and, &ripper_id_kw}, + {keyword_begin, &ripper_id_kw}, + {keyword_break, &ripper_id_kw}, + {keyword_case, &ripper_id_kw}, + {keyword_class, &ripper_id_kw}, + {keyword_def, &ripper_id_kw}, + {keyword_defined, &ripper_id_kw}, + {keyword_do, &ripper_id_kw}, + {keyword_do_block, &ripper_id_kw}, + {keyword_do_cond, &ripper_id_kw}, + {keyword_else, &ripper_id_kw}, + {keyword_elsif, &ripper_id_kw}, + {keyword_end, &ripper_id_kw}, + {keyword_ensure, &ripper_id_kw}, + {keyword_false, &ripper_id_kw}, + {keyword_for, &ripper_id_kw}, + {keyword_if, &ripper_id_kw}, + {modifier_if, &ripper_id_kw}, + {keyword_in, &ripper_id_kw}, + {keyword_module, &ripper_id_kw}, + {keyword_next, &ripper_id_kw}, + {keyword_nil, &ripper_id_kw}, + {keyword_not, &ripper_id_kw}, + {keyword_or, &ripper_id_kw}, + {keyword_redo, &ripper_id_kw}, + {keyword_rescue, &ripper_id_kw}, + {modifier_rescue, &ripper_id_kw}, + {keyword_retry, &ripper_id_kw}, + {keyword_return, &ripper_id_kw}, + {keyword_self, &ripper_id_kw}, + {keyword_super, &ripper_id_kw}, + {keyword_then, &ripper_id_kw}, + {keyword_true, &ripper_id_kw}, + {keyword_undef, &ripper_id_kw}, + {keyword_unless, &ripper_id_kw}, + {modifier_unless, &ripper_id_kw}, + {keyword_until, &ripper_id_kw}, + {modifier_until, &ripper_id_kw}, + {keyword_when, &ripper_id_kw}, + {keyword_while, &ripper_id_kw}, + {modifier_while, &ripper_id_kw}, + {keyword_yield, &ripper_id_kw}, + {keyword__FILE__, &ripper_id_kw}, + {keyword__LINE__, &ripper_id_kw}, + {keyword_BEGIN, &ripper_id_kw}, + {keyword_END, &ripper_id_kw}, + {tAMPER, &ripper_id_op}, + {tANDOP, &ripper_id_op}, + {tAREF, &ripper_id_op}, + {tASET, &ripper_id_op}, + {tASSOC, &ripper_id_op}, + {tBACK_REF, &ripper_id_backref}, + {tCHAR, &ripper_id_CHAR}, + {tCMP, &ripper_id_op}, + {tCOLON2, &ripper_id_op}, + {tCOLON3, &ripper_id_op}, + {tCONSTANT, &ripper_id_const}, + {tCVAR, &ripper_id_cvar}, + {tDOT2, &ripper_id_op}, + {tDOT3, &ripper_id_op}, + {tEQ, &ripper_id_op}, + {tEQQ, &ripper_id_op}, + {tFID, &ripper_id_ident}, + {tFLOAT, &ripper_id_float}, + {tGEQ, &ripper_id_op}, + {tGVAR, &ripper_id_gvar}, + {tIDENTIFIER, &ripper_id_ident}, + {tINTEGER, &ripper_id_int}, + {tIVAR, &ripper_id_ivar}, + {tLBRACE, &ripper_id_lbrace}, + {tLBRACE_ARG, &ripper_id_lbrace}, + {'{', &ripper_id_lbrace}, + {'}', &ripper_id_rbrace}, + {tLBRACK, &ripper_id_lbracket}, + {'[', &ripper_id_lbracket}, + {']', &ripper_id_rbracket}, + {tLEQ, &ripper_id_op}, + {tLPAREN, &ripper_id_lparen}, + {tLPAREN_ARG, &ripper_id_lparen}, + {'(', &ripper_id_lparen}, + {')', &ripper_id_rparen}, + {tLSHFT, &ripper_id_op}, + {tMATCH, &ripper_id_op}, + {tNEQ, &ripper_id_op}, + {tNMATCH, &ripper_id_op}, + {tNTH_REF, &ripper_id_backref}, + {tOP_ASGN, &ripper_id_op}, + {tOROP, &ripper_id_op}, + {tPOW, &ripper_id_op}, + {tQWORDS_BEG, &ripper_id_qwords_beg}, + {tREGEXP_BEG, &ripper_id_regexp_beg}, + {tREGEXP_END, &ripper_id_regexp_end}, + {tRPAREN, &ripper_id_rparen}, + {tRSHFT, &ripper_id_op}, + {tSTAR, &ripper_id_op}, + {tSTRING_BEG, &ripper_id_tstring_beg}, + {tSTRING_CONTENT, &ripper_id_tstring_content}, + {tSTRING_DBEG, &ripper_id_embexpr_beg}, + {tSTRING_DVAR, &ripper_id_embvar}, + {tSTRING_END, &ripper_id_tstring_end}, + {tSYMBEG, &ripper_id_symbeg}, + {tUMINUS, &ripper_id_op}, + {tUMINUS_NUM, &ripper_id_op}, + {tUPLUS, &ripper_id_op}, + {tWORDS_BEG, &ripper_id_words_beg}, + {tXSTRING_BEG, &ripper_id_backtick}, + {tLABEL, &ripper_id_label}, + {tLAMBDA, &ripper_id_tlambda}, + {tLAMBEG, &ripper_id_tlambeg}, + + /* ripper specific tokens */ + {tIGNORED_NL, &ripper_id_ignored_nl}, + {tCOMMENT, &ripper_id_comment}, + {tEMBDOC_BEG, &ripper_id_embdoc_beg}, + {tEMBDOC, &ripper_id_embdoc}, + {tEMBDOC_END, &ripper_id_embdoc_end}, + {tSP, &ripper_id_sp}, + {tHEREDOC_BEG, &ripper_id_heredoc_beg}, + {tHEREDOC_END, &ripper_id_heredoc_end}, + {k__END__, &ripper_id___end__}, + {0, NULL} +}; + +static ID +ripper_token2eventid(int tok) +{ + const struct token_assoc *a; + + for (a = token_to_eventid; a->id != NULL; a++) { + if (a->token == tok) + return *a->id; + } + if (tok < 256) { + return ripper_id_CHAR; + } + rb_raise(rb_eRuntimeError, "[Ripper FATAL] unknown token %d", tok); +} diff --git a/trunk/ext/ripper/extconf.rb b/trunk/ext/ripper/extconf.rb new file mode 100644 index 0000000000..a41a6af2d8 --- /dev/null +++ b/trunk/ext/ripper/extconf.rb @@ -0,0 +1,22 @@ +#!ruby -s + +require 'mkmf' +require 'rbconfig' + +def main + unless find_executable('bison') + unless File.exist?('ripper.c') or File.exist?("#{$srcdir}/ripper.c") + Logging.message 'missing bison; abort' + return + end + end + $objs = %w(ripper.o) + $cleanfiles.concat %w(ripper.y ripper.c ripper.E ripper.output eventids1.c eventids2table.c) + $defs << '-DRIPPER' + $defs << '-DRIPPER_DEBUG' if $debug + $VPATH << '$(topdir)' << '$(top_srcdir)' + $INCFLAGS << ' -I$(topdir) -I$(top_srcdir)' + create_makefile 'ripper' +end + +main diff --git a/trunk/ext/ripper/lib/ripper.rb b/trunk/ext/ripper/lib/ripper.rb new file mode 100644 index 0000000000..cb19da334a --- /dev/null +++ b/trunk/ext/ripper/lib/ripper.rb @@ -0,0 +1,4 @@ +require 'ripper/core' +require 'ripper/lexer' +require 'ripper/filter' +require 'ripper/sexp' diff --git a/trunk/ext/ripper/lib/ripper/core.rb b/trunk/ext/ripper/lib/ripper/core.rb new file mode 100644 index 0000000000..35aa54d090 --- /dev/null +++ b/trunk/ext/ripper/lib/ripper/core.rb @@ -0,0 +1,70 @@ +# +# $Id$ +# +# Copyright (c) 2003-2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper.so' + +class Ripper + + # Parses Ruby program read from _src_. + # _src_ must be a String or a IO or a object which has #gets method. + def Ripper.parse(src, filename = '(ripper)', lineno = 1) + new(src, filename, lineno).parse + end + + # This array contains name of parser events. + PARSER_EVENTS = PARSER_EVENT_TABLE.keys + + # This array contains name of scanner events. + SCANNER_EVENTS = SCANNER_EVENT_TABLE.keys + + # This array contains name of all ripper events. + EVENTS = PARSER_EVENTS + SCANNER_EVENTS + + private + + # + # Parser Events + # + + PARSER_EVENT_TABLE.each do |id, arity| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{id}(#{ ('a'..'z').to_a[0, arity].join(', ') }) + #{arity == 0 ? 'nil' : 'a'} + end + End + end + + # This method is called when weak warning is produced by the parser. + # _fmt_ and _args_ is printf style. + def warn(fmt, *args) + end + + # This method is called when strong warning is produced by the parser. + # _fmt_ and _args_ is printf style. + def warning(fmt, *args) + end + + # This method is called when the parser found syntax error. + def compile_error(msg) + end + + # + # Scanner Events + # + + SCANNER_EVENTS.each do |id| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{id}(token) + token + end + End + end + +end diff --git a/trunk/ext/ripper/lib/ripper/filter.rb b/trunk/ext/ripper/lib/ripper/filter.rb new file mode 100644 index 0000000000..898501b23c --- /dev/null +++ b/trunk/ext/ripper/lib/ripper/filter.rb @@ -0,0 +1,70 @@ +# +# $Id$ +# +# Copyright (c) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/lexer' + +class Ripper + + # This class handles only scanner events, + # and they are dispatched in the `right' order (same with input). + class Filter + + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + end + + # The file name of the input. + def filename + @__lexer.filename + end + + # The line number of the current token. + # This value starts from 1. + # This method is valid only in event handlers. + def lineno + @__line + end + + # The column number of the current token. + # This value starts from 0. + # This method is valid only in event handlers. + def column + @__col + end + + # Starts parsing. _init_ is a data accumulator. + # It is passed to the next event handler (as of Enumerable#inject). + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok| + @__line, @__col = *pos + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + # This method is called when some event handler have not defined. + # _event_ is :on_XXX, _token_ is scanned token, _data_ is a data + # accumulator. The return value of this method is passed to the + # next event handler (as of Enumerable#inject). + def on_default(event, token, data) + data + end + + end + +end diff --git a/trunk/ext/ripper/lib/ripper/lexer.rb b/trunk/ext/ripper/lib/ripper/lexer.rb new file mode 100644 index 0000000000..14ef99f034 --- /dev/null +++ b/trunk/ext/ripper/lib/ripper/lexer.rb @@ -0,0 +1,179 @@ +# +# $Id$ +# +# Copyright (c) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/core' + +class Ripper + + # Tokenizes Ruby program and returns an Array of String. + def Ripper.tokenize(src, filename = '-', lineno = 1) + Lexer.new(src, filename, lineno).tokenize + end + + # Tokenizes Ruby program and returns an Array of Array, + # which is formatted like [[lineno, column], type, token]. + # + # require 'ripper' + # require 'pp' + # + # p Ripper.lex("def m(a) nil end") + # #=> [[[1, 0], :on_kw, "def"], + # [[1, 3], :on_sp, " " ], + # [[1, 4], :on_ident, "m" ], + # [[1, 5], :on_lparen, "(" ], + # [[1, 6], :on_ident, "a" ], + # [[1, 7], :on_rparen, ")" ], + # [[1, 8], :on_sp, " " ], + # [[1, 9], :on_kw, "nil"], + # [[1, 12], :on_sp, " " ], + # [[1, 13], :on_kw, "end"]] + # + def Ripper.lex(src, filename = '-', lineno = 1) + Lexer.new(src, filename, lineno).lex + end + + class Lexer < ::Ripper #:nodoc: internal use only + def tokenize + lex().map {|pos, event, tok| tok } + end + + def lex + parse().sort_by {|pos, event, tok| pos } + end + + def parse + @buf = [] + super + @buf + end + + private + + SCANNER_EVENTS.each do |event| + module_eval(<<-End, __FILE__+'/module_eval', __LINE__ + 1) + def on_#{event}(tok) + @buf.push [[lineno(), column()], :on_#{event}, tok] + end + End + end + end + + # [EXPERIMENTAL] + # Parses +src+ and return a string which was matched to +pattern+. + # +pattern+ should be described as Regexp. + # + # require 'ripper' + # + # p Ripper.slice('def m(a) nil end', 'ident') #=> "m" + # p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+') #=> "m(a)" + # p Ripper.slice("<<EOS\nstring\nEOS", + # 'heredoc_beg nl $(tstring_content*) heredoc_end', 1) + # #=> "string\n" + # + def Ripper.slice(src, pattern, n = 0) + if m = token_match(src, pattern) + then m.string(n) + else nil + end + end + + def Ripper.token_match(src, pattern) #:nodoc: + TokenPattern.compile(pattern).match(src) + end + + class TokenPattern #:nodoc: + + class Error < ::StandardError; end + class CompileError < Error; end + class MatchError < Error; end + + class << self + alias compile new + end + + def initialize(pattern) + @source = pattern + @re = compile(pattern) + end + + def match(str) + match_list(::Ripper.lex(str)) + end + + def match_list(tokens) + if m = @re.match(map_tokens(tokens)) + then MatchData.new(tokens, m) + else nil + end + end + + private + + def compile(pattern) + if m = /[^\w\s$()\[\]{}?*+\.]/.match(pattern) + raise CompileError, "invalid char in pattern: #{m[0].inspect}" + end + buf = '' + pattern.scan(/(?:\w+|\$\(|[()\[\]\{\}?*+\.]+)/) do |tok| + case tok + when /\w/ + buf.concat map_token(tok) + when '$(' + buf.concat '(' + when '(' + buf.concat '(?:' + when /[?*\[\])\.]/ + buf.concat tok + else + raise 'must not happen' + end + end + Regexp.compile(buf) + rescue RegexpError => err + raise CompileError, err.message + end + + def map_tokens(tokens) + tokens.map {|pos,type,str| map_token(type.to_s.sub(/\Aon_/,'')) }.join + end + + MAP = {} + seed = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a + SCANNER_EVENT_TABLE.each do |ev, | + raise CompileError, "[RIPPER FATAL] too many system token" if seed.empty? + MAP[ev.to_s.sub(/\Aon_/,'')] = seed.shift + end + + def map_token(tok) + MAP[tok] or raise CompileError, "unknown token: #{tok}" + end + + class MatchData + def initialize(tokens, match) + @tokens = tokens + @match = match + end + + def string(n = 0) + return nil unless @match + match(n).join + end + + private + + def match(n = 0) + return [] unless @match + @tokens[@match.begin(n)...@match.end(n)].map {|pos,type,str| str } + end + end + + end + +end diff --git a/trunk/ext/ripper/lib/ripper/sexp.rb b/trunk/ext/ripper/lib/ripper/sexp.rb new file mode 100644 index 0000000000..f2260fe8dd --- /dev/null +++ b/trunk/ext/ripper/lib/ripper/sexp.rb @@ -0,0 +1,99 @@ +# +# $Id$ +# +# Copyright (c) 2004,2005 Minero Aoki +# +# This program is free software. +# You can distribute and/or modify this program under the Ruby License. +# For details of Ruby License, see ruby/COPYING. +# + +require 'ripper/core' + +class Ripper + + # [EXPERIMENTAL] + # Parses +src+ and create S-exp tree. + # This method is for mainly developper use. + # + # require 'ripper' + # require 'pp + # + # pp Ripper.sexp("def m(a) nil end") + # #=> [:program, + # [:stmts_add, + # [:stmts_new], + # [:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]], + # [:bodystmt, + # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]], + # nil, + # nil, + # nil]]]] + # + def Ripper.sexp(src, filename = '-', lineno = 1) + SexpBuilderPP.new(src, filename, lineno).parse + end + + def Ripper.sexp_raw(src, filename = '-', lineno = 1) + SexpBuilder.new(src, filename, lineno).parse + end + + class SexpBuilderPP < ::Ripper #:nodoc: + private + + PARSER_EVENT_TABLE.each do |event, arity| + if /_new\z/ =~ event.to_s and arity == 0 + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event} + [] + end + End + elsif /_add\z/ =~ event.to_s + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(list, item) + list.push item + list + end + End + else + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(*args) + [:#{event}, *args] + end + End + end + end + + SCANNER_EVENTS.each do |event| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(tok) + [:@#{event}, tok, [lineno(), column()]] + end + End + end + end + + class SexpBuilder < ::Ripper #:nodoc: + private + + PARSER_EVENTS.each do |event| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(*args) + args.unshift :#{event} + args + end + End + end + + SCANNER_EVENTS.each do |event| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(tok) + [:@#{event}, tok, [lineno(), column()]] + end + End + end + end + +end diff --git a/trunk/ext/ripper/tools/generate-param-macros.rb b/trunk/ext/ripper/tools/generate-param-macros.rb new file mode 100755 index 0000000000..b19f6e8d5c --- /dev/null +++ b/trunk/ext/ripper/tools/generate-param-macros.rb @@ -0,0 +1,14 @@ +off = true +ARGF.each do |line| + case line + when /RIPPER_PARAMS_DECL_BEGIN/ + off = false + when /RIPPER_PARAMS_DECL_END/ + exit + when /ripper/ + next if off + var = line.scan(/\w+/).last or next + base = var.sub(/ripper_/, '') + puts %"\#define #{base}\t\t(parser->ripper_#{base})" + end +end diff --git a/trunk/ext/ripper/tools/generate.rb b/trunk/ext/ripper/tools/generate.rb new file mode 100755 index 0000000000..0efb997604 --- /dev/null +++ b/trunk/ext/ripper/tools/generate.rb @@ -0,0 +1,152 @@ +# $Id$ + +require 'optparse' + +def main + mode = nil + ids1src = nil + ids2src = nil + template = nil + output = nil + + parser = @parser = OptionParser.new + parser.banner = "Usage: #{File.basename($0)} --mode=MODE [--ids1src=PATH] [--ids2src=PATH] [--output=PATH]" + parser.on('--mode=MODE', 'check, eventids1, or eventids2table.') {|m| + mode = m + } + parser.on('--ids1src=PATH', 'A source file of event-IDs 1 (parse.y).') {|path| + ids1src = path + } + parser.on('--ids2src=PATH', 'A source file of event-IDs 2 (eventids2.c).') {|path| + ids2src = path + } + parser.on('--output=PATH', 'An output file.') {|path| + output = path + } + parser.on('--help', 'Prints this message and quit.') { + puts parser.help + exit true + } + begin + parser.parse! + rescue OptionParser::ParseError => err + usage err.message + end + usage 'no mode given' unless mode + case mode + when 'check' + usage 'no --ids1src' unless ids1src + usage 'no --ids2src' unless ids2src + h = read_ids1_with_locations(ids1src) + check_arity h + ids2 = read_ids2(ids2src) + common = h.keys & ids2 + unless common.empty? + abort "event crash: #{common.join(' ')}" + end + exit 0 + when 'eventids1' + usage 'no --ids1src' unless ids1src + result = generate_eventids1(read_ids1(ids1src)) + when 'eventids2table' + usage 'no --ids2src' unless ids2src + result = generate_eventids2_table(read_ids2(ids2src)) + end + if output + File.open(output, 'w') {|f| + f.write result + } + else + puts result + end +end + +def usage(msg) + $stderr.puts msg + $stderr.puts @parser.help + exit false +end + +def generate_eventids1(ids) + buf = "" + ids.each do |id, arity| + buf << %Q[static ID ripper_id_#{id};\n] + end + buf << %Q[\n] + buf << %Q[static void\n] + buf << %Q[ripper_init_eventids1(VALUE self)\n] + buf << %Q[{\n] + buf << %Q[ VALUE h;\n] + buf << %Q[ ID id;\n] + ids.each do |id, arity| + buf << %Q[ ripper_id_#{id} = rb_intern("on_#{id}");\n] + end + buf << %Q[\n] + buf << %Q[ h = rb_hash_new();\n] + buf << %Q[ rb_define_const(self, "PARSER_EVENT_TABLE", h);\n] + ids.each do |id, arity| + buf << %Q[ id = rb_intern("#{id}");\n] + buf << %Q[ rb_hash_aset(h, ID2SYM(id), INT2NUM(#{arity}));\n] + end + buf << %Q[}\n] + buf +end + +def generate_eventids2_table(ids) + buf = "" + buf << %Q[static void\n] + buf << %Q[ripper_init_eventids2_table(VALUE self)\n] + buf << %Q[{\n] + buf << %Q[ VALUE h = rb_hash_new();\n] + buf << %Q[ ID id;\n] + buf << %Q[ rb_define_const(self, "SCANNER_EVENT_TABLE", h);\n] + ids.each do |id| + buf << %Q[ id = rb_intern("#{id}");\n] + buf << %Q[ rb_hash_aset(h, ID2SYM(id), INT2NUM(1));\n] + end + buf << %Q[}\n] + buf +end + +def read_ids1(path) + strip_locations(read_ids1_with_locations(path)) +end + +def strip_locations(h) + h.map {|event, list| [event, list.first[1]] }\ + .sort_by {|event, arity| event.to_s } +end + +def check_arity(h) + invalid = false + h.each do |event, list| + unless list.map {|line, arity| arity }.uniq.size == 1 + invalid = true + locations = list.map {|line, a| "#{line}:#{a}" }.join(', ') + $stderr.puts "arity crash [event=#{event}]: #{locations}" + end + end + abort if invalid +end + +def read_ids1_with_locations(path) + h = {} + File.open(path) {|f| + f.each do |line| + next if /\A\#\s*define\s+s?dispatch/ =~ line + next if /ripper_dispatch/ =~ line + line.scan(/dispatch(\d)\((\w+)/) do |arity, event| + (h[event] ||= []).push [f.lineno, arity.to_i] + end + end + } + h +end + +def read_ids2(path) + File.open(path) {|f| + return f.read.scan(/ripper_id_(\w+)/).flatten.uniq.sort + } +end + +main diff --git a/trunk/ext/ripper/tools/preproc.rb b/trunk/ext/ripper/tools/preproc.rb new file mode 100755 index 0000000000..06397cea05 --- /dev/null +++ b/trunk/ext/ripper/tools/preproc.rb @@ -0,0 +1,91 @@ +# $Id$ + +require 'optparse' + +def main + output = nil + parser = OptionParser.new + parser.banner = "Usage: #{File.basename($0)} [--output=PATH] <parse.y>" + parser.on('--output=PATH', 'An output file.') {|path| + output = path + } + parser.on('--help', 'Prints this message and quit.') { + puts parser.help + exit true + } + begin + parser.parse! + rescue OptionParser::ParseError => err + $stderr.puts err.message + $stderr.puts parser.help + exit false + end + unless ARGV.size == 1 + abort "wrong number of arguments (#{ARGV.size} for 1)" + end + out = "" + File.open(ARGV[0]) {|f| + prelude f, out + grammar f, out + usercode f, out + } + if output + File.open(output, 'w') {|f| + f.write out + } + else + print out + end +end + +def prelude(f, out) + while line = f.gets + case line + when %r</\*%%%\*/> + out << '/*' << $/ + when %r</\*%> + out << '*/' << $/ + when %r<%\*/> + out << $/ + when /\A%%/ + out << '%%' << $/ + return + when /\A%token/ + out << line.sub(/<\w+>/, '<val>') + when /\A%type/ + out << line.sub(/<\w+>/, '<val>') + else + out << line + end + end +end + +def grammar(f, out) + while line = f.gets + case line + when %r</\*%%%\*/> + out << '#if 0' << $/ + when %r</\*%c%\*/> + out << '/*' << $/ + when %r</\*%c> + out << '*/' << $/ + when %r</\*%> + out << '#endif' << $/ + when %r<%\*/> + out << $/ + when /\A%%/ + out << '%%' << $/ + return + else + out << line + end + end +end + +def usercode(f, out) + while line = f.gets + out << line + end +end + +main diff --git a/trunk/ext/ripper/tools/strip.rb b/trunk/ext/ripper/tools/strip.rb new file mode 100755 index 0000000000..99413c361d --- /dev/null +++ b/trunk/ext/ripper/tools/strip.rb @@ -0,0 +1,12 @@ +last_is_void = false +ARGF.each do |line| + if line.strip.empty? + #puts() unless last_is_void + last_is_void = true + elsif /\A\#/ === line + ; + else + print line + last_is_void = false + end +end |