summaryrefslogtreecommitdiff
path: root/trunk/ext/ripper
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/ext/ripper')
-rw-r--r--trunk/ext/ripper/.cvsignore8
-rw-r--r--trunk/ext/ripper/README30
-rw-r--r--trunk/ext/ripper/depend34
-rw-r--r--trunk/ext/ripper/eventids2.c277
-rw-r--r--trunk/ext/ripper/extconf.rb22
-rw-r--r--trunk/ext/ripper/lib/ripper.rb4
-rw-r--r--trunk/ext/ripper/lib/ripper/core.rb70
-rw-r--r--trunk/ext/ripper/lib/ripper/filter.rb70
-rw-r--r--trunk/ext/ripper/lib/ripper/lexer.rb179
-rw-r--r--trunk/ext/ripper/lib/ripper/sexp.rb99
-rwxr-xr-xtrunk/ext/ripper/tools/generate-param-macros.rb14
-rwxr-xr-xtrunk/ext/ripper/tools/generate.rb152
-rwxr-xr-xtrunk/ext/ripper/tools/preproc.rb91
-rwxr-xr-xtrunk/ext/ripper/tools/strip.rb12
14 files changed, 1062 insertions, 0 deletions
diff --git a/trunk/ext/ripper/.cvsignore b/trunk/ext/ripper/.cvsignore
new file mode 100644
index 0000000000..c9adce0b1a
--- /dev/null
+++ b/trunk/ext/ripper/.cvsignore
@@ -0,0 +1,8 @@
+Makefile
+mkmf.log
+eventids1.c
+eventids2table.c
+ripper.*
+ids1
+ids2
+extconf.h
diff --git a/trunk/ext/ripper/README b/trunk/ext/ripper/README
new file mode 100644
index 0000000000..0825013ba9
--- /dev/null
+++ b/trunk/ext/ripper/README
@@ -0,0 +1,30 @@
+Ripper README
+=============
+
+ Ripper is a Ruby script parser. You can get information
+ by event-based style from the parser.
+
+ !! WARNING !!
+
+ Ripper is still early-alpha version.
+ I never assure any kind of backward compatibility.
+
+Requirements
+------------
+
+ * ruby 1.9 (support CVS HEAD only)
+ * bison 1.28 or later (Other yaccs do not work)
+
+Usage
+-----
+
+ See test/ripper/* and sample/ripper/*.
+
+License
+-------
+
+ Ruby License.
+
+ Minero Aoki
+ aamine@loveruby.net
+ http://i.loveruby.net
diff --git a/trunk/ext/ripper/depend b/trunk/ext/ripper/depend
new file mode 100644
index 0000000000..120fdc58e7
--- /dev/null
+++ b/trunk/ext/ripper/depend
@@ -0,0 +1,34 @@
+GEN = $(srcdir)/tools/generate.rb
+SRC1 = $(top_srcdir)/parse.y
+SRC2 = $(srcdir)/eventids2.c
+BISON = bison
+
+src: ripper.c eventids1.c eventids2table.c
+
+ripper.o: ripper.c lex.c eventids1.c eventids2.c eventids2table.c
+
+.y.c:
+ $(BISON) -t -v -oy.tab.c $<
+ sed -f $(top_srcdir)/tool/ytab.sed -e "/^#/s!y\.tab\.c!$@!" y.tab.c > $@
+ @$(RM) y.tab.c
+
+all: check
+static: check
+
+ripper.y: $(srcdir)/tools/preproc.rb $(top_srcdir)/parse.y
+ $(RUBY) $(srcdir)/tools/preproc.rb $(top_srcdir)/parse.y --output=$@
+
+check: $(GEN) $(SRC1) $(SRC2)
+ $(RUBY) $(GEN) --mode=check --ids1src=$(SRC1) --ids2src=$(SRC2)
+
+eventids1.c: $(srcdir)/tools/generate.rb $(SRC1)
+ $(RUBY) $(GEN) --mode=eventids1 --ids1src=$(SRC1) --output=$@
+
+eventids2table.c: $(srcdir)/tools/generate.rb $(SRC2)
+ $(RUBY) $(GEN) --mode=eventids2table --ids2src=$(SRC2) --output=$@
+
+# Entries for Ripper maintainer
+
+preproc: ripper.E
+ripper.E: ripper.c
+ $(CC) -E $(CPPFLAGS) ripper.c | $(RUBY) $(srcdir)/tools/strip.rb > $@
diff --git a/trunk/ext/ripper/eventids2.c b/trunk/ext/ripper/eventids2.c
new file mode 100644
index 0000000000..629381448a
--- /dev/null
+++ b/trunk/ext/ripper/eventids2.c
@@ -0,0 +1,277 @@
+#define tIGNORED_NL (tLAST_TOKEN + 1)
+#define tCOMMENT (tLAST_TOKEN + 2)
+#define tEMBDOC_BEG (tLAST_TOKEN + 3)
+#define tEMBDOC (tLAST_TOKEN + 4)
+#define tEMBDOC_END (tLAST_TOKEN + 5)
+#define tSP (tLAST_TOKEN + 6)
+#define tHEREDOC_BEG (tLAST_TOKEN + 7)
+#define tHEREDOC_END (tLAST_TOKEN + 8)
+#define k__END__ (tLAST_TOKEN + 9)
+
+static ID ripper_id_backref;
+static ID ripper_id_backtick;
+static ID ripper_id_comma;
+static ID ripper_id_const;
+static ID ripper_id_cvar;
+static ID ripper_id_embexpr_beg;
+static ID ripper_id_embexpr_end;
+static ID ripper_id_embvar;
+static ID ripper_id_float;
+static ID ripper_id_gvar;
+static ID ripper_id_ident;
+static ID ripper_id_int;
+static ID ripper_id_ivar;
+static ID ripper_id_kw;
+static ID ripper_id_lbrace;
+static ID ripper_id_lbracket;
+static ID ripper_id_lparen;
+static ID ripper_id_nl;
+static ID ripper_id_op;
+static ID ripper_id_period;
+static ID ripper_id_rbrace;
+static ID ripper_id_rbracket;
+static ID ripper_id_rparen;
+static ID ripper_id_semicolon;
+static ID ripper_id_symbeg;
+static ID ripper_id_tstring_beg;
+static ID ripper_id_tstring_content;
+static ID ripper_id_tstring_end;
+static ID ripper_id_words_beg;
+static ID ripper_id_qwords_beg;
+static ID ripper_id_words_sep;
+static ID ripper_id_regexp_beg;
+static ID ripper_id_regexp_end;
+static ID ripper_id_label;
+static ID ripper_id_tlambda;
+static ID ripper_id_tlambeg;
+
+static ID ripper_id_ignored_nl;
+static ID ripper_id_comment;
+static ID ripper_id_embdoc_beg;
+static ID ripper_id_embdoc;
+static ID ripper_id_embdoc_end;
+static ID ripper_id_sp;
+static ID ripper_id_heredoc_beg;
+static ID ripper_id_heredoc_end;
+static ID ripper_id___end__;
+static ID ripper_id_CHAR;
+
+#include "eventids2table.c"
+
+static void
+ripper_init_eventids2(VALUE self)
+{
+ ripper_id_backref = rb_intern("on_backref");
+ ripper_id_backtick = rb_intern("on_backtick");
+ ripper_id_comma = rb_intern("on_comma");
+ ripper_id_const = rb_intern("on_const");
+ ripper_id_cvar = rb_intern("on_cvar");
+ ripper_id_embexpr_beg = rb_intern("on_embexpr_beg");
+ ripper_id_embexpr_end = rb_intern("on_embexpr_end");
+ ripper_id_embvar = rb_intern("on_embvar");
+ ripper_id_float = rb_intern("on_float");
+ ripper_id_gvar = rb_intern("on_gvar");
+ ripper_id_ident = rb_intern("on_ident");
+ ripper_id_int = rb_intern("on_int");
+ ripper_id_ivar = rb_intern("on_ivar");
+ ripper_id_kw = rb_intern("on_kw");
+ ripper_id_lbrace = rb_intern("on_lbrace");
+ ripper_id_lbracket = rb_intern("on_lbracket");
+ ripper_id_lparen = rb_intern("on_lparen");
+ ripper_id_nl = rb_intern("on_nl");
+ ripper_id_op = rb_intern("on_op");
+ ripper_id_period = rb_intern("on_period");
+ ripper_id_rbrace = rb_intern("on_rbrace");
+ ripper_id_rbracket = rb_intern("on_rbracket");
+ ripper_id_rparen = rb_intern("on_rparen");
+ ripper_id_semicolon = rb_intern("on_semicolon");
+ ripper_id_symbeg = rb_intern("on_symbeg");
+ ripper_id_tstring_beg = rb_intern("on_tstring_beg");
+ ripper_id_tstring_content = rb_intern("on_tstring_content");
+ ripper_id_tstring_end = rb_intern("on_tstring_end");
+ ripper_id_words_beg = rb_intern("on_words_beg");
+ ripper_id_qwords_beg = rb_intern("on_qwords_beg");
+ ripper_id_words_sep = rb_intern("on_words_sep");
+ ripper_id_regexp_beg = rb_intern("on_regexp_beg");
+ ripper_id_regexp_end = rb_intern("on_regexp_end");
+ ripper_id_label = rb_intern("on_label");
+ ripper_id_tlambda = rb_intern("on_tlambda");
+ ripper_id_tlambeg = rb_intern("on_tlambeg");
+
+ ripper_id_ignored_nl = rb_intern("on_ignored_nl");
+ ripper_id_comment = rb_intern("on_comment");
+ ripper_id_embdoc_beg = rb_intern("on_embdoc_beg");
+ ripper_id_embdoc = rb_intern("on_embdoc");
+ ripper_id_embdoc_end = rb_intern("on_embdoc_end");
+ ripper_id_sp = rb_intern("on_sp");
+ ripper_id_heredoc_beg = rb_intern("on_heredoc_beg");
+ ripper_id_heredoc_end = rb_intern("on_heredoc_end");
+ ripper_id___end__ = rb_intern("on___end__");
+ ripper_id_CHAR = rb_intern("on_CHAR");
+
+ ripper_init_eventids2_table(self);
+}
+
+static const struct token_assoc {
+ int token;
+ ID *id;
+} token_to_eventid[] = {
+ {' ', &ripper_id_words_sep},
+ {'!', &ripper_id_op},
+ {'%', &ripper_id_op},
+ {'&', &ripper_id_op},
+ {'*', &ripper_id_op},
+ {'+', &ripper_id_op},
+ {'-', &ripper_id_op},
+ {'/', &ripper_id_op},
+ {'<', &ripper_id_op},
+ {'=', &ripper_id_op},
+ {'>', &ripper_id_op},
+ {'?', &ripper_id_op},
+ {'^', &ripper_id_op},
+ {'|', &ripper_id_op},
+ {'~', &ripper_id_op},
+ {':', &ripper_id_op},
+ {',', &ripper_id_comma},
+ {'.', &ripper_id_period},
+ {';', &ripper_id_semicolon},
+ {'`', &ripper_id_backtick},
+ {'\n', &ripper_id_nl},
+ {keyword_alias, &ripper_id_kw},
+ {keyword_and, &ripper_id_kw},
+ {keyword_begin, &ripper_id_kw},
+ {keyword_break, &ripper_id_kw},
+ {keyword_case, &ripper_id_kw},
+ {keyword_class, &ripper_id_kw},
+ {keyword_def, &ripper_id_kw},
+ {keyword_defined, &ripper_id_kw},
+ {keyword_do, &ripper_id_kw},
+ {keyword_do_block, &ripper_id_kw},
+ {keyword_do_cond, &ripper_id_kw},
+ {keyword_else, &ripper_id_kw},
+ {keyword_elsif, &ripper_id_kw},
+ {keyword_end, &ripper_id_kw},
+ {keyword_ensure, &ripper_id_kw},
+ {keyword_false, &ripper_id_kw},
+ {keyword_for, &ripper_id_kw},
+ {keyword_if, &ripper_id_kw},
+ {modifier_if, &ripper_id_kw},
+ {keyword_in, &ripper_id_kw},
+ {keyword_module, &ripper_id_kw},
+ {keyword_next, &ripper_id_kw},
+ {keyword_nil, &ripper_id_kw},
+ {keyword_not, &ripper_id_kw},
+ {keyword_or, &ripper_id_kw},
+ {keyword_redo, &ripper_id_kw},
+ {keyword_rescue, &ripper_id_kw},
+ {modifier_rescue, &ripper_id_kw},
+ {keyword_retry, &ripper_id_kw},
+ {keyword_return, &ripper_id_kw},
+ {keyword_self, &ripper_id_kw},
+ {keyword_super, &ripper_id_kw},
+ {keyword_then, &ripper_id_kw},
+ {keyword_true, &ripper_id_kw},
+ {keyword_undef, &ripper_id_kw},
+ {keyword_unless, &ripper_id_kw},
+ {modifier_unless, &ripper_id_kw},
+ {keyword_until, &ripper_id_kw},
+ {modifier_until, &ripper_id_kw},
+ {keyword_when, &ripper_id_kw},
+ {keyword_while, &ripper_id_kw},
+ {modifier_while, &ripper_id_kw},
+ {keyword_yield, &ripper_id_kw},
+ {keyword__FILE__, &ripper_id_kw},
+ {keyword__LINE__, &ripper_id_kw},
+ {keyword_BEGIN, &ripper_id_kw},
+ {keyword_END, &ripper_id_kw},
+ {tAMPER, &ripper_id_op},
+ {tANDOP, &ripper_id_op},
+ {tAREF, &ripper_id_op},
+ {tASET, &ripper_id_op},
+ {tASSOC, &ripper_id_op},
+ {tBACK_REF, &ripper_id_backref},
+ {tCHAR, &ripper_id_CHAR},
+ {tCMP, &ripper_id_op},
+ {tCOLON2, &ripper_id_op},
+ {tCOLON3, &ripper_id_op},
+ {tCONSTANT, &ripper_id_const},
+ {tCVAR, &ripper_id_cvar},
+ {tDOT2, &ripper_id_op},
+ {tDOT3, &ripper_id_op},
+ {tEQ, &ripper_id_op},
+ {tEQQ, &ripper_id_op},
+ {tFID, &ripper_id_ident},
+ {tFLOAT, &ripper_id_float},
+ {tGEQ, &ripper_id_op},
+ {tGVAR, &ripper_id_gvar},
+ {tIDENTIFIER, &ripper_id_ident},
+ {tINTEGER, &ripper_id_int},
+ {tIVAR, &ripper_id_ivar},
+ {tLBRACE, &ripper_id_lbrace},
+ {tLBRACE_ARG, &ripper_id_lbrace},
+ {'{', &ripper_id_lbrace},
+ {'}', &ripper_id_rbrace},
+ {tLBRACK, &ripper_id_lbracket},
+ {'[', &ripper_id_lbracket},
+ {']', &ripper_id_rbracket},
+ {tLEQ, &ripper_id_op},
+ {tLPAREN, &ripper_id_lparen},
+ {tLPAREN_ARG, &ripper_id_lparen},
+ {'(', &ripper_id_lparen},
+ {')', &ripper_id_rparen},
+ {tLSHFT, &ripper_id_op},
+ {tMATCH, &ripper_id_op},
+ {tNEQ, &ripper_id_op},
+ {tNMATCH, &ripper_id_op},
+ {tNTH_REF, &ripper_id_backref},
+ {tOP_ASGN, &ripper_id_op},
+ {tOROP, &ripper_id_op},
+ {tPOW, &ripper_id_op},
+ {tQWORDS_BEG, &ripper_id_qwords_beg},
+ {tREGEXP_BEG, &ripper_id_regexp_beg},
+ {tREGEXP_END, &ripper_id_regexp_end},
+ {tRPAREN, &ripper_id_rparen},
+ {tRSHFT, &ripper_id_op},
+ {tSTAR, &ripper_id_op},
+ {tSTRING_BEG, &ripper_id_tstring_beg},
+ {tSTRING_CONTENT, &ripper_id_tstring_content},
+ {tSTRING_DBEG, &ripper_id_embexpr_beg},
+ {tSTRING_DVAR, &ripper_id_embvar},
+ {tSTRING_END, &ripper_id_tstring_end},
+ {tSYMBEG, &ripper_id_symbeg},
+ {tUMINUS, &ripper_id_op},
+ {tUMINUS_NUM, &ripper_id_op},
+ {tUPLUS, &ripper_id_op},
+ {tWORDS_BEG, &ripper_id_words_beg},
+ {tXSTRING_BEG, &ripper_id_backtick},
+ {tLABEL, &ripper_id_label},
+ {tLAMBDA, &ripper_id_tlambda},
+ {tLAMBEG, &ripper_id_tlambeg},
+
+ /* ripper specific tokens */
+ {tIGNORED_NL, &ripper_id_ignored_nl},
+ {tCOMMENT, &ripper_id_comment},
+ {tEMBDOC_BEG, &ripper_id_embdoc_beg},
+ {tEMBDOC, &ripper_id_embdoc},
+ {tEMBDOC_END, &ripper_id_embdoc_end},
+ {tSP, &ripper_id_sp},
+ {tHEREDOC_BEG, &ripper_id_heredoc_beg},
+ {tHEREDOC_END, &ripper_id_heredoc_end},
+ {k__END__, &ripper_id___end__},
+ {0, NULL}
+};
+
+static ID
+ripper_token2eventid(int tok)
+{
+ const struct token_assoc *a;
+
+ for (a = token_to_eventid; a->id != NULL; a++) {
+ if (a->token == tok)
+ return *a->id;
+ }
+ if (tok < 256) {
+ return ripper_id_CHAR;
+ }
+ rb_raise(rb_eRuntimeError, "[Ripper FATAL] unknown token %d", tok);
+}
diff --git a/trunk/ext/ripper/extconf.rb b/trunk/ext/ripper/extconf.rb
new file mode 100644
index 0000000000..a41a6af2d8
--- /dev/null
+++ b/trunk/ext/ripper/extconf.rb
@@ -0,0 +1,22 @@
+#!ruby -s
+
+require 'mkmf'
+require 'rbconfig'
+
+def main
+ unless find_executable('bison')
+ unless File.exist?('ripper.c') or File.exist?("#{$srcdir}/ripper.c")
+ Logging.message 'missing bison; abort'
+ return
+ end
+ end
+ $objs = %w(ripper.o)
+ $cleanfiles.concat %w(ripper.y ripper.c ripper.E ripper.output eventids1.c eventids2table.c)
+ $defs << '-DRIPPER'
+ $defs << '-DRIPPER_DEBUG' if $debug
+ $VPATH << '$(topdir)' << '$(top_srcdir)'
+ $INCFLAGS << ' -I$(topdir) -I$(top_srcdir)'
+ create_makefile 'ripper'
+end
+
+main
diff --git a/trunk/ext/ripper/lib/ripper.rb b/trunk/ext/ripper/lib/ripper.rb
new file mode 100644
index 0000000000..cb19da334a
--- /dev/null
+++ b/trunk/ext/ripper/lib/ripper.rb
@@ -0,0 +1,4 @@
+require 'ripper/core'
+require 'ripper/lexer'
+require 'ripper/filter'
+require 'ripper/sexp'
diff --git a/trunk/ext/ripper/lib/ripper/core.rb b/trunk/ext/ripper/lib/ripper/core.rb
new file mode 100644
index 0000000000..35aa54d090
--- /dev/null
+++ b/trunk/ext/ripper/lib/ripper/core.rb
@@ -0,0 +1,70 @@
+#
+# $Id$
+#
+# Copyright (c) 2003-2005 Minero Aoki
+#
+# This program is free software.
+# You can distribute and/or modify this program under the Ruby License.
+# For details of Ruby License, see ruby/COPYING.
+#
+
+require 'ripper.so'
+
+class Ripper
+
+ # Parses Ruby program read from _src_.
+ # _src_ must be a String or a IO or a object which has #gets method.
+ def Ripper.parse(src, filename = '(ripper)', lineno = 1)
+ new(src, filename, lineno).parse
+ end
+
+ # This array contains name of parser events.
+ PARSER_EVENTS = PARSER_EVENT_TABLE.keys
+
+ # This array contains name of scanner events.
+ SCANNER_EVENTS = SCANNER_EVENT_TABLE.keys
+
+ # This array contains name of all ripper events.
+ EVENTS = PARSER_EVENTS + SCANNER_EVENTS
+
+ private
+
+ #
+ # Parser Events
+ #
+
+ PARSER_EVENT_TABLE.each do |id, arity|
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{id}(#{ ('a'..'z').to_a[0, arity].join(', ') })
+ #{arity == 0 ? 'nil' : 'a'}
+ end
+ End
+ end
+
+ # This method is called when weak warning is produced by the parser.
+ # _fmt_ and _args_ is printf style.
+ def warn(fmt, *args)
+ end
+
+ # This method is called when strong warning is produced by the parser.
+ # _fmt_ and _args_ is printf style.
+ def warning(fmt, *args)
+ end
+
+ # This method is called when the parser found syntax error.
+ def compile_error(msg)
+ end
+
+ #
+ # Scanner Events
+ #
+
+ SCANNER_EVENTS.each do |id|
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{id}(token)
+ token
+ end
+ End
+ end
+
+end
diff --git a/trunk/ext/ripper/lib/ripper/filter.rb b/trunk/ext/ripper/lib/ripper/filter.rb
new file mode 100644
index 0000000000..898501b23c
--- /dev/null
+++ b/trunk/ext/ripper/lib/ripper/filter.rb
@@ -0,0 +1,70 @@
+#
+# $Id$
+#
+# Copyright (c) 2004,2005 Minero Aoki
+#
+# This program is free software.
+# You can distribute and/or modify this program under the Ruby License.
+# For details of Ruby License, see ruby/COPYING.
+#
+
+require 'ripper/lexer'
+
+class Ripper
+
+ # This class handles only scanner events,
+ # and they are dispatched in the `right' order (same with input).
+ class Filter
+
+ def initialize(src, filename = '-', lineno = 1)
+ @__lexer = Lexer.new(src, filename, lineno)
+ @__line = nil
+ @__col = nil
+ end
+
+ # The file name of the input.
+ def filename
+ @__lexer.filename
+ end
+
+ # The line number of the current token.
+ # This value starts from 1.
+ # This method is valid only in event handlers.
+ def lineno
+ @__line
+ end
+
+ # The column number of the current token.
+ # This value starts from 0.
+ # This method is valid only in event handlers.
+ def column
+ @__col
+ end
+
+ # Starts parsing. _init_ is a data accumulator.
+ # It is passed to the next event handler (as of Enumerable#inject).
+ def parse(init = nil)
+ data = init
+ @__lexer.lex.each do |pos, event, tok|
+ @__line, @__col = *pos
+ data = if respond_to?(event, true)
+ then __send__(event, tok, data)
+ else on_default(event, tok, data)
+ end
+ end
+ data
+ end
+
+ private
+
+ # This method is called when some event handler have not defined.
+ # _event_ is :on_XXX, _token_ is scanned token, _data_ is a data
+ # accumulator. The return value of this method is passed to the
+ # next event handler (as of Enumerable#inject).
+ def on_default(event, token, data)
+ data
+ end
+
+ end
+
+end
diff --git a/trunk/ext/ripper/lib/ripper/lexer.rb b/trunk/ext/ripper/lib/ripper/lexer.rb
new file mode 100644
index 0000000000..14ef99f034
--- /dev/null
+++ b/trunk/ext/ripper/lib/ripper/lexer.rb
@@ -0,0 +1,179 @@
+#
+# $Id$
+#
+# Copyright (c) 2004,2005 Minero Aoki
+#
+# This program is free software.
+# You can distribute and/or modify this program under the Ruby License.
+# For details of Ruby License, see ruby/COPYING.
+#
+
+require 'ripper/core'
+
+class Ripper
+
+ # Tokenizes Ruby program and returns an Array of String.
+ def Ripper.tokenize(src, filename = '-', lineno = 1)
+ Lexer.new(src, filename, lineno).tokenize
+ end
+
+ # Tokenizes Ruby program and returns an Array of Array,
+ # which is formatted like [[lineno, column], type, token].
+ #
+ # require 'ripper'
+ # require 'pp'
+ #
+ # p Ripper.lex("def m(a) nil end")
+ # #=> [[[1, 0], :on_kw, "def"],
+ # [[1, 3], :on_sp, " " ],
+ # [[1, 4], :on_ident, "m" ],
+ # [[1, 5], :on_lparen, "(" ],
+ # [[1, 6], :on_ident, "a" ],
+ # [[1, 7], :on_rparen, ")" ],
+ # [[1, 8], :on_sp, " " ],
+ # [[1, 9], :on_kw, "nil"],
+ # [[1, 12], :on_sp, " " ],
+ # [[1, 13], :on_kw, "end"]]
+ #
+ def Ripper.lex(src, filename = '-', lineno = 1)
+ Lexer.new(src, filename, lineno).lex
+ end
+
+ class Lexer < ::Ripper #:nodoc: internal use only
+ def tokenize
+ lex().map {|pos, event, tok| tok }
+ end
+
+ def lex
+ parse().sort_by {|pos, event, tok| pos }
+ end
+
+ def parse
+ @buf = []
+ super
+ @buf
+ end
+
+ private
+
+ SCANNER_EVENTS.each do |event|
+ module_eval(<<-End, __FILE__+'/module_eval', __LINE__ + 1)
+ def on_#{event}(tok)
+ @buf.push [[lineno(), column()], :on_#{event}, tok]
+ end
+ End
+ end
+ end
+
+ # [EXPERIMENTAL]
+ # Parses +src+ and return a string which was matched to +pattern+.
+ # +pattern+ should be described as Regexp.
+ #
+ # require 'ripper'
+ #
+ # p Ripper.slice('def m(a) nil end', 'ident') #=> "m"
+ # p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+') #=> "m(a)"
+ # p Ripper.slice("<<EOS\nstring\nEOS",
+ # 'heredoc_beg nl $(tstring_content*) heredoc_end', 1)
+ # #=> "string\n"
+ #
+ def Ripper.slice(src, pattern, n = 0)
+ if m = token_match(src, pattern)
+ then m.string(n)
+ else nil
+ end
+ end
+
+ def Ripper.token_match(src, pattern) #:nodoc:
+ TokenPattern.compile(pattern).match(src)
+ end
+
+ class TokenPattern #:nodoc:
+
+ class Error < ::StandardError; end
+ class CompileError < Error; end
+ class MatchError < Error; end
+
+ class << self
+ alias compile new
+ end
+
+ def initialize(pattern)
+ @source = pattern
+ @re = compile(pattern)
+ end
+
+ def match(str)
+ match_list(::Ripper.lex(str))
+ end
+
+ def match_list(tokens)
+ if m = @re.match(map_tokens(tokens))
+ then MatchData.new(tokens, m)
+ else nil
+ end
+ end
+
+ private
+
+ def compile(pattern)
+ if m = /[^\w\s$()\[\]{}?*+\.]/.match(pattern)
+ raise CompileError, "invalid char in pattern: #{m[0].inspect}"
+ end
+ buf = ''
+ pattern.scan(/(?:\w+|\$\(|[()\[\]\{\}?*+\.]+)/) do |tok|
+ case tok
+ when /\w/
+ buf.concat map_token(tok)
+ when '$('
+ buf.concat '('
+ when '('
+ buf.concat '(?:'
+ when /[?*\[\])\.]/
+ buf.concat tok
+ else
+ raise 'must not happen'
+ end
+ end
+ Regexp.compile(buf)
+ rescue RegexpError => err
+ raise CompileError, err.message
+ end
+
+ def map_tokens(tokens)
+ tokens.map {|pos,type,str| map_token(type.to_s.sub(/\Aon_/,'')) }.join
+ end
+
+ MAP = {}
+ seed = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a
+ SCANNER_EVENT_TABLE.each do |ev, |
+ raise CompileError, "[RIPPER FATAL] too many system token" if seed.empty?
+ MAP[ev.to_s.sub(/\Aon_/,'')] = seed.shift
+ end
+
+ def map_token(tok)
+ MAP[tok] or raise CompileError, "unknown token: #{tok}"
+ end
+
+ class MatchData
+ def initialize(tokens, match)
+ @tokens = tokens
+ @match = match
+ end
+
+ def string(n = 0)
+ return nil unless @match
+ match(n).join
+ end
+
+ private
+
+ def match(n = 0)
+ return [] unless @match
+ @tokens[@match.begin(n)...@match.end(n)].map {|pos,type,str| str }
+ end
+ end
+
+ end
+
+end
diff --git a/trunk/ext/ripper/lib/ripper/sexp.rb b/trunk/ext/ripper/lib/ripper/sexp.rb
new file mode 100644
index 0000000000..f2260fe8dd
--- /dev/null
+++ b/trunk/ext/ripper/lib/ripper/sexp.rb
@@ -0,0 +1,99 @@
+#
+# $Id$
+#
+# Copyright (c) 2004,2005 Minero Aoki
+#
+# This program is free software.
+# You can distribute and/or modify this program under the Ruby License.
+# For details of Ruby License, see ruby/COPYING.
+#
+
+require 'ripper/core'
+
+class Ripper
+
+ # [EXPERIMENTAL]
+ # Parses +src+ and create S-exp tree.
+ # This method is for mainly developper use.
+ #
+ # require 'ripper'
+ # require 'pp
+ #
+ # pp Ripper.sexp("def m(a) nil end")
+ # #=> [:program,
+ # [:stmts_add,
+ # [:stmts_new],
+ # [:def,
+ # [:@ident, "m", [1, 4]],
+ # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]],
+ # [:bodystmt,
+ # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]],
+ # nil,
+ # nil,
+ # nil]]]]
+ #
+ def Ripper.sexp(src, filename = '-', lineno = 1)
+ SexpBuilderPP.new(src, filename, lineno).parse
+ end
+
+ def Ripper.sexp_raw(src, filename = '-', lineno = 1)
+ SexpBuilder.new(src, filename, lineno).parse
+ end
+
+ class SexpBuilderPP < ::Ripper #:nodoc:
+ private
+
+ PARSER_EVENT_TABLE.each do |event, arity|
+ if /_new\z/ =~ event.to_s and arity == 0
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{event}
+ []
+ end
+ End
+ elsif /_add\z/ =~ event.to_s
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{event}(list, item)
+ list.push item
+ list
+ end
+ End
+ else
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{event}(*args)
+ [:#{event}, *args]
+ end
+ End
+ end
+ end
+
+ SCANNER_EVENTS.each do |event|
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{event}(tok)
+ [:@#{event}, tok, [lineno(), column()]]
+ end
+ End
+ end
+ end
+
+ class SexpBuilder < ::Ripper #:nodoc:
+ private
+
+ PARSER_EVENTS.each do |event|
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{event}(*args)
+ args.unshift :#{event}
+ args
+ end
+ End
+ end
+
+ SCANNER_EVENTS.each do |event|
+ module_eval(<<-End, __FILE__, __LINE__ + 1)
+ def on_#{event}(tok)
+ [:@#{event}, tok, [lineno(), column()]]
+ end
+ End
+ end
+ end
+
+end
diff --git a/trunk/ext/ripper/tools/generate-param-macros.rb b/trunk/ext/ripper/tools/generate-param-macros.rb
new file mode 100755
index 0000000000..b19f6e8d5c
--- /dev/null
+++ b/trunk/ext/ripper/tools/generate-param-macros.rb
@@ -0,0 +1,14 @@
+off = true
+ARGF.each do |line|
+ case line
+ when /RIPPER_PARAMS_DECL_BEGIN/
+ off = false
+ when /RIPPER_PARAMS_DECL_END/
+ exit
+ when /ripper/
+ next if off
+ var = line.scan(/\w+/).last or next
+ base = var.sub(/ripper_/, '')
+ puts %"\#define #{base}\t\t(parser->ripper_#{base})"
+ end
+end
diff --git a/trunk/ext/ripper/tools/generate.rb b/trunk/ext/ripper/tools/generate.rb
new file mode 100755
index 0000000000..0efb997604
--- /dev/null
+++ b/trunk/ext/ripper/tools/generate.rb
@@ -0,0 +1,152 @@
+# $Id$
+
+require 'optparse'
+
+def main
+ mode = nil
+ ids1src = nil
+ ids2src = nil
+ template = nil
+ output = nil
+
+ parser = @parser = OptionParser.new
+ parser.banner = "Usage: #{File.basename($0)} --mode=MODE [--ids1src=PATH] [--ids2src=PATH] [--output=PATH]"
+ parser.on('--mode=MODE', 'check, eventids1, or eventids2table.') {|m|
+ mode = m
+ }
+ parser.on('--ids1src=PATH', 'A source file of event-IDs 1 (parse.y).') {|path|
+ ids1src = path
+ }
+ parser.on('--ids2src=PATH', 'A source file of event-IDs 2 (eventids2.c).') {|path|
+ ids2src = path
+ }
+ parser.on('--output=PATH', 'An output file.') {|path|
+ output = path
+ }
+ parser.on('--help', 'Prints this message and quit.') {
+ puts parser.help
+ exit true
+ }
+ begin
+ parser.parse!
+ rescue OptionParser::ParseError => err
+ usage err.message
+ end
+ usage 'no mode given' unless mode
+ case mode
+ when 'check'
+ usage 'no --ids1src' unless ids1src
+ usage 'no --ids2src' unless ids2src
+ h = read_ids1_with_locations(ids1src)
+ check_arity h
+ ids2 = read_ids2(ids2src)
+ common = h.keys & ids2
+ unless common.empty?
+ abort "event crash: #{common.join(' ')}"
+ end
+ exit 0
+ when 'eventids1'
+ usage 'no --ids1src' unless ids1src
+ result = generate_eventids1(read_ids1(ids1src))
+ when 'eventids2table'
+ usage 'no --ids2src' unless ids2src
+ result = generate_eventids2_table(read_ids2(ids2src))
+ end
+ if output
+ File.open(output, 'w') {|f|
+ f.write result
+ }
+ else
+ puts result
+ end
+end
+
+def usage(msg)
+ $stderr.puts msg
+ $stderr.puts @parser.help
+ exit false
+end
+
+def generate_eventids1(ids)
+ buf = ""
+ ids.each do |id, arity|
+ buf << %Q[static ID ripper_id_#{id};\n]
+ end
+ buf << %Q[\n]
+ buf << %Q[static void\n]
+ buf << %Q[ripper_init_eventids1(VALUE self)\n]
+ buf << %Q[{\n]
+ buf << %Q[ VALUE h;\n]
+ buf << %Q[ ID id;\n]
+ ids.each do |id, arity|
+ buf << %Q[ ripper_id_#{id} = rb_intern("on_#{id}");\n]
+ end
+ buf << %Q[\n]
+ buf << %Q[ h = rb_hash_new();\n]
+ buf << %Q[ rb_define_const(self, "PARSER_EVENT_TABLE", h);\n]
+ ids.each do |id, arity|
+ buf << %Q[ id = rb_intern("#{id}");\n]
+ buf << %Q[ rb_hash_aset(h, ID2SYM(id), INT2NUM(#{arity}));\n]
+ end
+ buf << %Q[}\n]
+ buf
+end
+
+def generate_eventids2_table(ids)
+ buf = ""
+ buf << %Q[static void\n]
+ buf << %Q[ripper_init_eventids2_table(VALUE self)\n]
+ buf << %Q[{\n]
+ buf << %Q[ VALUE h = rb_hash_new();\n]
+ buf << %Q[ ID id;\n]
+ buf << %Q[ rb_define_const(self, "SCANNER_EVENT_TABLE", h);\n]
+ ids.each do |id|
+ buf << %Q[ id = rb_intern("#{id}");\n]
+ buf << %Q[ rb_hash_aset(h, ID2SYM(id), INT2NUM(1));\n]
+ end
+ buf << %Q[}\n]
+ buf
+end
+
+def read_ids1(path)
+ strip_locations(read_ids1_with_locations(path))
+end
+
+def strip_locations(h)
+ h.map {|event, list| [event, list.first[1]] }\
+ .sort_by {|event, arity| event.to_s }
+end
+
+def check_arity(h)
+ invalid = false
+ h.each do |event, list|
+ unless list.map {|line, arity| arity }.uniq.size == 1
+ invalid = true
+ locations = list.map {|line, a| "#{line}:#{a}" }.join(', ')
+ $stderr.puts "arity crash [event=#{event}]: #{locations}"
+ end
+ end
+ abort if invalid
+end
+
+def read_ids1_with_locations(path)
+ h = {}
+ File.open(path) {|f|
+ f.each do |line|
+ next if /\A\#\s*define\s+s?dispatch/ =~ line
+ next if /ripper_dispatch/ =~ line
+ line.scan(/dispatch(\d)\((\w+)/) do |arity, event|
+ (h[event] ||= []).push [f.lineno, arity.to_i]
+ end
+ end
+ }
+ h
+end
+
+def read_ids2(path)
+ File.open(path) {|f|
+ return f.read.scan(/ripper_id_(\w+)/).flatten.uniq.sort
+ }
+end
+
+main
diff --git a/trunk/ext/ripper/tools/preproc.rb b/trunk/ext/ripper/tools/preproc.rb
new file mode 100755
index 0000000000..06397cea05
--- /dev/null
+++ b/trunk/ext/ripper/tools/preproc.rb
@@ -0,0 +1,91 @@
+# $Id$
+
+require 'optparse'
+
+def main
+ output = nil
+ parser = OptionParser.new
+ parser.banner = "Usage: #{File.basename($0)} [--output=PATH] <parse.y>"
+ parser.on('--output=PATH', 'An output file.') {|path|
+ output = path
+ }
+ parser.on('--help', 'Prints this message and quit.') {
+ puts parser.help
+ exit true
+ }
+ begin
+ parser.parse!
+ rescue OptionParser::ParseError => err
+ $stderr.puts err.message
+ $stderr.puts parser.help
+ exit false
+ end
+ unless ARGV.size == 1
+ abort "wrong number of arguments (#{ARGV.size} for 1)"
+ end
+ out = ""
+ File.open(ARGV[0]) {|f|
+ prelude f, out
+ grammar f, out
+ usercode f, out
+ }
+ if output
+ File.open(output, 'w') {|f|
+ f.write out
+ }
+ else
+ print out
+ end
+end
+
+def prelude(f, out)
+ while line = f.gets
+ case line
+ when %r</\*%%%\*/>
+ out << '/*' << $/
+ when %r</\*%>
+ out << '*/' << $/
+ when %r<%\*/>
+ out << $/
+ when /\A%%/
+ out << '%%' << $/
+ return
+ when /\A%token/
+ out << line.sub(/<\w+>/, '<val>')
+ when /\A%type/
+ out << line.sub(/<\w+>/, '<val>')
+ else
+ out << line
+ end
+ end
+end
+
+def grammar(f, out)
+ while line = f.gets
+ case line
+ when %r</\*%%%\*/>
+ out << '#if 0' << $/
+ when %r</\*%c%\*/>
+ out << '/*' << $/
+ when %r</\*%c>
+ out << '*/' << $/
+ when %r</\*%>
+ out << '#endif' << $/
+ when %r<%\*/>
+ out << $/
+ when /\A%%/
+ out << '%%' << $/
+ return
+ else
+ out << line
+ end
+ end
+end
+
+def usercode(f, out)
+ while line = f.gets
+ out << line
+ end
+end
+
+main
diff --git a/trunk/ext/ripper/tools/strip.rb b/trunk/ext/ripper/tools/strip.rb
new file mode 100755
index 0000000000..99413c361d
--- /dev/null
+++ b/trunk/ext/ripper/tools/strip.rb
@@ -0,0 +1,12 @@
+last_is_void = false
+ARGF.each do |line|
+ if line.strip.empty?
+ #puts() unless last_is_void
+ last_is_void = true
+ elsif /\A\#/ === line
+ ;
+ else
+ print line
+ last_is_void = false
+ end
+end