summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-14 10:53:47 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-14 10:53:47 +0000
commit7df1e45bb6b4326da5c799dcf58d38f0a14362af (patch)
treec742cebc6f6a1bc37d74c7a24b9d59b68971afdd
parenta61ae940c68c4401a42487ee2b670e145dd56dcd (diff)
ripper: add states of scanner
* parse.y (ripper_state): add states of scanner to tokens from Ripper.lex and Ripper::Filter#on_*. based on the patch by aycabta (Code Ahss) at [ruby-core:81789]. [Feature #13686] * ext/ripper/tools/preproc.rb (prelude, usercode): generate EXPR_* constants from enums. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59896 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--NEWS7
-rw-r--r--ext/ripper/lib/ripper/filter.rb10
-rw-r--r--ext/ripper/lib/ripper/lexer.rb33
-rwxr-xr-xext/ripper/tools/preproc.rb20
-rw-r--r--parse.y24
-rw-r--r--test/ripper/test_filter.rb11
-rw-r--r--test/ripper/test_ripper.rb4
-rw-r--r--test/ripper/test_scanner_events.rb98
8 files changed, 138 insertions, 69 deletions
diff --git a/NEWS b/NEWS
index 2b0ae345618..f289bd776a5 100644
--- a/NEWS
+++ b/NEWS
@@ -133,6 +133,13 @@ with all sufficient information, see the ChangeLog file or Redmine
* New constants:
* RbConfig::LIMITS is added to provide the limits of C types.
+* Ripper
+ * New method:
+ * Ripper#state is added to tell the state of scanner. [Feature #13686]
+
+ * New constants:
+ * Ripper::EXPR_BEG and so on for Ripper#state.
+
* Set
* Add Set#to_s as alias to #inspect [Feature #13676]
diff --git a/ext/ripper/lib/ripper/filter.rb b/ext/ripper/lib/ripper/filter.rb
index 455197f6906..9955d30550b 100644
--- a/ext/ripper/lib/ripper/filter.rb
+++ b/ext/ripper/lib/ripper/filter.rb
@@ -25,6 +25,7 @@ class Ripper
@__lexer = Lexer.new(src, filename, lineno)
@__line = nil
@__col = nil
+ @__state = nil
end
# The file name of the input.
@@ -46,13 +47,20 @@ class Ripper
@__col
end
+ # The scanner's state of the current token.
+ # This value is the bitwise OR of zero or more of the +Ripper::EXPR_*+ constants.
+ def state
+ @__state
+ end
+
# Starts the parser.
# +init+ is a data accumulator and is passed to the next event handler (as
# of Enumerable#inject).
def parse(init = nil)
data = init
- @__lexer.lex.each do |pos, event, tok|
+ @__lexer.lex.each do |pos, event, tok, state|
@__line, @__col = *pos
+ @__state = state
data = if respond_to?(event, true)
then __send__(event, tok, data)
else on_default(event, tok, data)
diff --git a/ext/ripper/lib/ripper/lexer.rb b/ext/ripper/lib/ripper/lexer.rb
index 300c8eef518..4e82a1275c2 100644
--- a/ext/ripper/lib/ripper/lexer.rb
+++ b/ext/ripper/lib/ripper/lexer.rb
@@ -23,29 +23,30 @@ class Ripper
end
# Tokenizes the Ruby program and returns an array of an array,
- # which is formatted like <code>[[lineno, column], type, token]</code>.
+ # which is formatted like
+ # <code>[[lineno, column], type, token, state]</code>.
#
# require 'ripper'
# require 'pp'
#
# pp Ripper.lex("def m(a) nil end")
- # #=> [[[1, 0], :on_kw, "def"],
- # [[1, 3], :on_sp, " " ],
- # [[1, 4], :on_ident, "m" ],
- # [[1, 5], :on_lparen, "(" ],
- # [[1, 6], :on_ident, "a" ],
- # [[1, 7], :on_rparen, ")" ],
- # [[1, 8], :on_sp, " " ],
- # [[1, 9], :on_kw, "nil"],
- # [[1, 12], :on_sp, " " ],
- # [[1, 13], :on_kw, "end"]]
+ # #=> [[[1, 0], :on_kw, "def", Ripper::EXPR_FNAME ],
+ # [[1, 3], :on_sp, " ", Ripper::EXPR_FNAME ],
+ # [[1, 4], :on_ident, "m", Ripper::EXPR_ENDFN ],
+ # [[1, 5], :on_lparen, "(", Ripper::EXPR_LABEL | Ripper::EXPR_BEG],
+ # [[1, 6], :on_ident, "a", Ripper::EXPR_ARG ],
+ # [[1, 7], :on_rparen, ")", Ripper::EXPR_ENDFN ],
+ # [[1, 8], :on_sp, " ", Ripper::EXPR_BEG ],
+ # [[1, 9], :on_kw, "nil", Ripper::EXPR_END ],
+ # [[1, 12], :on_sp, " ", Ripper::EXPR_END ],
+ # [[1, 13], :on_kw, "end", Ripper::EXPR_END ]]
#
def Ripper.lex(src, filename = '-', lineno = 1)
Lexer.new(src, filename, lineno).lex
end
class Lexer < ::Ripper #:nodoc: internal use only
- Elem = Struct.new(:pos, :event, :tok)
+ Elem = Struct.new(:pos, :event, :tok, :state)
def tokenize
parse().sort_by(&:pos).map(&:tok)
@@ -77,7 +78,7 @@ class Ripper
e.event = :on_ignored_sp
next
end
- ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n])]
+ ignored_sp << [i, Elem.new(e.pos.dup, :on_ignored_sp, tok[0, n], e.state)]
e.pos[1] += n
end
end
@@ -93,16 +94,16 @@ class Ripper
buf = []
@buf << buf
@buf = buf
- @buf.push Elem.new([lineno(), column()], __callee__, tok)
+ @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
end
def on_heredoc_end(tok)
- @buf.push Elem.new([lineno(), column()], __callee__, tok)
+ @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
@buf = @stack.pop
end
def _push_token(tok)
- @buf.push Elem.new([lineno(), column()], __callee__, tok)
+ @buf.push Elem.new([lineno(), column()], __callee__, tok, state())
end
(SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
diff --git a/ext/ripper/tools/preproc.rb b/ext/ripper/tools/preproc.rb
index 16449ec81c5..8b685791647 100755
--- a/ext/ripper/tools/preproc.rb
+++ b/ext/ripper/tools/preproc.rb
@@ -40,6 +40,7 @@ def main
end
def prelude(f, out)
+ @exprs = {}
while line = f.gets
case line
when %r</\*%%%\*/>
@@ -56,6 +57,16 @@ def prelude(f, out)
when /\A%type/
out << line.sub(/<\w+>/, '<val>')
else
+ if (/^enum lex_state_(?:bits|e) \{/ =~ line)..(/^\}/ =~ line)
+ case line
+ when /^\s*(EXPR_\w+),\s+\/\*(.+)\*\//
+ @exprs[$1.chomp("_bit")] = $2.strip
+ when /^\s*(EXPR_\w+)\s+=\s+(.+)$/
+ name = $1
+ val = $2.chomp(",")
+ @exprs[name] = "equals to " + (val.start_with?("(") ? "<tt>#{val}</tt>" : "+#{val}+")
+ end
+ end
out << line
end
end
@@ -84,9 +95,12 @@ def grammar(f, out)
end
def usercode(f, out)
- while line = f.gets
- out << line
- end
+ require 'erb'
+ compiler = ERB::Compiler.new('%-')
+ compiler.put_cmd = compiler.insert_cmd = "out.<<"
+ lineno = f.lineno
+ src, = compiler.compile(f.read)
+ eval(src, binding, f.path, lineno)
end
main
diff --git a/parse.y b/parse.y
index 249aeca3b9b..60d40e2b923 100644
--- a/parse.y
+++ b/parse.y
@@ -11446,6 +11446,25 @@ ripper_lineno(VALUE self)
return INT2NUM(ruby_sourceline);
}
+/*
+ * call-seq:
+ * ripper.state -> Integer
+ *
+ * Return scanner state of current token.
+ */
+static VALUE
+ripper_state(VALUE self)
+{
+ struct parser_params *parser;
+
+ TypedData_Get_Struct(self, struct parser_params, &parser_data_type, parser);
+ if (!ripper_initialized_p(parser)) {
+ rb_raise(rb_eArgError, "method called for uninitialized object");
+ }
+ if (NIL_P(parser->parsing_thread)) return Qnil;
+ return INT2NUM(lex_state);
+}
+
#ifdef RIPPER_DEBUG
/* :nodoc: */
static VALUE
@@ -11493,6 +11512,7 @@ InitVM_ripper(void)
rb_define_method(Ripper, "column", ripper_column, 0);
rb_define_method(Ripper, "filename", ripper_filename, 0);
rb_define_method(Ripper, "lineno", ripper_lineno, 0);
+ rb_define_method(Ripper, "state", ripper_state, 0);
rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0);
rb_define_method(Ripper, "encoding", rb_parser_encoding, 0);
rb_define_method(Ripper, "yydebug", rb_parser_get_yydebug, 0);
@@ -11507,6 +11527,10 @@ InitVM_ripper(void)
rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
+<% @exprs.each do |expr, desc| -%>
+ /* <%=desc%> */
+ rb_define_const(Ripper, "<%=expr%>", INT2NUM(<%=expr%>));
+<% end %>
ripper_init_eventids1_table(Ripper);
ripper_init_eventids2_table(Ripper);
diff --git a/test/ripper/test_filter.rb b/test/ripper/test_filter.rb
index d025cf5a98a..c39820c3217 100644
--- a/test/ripper/test_filter.rb
+++ b/test/ripper/test_filter.rb
@@ -15,6 +15,7 @@ class TestRipper::Filter < Test::Unit::TestCase
data[:filename] = filename rescue nil
data[:lineno] = lineno
data[:column] = column
+ data[:state] = state
data[:token] = token
end
data
@@ -75,6 +76,16 @@ class TestRipper::Filter < Test::Unit::TestCase
assert_equal(last_columns, filter.column)
end
+ def test_filter_state
+ data = {}
+ src = File.read(filename)
+ filter = Filter.new(src)
+ assert_equal(nil, filter.state)
+ filter.parse(data)
+ assert_not_nil(data[:state])
+ assert_not_nil(filter.state)
+ end
+
def test_filter_token
data = {}
filter = Filter.new("begin; puts 1; end")
diff --git a/test/ripper/test_ripper.rb b/test/ripper/test_ripper.rb
index 79cbf88a15f..e7d20a66a09 100644
--- a/test/ripper/test_ripper.rb
+++ b/test/ripper/test_ripper.rb
@@ -17,6 +17,10 @@ class TestRipper::Ripper < Test::Unit::TestCase
assert_nil @ripper.column
end
+ def test_state
+ assert_nil @ripper.state
+ end
+
def test_encoding
assert_equal Encoding::UTF_8, @ripper.encoding
ripper = Ripper.new('# coding: iso-8859-15')
diff --git a/test/ripper/test_scanner_events.rb b/test/ripper/test_scanner_events.rb
index ef49fc18ba5..90fd599a311 100644
--- a/test/ripper/test_scanner_events.rb
+++ b/test/ripper/test_scanner_events.rb
@@ -48,70 +48,70 @@ class TestRipper::ScannerEvents < Test::Unit::TestCase
def test_lex
assert_equal [],
Ripper.lex('')
- assert_equal [[[1,0], :on_ident, "a"]],
+ assert_equal [[[1,0], :on_ident, "a", Ripper::EXPR_CMDARG]],
Ripper.lex('a')
- assert_equal [[[1, 0], :on_kw, "nil"]],
+ assert_equal [[[1, 0], :on_kw, "nil", Ripper::EXPR_END]],
Ripper.lex("nil")
- assert_equal [[[1, 0], :on_kw, "def"],
- [[1, 3], :on_sp, " "],
- [[1, 4], :on_ident, "m"],
- [[1, 5], :on_lparen, "("],
- [[1, 6], :on_ident, "a"],
- [[1, 7], :on_rparen, ")"],
- [[1, 8], :on_kw, "end"]],
+ assert_equal [[[1, 0], :on_kw, "def", Ripper::EXPR_FNAME],
+ [[1, 3], :on_sp, " ", Ripper::EXPR_FNAME],
+ [[1, 4], :on_ident, "m", Ripper::EXPR_ENDFN],
+ [[1, 5], :on_lparen, "(", Ripper::EXPR_BEG | Ripper::EXPR_LABEL],
+ [[1, 6], :on_ident, "a", Ripper::EXPR_ARG],
+ [[1, 7], :on_rparen, ")", Ripper::EXPR_ENDFN],
+ [[1, 8], :on_kw, "end", Ripper::EXPR_END]],
Ripper.lex("def m(a)end")
- assert_equal [[[1, 0], :on_int, "1"],
- [[1, 1], :on_nl, "\n"],
- [[2, 0], :on_int, "2"],
- [[2, 1], :on_nl, "\n"],
- [[3, 0], :on_int, "3"]],
+ assert_equal [[[1, 0], :on_int, "1", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+ [[1, 1], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[2, 0], :on_int, "2", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+ [[2, 1], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[3, 0], :on_int, "3", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
Ripper.lex("1\n2\n3")
- assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"],
- [[1, 5], :on_nl, "\n"],
- [[2, 0], :on_tstring_content, "heredoc\n"],
- [[3, 0], :on_heredoc_end, "EOS"]],
+ assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG],
+ [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[2, 0], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG],
+ [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]],
Ripper.lex("<<""EOS\nheredoc\nEOS")
- assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS"],
- [[1, 5], :on_nl, "\n"],
- [[2, 0], :on_heredoc_end, "EOS"]],
+ assert_equal [[[1, 0], :on_heredoc_beg, "<<""EOS", Ripper::EXPR_BEG],
+ [[1, 5], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[2, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]],
Ripper.lex("<<""EOS\nEOS"),
"bug#4543"
- assert_equal [[[1, 0], :on_regexp_beg, "/"],
- [[1, 1], :on_tstring_content, "foo\nbar"],
- [[2, 3], :on_regexp_end, "/"]],
+ assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG],
+ [[1, 1], :on_tstring_content, "foo\nbar", Ripper::EXPR_BEG],
+ [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]],
Ripper.lex("/foo\nbar/")
- assert_equal [[[1, 0], :on_regexp_beg, "/"],
- [[1, 1], :on_tstring_content, "foo\n\u3020"],
- [[2, 3], :on_regexp_end, "/"]],
+ assert_equal [[[1, 0], :on_regexp_beg, "/", Ripper::EXPR_BEG],
+ [[1, 1], :on_tstring_content, "foo\n\u3020", Ripper::EXPR_BEG],
+ [[2, 3], :on_regexp_end, "/", Ripper::EXPR_BEG]],
Ripper.lex("/foo\n\u3020/")
- assert_equal [[[1, 0], :on_tstring_beg, "'"],
- [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0"],
- [[2, 3], :on_tstring_end, "'"]],
+ assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
+ [[1, 1], :on_tstring_content, "foo\n\xe3\x80\xa0", Ripper::EXPR_BEG],
+ [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
Ripper.lex("'foo\n\xe3\x80\xa0'")
- assert_equal [[[1, 0], :on_tstring_beg, "'"],
- [[1, 1], :on_tstring_content, "\u3042\n\u3044"],
- [[2, 3], :on_tstring_end, "'"]],
+ assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
+ [[1, 1], :on_tstring_content, "\u3042\n\u3044", Ripper::EXPR_BEG],
+ [[2, 3], :on_tstring_end, "'", Ripper::EXPR_END | Ripper::EXPR_ENDARG]],
Ripper.lex("'\u3042\n\u3044'")
- assert_equal [[[1, 0], :on_rational, "1r"],
- [[1, 2], :on_nl, "\n"],
- [[2, 0], :on_imaginary, "2i"],
- [[2, 2], :on_nl, "\n"],
- [[3, 0], :on_imaginary, "3ri"],
- [[3, 3], :on_nl, "\n"],
- [[4, 0], :on_rational, "4.2r"],
- [[4, 4], :on_nl, "\n"],
- [[5, 0], :on_imaginary, "5.6ri"],
+ assert_equal [[[1, 0], :on_rational, "1r", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+ [[1, 2], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[2, 0], :on_imaginary, "2i", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+ [[2, 2], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[3, 0], :on_imaginary, "3ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+ [[3, 3], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[4, 0], :on_rational, "4.2r", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
+ [[4, 4], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[5, 0], :on_imaginary, "5.6ri", Ripper::EXPR_END | Ripper::EXPR_ENDARG],
],
Ripper.lex("1r\n2i\n3ri\n4.2r\n5.6ri")
- assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS"],
- [[1, 6], :on_nl, "\n"],
- [[2, 0], :on_ignored_sp, " "],
- [[2, 2], :on_tstring_content, "heredoc\n"],
- [[3, 0], :on_heredoc_end, "EOS"]
+ assert_equal [[[1, 0], :on_heredoc_beg, "<<~EOS", Ripper::EXPR_BEG],
+ [[1, 6], :on_nl, "\n", Ripper::EXPR_BEG],
+ [[2, 0], :on_ignored_sp, " ", Ripper::EXPR_BEG],
+ [[2, 2], :on_tstring_content, "heredoc\n", Ripper::EXPR_BEG],
+ [[3, 0], :on_heredoc_end, "EOS", Ripper::EXPR_BEG]
],
Ripper.lex("<<~EOS\n heredoc\nEOS")
- assert_equal [[[1, 0], :on_tstring_beg, "'"],
- [[1, 1], :on_tstring_content, "foo"]],
+ assert_equal [[[1, 0], :on_tstring_beg, "'", Ripper::EXPR_BEG],
+ [[1, 1], :on_tstring_content, "foo", Ripper::EXPR_BEG]],
Ripper.lex("'foo")
end