9 files changed, 556 insertions, 104 deletions
diff --git a/NEWS.md b/NEWS.md
index bff9d1e7ab..d090b88f4d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -192,6 +192,14 @@ Note: We're only listing outstanding class updates.
 
 * RubyVM::AbstractSyntaxTree
     * Add `error_tolerant` option for `parse`, `parse_file` and `of`. [[Feature #19013]]
+    * Add `keep_tokens` option for `parse`, `parse_file` and `of`. Add `#tokens` and `#all_tokens`
+      for `RubyVM::AbstractSyntaxTree::Node` [[Feature #19070]]
+
+    ```ruby
+    root = RubyVM::AbstractSyntaxTree.parse("x = 1 + 2", keep_tokens: true)
+    root.tokens # => [[0, :tIDENTIFIER, "x", [1, 0, 1, 1]], [1, :tSP, " ", [1, 1, 1, 2]], ...]
+    root.tokens.map{_1[2]}.join # => "x = 1 + 2"
+    ```
 
 * Set
     * Set is now available as a built-in class without the need for `require "set"`. [[Feature #16989]]
diff --git a/ast.c b/ast.c
index 93c33a1f8d..bf3781d820 100644
--- a/ast.c
+++ b/ast.c
@@ -64,8 +64,8 @@ ast_new_internal(rb_ast_t *ast, const NODE *node)
     return obj;
 }
 
-static VALUE rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant);
-static VALUE rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant);
+static VALUE rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens);
+static VALUE rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens);
 
 static VALUE
 ast_parse_new(void)
@@ -85,13 +85,13 @@ ast_parse_done(rb_ast_t *ast)
 }
 
 static VALUE
-ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE keep_script_lines, VALUE error_tolerant)
+ast_s_parse(rb_execution_context_t *ec, VALUE module, VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
 {
-    return rb_ast_parse_str(str, keep_script_lines, error_tolerant);
+    return rb_ast_parse_str(str, keep_script_lines, error_tolerant, keep_tokens);
 }
 
 static VALUE
-rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant)
+rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
 {
     rb_ast_t *ast = 0;
 
@@ -99,18 +99,19 @@ rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant)
     VALUE vparser = ast_parse_new();
     if (RTEST(keep_script_lines)) rb_parser_keep_script_lines(vparser);
     if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
+    if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
     ast = rb_parser_compile_string_path(vparser, Qnil, str, 1);
     return ast_parse_done(ast);
 }
 
 static VALUE
-ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE keep_script_lines, VALUE error_tolerant)
+ast_s_parse_file(rb_execution_context_t *ec, VALUE module, VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
 {
-    return rb_ast_parse_file(path, keep_script_lines, error_tolerant);
+    return rb_ast_parse_file(path, keep_script_lines, error_tolerant, keep_tokens);
 }
 
 static VALUE
-rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant)
+rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
 {
     VALUE f;
     rb_ast_t *ast = 0;
@@ -122,6 +123,7 @@ rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant)
     VALUE vparser = ast_parse_new();
     if (RTEST(keep_script_lines)) rb_parser_keep_script_lines(vparser);
     if (RTEST(error_tolerant))  rb_parser_error_tolerant(vparser);
+    if (RTEST(keep_tokens))  rb_parser_keep_tokens(vparser);
     ast = rb_parser_compile_file_path(vparser, Qnil, f, 1);
     rb_io_close(f);
     return ast_parse_done(ast);
@@ -141,7 +143,7 @@ lex_array(VALUE array, int index)
 }
 
 static VALUE
-rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant)
+rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
 {
     rb_ast_t *ast = 0;
 
@@ -149,6 +151,7 @@ rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant)
     VALUE vparser = ast_parse_new();
     if (RTEST(keep_script_lines)) rb_parser_keep_script_lines(vparser);
     if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
+    if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
     ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1);
     return ast_parse_done(ast);
 }
@@ -208,7 +211,7 @@ node_id_for_backtrace_location(rb_execution_context_t *ec, VALUE module, VALUE l
 }
 
 static VALUE
-ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE keep_script_lines, VALUE error_tolerant)
+ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
 {
     VALUE node, lines = Qnil;
     const rb_iseq_t *iseq;
@@ -247,13 +250,13 @@ ast_s_of(rb_execution_context_t *ec, VALUE module, VALUE body, VALUE keep_script
     }
 
     if (!NIL_P(lines) || !NIL_P(lines = script_lines(path))) {
-        node = rb_ast_parse_array(lines, keep_script_lines, error_tolerant);
+        node = rb_ast_parse_array(lines, keep_script_lines, error_tolerant, keep_tokens);
     }
     else if (e_option) {
-        node = rb_ast_parse_str(rb_e_script, keep_script_lines, error_tolerant);
+        node = rb_ast_parse_str(rb_e_script, keep_script_lines, error_tolerant, keep_tokens);
     }
     else {
-        node = rb_ast_parse_file(path, keep_script_lines, error_tolerant);
+        node = rb_ast_parse_file(path, keep_script_lines, error_tolerant, keep_tokens);
     }
 
     return node_find(node, node_id);
@@ -716,6 +719,15 @@ ast_node_last_column(rb_execution_context_t *ec, VALUE self)
 }
 
 static VALUE
+ast_node_all_tokens(rb_execution_context_t *ec, VALUE self)
+{
+    struct ASTNodeData *data;
+    TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
+
+    return rb_ast_tokens(data->ast);
+}
+
+static VALUE
 ast_node_inspect(rb_execution_context_t *ec, VALUE self)
 {
     VALUE str;
diff --git a/ast.rb b/ast.rb
index 1bc1168b80..24740ebe28 100644
--- a/ast.rb
+++ b/ast.rb
@@ -29,8 +29,8 @@ module RubyVM::AbstractSyntaxTree
   #
   #    RubyVM::AbstractSyntaxTree.parse("x = 1 + 2")
   #    # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-1:9>
-  def self.parse string, keep_script_lines: false, error_tolerant: false
-    Primitive.ast_s_parse string, keep_script_lines, error_tolerant
+  def self.parse string, keep_script_lines: false, error_tolerant: false, keep_tokens: false
+    Primitive.ast_s_parse string, keep_script_lines, error_tolerant, keep_tokens
   end
 
   #  call-seq:
@@ -44,8 +44,8 @@ module RubyVM::AbstractSyntaxTree
   #
   #     RubyVM::AbstractSyntaxTree.parse_file("my-app/app.rb")
   #     # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-31:3>
-  def self.parse_file pathname, keep_script_lines: false, error_tolerant: false
-    Primitive.ast_s_parse_file pathname, keep_script_lines, error_tolerant
+  def self.parse_file pathname, keep_script_lines: false, error_tolerant: false, keep_tokens: false
+    Primitive.ast_s_parse_file pathname, keep_script_lines, error_tolerant, keep_tokens
   end
 
   #  call-seq:
@@ -63,8 +63,8 @@ module RubyVM::AbstractSyntaxTree
   #
   #     RubyVM::AbstractSyntaxTree.of(method(:hello))
   #     # => #<RubyVM::AbstractSyntaxTree::Node:SCOPE@1:0-3:3>
-  def self.of body, keep_script_lines: false, error_tolerant: false
-    Primitive.ast_s_of body, keep_script_lines, error_tolerant
+  def self.of body, keep_script_lines: false, error_tolerant: false, keep_tokens: false
+    Primitive.ast_s_of body, keep_script_lines, error_tolerant, keep_tokens
   end
 
   #  call-seq:
@@ -137,6 +137,46 @@ module RubyVM::AbstractSyntaxTree
     end
 
     #  call-seq:
+    #     node.tokens -> array
+    #
+    # Returns tokens corresponding to the location of the node.
+    # Returns nil if keep_tokens is not enabled when parse method is called.
+    # Token is an array of:
+    #
+    #  - id
+    #  - token type
+    #  - source code text
+    #  - location [first_lineno, first_column, last_lineno, last_column]
+    #
+    #    root = RubyVM::AbstractSyntaxTree.parse("x = 1 + 2", keep_tokens: true)
+    #    root.tokens # => [[0, :tIDENTIFIER, "x", [1, 0, 1, 1]], [1, :tSP, " ", [1, 1, 1, 2]], ...]
+    #    root.tokens.map{_1[2]}.join # => "x = 1 + 2"
+    def tokens
+      return nil unless all_tokens
+
+      all_tokens.each_with_object([]) do |token, a|
+        loc = token.last
+        if ([first_lineno, first_column] <=> [loc[0], loc[1]]) <= 0 &&
+           ([last_lineno, last_column]   <=> [loc[2], loc[3]]) >= 0
+           a << token
+        end
+      end
+    end
+
+    #  call-seq:
+    #     node.all_tokens -> array
+    #
+    # Returns all tokens for the input script regardless the receiver node.
+    # Returns nil if keep_tokens is not enabled when parse method is called.
+    #
+    #    root = RubyVM::AbstractSyntaxTree.parse("x = 1 + 2", keep_tokens: true)
+    #    root.all_tokens # => [[0, :tIDENTIFIER, "x", [1, 0, 1, 1]], [1, :tSP, " ", [1, 1, 1, 2]], ...]
+    #    root.children[-1].all_tokens # => [[0, :tIDENTIFIER, "x", [1, 0, 1, 1]], [1, :tSP, " ", [1, 1, 1, 2]], ...]
+    def all_tokens
+      Primitive.ast_node_all_tokens
+    end
+
+    #  call-seq:
     #     node.children -> array
     #
     #  Returns AST nodes under this one.  Each kind of node
diff --git a/ext/ripper/eventids2.c b/ext/ripper/eventids2.c
index ac38663f2d..05687497ac 100644
--- a/ext/ripper/eventids2.c
+++ b/ext/ripper/eventids2.c
@@ -1,22 +1,3 @@
-enum {
-    tIGNORED_NL  = tLAST_TOKEN + 1,
-# define tIGNORED_NL ((enum yytokentype)tIGNORED_NL)
-    tCOMMENT,
-# define tCOMMENT ((enum yytokentype)tCOMMENT)
-    tEMBDOC_BEG,
-# define tEMBDOC_BEG ((enum yytokentype)tEMBDOC_BEG)
-    tEMBDOC,
-# define tEMBDOC ((enum yytokentype)tEMBDOC)
-    tEMBDOC_END,
-# define tEMBDOC_END ((enum yytokentype)tEMBDOC_END)
-    tHEREDOC_BEG,
-# define tHEREDOC_BEG ((enum yytokentype)tHEREDOC_BEG)
-    tHEREDOC_END,
-# define tHEREDOC_END ((enum yytokentype)tHEREDOC_END)
-    k__END__,
-# define k__END__ ((enum yytokentype)k__END__)
-};
-
 typedef struct {
     ID ripper_id_backref;
     ID ripper_id_backtick;
diff --git a/internal/parse.h b/internal/parse.h
index 37133827f5..f242c384ad 100644
--- a/internal/parse.h
+++ b/internal/parse.h
@@ -16,6 +16,7 @@ VALUE rb_parser_set_yydebug(VALUE, VALUE);
 void *rb_parser_load_file(VALUE parser, VALUE name);
 void rb_parser_keep_script_lines(VALUE vparser);
 void rb_parser_error_tolerant(VALUE vparser);
+void rb_parser_keep_tokens(VALUE vparser);
 
 RUBY_SYMBOL_EXPORT_BEGIN
 VALUE rb_parser_set_context(VALUE, const struct rb_iseq_struct *, int);
diff --git a/node.c b/node.c
index c7469151ec..a6cb498778 100644
--- a/node.c
+++ b/node.c
@@ -1161,6 +1161,12 @@ struct node_buffer_struct {
     node_buffer_list_t markable;
     struct rb_ast_local_table_link *local_tables;
     VALUE mark_hash;
+    // - id (sequence number)
+    // - token_type
+    // - text of token
+    // - location info
+    // Array, whose entry is array
+    VALUE tokens;
 };
 
 static void
@@ -1187,6 +1193,7 @@ rb_node_buffer_new(void)
     init_node_buffer_list(&nb->markable, (node_buffer_elem_t*)((size_t)nb->unmarkable.head + bucket_size));
     nb->local_tables = 0;
     nb->mark_hash = Qnil;
+    nb->tokens = Qnil;
     return nb;
 }
 
@@ -1418,7 +1425,10 @@ rb_ast_update_references(rb_ast_t *ast)
 void
 rb_ast_mark(rb_ast_t *ast)
 {
-    if (ast->node_buffer) rb_gc_mark(ast->node_buffer->mark_hash);
+    if (ast->node_buffer) {
+        rb_gc_mark(ast->node_buffer->mark_hash);
+        rb_gc_mark(ast->node_buffer->tokens);
+    }
     if (ast->body.compile_option) rb_gc_mark(ast->body.compile_option);
     if (ast->node_buffer) {
         node_buffer_t *nb = ast->node_buffer;
@@ -1477,3 +1487,15 @@ rb_ast_add_mark_object(rb_ast_t *ast, VALUE obj)
     }
     rb_hash_aset(ast->node_buffer->mark_hash, obj, Qtrue);
 }
+
+VALUE
+rb_ast_tokens(rb_ast_t *ast)
+{
+    return ast->node_buffer->tokens;
+}
+
+void
+rb_ast_set_tokens(rb_ast_t *ast, VALUE tokens)
+{
+    RB_OBJ_WRITE(ast, &ast->node_buffer->tokens, tokens);
+}
diff --git a/node.h b/node.h
index 220de3135e..befb1328fb 100644
--- a/node.h
+++ b/node.h
@@ -421,6 +421,8 @@ void rb_ast_dispose(rb_ast_t*);
 void rb_ast_free(rb_ast_t*);
 size_t rb_ast_memsize(const rb_ast_t*);
 void rb_ast_add_mark_object(rb_ast_t*, VALUE);
+void rb_ast_set_tokens(rb_ast_t*, VALUE);
+VALUE rb_ast_tokens(rb_ast_t *ast);
 NODE *rb_ast_newnode(rb_ast_t*, enum node_type type);
 void rb_ast_delete_node(rb_ast_t*, NODE *n);
 rb_ast_id_table_t *rb_ast_new_local_table(rb_ast_t*, int);
diff --git a/parse.y b/parse.y
index 846136bd03..57cfe7db62 100644
--- a/parse.y
+++ b/parse.y
@@ -124,7 +124,13 @@ RBIMPL_WARNING_POP()
 
 #define RUBY_SET_YYLLOC_FROM_STRTERM_HEREDOC(Current)			\
     rb_parser_set_location_from_strterm_heredoc(p, &p->lex.strterm->u.heredoc, &(Current))
-#define RUBY_SET_YYLLOC_OF_NONE(Current)					\
+#define RUBY_SET_YYLLOC_OF_DELAYED_TOKEN(Current)			\
+    rb_parser_set_location_of_delayed_token(p, &(Current))
+#define RUBY_SET_YYLLOC_OF_HEREDOC_END(Current)				\
+    rb_parser_set_location_of_heredoc_end(p, &(Current))
+#define RUBY_SET_YYLLOC_OF_DUMMY_END(Current)				\
+    rb_parser_set_location_of_dummy_end(p, &(Current))
+#define RUBY_SET_YYLLOC_OF_NONE(Current)				\
     rb_parser_set_location_of_none(p, &(Current))
 #define RUBY_SET_YYLLOC(Current)					\
     rb_parser_set_location(p, &(Current))
@@ -272,12 +278,12 @@ struct parser_params {
     rb_imemo_tmpbuf_t *heap;
 
     YYSTYPE *lval;
+    YYLTYPE *yylloc;
 
     struct {
 	rb_strterm_t *strterm;
 	VALUE (*gets)(struct parser_params*,VALUE);
 	VALUE input;
-	VALUE prevline;
 	VALUE lastline;
 	VALUE nextline;
 	const char *pbeg;
@@ -320,6 +326,14 @@ struct parser_params {
     VALUE debug_buffer;
     VALUE debug_output;
 
+    struct {
+	VALUE token;
+	int beg_line;
+	int beg_col;
+	int end_line;
+	int end_col;
+    } delayed;
+
     ID cur_arg;
 
     rb_ast_t *ast;
@@ -351,6 +365,7 @@ struct parser_params {
     unsigned int do_split: 1;
     unsigned int keep_script_lines: 1;
     unsigned int error_tolerant: 1;
+    unsigned int keep_tokens: 1;
 
     NODE *eval_tree_begin;
     NODE *eval_tree;
@@ -359,15 +374,13 @@ struct parser_params {
     const struct rb_iseq_struct *parent_iseq;
     /* store specific keyword locations to generate dummy end token */
     VALUE end_expect_token_locations;
+    /* id for terms */
+    int token_id;
+    /* Array for term tokens */
+    VALUE tokens;
 #else
     /* Ripper only */
 
-    struct {
-	VALUE token;
-	int line;
-	int col;
-    } delayed;
-
     VALUE value;
     VALUE result;
     VALUE parsing_thread;
@@ -447,6 +460,177 @@ peek_end_expect_token_locations(struct parser_params *p)
     if(NIL_P(p->end_expect_token_locations)) return Qnil;
     return rb_ary_last(0, 0, p->end_expect_token_locations);
 }
+
+static ID
+parser_token2id(enum yytokentype tok)
+{
+    switch ((int) tok) {
+#define TOKEN2ID(tok) case tok: return rb_intern(#tok);
+#define TOKEN2ID2(tok, name) case tok: return rb_intern(name);
+      TOKEN2ID2(' ', "words_sep")
+      TOKEN2ID2('!', "!")
+      TOKEN2ID2('%', "%");
+      TOKEN2ID2('&', "&");
+      TOKEN2ID2('*', "*");
+      TOKEN2ID2('+', "+");
+      TOKEN2ID2('-', "-");
+      TOKEN2ID2('/', "/");
+      TOKEN2ID2('<', "<");
+      TOKEN2ID2('=', "=");
+      TOKEN2ID2('>', ">");
+      TOKEN2ID2('?', "?");
+      TOKEN2ID2('^', "^");
+      TOKEN2ID2('|', "|");
+      TOKEN2ID2('~', "~");
+      TOKEN2ID2(':', ":");
+      TOKEN2ID2(',', ",");
+      TOKEN2ID2('.', ".");
+      TOKEN2ID2(';', ";");
+      TOKEN2ID2('`', "`");
+      TOKEN2ID2('\n', "nl");
+      TOKEN2ID2('{', "{");
+      TOKEN2ID2('}', "}");
+      TOKEN2ID2('[', "[");
+      TOKEN2ID2(']', "]");
+      TOKEN2ID2('(', "(");
+      TOKEN2ID2(')', ")");
+      TOKEN2ID(keyword_class);
+      TOKEN2ID(keyword_module);
+      TOKEN2ID(keyword_def);
+      TOKEN2ID(keyword_undef);
+      TOKEN2ID(keyword_begin);
+      TOKEN2ID(keyword_rescue);
+      TOKEN2ID(keyword_ensure);
+      TOKEN2ID(keyword_end);
+      TOKEN2ID(keyword_if);
+      TOKEN2ID(keyword_unless);
+      TOKEN2ID(keyword_then);
+      TOKEN2ID(keyword_elsif);
+      TOKEN2ID(keyword_else);
+      TOKEN2ID(keyword_case);
+      TOKEN2ID(keyword_when);
+      TOKEN2ID(keyword_while);
+      TOKEN2ID(keyword_until);
+      TOKEN2ID(keyword_for);
+      TOKEN2ID(keyword_break);
+      TOKEN2ID(keyword_next);
+      TOKEN2ID(keyword_redo);
+      TOKEN2ID(keyword_retry);
+      TOKEN2ID(keyword_in);
+      TOKEN2ID(keyword_do);
+      TOKEN2ID(keyword_do_cond);
+      TOKEN2ID(keyword_do_block);
+      TOKEN2ID(keyword_do_LAMBDA);
+      TOKEN2ID(keyword_return);
+      TOKEN2ID(keyword_yield);
+      TOKEN2ID(keyword_super);
+      TOKEN2ID(keyword_self);
+      TOKEN2ID(keyword_nil);
+      TOKEN2ID(keyword_true);
+      TOKEN2ID(keyword_false);
+      TOKEN2ID(keyword_and);
+      TOKEN2ID(keyword_or);
+      TOKEN2ID(keyword_not);
+      TOKEN2ID(modifier_if);
+      TOKEN2ID(modifier_unless);
+      TOKEN2ID(modifier_while);
+      TOKEN2ID(modifier_until);
+      TOKEN2ID(modifier_rescue);
+      TOKEN2ID(keyword_alias);
+      TOKEN2ID(keyword_defined);
+      TOKEN2ID(keyword_BEGIN);
+      TOKEN2ID(keyword_END);
+      TOKEN2ID(keyword__LINE__);
+      TOKEN2ID(keyword__FILE__);
+      TOKEN2ID(keyword__ENCODING__);
+      TOKEN2ID(tIDENTIFIER);
+      TOKEN2ID(tFID);
+      TOKEN2ID(tGVAR);
+      TOKEN2ID(tIVAR);
+      TOKEN2ID(tCONSTANT);
+      TOKEN2ID(tCVAR);
+      TOKEN2ID(tLABEL);
+      TOKEN2ID(tINTEGER);
+      TOKEN2ID(tFLOAT);
+      TOKEN2ID(tRATIONAL);
+      TOKEN2ID(tIMAGINARY);
+      TOKEN2ID(tCHAR);
+      TOKEN2ID(tNTH_REF);
+      TOKEN2ID(tBACK_REF);
+      TOKEN2ID(tSTRING_CONTENT);
+      TOKEN2ID(tREGEXP_END);
+      TOKEN2ID(tDUMNY_END);
+      TOKEN2ID(tSP);
+      TOKEN2ID(tUPLUS);
+      TOKEN2ID(tUMINUS);
+      TOKEN2ID(tPOW);
+      TOKEN2ID(tCMP);
+      TOKEN2ID(tEQ);
+      TOKEN2ID(tEQQ);
+      TOKEN2ID(tNEQ);
+      TOKEN2ID(tGEQ);
+      TOKEN2ID(tLEQ);
+      TOKEN2ID(tANDOP);
+      TOKEN2ID(tOROP);
+      TOKEN2ID(tMATCH);
+      TOKEN2ID(tNMATCH);
+      TOKEN2ID(tDOT2);
+      TOKEN2ID(tDOT3);
+      TOKEN2ID(tBDOT2);
+      TOKEN2ID(tBDOT3);
+      TOKEN2ID(tAREF);
+      TOKEN2ID(tASET);
+      TOKEN2ID(tLSHFT);
+      TOKEN2ID(tRSHFT);
+      TOKEN2ID(tANDDOT);
+      TOKEN2ID(tCOLON2);
+      TOKEN2ID(tCOLON3);
+      TOKEN2ID(tOP_ASGN);
+      TOKEN2ID(tASSOC);
+      TOKEN2ID(tLPAREN);
+      TOKEN2ID(tLPAREN_ARG);
+      TOKEN2ID(tRPAREN);
+      TOKEN2ID(tLBRACK);
+      TOKEN2ID(tLBRACE);
+      TOKEN2ID(tLBRACE_ARG);
+      TOKEN2ID(tSTAR);
+      TOKEN2ID(tDSTAR);
+      TOKEN2ID(tAMPER);
+      TOKEN2ID(tLAMBDA);
+      TOKEN2ID(tSYMBEG);
+      TOKEN2ID(tSTRING_BEG);
+      TOKEN2ID(tXSTRING_BEG);
+      TOKEN2ID(tREGEXP_BEG);
+      TOKEN2ID(tWORDS_BEG);
+      TOKEN2ID(tQWORDS_BEG);
+      TOKEN2ID(tSYMBOLS_BEG);
+      TOKEN2ID(tQSYMBOLS_BEG);
+      TOKEN2ID(tSTRING_END);
+      TOKEN2ID(tSTRING_DEND);
+      TOKEN2ID(tSTRING_DBEG);
+      TOKEN2ID(tSTRING_DVAR);
+      TOKEN2ID(tLAMBEG);
+      TOKEN2ID(tLABEL_END);
+      TOKEN2ID(tIGNORED_NL);
+      TOKEN2ID(tCOMMENT);
+      TOKEN2ID(tEMBDOC_BEG);
+      TOKEN2ID(tEMBDOC);
+      TOKEN2ID(tEMBDOC_END);
+      TOKEN2ID(tHEREDOC_BEG);
+      TOKEN2ID(tHEREDOC_END);
+      TOKEN2ID(k__END__);
+      TOKEN2ID(tLOWEST);
+      TOKEN2ID(tUMINUS_NUM);
+      TOKEN2ID(tLAST_TOKEN);
+#undef TOKEN2ID
+#undef TOKEN2ID2
+    }
+
+    rb_bug("parser_token2id: unknown token %d", tok);
+
+    UNREACHABLE_RETURN(0);
+}
+
 #endif
 
 RBIMPL_ATTR_NONNULL((1, 2, 3))
@@ -457,6 +641,9 @@ static int parser_yyerror0(struct parser_params*, const char*);
 #define yyerror1(loc, msg) parser_yyerror(p, (loc), (msg))
 #define yyerror(yylloc, p, msg) parser_yyerror(p, yylloc, msg)
 #define token_flush(ptr) ((ptr)->lex.ptok = (ptr)->lex.pcur)
+#define lex_goto_eol(p) ((p)->lex.pcur = (p)->lex.pend)
+#define lex_eol_p(p) ((p)->lex.pcur >= (p)->lex.pend)
+#define lex_eol_n_p(p,n) ((p)->lex.pcur+(n) >= (p)->lex.pend)
 
 static void token_info_setup(token_info *ptinfo, const char *ptr, const rb_code_location_t *loc);
 static void token_info_push(struct parser_params*, const char *token, const rb_code_location_t *loc);
@@ -707,6 +894,9 @@ VALUE rb_parser_lex_state_name(enum lex_state_e state);
 void rb_parser_show_bitstack(struct parser_params *, stack_type, const char *, int);
 PRINTF_ARGS(void rb_parser_fatal(struct parser_params *p, const char *fmt, ...), 2, 3);
 YYLTYPE *rb_parser_set_location_from_strterm_heredoc(struct parser_params *p, rb_strterm_heredoc_t *here, YYLTYPE *yylloc);
+YYLTYPE *rb_parser_set_location_of_delayed_token(struct parser_params *p, YYLTYPE *yylloc);
+YYLTYPE *rb_parser_set_location_of_heredoc_end(struct parser_params *p, YYLTYPE *yylloc);
+YYLTYPE *rb_parser_set_location_of_dummy_end(struct parser_params *p, YYLTYPE *yylloc);
 YYLTYPE *rb_parser_set_location_of_none(struct parser_params *p, YYLTYPE *yylloc);
 YYLTYPE *rb_parser_set_location(struct parser_params *p, YYLTYPE *yylloc);
 RUBY_SYMBOL_EXPORT_END
@@ -1057,6 +1247,8 @@ endless_method_name(struct parser_params *p, NODE *defn, const YYLTYPE *loc)
     token_info_drop(p, "def", loc->beg_pos);
 }
 
+#define debug_token_line(p, name, line) if (p->debug) rb_parser_printf(p, name ":%d (%d: %ld|%ld|%ld)\n", line, p->ruby_sourceline, p->lex.ptok - p->lex.pbeg, p->lex.pcur - p->lex.ptok, p->lex.pend - p->lex.pcur)
+
 #ifndef RIPPER
 # define Qnone 0
 # define Qnull 0
@@ -1356,6 +1548,9 @@ static int looking_at_eol_p(struct parser_params *p);
 %token tSTRING_DEND	"'}'"
 %token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END
 
+%token tIGNORED_NL tCOMMENT tEMBDOC_BEG tEMBDOC tEMBDOC_END
+%token tHEREDOC_BEG tHEREDOC_END k__END__
+
 /*
  *	precedence table
  */
@@ -3447,7 +3642,7 @@ k_if		: keyword_if
 			token_info_push(p, "if", &@$);
 			if (p->token_info && p->token_info->nonspc &&
 			    p->token_info->next && !strcmp(p->token_info->next->token, "else")) {
-			    const char *tok = p->lex.ptok;
+			    const char *tok = p->lex.ptok - rb_strlen_lit("if");
 			    const char *beg = p->lex.pbeg + p->token_info->next->beg.column;
 			    beg += rb_strlen_lit("else");
 			    while (beg < tok && ISSPACE(*beg)) beg++;
@@ -5906,7 +6101,11 @@ trailer		: opt_nl
 		;
 
 term		: ';' {yyerrok;token_flush(p);}
-		| '\n' {token_flush(p);}
+		| '\n'
+		    {
+			@$.end_pos = @$.beg_pos;
+			token_flush(p);
+		    }
 		;
 
 terms		: term
@@ -5967,12 +6166,91 @@ ripper_yylval_id(struct parser_params *p, ID x)
 #endif
 
 #define set_yylval_noname() set_yylval_id(keyword_nil)
+#define has_delayed_token(p) (!NIL_P(p->delayed.token))
 
 #ifndef RIPPER
 #define literal_flush(p, ptr) ((p)->lex.ptok = (ptr))
-#define dispatch_scan_event(p, t) ((void)0)
-#define dispatch_delayed_token(p, t) ((void)0)
-#define has_delayed_token(p) (0)
+#define dispatch_scan_event(p, t) parser_dispatch_scan_event(p, t, __LINE__)
+
+static bool
+parser_has_token(struct parser_params *p)
+{
+    if (p->keep_tokens && (p->lex.pcur < p->lex.ptok)) rb_bug("lex.pcur < lex.ptok. (line: %d) %ld|%ld|%ld", p->ruby_sourceline, p->lex.ptok - p->lex.pbeg, p->lex.pcur - p->lex.ptok, p->lex.pend - p->lex.pcur);
+    return p->lex.pcur > p->lex.ptok;
+}
+
+static VALUE
+code_loc_to_ary(const rb_code_location_t *loc)
+{
+    VALUE ary = rb_ary_new_from_args(4,
+	INT2NUM(loc->beg_pos.lineno), INT2NUM(loc->beg_pos.column),
+	INT2NUM(loc->end_pos.lineno), INT2NUM(loc->end_pos.column));
+    rb_obj_freeze(ary);
+
+    return ary;
+}
+
+static void
+parser_append_tokens(struct parser_params *p, VALUE str, enum yytokentype t, int line)
+{
+    VALUE ary;
+    int token_id;
+
+    ary = rb_ary_new2(4);
+    token_id = p->token_id;
+    rb_ary_push(ary, INT2FIX(token_id));
+    rb_ary_push(ary, ID2SYM(parser_token2id(t)));
+    rb_ary_push(ary, str);
+    rb_ary_push(ary, code_loc_to_ary(p->yylloc));
+    rb_obj_freeze(ary);
+    rb_ary_push(p->tokens, ary);
+    p->token_id++;
+
+    if (p->debug) {
+	rb_parser_printf(p, "Append tokens (line: %d) %"PRIsVALUE"\n", line, ary);
+    }
+}
+
+static void
+parser_dispatch_scan_event(struct parser_params *p, enum yytokentype t, int line)
+{
+    debug_token_line(p, "parser_dispatch_scan_event", line);
+
+    if (!parser_has_token(p)) return;
+
+    RUBY_SET_YYLLOC(*p->yylloc);
+
+    if (p->keep_tokens) {
+	VALUE str = STR_NEW(p->lex.ptok, p->lex.pcur - p->lex.ptok);
+	parser_append_tokens(p, str, t, line);
+    }
+
+    token_flush(p);
+}
+
+#define dispatch_delayed_token(p, t) parser_dispatch_delayed_token(p, t, __LINE__)
+static void
+parser_dispatch_delayed_token(struct parser_params *p, enum yytokentype t, int line)
+{
+    int saved_line = p->ruby_sourceline;
+    const char *saved_tokp = p->lex.ptok;
+
+    debug_token_line(p, "parser_dispatch_delayed_token", line);
+
+    if (!has_delayed_token(p)) return;
+
+    RUBY_SET_YYLLOC_OF_DELAYED_TOKEN(*p->yylloc);
+
+    if (p->keep_tokens) {
+	p->ruby_sourceline = p->delayed.beg_line;
+	p->lex.ptok = p->lex.pbeg + p->delayed.beg_col;
+	parser_append_tokens(p, p->delayed.token, t, line);
+	p->ruby_sourceline = saved_line;
+	p->lex.ptok = saved_tokp;
+    }
+
+    p->delayed.token = Qnil;
+}
 #else
 #define literal_flush(p, ptr) ((void)(ptr))
 
@@ -5997,6 +6275,7 @@ ripper_scan_event_val(struct parser_params *p, enum yytokentype t)
 {
     VALUE str = STR_NEW(p->lex.ptok, p->lex.pcur - p->lex.ptok);
     VALUE rval = ripper_dispatch1(p, ripper_token2eventid(t), str);
+    RUBY_SET_YYLLOC(*p->yylloc);
     token_flush(p);
     return rval;
 }
@@ -6016,15 +6295,14 @@ ripper_dispatch_delayed_token(struct parser_params *p, enum yytokentype t)
     const char *saved_tokp = p->lex.ptok;
 
     if (NIL_P(p->delayed.token)) return;
-    p->ruby_sourceline = p->delayed.line;
-    p->lex.ptok = p->lex.pbeg + p->delayed.col;
+    p->ruby_sourceline = p->delayed.beg_line;
+    p->lex.ptok = p->lex.pbeg + p->delayed.beg_col;
     add_mark_object(p, yylval_rval = ripper_dispatch1(p, ripper_token2eventid(t), p->delayed.token));
     p->delayed.token = Qnil;
     p->ruby_sourceline = saved_line;
     p->lex.ptok = saved_tokp;
 }
 #define dispatch_delayed_token(p, t) ripper_dispatch_delayed_token(p, t)
-#define has_delayed_token(p) (!NIL_P(p->delayed.token))
 #endif /* RIPPER */
 
 static inline int
@@ -6495,7 +6773,6 @@ yycompile0(VALUE arg)
 
     p->lex.strterm = 0;
     p->lex.pcur = p->lex.pbeg = p->lex.pend = 0;
-    p->lex.prevline = p->lex.lastline = p->lex.nextline = 0;
     if (n || p->error_p) {
 	VALUE mesg = p->error_buffer;
 	if (!mesg) {
@@ -6512,6 +6789,7 @@ yycompile0(VALUE arg)
     }
     else {
 	VALUE opt = p->compile_option;
+	VALUE tokens = p->tokens;
 	NODE *prelude;
 	NODE *body = parser_append_options(p, tree->nd_body);
 	if (!opt) opt = rb_obj_hide(rb_ident_hash_new());
@@ -6519,6 +6797,10 @@ yycompile0(VALUE arg)
 	prelude = block_append(p, p->eval_tree_begin, body);
 	tree->nd_body = prelude;
         RB_OBJ_WRITE(p->ast, &p->ast->body.compile_option, opt);
+	if (p->keep_tokens) {
+	    rb_obj_freeze(tokens);
+	    rb_ast_set_tokens(p->ast, tokens);
+	}
     }
     p->ast->body.root = tree;
     if (!p->ast->body.script_lines) p->ast->body.script_lines = INT2FIX(p->line_count);
@@ -6709,32 +6991,31 @@ parser_str_new(const char *ptr, long len, rb_encoding *enc, int func, rb_encodin
     return str;
 }
 
-#define lex_goto_eol(p) ((p)->lex.pcur = (p)->lex.pend)
-#define lex_eol_p(p) ((p)->lex.pcur >= (p)->lex.pend)
-#define lex_eol_n_p(p,n) ((p)->lex.pcur+(n) >= (p)->lex.pend)
 #define peek(p,c) peek_n(p, (c), 0)
 #define peek_n(p,c,n) (!lex_eol_n_p(p, n) && (c) == (unsigned char)(p)->lex.pcur[n])
 #define peekc(p) peekc_n(p, 0)
 #define peekc_n(p,n) (lex_eol_n_p(p, n) ? -1 : (unsigned char)(p)->lex.pcur[n])
 
-#ifdef RIPPER
 static void
-add_delayed_token(struct parser_params *p, const char *tok, const char *end)
+add_delayed_token(struct parser_params *p, const char *tok, const char *end, int line)
 {
+#ifndef RIPPER
+    debug_token_line(p, "add_delayed_token", line);
+#endif
+
     if (tok < end) {
 	if (!has_delayed_token(p)) {
 	    p->delayed.token = rb_str_buf_new(end - tok);
 	    rb_enc_associate(p->delayed.token, p->enc);
-	    p->delayed.line = p->ruby_sourceline;
-	    p->delayed.col = rb_long2int(tok - p->lex.pbeg);
+	    p->delayed.beg_line = p->ruby_sourceline;
+	    p->delayed.beg_col = rb_long2int(tok - p->lex.pbeg);
 	}
 	rb_str_buf_cat(p->delayed.token, tok, end - tok);
+	p->delayed.end_line = p->ruby_sourceline;
+	p->delayed.end_col = rb_long2int(end - p->lex.pbeg);
 	p->lex.ptok = end;
     }
 }
-#else
-#define add_delayed_token(p, tok, end) ((void)(tok), (void)(end))
-#endif
 
 static int
 nextline(struct parser_params *p, int set_encoding)
@@ -6767,7 +7048,7 @@ nextline(struct parser_params *p, int set_encoding)
 	/* after here-document without terminator */
 	goto end_of_input;
     }
-    add_delayed_token(p, p->lex.ptok, p->lex.pend);
+    add_delayed_token(p, p->lex.ptok, p->lex.pend, __LINE__);
     if (p->heredoc_end > 0) {
 	p->ruby_sourceline = p->heredoc_end;
 	p->heredoc_end = 0;
@@ -6776,7 +7057,6 @@ nextline(struct parser_params *p, int set_encoding)
     p->lex.pbeg = p->lex.pcur = RSTRING_PTR(v);
     p->lex.pend = p->lex.pcur + RSTRING_LEN(v);
     token_flush(p);
-    p->lex.prevline = p->lex.lastline;
     p->lex.lastline = v;
     return 0;
 }
@@ -6929,20 +7209,22 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
 {
     size_t numlen;
     int codepoint = scan_hex(p->lex.pcur, wide ? p->lex.pend - p->lex.pcur : 4, &numlen);
-    literal_flush(p, p->lex.pcur);
     p->lex.pcur += numlen;
     if (p->lex.strterm == NULL ||
         (p->lex.strterm->flags & STRTERM_HEREDOC) ||
         (p->lex.strterm->u.literal.u1.func != str_regexp)) {
         if (wide ? (numlen == 0 || numlen > 6) : (numlen < 4))  {
+            literal_flush(p, p->lex.pcur);
             yyerror0("invalid Unicode escape");
             return wide && numlen > 0;
         }
         if (codepoint > 0x10ffff) {
+            literal_flush(p, p->lex.pcur);
             yyerror0("invalid Unicode codepoint (too large)");
             return wide;
         }
         if ((codepoint & 0xfffff800) == 0xd800) {
+            literal_flush(p, p->lex.pcur);
             yyerror0("invalid Unicode codepoint");
             return wide;
         }
@@ -7363,7 +7645,6 @@ tokadd_string(struct parser_params *p,
 	    }
 	}
 	else if (c == '\\') {
-	    literal_flush(p, p->lex.pcur - 1);
 	    c = nextc(p);
 	    switch (c) {
 	      case '\n':
@@ -7511,7 +7792,21 @@ flush_string_content(struct parser_params *p, rb_encoding *enc)
     yylval.val = content;
 }
 #else
-#define flush_string_content(p, enc) ((void)(enc))
+static void
+flush_string_content(struct parser_params *p, rb_encoding *enc)
+{
+    if (has_delayed_token(p)) {
+	ptrdiff_t len = p->lex.pcur - p->lex.ptok;
+	if (len > 0) {
+	    rb_enc_str_buf_cat(p->delayed.token, p->lex.ptok, len, enc);
+	    p->delayed.end_line = p->ruby_sourceline;
+	    p->delayed.end_col = rb_long2int(p->lex.pcur - p->lex.pbeg);
+	}
+	dispatch_delayed_token(p, tSTRING_CONTENT);
+	p->lex.ptok = p->lex.pcur;
+    }
+    dispatch_scan_event(p, tSTRING_CONTENT);
+}
 #endif
 
 RUBY_FUNC_EXPORTED const unsigned int ruby_global_name_punct_bits[(0x7e - 0x20 + 31) / 32];
@@ -7630,14 +7925,14 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote)
 	if (func & STR_FUNC_QWORDS) {
 	    quote->u1.func |= STR_FUNC_TERM;
 	    pushback(p, c); /* dispatch the term at tSTRING_END */
-	    add_delayed_token(p, p->lex.ptok, p->lex.pcur);
+	    add_delayed_token(p, p->lex.ptok, p->lex.pcur, __LINE__);
 	    return ' ';
 	}
 	return parser_string_term(p, func);
     }
     if (space) {
 	pushback(p, c);
-	add_delayed_token(p, p->lex.ptok, p->lex.pcur);
+	add_delayed_token(p, p->lex.ptok, p->lex.pcur, __LINE__);
 	return ' ';
     }
     newtok(p);
@@ -7997,12 +8292,29 @@ dispatch_heredoc_end(struct parser_params *p)
 	dispatch_delayed_token(p, tSTRING_CONTENT);
     str = STR_NEW(p->lex.ptok, p->lex.pend - p->lex.ptok);
     ripper_dispatch1(p, ripper_token2eventid(tHEREDOC_END), str);
+    RUBY_SET_YYLLOC_FROM_STRTERM_HEREDOC(*p->yylloc);
     lex_goto_eol(p);
     token_flush(p);
 }
 
 #else
-#define dispatch_heredoc_end(p) ((void)0)
+#define dispatch_heredoc_end(p) parser_dispatch_heredoc_end(p, __LINE__)
+static void
+parser_dispatch_heredoc_end(struct parser_params *p, int line)
+{
+    if (has_delayed_token(p))
+	dispatch_delayed_token(p, tSTRING_CONTENT);
+
+    if (p->keep_tokens) {
+	VALUE str = STR_NEW(p->lex.ptok, p->lex.pend - p->lex.ptok);
+	RUBY_SET_YYLLOC_OF_HEREDOC_END(*p->yylloc);
+	parser_append_tokens(p, str, tHEREDOC_END, line);
+    }
+
+    RUBY_SET_YYLLOC_FROM_STRTERM_HEREDOC(*p->yylloc);
+    lex_goto_eol(p);
+    token_flush(p);
+}
 #endif
 
 static enum yytokentype
@@ -9430,6 +9742,16 @@ parse_ident(struct parser_params *p, int c, int cmd_state)
     return result;
 }
 
+static void
+warn_cr(struct parser_params *p)
+{
+    if (!p->cr_seen) {
+	p->cr_seen = TRUE;
+	/* carried over with p->lex.nextline for nextc() */
+	rb_warn0("encountered \\r in middle of line, treated as a mere space");
+    }
+}
+
 static enum yytokentype
 parser_yylex(struct parser_params *p)
 {
@@ -9443,6 +9765,7 @@ parser_yylex(struct parser_params *p)
 
     if (p->lex.strterm) {
 	if (p->lex.strterm->flags & STRTERM_HEREDOC) {
+	    token_flush(p);
 	    return here_document(p, &p->lex.strterm->u.heredoc);
 	}
 	else {
@@ -9453,11 +9776,11 @@ parser_yylex(struct parser_params *p)
     cmd_state = p->command_start;
     p->command_start = FALSE;
     p->token_seen = TRUE;
-  retry:
-    last_state = p->lex.state;
 #ifndef RIPPER
     token_flush(p);
 #endif
+  retry:
+    last_state = p->lex.state;
     switch (c = nextc(p)) {
       case '\0':		/* NUL */
       case '\004':		/* ^D */
@@ -9467,26 +9790,27 @@ parser_yylex(struct parser_params *p)
 #ifndef RIPPER
 	if (!NIL_P(p->end_expect_token_locations) && RARRAY_LEN(p->end_expect_token_locations) > 0) {
 	    pop_end_expect_token_locations(p);
+	    RUBY_SET_YYLLOC_OF_DUMMY_END(*p->yylloc);
 	    return tDUMNY_END;
 	}
 #endif
+	/* Set location for end-of-input because dispatch_scan_event is not called. */
+	RUBY_SET_YYLLOC(*p->yylloc);
 	return 0;
 
 	/* white spaces */
       case '\r':
-	if (!p->cr_seen) {
-	    p->cr_seen = TRUE;
-	    /* carried over with p->lex.nextline for nextc() */
-	    rb_warn0("encountered \\r in middle of line, treated as a mere space");
-	}
+	warn_cr(p);
 	/* fall through */
       case ' ': case '\t': case '\f':
       case '\13': /* '\v' */
 	space_seen = 1;
-#ifdef RIPPER
 	while ((c = nextc(p))) {
 	    switch (c) {
-	      case ' ': case '\t': case '\f': case '\r':
+	      case '\r':
+		warn_cr(p);
+		/* fall through */
+	      case ' ': case '\t': case '\f':
 	      case '\13': /* '\v' */
 		break;
 	      default:
@@ -9496,6 +9820,8 @@ parser_yylex(struct parser_params *p)
       outofloop:
 	pushback(p, c);
 	dispatch_scan_event(p, tSP);
+#ifndef RIPPER
+	token_flush(p);
 #endif
 	goto retry;
 
@@ -9533,7 +9859,10 @@ parser_yylex(struct parser_params *p)
 		break;
 	      case '#':
 		pushback(p, c);
-		if (space_seen) dispatch_scan_event(p, tSP);
+		if (space_seen) {
+		    dispatch_scan_event(p, tSP);
+		    token_flush(p);
+		}
 		goto retry;
 	      case '&':
 	      case '.': {
@@ -9548,18 +9877,10 @@ parser_yylex(struct parser_params *p)
 		p->ruby_sourceline--;
 		p->lex.nextline = p->lex.lastline;
 	      case -1:		/* EOF no decrement*/
-#ifndef RIPPER
-		if (p->lex.prevline && !p->eofp) p->lex.lastline = p->lex.prevline;
-		p->lex.pbeg = RSTRING_PTR(p->lex.lastline);
-		p->lex.pend = p->lex.pcur = p->lex.pbeg + RSTRING_LEN(p->lex.lastline);
-		pushback(p, 1); /* always pushback */
-		p->lex.ptok = p->lex.pcur;
-#else
 		lex_goto_eol(p);
 		if (c != -1) {
 		    p->lex.ptok = p->lex.pcur;
 		}
-#endif
 		goto normal_newline;
 	    }
 	}
@@ -10157,12 +10478,9 @@ yylex(YYSTYPE *lval, YYLTYPE *yylloc, struct parser_params *p)
 
     p->lval = lval;
     lval->val = Qundef;
-    t = parser_yylex(p);
+    p->yylloc = yylloc;
 
-    if (p->lex.strterm && (p->lex.strterm->flags & STRTERM_HEREDOC))
-	RUBY_SET_YYLLOC_FROM_STRTERM_HEREDOC(*yylloc);
-    else
-	RUBY_SET_YYLLOC(*yylloc);
+    t = parser_yylex(p);
 
     if (has_delayed_token(p))
 	dispatch_delayed_token(p, t);
@@ -11054,6 +11372,34 @@ rb_parser_set_location_from_strterm_heredoc(struct parser_params *p, rb_strterm_
 }
 
 YYLTYPE *
+rb_parser_set_location_of_delayed_token(struct parser_params *p, YYLTYPE *yylloc)
+{
+    yylloc->beg_pos.lineno = p->delayed.beg_line;
+    yylloc->beg_pos.column = p->delayed.beg_col;
+    yylloc->end_pos.lineno = p->delayed.end_line;
+    yylloc->end_pos.column = p->delayed.end_col;
+
+    return yylloc;
+}
+
+YYLTYPE *
+rb_parser_set_location_of_heredoc_end(struct parser_params *p, YYLTYPE *yylloc)
+{
+    int sourceline = p->ruby_sourceline;
+    int beg_pos = (int)(p->lex.ptok - p->lex.pbeg);
+    int end_pos = (int)(p->lex.pend - p->lex.pbeg);
+    return rb_parser_set_pos(yylloc, sourceline, beg_pos, end_pos);
+}
+
+YYLTYPE *
+rb_parser_set_location_of_dummy_end(struct parser_params *p, YYLTYPE *yylloc)
+{
+    yylloc->end_pos = yylloc->beg_pos;
+
+    return yylloc;
+}
+
+YYLTYPE *
 rb_parser_set_location_of_none(struct parser_params *p, YYLTYPE *yylloc)
 {
     int sourceline = p->ruby_sourceline;
@@ -13329,13 +13675,15 @@ parser_initialize(struct parser_params *p)
     p->ruby_sourcefile_string = Qnil;
     p->lex.lpar_beg = -1; /* make lambda_beginning_p() == FALSE at first */
     p->node_id = 0;
-#ifdef RIPPER
     p->delayed.token = Qnil;
+#ifdef RIPPER
     p->result = Qnil;
     p->parsing_thread = Qnil;
 #else
     p->error_buffer = Qfalse;
     p->end_expect_token_locations = Qnil;
+    p->token_id = 0;
+    p->tokens = Qnil;
 #endif
     p->debug_buffer = Qnil;
     p->debug_output = rb_ractor_stdout();
@@ -13353,20 +13701,20 @@ parser_mark(void *ptr)
     struct parser_params *p = (struct parser_params*)ptr;
 
     rb_gc_mark(p->lex.input);
-    rb_gc_mark(p->lex.prevline);
     rb_gc_mark(p->lex.lastline);
     rb_gc_mark(p->lex.nextline);
     rb_gc_mark(p->ruby_sourcefile_string);
     rb_gc_mark((VALUE)p->lex.strterm);
     rb_gc_mark((VALUE)p->ast);
     rb_gc_mark(p->case_labels);
+    rb_gc_mark(p->delayed.token);
 #ifndef RIPPER
     rb_gc_mark(p->debug_lines);
     rb_gc_mark(p->compile_option);
     rb_gc_mark(p->error_buffer);
     rb_gc_mark(p->end_expect_token_locations);
+    rb_gc_mark(p->tokens);
 #else
-    rb_gc_mark(p->delayed.token);
     rb_gc_mark(p->value);
     rb_gc_mark(p->result);
     rb_gc_mark(p->parsing_thread);
@@ -13480,6 +13828,16 @@ rb_parser_error_tolerant(VALUE vparser)
     p->end_expect_token_locations = rb_ary_new();
 }
 
+void
+rb_parser_keep_tokens(VALUE vparser)
+{
+    struct parser_params *p;
+
+    TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p);
+    p->keep_tokens = 1;
+    p->tokens = rb_ary_new();
+}
+
 #endif
 
 #ifdef RIPPER
diff --git a/test/ruby/test_ast.rb b/test/ruby/test_ast.rb
index acf6722bb7..c3de36b4fb 100644
--- a/test/ruby/test_ast.rb
+++ b/test/ruby/test_ast.rb
@@ -132,6 +132,34 @@ class TestAst < Test::Unit::TestCase
     end
   end
 
+  Dir.glob("test/**/*.rb", base: SRCDIR).each do |path|
+    define_method("test_all_tokens:#{path}") do
+      node = RubyVM::AbstractSyntaxTree.parse_file("#{SRCDIR}/#{path}", keep_tokens: true)
+      tokens = node.all_tokens.sort_by { [_1.last[0], _1.last[1]] }
+      tokens_bytes = tokens.map { _1[2]}.join.bytes
+      source_bytes = File.read("#{SRCDIR}/#{path}").bytes
+
+      assert_equal(source_bytes, tokens_bytes)
+
+      (tokens.count - 1).times do |i|
+        token_0 = tokens[i]
+        token_1 = tokens[i + 1]
+        end_pos = token_0.last[2..3]
+        beg_pos = token_1.last[0..1]
+
+        if end_pos[0] == beg_pos[0]
+          # When both tokens are same line, column should be consecutives
+          assert_equal(beg_pos[1], end_pos[1], "#{token_0}. #{token_1}")
+        else
+          # Line should be next
+          assert_equal(beg_pos[0], end_pos[0] + 1, "#{token_0}. #{token_1}")
+          # It should be on the beginning of the line
+          assert_equal(0, beg_pos[1], "#{token_0}. #{token_1}")
+        end
+      end
+    end
+  end
+
   private def parse(src)
     EnvUtil.suppress_warning {
       RubyVM::AbstractSyntaxTree.parse(src)
@@ -705,11 +733,11 @@ dummy
         a = 1
       else
     STR
-      (SCOPE@1:0-3:5
+      (SCOPE@1:0-3:4
        tbl: [:a]
        args: nil
        body:
-         (IF@1:0-3:5 (VCALL@1:3-1:7 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1))
+         (IF@1:0-3:4 (VCALL@1:3-1:7 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1))
             (BEGIN@3:4-3:4 nil)))
     EXP
   end
@@ -732,11 +760,11 @@ dummy
         a = 1
       else
     STR
-      (SCOPE@1:0-3:5
+      (SCOPE@1:0-3:4
        tbl: [:a]
        args: nil
        body:
-         (UNLESS@1:0-3:5 (VCALL@1:7-1:11 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1))
+         (UNLESS@1:0-3:4 (VCALL@1:7-1:11 :cond) (LASGN@2:2-2:7 :a (LIT@2:6-2:7 1))
             (BEGIN@3:4-3:4 nil)))
     EXP
   end