8 files changed, 411 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 87ac93ed51..6a77703461 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Mon Dec  7 23:39:49 2015  Ben Miller  <bjmllr@gmail.com>
+
+	* parse.y: add heredoc <<~ syntax.  [Feature #9098]
+
 Mon Dec  7 23:06:16 2015  Kazuhiro NISHIYAMA  <zn@mbf.nifty.com>
 
 	* prelude.rb (IO#read_nonblock): [DOC] add missing options to
diff --git a/doc/syntax/literals.rdoc b/doc/syntax/literals.rdoc
index cbd18e9d4f..9631575320 100644
--- a/doc/syntax/literals.rdoc
+++ b/doc/syntax/literals.rdoc
@@ -196,6 +196,20 @@ Note that the while the closing identifier may be indented, the content is
 always treated as if it is flush left.  If you indent the content those spaces
 will appear in the output.
 
+To have indented content as well as an indented closing identifier, you can use
+a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:
+
+    expected_result = <<~SQUIGGLY_HEREDOC
+      This would contain specially formatted text.
+
+      That might span many lines
+    SQUIGGLY_HEREDOC
+
+The indentation of the least-indented line will be removed from each line of
+the content.  Note that empty lines and lines consisting solely of literal tabs
+and spaces will be ignored for the purposes of determining indentation, but
+escaped tabs and spaces are considered non-indentation characters.
+
 A heredoc allows interpolation and escaped characters.  You may disable
 interpolation and escaping by surrounding the opening identifier with single
 quotes:
diff --git a/ext/ripper/lib/ripper/lexer.rb b/ext/ripper/lib/ripper/lexer.rb
index 586d0807a1..e0460e8b02 100644
--- a/ext/ripper/lib/ripper/lexer.rb
+++ b/ext/ripper/lib/ripper/lexer.rb
@@ -44,28 +44,56 @@ class Ripper
   end
 
   class Lexer < ::Ripper   #:nodoc: internal use only
+    Elem = Struct.new(:pos, :event, :tok)
+
     def tokenize
-      lex().map {|pos, event, tok| tok }
+      parse().sort_by(&:pos).map(&:tok)
     end
 
     def lex
-      parse().sort_by {|pos, event, tok| pos }
+      parse().sort_by(&:pos).map(&:to_a)
     end
 
     def parse
       @buf = []
+      @stack = []
       super
+      @buf.flatten!
       @buf
     end
 
     private
 
+    def on_heredoc_dedent(v, w)
+      @buf.each do |e|
+        if e.event == :on_tstring_content
+          if (n = dedent_string(e.tok, w)) > 0
+            e.pos[1] += n
+          end
+        end
+      end
+      v
+    end
+
+    def on_heredoc_beg(tok)
+      @stack.push @buf
+      buf = []
+      @buf << buf
+      @buf = buf
+      @buf.push Elem.new([lineno(), column()], __callee__, tok)
+    end
+
+    def on_heredoc_end(tok)
+      @buf.push Elem.new([lineno(), column()], __callee__, tok)
+      @buf = @stack.pop
+    end
+
     def _push_token(tok)
-      @buf.push [[lineno(), column()], __callee__, tok]
+      @buf.push Elem.new([lineno(), column()], __callee__, tok)
     end
 
-    SCANNER_EVENTS.each do |event|
-      alias_method "on_#{event}", :_push_token
+    (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
+      alias_method event, :_push_token
     end
   end
 
diff --git a/ext/ripper/lib/ripper/sexp.rb b/ext/ripper/lib/ripper/sexp.rb
index 55b942a1c0..d9f445cfed 100644
--- a/ext/ripper/lib/ripper/sexp.rb
+++ b/ext/ripper/lib/ripper/sexp.rb
@@ -62,7 +62,35 @@ class Ripper
   class SexpBuilder < ::Ripper   #:nodoc:
     private
 
-    PARSER_EVENTS.each do |event|
+    def dedent_element(e, width)
+      if (n = dedent_string(e[1], width)) > 0
+        e[2][1] += n
+      end
+      e
+    end
+
+    def on_heredoc_dedent(val, width)
+      sub = proc do |cont|
+        cont.map! do |e|
+          if Array === e
+            case e[0]
+            when :@tstring_content
+              e = dedent_element(e, width)
+            when /_add\z/
+              e[1] = sub[e[1]]
+            end
+          elsif String === e
+            dedent_string(e, width)
+          end
+          e
+        end
+      end
+      sub[val]
+      val
+    end
+
+    events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym}
+    (PARSER_EVENTS - events).each do |event|
       module_eval(<<-End, __FILE__, __LINE__ + 1)
         def on_#{event}(*args)
           args.unshift :#{event}
@@ -83,6 +111,19 @@ class Ripper
   class SexpBuilderPP < SexpBuilder #:nodoc:
     private
 
+    def on_heredoc_dedent(val, width)
+      val.map! do |e|
+        next e if Symbol === e and /_content\z/ =~ e
+        if Array === e and e[0] == :@tstring_content
+          e = dedent_element(e, width)
+        elsif String === e
+          dedent_string(e, width)
+        end
+        e
+      end
+      val
+    end
+
     def _dispatch_event_new
       []
     end
diff --git a/parse.y b/parse.y
index bc6de5a3ec..616418327a 100644
--- a/parse.y
+++ b/parse.y
@@ -257,6 +257,8 @@ struct parser_params {
     int toksiz;
     int tokline;
     int heredoc_end;
+    int heredoc_indent;
+    int heredoc_line_indent;
     char *tokenbuf;
     NODE *deferred_nodes;
     struct local_vars *lvtbl;
@@ -347,6 +349,8 @@ static int parser_yyerror(struct parser_params*, const char*);
 #define lex_p			(parser->lex.pcur)
 #define lex_pend		(parser->lex.pend)
 #define heredoc_end		(parser->heredoc_end)
+#define heredoc_indent		(parser->heredoc_indent)
+#define heredoc_line_indent	(parser->heredoc_line_indent)
 #define command_start		(parser->command_start)
 #define deferred_nodes		(parser->deferred_nodes)
 #define lex_gets_ptr		(parser->lex.gets_ptr)
@@ -487,6 +491,9 @@ static int reg_fragment_check_gen(struct parser_params*, VALUE, int);
 static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
 #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))
 
+static void parser_heredoc_dedent(struct parser_params*,NODE*);
+# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
+
 #define get_id(id) (id)
 #define get_value(val) (val)
 #else
@@ -670,6 +677,9 @@ new_args_tail_gen(struct parser_params *parser, VALUE k, VALUE kr, VALUE b)
 
 #define new_defined(expr) dispatch1(defined, (expr))
 
+static void parser_heredoc_dedent(struct parser_params*,VALUE);
+# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
+
 #define FIXME 0
 
 #endif /* RIPPER */
@@ -3887,6 +3897,7 @@ strings		: string
 			else {
 			    node = evstr2dstr(node);
 			}
+			heredoc_indent = 0;
 			$$ = node;
 		    /*%
 			$$ = $1;
@@ -3908,6 +3919,7 @@ string		: tCHAR
 
 string1		: tSTRING_BEG string_contents tSTRING_END
 		    {
+			heredoc_dedent($2);
 		    /*%%%*/
 			$$ = $2;
 		    /*%
@@ -3920,6 +3932,10 @@ xstring		: tXSTRING_BEG xstring_contents tSTRING_END
 		    {
 		    /*%%%*/
 			NODE *node = $2;
+		    /*%
+		    %*/
+			heredoc_dedent($2);
+		    /*%%%*/
 			if (!node) {
 			    node = NEW_XSTR(STR_NEW0());
 			}
@@ -4319,6 +4335,10 @@ string_content	: tSTRING_CONTENT
 			$<num>$ = brace_nest;
 			brace_nest = 0;
 		    }
+		    {
+			$<num>$ = heredoc_indent;
+			heredoc_indent = 0;
+		    }
 		  compstmt tSTRING_DEND
 		    {
 			cond_stack = $<val>1;
@@ -4326,11 +4346,13 @@ string_content	: tSTRING_CONTENT
 			lex_strterm = $<node>3;
 			lex_state = $<num>4;
 			brace_nest = $<num>5;
+			heredoc_indent = $<num>6;
+			heredoc_line_indent = -1;
 		    /*%%%*/
-			if ($6) $6->flags &= ~NODE_FL_NEWLINE;
-			$$ = new_evstr($6);
+			if ($7) $7->flags &= ~NODE_FL_NEWLINE;
+			$$ = new_evstr($7);
 		    /*%
-			$$ = dispatch1(string_embexpr, $6);
+			$$ = dispatch1(string_embexpr, $7);
 		    %*/
 		    }
 		;
@@ -6204,6 +6226,27 @@ parser_tokadd_string(struct parser_params *parser,
     } while (0)
 
     while ((c = nextc()) != -1) {
+	if (heredoc_indent > 0) {
+	    if (heredoc_line_indent == -1) {
+		if (c == '\n') heredoc_line_indent = 0;
+	    }
+	    else {
+		if (c == ' ') {
+		    heredoc_line_indent++;
+		}
+		else if (c == '\t') {
+		    int w = (heredoc_line_indent / TAB_WIDTH) + 1;
+		    heredoc_line_indent = w * TAB_WIDTH;
+		}
+		else if (c != '\n') {
+		    if (heredoc_indent > heredoc_line_indent) {
+			heredoc_indent = heredoc_line_indent;
+		    }
+		    heredoc_line_indent = -1;
+		}
+	    }
+	}
+
 	if (paren && c == paren) {
 	    ++*nest;
 	}
@@ -6465,6 +6508,12 @@ parser_heredoc_identifier(struct parser_params *parser)
 	c = nextc();
 	func = STR_FUNC_INDENT;
     }
+    else if (c == '~') {
+	c = nextc();
+	func = STR_FUNC_INDENT;
+	heredoc_indent = INT_MAX;
+	heredoc_line_indent = 0;
+    }
     switch (c) {
       case '\'':
 	func |= str_squote; goto quoted;
@@ -6489,7 +6538,7 @@ parser_heredoc_identifier(struct parser_params *parser)
 	if (!parser_is_identchar()) {
 	    pushback(c);
 	    if (func & STR_FUNC_INDENT) {
-		pushback('-');
+		pushback(heredoc_indent > 0 ? '~' : '-');
 	    }
 	    return 0;
 	}
@@ -6535,6 +6584,114 @@ parser_heredoc_restore(struct parser_params *parser, NODE *here)
 }
 
 static int
+dedent_pos(const char *str, long len, int width)
+{
+    int i, col = 0;
+
+    for (i = 0; i < len && col < width; i++) {
+	if (str[i] == ' ') {
+	    col++;
+	}
+	else if (str[i] == '\t') {
+	    int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
+	    if (n > width) break;
+	    col = n;
+	}
+	else {
+	    break;
+	}
+    }
+    return i;
+}
+
+#ifndef RIPPER
+static VALUE
+parser_heredoc_dedent_string(VALUE input, int width, int first)
+{
+    long len;
+    int col;
+    char *str, *p, *out_p, *end, *t;
+
+    RSTRING_GETMEM(input, str, len);
+    end = &str[len];
+
+    p = str;
+    if (!first) {
+	p = memchr(p, '\n', end - p);
+	if (!p) return input;
+	p++;
+    }
+    out_p = p;
+    while (p < end) {
+	col = dedent_pos(p, end - p, width);
+	p += col;
+	if (!(t = memchr(p, '\n', end - p)))
+	    t = end;
+	else
+	    ++t;
+	if (p > out_p) memmove(out_p, p, t - p);
+	out_p += t - p;
+	p = t;
+    }
+    rb_str_set_len(input, out_p - str);
+
+    return input;
+}
+
+static void
+parser_heredoc_dedent(struct parser_params *parser, NODE *root)
+{
+    NODE *node, *str_node;
+    int first = TRUE;
+    int indent = heredoc_indent;
+
+    if (indent <= 0) return;
+
+    node = str_node = root;
+
+    while (str_node) {
+	VALUE lit = str_node->nd_lit;
+	if (NIL_P(parser_heredoc_dedent_string(lit, indent, first)))
+	    compile_error(PARSER_ARG "dedent failure: %d: %"PRIsVALUE, indent, lit);
+	first = FALSE;
+
+	str_node = 0;
+	while ((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) {
+	    if ((str_node = node->nd_head) != 0) {
+		enum node_type type = nd_type(str_node);
+		if (type == NODE_STR || type == NODE_DSTR) break;
+	    }
+	}
+    }
+}
+#else /* RIPPER */
+static void
+parser_heredoc_dedent(struct parser_params *parser, VALUE array)
+{
+    if (heredoc_indent <= 0) return;
+
+    dispatch2(heredoc_dedent, array, INT2NUM(heredoc_indent));
+}
+
+static VALUE
+parser_dedent_string(VALUE self, VALUE input, VALUE width)
+{
+    char *str;
+    long len;
+    int wid, col;
+
+    StringValue(input);
+    wid = NUM2UINT(width);
+    rb_str_modify(input);
+    RSTRING_GETMEM(input, str, len);
+    col = dedent_pos(str, len, wid);
+    MEMMOVE(str, str + col, char, len - col);
+    rb_str_set_len(input, len - col);
+    return INT2NUM(col);
+}
+#endif
+
+static int
 parser_whole_match_p(struct parser_params *parser,
     const char *eos, long len, int indent)
 {
@@ -6685,7 +6842,15 @@ parser_here_document(struct parser_params *parser, NODE *here)
     }
 
     if (!(func & STR_FUNC_EXPAND)) {
+	int end = 0;
 	do {
+#ifdef RIPPER
+	    if (end && heredoc_indent > 0) {
+		set_yylval_str(str);
+		flush_string_content(enc);
+		return tSTRING_CONTENT;
+	    }
+#endif
 	    p = RSTRING_PTR(lex_lastline);
 	    pend = lex_pend;
 	    if (pend > p) {
@@ -6712,7 +6877,7 @@ parser_here_document(struct parser_params *parser, NODE *here)
 		}
 		goto error;
 	    }
-	} while (!whole_match_p(eos, len, indent));
+	} while (!(end = whole_match_p(eos, len, indent)));
     }
     else {
 	/*	int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/
@@ -6730,11 +6895,20 @@ parser_here_document(struct parser_params *parser, NODE *here)
 		goto restore;
 	    }
 	    if (c != '\n') {
+#ifdef RIPPER
+	      flush:
+#endif
 		set_yylval_str(STR_NEW3(tok(), toklen(), enc, func));
 		flush_string_content(enc);
 		return tSTRING_CONTENT;
 	    }
 	    tokadd(nextc());
+#ifdef RIPPER
+	    if (c == '\n' && heredoc_indent > 0) {
+		lex_goto_eol(parser);
+		goto flush;
+	    }
+#endif
 	    /*	    if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/
 	    if ((c = nextc()) == -1) goto error;
 	} while (!whole_match_p(eos, len, indent));
@@ -11294,6 +11468,9 @@ InitVM_ripper(void)
     rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1);
 #endif
 
+    rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
+    rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
+
     ripper_init_eventids1_table(Ripper);
     ripper_init_eventids2_table(Ripper);
 
diff --git a/test/ripper/test_parser_events.rb b/test/ripper/test_parser_events.rb
index 55485b68d0..540d36e4d9 100644
--- a/test/ripper/test_parser_events.rb
+++ b/test/ripper/test_parser_events.rb
@@ -431,6 +431,19 @@ class TestRipper::ParserEvents < Test::Unit::TestCase
     assert_equal("heredoc1\nheredoc2\n", heredoc, bug1921)
   end
 
+  def test_heredoc_dedent
+    thru_heredoc_dedent = false
+    str = width = nil
+    tree = parse("<""<~EOS\n heredoc\nEOS\n", :on_heredoc_dedent) {|e, s, w|
+      thru_heredoc_dedent = true
+      str = s
+      width = w
+    }
+    assert_equal true, thru_heredoc_dedent
+    assert_match(/string_content\(\), heredoc\n/, tree)
+    assert_equal(1, width)
+  end
+
   def test_massign
     thru_massign = false
     parse("a, b = 1, 2", :on_massign) {thru_massign = true}
diff --git a/test/ripper/test_sexp.rb b/test/ripper/test_sexp.rb
index 8fc17fdd4a..557ae9b423 100644
--- a/test/ripper/test_sexp.rb
+++ b/test/ripper/test_sexp.rb
@@ -38,6 +38,27 @@ class TestRipper::Sexp < Test::Unit::TestCase
     assert_equal "foo\n", search_sexp(:@tstring_content, sexp)[1]
   end
 
+  def test_squiggly_heredoc
+    sexp = Ripper.sexp("<<~eot\n      asdf\neot")
+    assert_equal "asdf\n", search_sexp(:@tstring_content, sexp)[1]
+  end
+
+  def test_squiggly_heredoc_with_interpolated_expression
+    sexp1 = Ripper.sexp(<<-eos)
+<<-eot
+a\#{1}z
+eot
+    eos
+
+    sexp2 = Ripper.sexp(<<-eos)
+<<~eot
+  a\#{1}z
+eot
+    eos
+
+    assert_equal clear_pos(sexp1), clear_pos(sexp2)
+  end
+
   def search_sexp(sym, sexp)
     return sexp if !sexp or sexp[0] == sym
     sexp.find do |e|
@@ -46,4 +67,18 @@ class TestRipper::Sexp < Test::Unit::TestCase
       end
     end
   end
+
+  def clear_pos(sexp)
+    return sexp if !sexp
+    sexp.each do |e|
+      if Array === e
+        if e.size == 3 and Array === (last = e.last) and
+          last.size == 2 and Integer === last[0] and Integer === last[1]
+          last.clear
+        else
+          clear_pos(e)
+        end
+      end
+    end
+  end
 end if ripper_test
diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb
index e2cd389a07..82af7817e7 100644
--- a/test/ruby/test_syntax.rb
+++ b/test/ruby/test_syntax.rb
@@ -475,6 +475,94 @@ e"
     assert_equal(expected, actual, "#{Bug7559}: ")
   end
 
+  def test_dedented_heredoc_without_indentation
+    assert_equal(" y\nz\n", <<~eos)
+ y
+z
+    eos
+  end
+
+  def test_dedented_heredoc_with_indentation
+    assert_equal(" a\nb\n", <<~eos)
+     a
+    b
+    eos
+  end
+
+  def test_dedented_heredoc_with_blank_less_indented_line
+    # the blank line has two leading spaces
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "  \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("a\n\nb\n", result)
+  end
+
+  def test_dedented_heredoc_with_blank_less_indented_line_escaped
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "\\ \\ \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("    a\n  \n    b\n", result)
+  end
+
+  def test_dedented_heredoc_with_blank_more_indented_line
+    # the blank line has six leading spaces
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "      \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("a\n  \nb\n", result)
+  end
+
+  def test_dedented_heredoc_with_blank_more_indented_line_escaped
+    result = eval("<<~eos\n" \
+                  "    a\n" \
+                  "\\ \\ \\ \\ \\ \\ \n" \
+                  "    b\n" \
+                  "    eos\n")
+    assert_equal("    a\n      \n    b\n", result)
+  end
+
+  def test_dedented_heredoc_with_empty_line
+result = eval("<<~eos\n" \
+              "      This would contain specially formatted text.\n" \
+              "\n" \
+              "      That might span many lines\n" \
+              "    eos\n")
+    assert_equal(<<-eos, result)
+This would contain specially formatted text.
+
+That might span many lines
+    eos
+  end
+
+  def test_dedented_heredoc_with_interpolated_expression
+    result = eval(" <<~eos\n" \
+                  "  #{1}a\n" \
+                  " zy\n" \
+                  "      eos\n")
+      assert_equal(<<-eos, result)
+ #{1}a
+zy
+      eos
+  end
+
+  def test_dedented_heredoc_with_interpolated_string
+    w = ""
+    result = eval("<<~eos\n" \
+                  " \#{w} a\n" \
+                  "  zy\n" \
+                  "    eos\n")
+    assert_equal(<<-eos, result)
+#{w} a
+ zy
+    eos
+  end
+
   def test_lineno_after_heredoc
     bug7559 = '[ruby-dev:46737]'
     expected, _, actual = __LINE__, <<eom, __LINE__