summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog21
-rwxr-xr-xbin/erb27
-rw-r--r--lib/erb.rb51
-rw-r--r--man/erb.1158
-rw-r--r--test/erb/test_erb_m17n.rb123
5 files changed, 370 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 3c06fa8..bda8ff2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+Mon Dec 29 17:25:17 2008 Yuki Sonoda (Yugui) <yugui@yugui.jp>
+
+ * lib/erb.rb (ERB): m17n of ERB. adds rdoc.
+ fixes #712. c.f. [ruby-dev:37516].
+
+ * lib/erb.rb (ERB::Compiler#compile): recognizes magic comments.
+ returns a pair of compiled script and its script encoding.
+
+ * lib/erb.rb (ERB#set_eoutvar): make generated scripts return a
+ string in correct encoding.
+
+ * lib/erb.rb (ERB#def_method): use Kernel#eval for encoding-awareness
+ of the evaluated string.
+
+ * bin/erb.rb (ERB::Main.run): adds -E and -U options.
+ String is no longer Enumerable.
+
+ * man/erb.1: new manapage.
+
+ * test/erb/test_erb_m17n.rb: new test case for m17n features.
+
Mon Dec 29 18:02:45 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* ext/io/wait/wait.c (io_nread): returns number of bytes available
diff --git a/bin/erb b/bin/erb
index 8541437..d265641 100755
--- a/bin/erb
+++ b/bin/erb
@@ -72,6 +72,11 @@ class ERB
end
raise "invalid trim mode #{arg.dump}" unless arg =~ /^[0-2]$/
trim_mode = arg.to_i
+ when '-E', '--encoding'
+ arg = ARGV.req_arg
+ set_encoding(*arg.split(/:/, 2))
+ when '-U'
+ set_encoding(Encoding::UTF_8, Encoding::UTF_8)
when '-P'
disable_percent = true
when '--help'
@@ -91,12 +96,15 @@ class ERB
-d set $DEBUG to true
-r [library] load a library
-S [safe_level] set $SAFE (0..4)
+ -E ex[:in] set default external/internal encodings
+ -U set default encoding to UTF-8.
-T [trim_mode] specify trim_mode (0..2, -)
-P ignore lines which start with "%"
EOU
exit 1
end
+ $<.set_encoding(Encoding::ASCII_8BIT, nil)
src = $<.read
filename = $FILENAME
exit 2 unless src
@@ -105,10 +113,8 @@ EOU
erb.filename = filename
if output
if number
- l = 1
- for line in erb.src
- puts "%3d %s"%[l, line]
- l += 1
+ erb.src.each_line.with_index do |line, l|
+ puts "%3d %s"%[l+1, line]
end
else
puts erb.src
@@ -118,6 +124,19 @@ EOU
end
end
module_function :run
+
+ def set_encoding(extern, intern = nil)
+ verbose, $VERBOSE = $VERBOSE, nil
+ Encoding.default_external = extern unless extern.nil? || extern.empty?
+ Encoding.default_internal = intern unless intern.nil? || intern.empty?
+ [$stdin, $stdout, $stderr].each do |io|
+ io.set_encoding(extern, intern)
+ end
+ ensure
+ $VERBOSE = verbose
+ end
+ module_function :set_encoding
+ class << self; private :set_encoding; end
end
end
diff --git a/lib/erb.rb b/lib/erb.rb
index c879941..c519481 100644
--- a/lib/erb.rb
+++ b/lib/erb.rb
@@ -58,6 +58,24 @@
#
# See the ERB.new and ERB#result methods for more detail.
#
+# == Character encodings
+#
+# ERB (or ruby code generated by ERB) returns a string in the same
+# character encoding as the input string. When the input string has
+# a magic comment, however, it returns a string in the encoding specified
+# by the magic comment.
+#
+# # -*- coding: UTF-8 -*-
+# require 'erb'
+#
+# template = ERB.new <<EOF
+# <%#-*- coding: Big5 -*-%>
+# \_\_ENCODING\_\_ is <%= \_\_ENCODING\_\_ %>.
+# EOF
+# puts template.result
+#
+# <em>Prints:</em> \_\_ENCODING\_\_ is Big5.
+#
#
# == Examples
#
@@ -517,10 +535,14 @@ class ERB
end
def compile(s)
- out = Buffer.new(self, s.encoding)
+ enc = s.encoding
+ raise ArgumentError, "#{enc} is not ASCII compatible" if enc.dummy?
+ s = s.dup.force_encoding("ASCII-8BIT") # don't use constant Enoding::ASCII_8BIT for miniruby
+ enc = detect_magic_comment(s) || enc
+ out = Buffer.new(self, enc)
content = ''
- scanner = make_scanner(s.dup.force_encoding("ASCII-8BIT"))
+ scanner = make_scanner(s)
scanner.scan do |token|
if scanner.stag.nil?
case token
@@ -560,6 +582,7 @@ class ERB
when '<%='
out.push("#{@insert_cmd}((#{content}).to_s)")
when '<%#'
+ # content = content.force_encoding(@enc)
# out.push("# #{content.dump}")
end
scanner.stag = nil
@@ -573,7 +596,7 @@ class ERB
end
out.push("#{@put_cmd} #{content.dump}") if content.size > 0
out.close
- out.script
+ return out.script, enc
end
def prepare_trim_mode(mode)
@@ -613,6 +636,18 @@ class ERB
end
attr_reader :percent, :trim_mode
attr_accessor :put_cmd, :insert_cmd, :pre_cmd, :post_cmd
+
+ private
+ def detect_magic_comment(s)
+ if /\A<%#(.*)%>/ =~ s or (@percent and /\A%#(.*)/ =~ s)
+ comment = $1
+ comment = $1 if comment[/-\*-\s*(.*?)\s*-*-$/]
+ if %r"coding\s*[=:]\s*([[:alnum:]\-_]+)" =~ comment
+ enc = $1.sub(/-(?:mac|dos|unix)/i, '')
+ enc = Encoding.find(enc)
+ end
+ end
+ end
end
end
@@ -688,7 +723,7 @@ class ERB
@safe_level = safe_level
compiler = ERB::Compiler.new(trim_mode)
set_eoutvar(compiler, eoutvar)
- @src = compiler.compile(str)
+ @src, @enc = *compiler.compile(str)
@filename = nil
end
@@ -714,7 +749,7 @@ class ERB
compiler.pre_cmd = cmd
cmd = []
- cmd.push(eoutvar)
+ cmd.push("#{eoutvar}.force_encoding(__ENCODING__)")
compiler.post_cmd = cmd
end
@@ -745,7 +780,11 @@ class ERB
end
def def_method(mod, methodname, fname='(ERB)') # :nodoc:
- mod.module_eval("def #{methodname}\n" + self.src + "\nend\n", fname, -1)
+ src = self.src
+ magic_comment = "#coding:#{@enc}\n"
+ mod.module_eval do
+ eval(magic_comment + "def #{methodname}\n" + src + "\nend\n", binding, fname, -2)
+ end
end
def def_module(methodname='erb') # :nodoc:
diff --git a/man/erb.1 b/man/erb.1
new file mode 100644
index 0000000..87a0a6f
--- /dev/null
+++ b/man/erb.1
@@ -0,0 +1,158 @@
+.\"Ruby is copyrighted by Yukihiro Matsumoto <matz@netlab.jp>.
+.Dd December 27, 2008
+.Dt ERB(1) "" "Ruby Programmers Reference Guide"
+.Os UNIX
+.Sh NAME
+.Nm erb
+.Nd Ruby Templating
+.Sh SYNOPSIS
+.Nm
+.Op Fl -version
+.Op Fl UPdnvx
+.Op Fl E Ar ext Ns Op Ns : Ns int
+.Op Fl S Ar level
+.Op Fl T Ar mode
+.Op Fl r Ar library
+.Op Fl -
+.Op file ...
+.Pp
+.Sh DESCRIPTION
+.Nm
+is a command line front-end for
+.Li "ERB"
+library, which is an implementation of eRuby.
+
+eRuby provdes an easy to use but powerful templating system for Ruby.
+Using eRuby, actual Ruby code can be added to any plain text document for the
+purposes of generating document information details and/or flow control.
+
+.Nm
+is a part of
+.Nm Ruby .
+.Pp
+.Sh OPTIONS
+.Bl -tag -width "1234567890123" -compact
+.Pp
+.It Fl -version
+Prints the version of
+.Nm .
+.Pp
+.It Fl E Ar external Ns Op : Ns Ar internal
+.It Fl -encoding Ar external Ns Op : Ns Ar internal
+Specifies the default value(s) for external encodings and internal encoding. Values should be separated with colon (:).
+
+You can ommit the one for internal encodings, then the value
+.Pf ( Li "Encoding.default_internal" ) will be nil.
+.Pp
+.It Fl P
+Evaluates lines starting with
+.Li "%"
+as Ruby code and removes the tailing EOLs.
+.Pp
+.It Fl S Ar level
+Specifies the safe level in which eRuby script will run.
+.Pp
+.It Fl T Ar mode
+Specifies trim mode (default 0).
+.Ar mode
+can be one of
+.Bl -hang -offset indent
+.It Sy 0
+EOL remains after the embedded ruby script is evaluated.
+.Pp
+.It Sy 1
+EOL is removed if the line ends with
+.Li "%>" .
+.Pp
+.It Sy 2
+EOL is removed if the line starts with
+.Li "<%"
+and ends with
+.Li "%>" .
+.Pp
+.It Sy -
+EOL is removed if the line ends with
+.Li "-%>" .
+And leading whitespaces are removed if the erb directive starts with
+.Li "<%-" .
+.Pp
+.El
+.Pp
+.It Fl U
+can be one of
+Sets the defalut value for internal encodings
+.Pf ( Li "Encoding.default_internal" ) to UTF-8.
+.Pp
+.It Fl d
+.It Fl -debug
+Turns on debug mode.
+.Li "$DEBUG"
+will be set to true.
+.Pp
+.It Fl h
+.It Fl -help
+Prints a summry of the options.
+.Pp
+.It Fl n
+Used with
+.Fl x .
+Prepends the line number to each line in the output.
+.Pp
+.It Fl v
+Enables verbose mode.
+.Li "$VERBOSE"
+will be set to true.
+.Pp
+.It Fl x
+Converts the eRuby script into Ruby script and prints it without line numbers.
+.Pp
+.El
+.Pp
+.Sh EXAMPLES
+Here is an eRuby script
+.Bd -literal -offset indent
+<?xml version="1.0" ?>
+<% require 'prime' -%>
+<erb-example>
+ <calc><%= 1+1 %></calc>
+ <var><%= __FILE__ %></var>
+ <library><%= Prime.each(10).to_a.join(", ") %></library>
+</erb-example>
+.Ed
+
+Command
+.Dl "% erb -T - example.erb"
+prints
+.Bd -literal -offset indent
+<?xml version="1.0" ?>
+<erb-example>
+ <calc>2</calc>
+ <var>example.erb</var>
+ <library>2, 3, 5, 7</library>
+</erb-example>
+.Ed
+.Pp
+.Sh SEE ALSO
+.Xr ruby 1 .
+
+And see
+.Xr ri 1
+documentation for
+.Li "ERB"
+class.
+.El
+.Pp
+.Sh REPORTING BUGS
+.Bl -bullet
+.Li Security vulnerabilities should be reported via an email to
+.Aq security@ruby-lang.org Ns
+.Li .
+Reported problems will be published after fixed.
+.Pp
+.Li And you can report other bugs and feature requests via the
+Ruby Issue Tracking System (http://redmine.ruby-lang.org).
+Do not report security vulnerabilities
+via the system because it publishes the vulnerabilities immedately.
+.El
+.Sh AUTHORS
+Written by Masatoshi SEKI.
diff --git a/test/erb/test_erb_m17n.rb b/test/erb/test_erb_m17n.rb
new file mode 100644
index 0000000..432cb4f
--- /dev/null
+++ b/test/erb/test_erb_m17n.rb
@@ -0,0 +1,123 @@
+# -*- coding: UTF-8 -*-
+require 'test/unit'
+require 'erb'
+
+class TestERB < Test::Unit::TestCase
+ def test_result_encoding
+ erb = ERB.new("hello")
+ assert_equal __ENCODING__, erb.result.encoding
+
+ erb = ERB.new("こんにちは".encode("EUC-JP"))
+ assert_equal Encoding::EUC_JP, erb.result.encoding
+
+ erb = ERB.new("\xC4\xE3\xBA\xC3".force_encoding("EUC-CN"))
+ assert_equal Encoding::EUC_CN, erb.result.encoding
+
+ erb = ERB.new("γεια σας".encode("ISO-8859-7"))
+ assert_equal Encoding::ISO_8859_7, erb.result.encoding
+
+ assert_raise(ArgumentError, /ASCII compatible/) {
+ ERB.new("こんにちは".force_encoding("ISO-2022-JP")) # dummy encoding
+ }
+ end
+
+ def test_generate_magic_comment
+ erb = ERB.new("hello")
+ assert_match /#coding:UTF-8/, erb.src
+
+ erb = ERB.new("hello".force_encoding("EUC-JP"))
+ assert_match /#coding:EUC-JP/, erb.src
+
+ erb = ERB.new("hello".force_encoding("ISO-8859-9"))
+ assert_match /#coding:ISO-8859-9/, erb.src
+ end
+
+ def test_literal_encoding
+ erb = ERB.new("literal encoding is <%= 'hello'.encoding %>")
+ assert_match /literal encoding is UTF-8/, erb.result
+
+ erb = ERB.new("literal encoding is <%= 'こんにちは'.encoding %>".encode("EUC-JP"))
+ assert_match /literal encoding is EUC-JP/, erb.result
+
+ erb = ERB.new("literal encoding is <%= '\xC4\xE3\xBA\xC3'.encoding %>".force_encoding("EUC-CN"))
+ assert_match /literal encoding is GB2312/, erb.result
+ end
+
+ def test___ENCODING__
+ erb = ERB.new("__ENCODING__ is <%= __ENCODING__ %>")
+ assert_match /__ENCODING__ is UTF-8/, erb.result
+
+ erb = ERB.new("__ENCODING__ is <%= __ENCODING__ %>".force_encoding("EUC-JP"))
+ assert_match /__ENCODING__ is EUC-JP/, erb.result
+
+ erb = ERB.new("__ENCODING__ is <%= __ENCODING__ %>".force_encoding("Big5"))
+ assert_match /__ENCODING__ is Big5/, erb.result
+ end
+
+ def test_recognize_magic_comment
+ erb = ERB.new(<<-EOS.encode("EUC-KR"))
+<%# -*- coding: EUC-KR -*- %>
+안녕하세요
+ EOS
+ assert_match /#coding:EUC-KR/, erb.src
+ assert_equal Encoding::EUC_KR, erb.result.encoding
+
+ erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("ASCII-8BIT"))
+<%#-*- coding: EUC-KR -*-%>
+안녕하세요
+ EOS
+ assert_match /#coding:EUC-KR/, erb.src
+ assert_equal Encoding::EUC_KR, erb.result.encoding
+
+ erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("ASCII-8BIT"))
+<%# vim: tabsize=8 encoding=EUC-KR shiftwidth=2 expandtab %>
+안녕하세요
+ EOS
+ assert_match /#coding:EUC-KR/, erb.src
+ assert_equal Encoding::EUC_KR, erb.result.encoding
+
+ erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("ASCII-8BIT"))
+<%#coding:EUC-KR %>
+안녕하세요
+ EOS
+ assert_match /#coding:EUC-KR/, erb.src
+ assert_equal Encoding::EUC_KR, erb.result.encoding
+
+ erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("EUC-JP"))
+<%#coding:EUC-KR %>
+안녕하세요
+ EOS
+ assert_match /#coding:EUC-KR/, erb.src
+ assert_equal Encoding::EUC_KR, erb.result.encoding
+ end
+
+ module M; end
+ def test_method_with_encoding
+ obj = Object.new
+ obj.extend(M)
+
+ erb = ERB.new(<<-EOS.encode("EUC-JP").force_encoding("ASCII-8BIT"))
+<%#coding:EUC-JP %>
+literal encoding is <%= 'こんにちは'.encoding %>
+__ENCODING__ is <%= __ENCODING__ %>
+ EOS
+ erb.def_method(M, :m_from_magic_comment)
+
+ result = obj.m_from_magic_comment
+ assert_equal Encoding::EUC_JP, result.encoding
+ assert_match /literal encoding is EUC-JP/, result
+ assert_match /__ENCODING__ is EUC-JP/, result
+
+ erb = ERB.new(<<-EOS.encode("EUC-KR"))
+literal encoding is <%= '안녕하세요'.encoding %>
+__ENCODING__ is <%= __ENCODING__ %>
+EOS
+ erb.def_method(M, :m_from_eval_encoding)
+ result = obj.m_from_eval_encoding
+ assert_equal Encoding::EUC_KR, result.encoding
+ assert_match /literal encoding is EUC-KR/, result
+ assert_match /__ENCODING__ is EUC-KR/, result
+ end
+end
+
+# vim:fileencoding=UTF-8