1 files changed, 660 insertions, 266 deletions
diff --git a/sprintf.c b/sprintf.c
index ba1c40022b..60d54f49bf 100644
--- a/sprintf.c
+++ b/sprintf.c
@@ -3,33 +3,35 @@
   sprintf.c -
 
   $Author$
-  $Date$
   created at: Fri Oct 15 10:39:26 JST 1993
 
-  Copyright (C) 1993-2003 Yukihiro Matsumoto
+  Copyright (C) 1993-2007 Yukihiro Matsumoto
   Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
   Copyright (C) 2000  Information-technology Promotion Agency, Japan
 
 **********************************************************************/
 
-#include "ruby.h"
-#include "re.h"
-#include <ctype.h>
+#include "ruby/ruby.h"
+#include "ruby/re.h"
+#include "ruby/encoding.h"
 #include <math.h>
+#include <stdarg.h>
+
+#ifdef HAVE_IEEEFP_H
+#include <ieeefp.h>
+#endif
 
 #define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
 #define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT)
 #define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
 
-static void fmt_setup _((char*,int,int,int,int));
+static void fmt_setup(char*,size_t,int,int,int,int);
 
 static char*
-remove_sign_bits(str, base)
-    char *str;
-    int base;
+remove_sign_bits(char *str, int base)
 {
     char *s, *t;
-    
+
     s = t = str;
 
     if (base == 16) {
@@ -48,18 +50,12 @@ remove_sign_bits(str, base)
 	    t++;
 	}
     }
-    if (t > s) {
-	while (*t) *s++ = *t++;
-	*s = '\0';
-    }
 
-    return str;
+    return t;
 }
 
 static char
-sign_bits(base, p)
-    int base;
-    const char *p;
+sign_bits(int base, const char *p)
 {
     char c = '.';
 
@@ -87,11 +83,13 @@ sign_bits(base, p)
 #define FPREC0 128
 
 #define CHECK(l) do {\
+    int cr = ENC_CODERANGE(result);\
     while (blen + (l) >= bsiz) {\
 	bsiz*=2;\
     }\
     rb_str_resize(result, bsiz);\
-    buf = RSTRING(result)->ptr;\
+    ENC_CODERANGE_SET(result, cr);\
+    buf = RSTRING_PTR(result);\
 } while (0)
 
 #define PUSH(s, l) do { \
@@ -100,21 +98,38 @@ sign_bits(base, p)
     blen += (l);\
 } while (0)
 
+#define FILL(c, l) do { \
+    CHECK(l);\
+    memset(&buf[blen], c, l);\
+    blen += (l);\
+} while (0)
+
 #define GETARG() (nextvalue != Qundef ? nextvalue : \
-    posarg < 0 ? \
+    posarg == -1 ? \
     (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \
+    posarg == -2 ? \
+    (rb_raise(rb_eArgError, "unnumbered(%d) mixed with named", nextarg), 0) : \
     (posarg = nextarg++, GETNTHARG(posarg)))
 
 #define GETPOSARG(n) (posarg > 0 ? \
     (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", n, posarg), 0) : \
+    posarg == -2 ? \
+    (rb_raise(rb_eArgError, "numbered(%d) after named", n), 0) : \
     ((n < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", n), 0) : \
 	       (posarg = -1, GETNTHARG(n))))
 
 #define GETNTHARG(nth) \
     ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth])
 
+#define GETNAMEARG(id, name, len) ( \
+    posarg > 0 ? \
+    (rb_raise(rb_eArgError, "named%.*s after unnumbered(%d)", (len), (name), posarg), 0) : \
+    posarg == -1 ? \
+    (rb_raise(rb_eArgError, "named%.*s after numbered", (len), (name)), 0) :	\
+    (posarg = -2, rb_hash_lookup2(get_hash(&hash, argc, argv), id, Qundef)))
+
 #define GETNUM(n, val) \
-    for (; p < end && ISDIGIT(*p); p++) { \
+    for (; p < end && rb_enc_isdigit(*p, enc); p++) {	\
 	int next_n = 10 * n + (*p - '0'); \
         if (next_n / 10 != n) {\
 	    rb_raise(rb_eArgError, #val " too big"); \
@@ -139,101 +154,264 @@ sign_bits(base, p)
     val = NUM2INT(tmp); \
 } while (0)
 
+static VALUE
+get_hash(volatile VALUE *hash, int argc, const VALUE *argv)
+{
+    VALUE tmp;
+
+    if (*hash != Qundef) return *hash;
+    if (argc != 2) {
+	rb_raise(rb_eArgError, "one hash required");
+    }
+    tmp = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
+    if (NIL_P(tmp)) {
+	rb_raise(rb_eArgError, "one hash required");
+    }
+    return (*hash = tmp);
+}
 
 /*
  *  call-seq:
- *     format(format_string [, arguments...] )   => string
- *     sprintf(format_string [, arguments...] )  => string
- *  
+ *     format(format_string [, arguments...] )   -> string
+ *     sprintf(format_string [, arguments...] )  -> string
+ *
  *  Returns the string resulting from applying <i>format_string</i> to
- *  any additional arguments. Within the format string, any characters
- *  other than format sequences are copied to the result. A format
+ *  any additional arguments.  Within the format string, any characters
+ *  other than format sequences are copied to the result.
+ *
+ *  The syntax of a format sequence is follows.
+ *
+ *    %[flags][width][.precision]type
+ *
+ *  A format
  *  sequence consists of a percent sign, followed by optional flags,
  *  width, and precision indicators, then terminated with a field type
- *  character. The field type controls how the corresponding
+ *  character.  The field type controls how the corresponding
  *  <code>sprintf</code> argument is to be interpreted, while the flags
- *  modify that interpretation. The field type characters are listed
- *  in the table at the end of this section. The flag characters are:
- *
- *    Flag     | Applies to   | Meaning
- *    ---------+--------------+-----------------------------------------
- *    space    | bdeEfgGiouxX | Leave a space at the start of 
- *             |              | positive numbers.
- *    ---------+--------------+-----------------------------------------
- *    (digit)$ | all          | Specifies the absolute argument number
- *             |              | for this field. Absolute and relative
- *             |              | argument numbers cannot be mixed in a
- *             |              | sprintf string.
- *    ---------+--------------+-----------------------------------------
- *     #       | beEfgGoxX    | Use an alternative format. For the
- *             |              | conversions `o', `x', `X', and `b', 
- *             |              | prefix the result with ``0'', ``0x'', ``0X'',
- *             |              |  and ``0b'', respectively. For `e',
- *             |              | `E', `f', `g', and 'G', force a decimal
- *             |              | point to be added, even if no digits follow.
- *             |              | For `g' and 'G', do not remove trailing zeros.
- *    ---------+--------------+-----------------------------------------
- *    +        | bdeEfgGiouxX | Add a leading plus sign to positive numbers.
- *    ---------+--------------+-----------------------------------------
- *    -        | all          | Left-justify the result of this conversion.
- *    ---------+--------------+-----------------------------------------
- *    0 (zero) | bdeEfgGiouxX | Pad with zeros, not spaces.
- *    ---------+--------------+-----------------------------------------
- *    *        | all          | Use the next argument as the field width. 
- *             |              | If negative, left-justify the result. If the
- *             |              | asterisk is followed by a number and a dollar 
- *             |              | sign, use the indicated argument as the width.
- *
- *     
- *  The field width is an optional integer, followed optionally by a
- *  period and a precision. The width specifies the minimum number of
- *  characters that will be written to the result for this field. For
- *  numeric fields, the precision controls the number of decimal places
- *  displayed. For string fields, the precision determines the maximum
- *  number of characters to be copied from the string. (Thus, the format
- *  sequence <code>%10.10s</code> will always contribute exactly ten
- *  characters to the result.)
+ *  modify that interpretation.
  *
- *  The field types are:
+ *  The field type characters are:
  *
- *      Field |  Conversion
+ *      Field |  Integer Format
  *      ------+--------------------------------------------------------------
  *        b   | Convert argument as a binary number.
- *        c   | Argument is the numeric code for a single character.
+ *            | Negative numbers will be displayed as a two's complement
+ *            | prefixed with `..1'.
+ *        B   | Equivalent to `b', but uses an uppercase 0B for prefix
+ *            | in the alternative format by #.
  *        d   | Convert argument as a decimal number.
+ *        i   | Identical to `d'.
+ *        o   | Convert argument as an octal number.
+ *            | Negative numbers will be displayed as a two's complement
+ *            | prefixed with `..7'.
+ *        u   | Identical to `d'.
+ *        x   | Convert argument as a hexadecimal number.
+ *            | Negative numbers will be displayed as a two's complement
+ *            | prefixed with `..f' (representing an infinite string of
+ *            | leading 'ff's).
+ *        X   | Equivalent to `x', but uses uppercase letters.
+ *
+ *      Field |  Float Format
+ *      ------+--------------------------------------------------------------
+ *        e   | Convert floating point argument into exponential notation
+ *            | with one digit before the decimal point as [-]d.dddddde[+-]dd.
+ *            | The precision specifies the number of digits after the decimal
+ *            | point (defaulting to six).
  *        E   | Equivalent to `e', but uses an uppercase E to indicate
  *            | the exponent.
- *        e   | Convert floating point argument into exponential notation 
- *            | with one digit before the decimal point. The precision
- *            | determines the number of fractional digits (defaulting to six).
- *        f   | Convert floating point argument as [-]ddd.ddd, 
- *            |  where the precision determines the number of digits after
+ *        f   | Convert floating point argument as [-]ddd.dddddd,
+ *            | where the precision specifies the number of digits after
  *            | the decimal point.
- *        G   | Equivalent to `g', but use an uppercase `E' in exponent form.
  *        g   | Convert a floating point number using exponential form
  *            | if the exponent is less than -4 or greater than or
- *            | equal to the precision, or in d.dddd form otherwise.
- *        i   | Identical to `d'.
- *        o   | Convert argument as an octal number.
+ *            | equal to the precision, or in dd.dddd form otherwise.
+ *            | The precision specifies the number of significant digits.
+ *        G   | Equivalent to `g', but use an uppercase `E' in exponent form.
+ *        a   | Convert floating point argument as [-]0xh.hhhhp[+-]dd,
+ *            | which is consisted from optional sign, "0x", fraction part
+ *            | as hexadecimal, "p", and exponential part as decimal.
+ *        A   | Equivalent to `a', but use uppercase `X' and `P'.
+ *
+ *      Field |  Other Format
+ *      ------+--------------------------------------------------------------
+ *        c   | Argument is the numeric code for a single character or
+ *            | a single character string itself.
  *        p   | The valuing of argument.inspect.
- *        s   | Argument is a string to be substituted. If the format
+ *        s   | Argument is a string to be substituted.  If the format
  *            | sequence contains a precision, at most that many characters
  *            | will be copied.
- *        u   | Treat argument as an unsigned decimal number. Negative integers
- *            | are displayed as a 32 bit two's complement plus one for the
- *            | underlying architecture; that is, 2 ** 32 + n.  However, since
- *            | Ruby has no inherent limit on bits used to represent the
- *            | integer, this value is preceded by two dots (..) in order to
- *            | indicate a infinite number of leading sign bits.
- *        X   | Convert argument as a hexadecimal number using uppercase
- *            | letters. Negative numbers will be displayed with two
- *            | leading periods (representing an infinite string of
- *            | leading 'FF's.
- *        x   | Convert argument as a hexadecimal number.
- *            | Negative numbers will be displayed with two
- *            | leading periods (representing an infinite string of
- *            | leading 'ff's.
- *     
+ *        %   | A percent sign itself will be displayed.  No argument taken.
+ *
+ *  The flags modifies the behavior of the formats.
+ *  The flag characters are:
+ *
+ *    Flag     | Applies to    | Meaning
+ *    ---------+---------------+-----------------------------------------
+ *    space    | bBdiouxX      | Leave a space at the start of
+ *             | aAeEfgG       | non-negative numbers.
+ *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
+ *             |               | a minus sign with absolute value for
+ *             |               | negative values.
+ *    ---------+---------------+-----------------------------------------
+ *    (digit)$ | all           | Specifies the absolute argument number
+ *             |               | for this field.  Absolute and relative
+ *             |               | argument numbers cannot be mixed in a
+ *             |               | sprintf string.
+ *    ---------+---------------+-----------------------------------------
+ *     #       | bBoxX         | Use an alternative format.
+ *             | aAeEfgG       | For the conversions `o', increase the precision
+ *             |               | until the first digit will be `0' if
+ *             |               | it is not formatted as complements.
+ *             |               | For the conversions `x', `X', `b' and `B'
+ *             |               | on non-zero, prefix the result with ``0x'',
+ *             |               | ``0X'', ``0b'' and ``0B'', respectively.
+ *             |               | For `a', `A', `e', `E', `f', `g', and 'G',
+ *             |               | force a decimal point to be added,
+ *             |               | even if no digits follow.
+ *             |               | For `g' and 'G', do not remove trailing zeros.
+ *    ---------+---------------+-----------------------------------------
+ *    +        | bBdiouxX      | Add a leading plus sign to non-negative
+ *             | aAeEfgG       | numbers.
+ *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
+ *             |               | a minus sign with absolute value for
+ *             |               | negative values.
+ *    ---------+---------------+-----------------------------------------
+ *    -        | all           | Left-justify the result of this conversion.
+ *    ---------+---------------+-----------------------------------------
+ *    0 (zero) | bBdiouxX      | Pad with zeros, not spaces.
+ *             | aAeEfgG       | For `o', `x', `X', `b' and `B', radix-1
+ *             | (numeric fmt) | is used for negative numbers formatted as
+ *             |               | complements.
+ *    ---------+---------------+-----------------------------------------
+ *    *        | all           | Use the next argument as the field width.
+ *             |               | If negative, left-justify the result. If the
+ *             |               | asterisk is followed by a number and a dollar
+ *             |               | sign, use the indicated argument as the width.
+ *
+ *  Examples of flags:
+ *
+ *   # `+' and space flag specifies the sign of non-negative numbers.
+ *   sprintf("%d", 123)  #=> "123"
+ *   sprintf("%+d", 123) #=> "+123"
+ *   sprintf("% d", 123) #=> " 123"
+ *
+ *   # `#' flag for `o' increases number of digits to show `0'.
+ *   # `+' and space flag changes format of negative numbers.
+ *   sprintf("%o", 123)   #=> "173"
+ *   sprintf("%#o", 123)  #=> "0173"
+ *   sprintf("%+o", -123) #=> "-173"
+ *   sprintf("%o", -123)  #=> "..7605"
+ *   sprintf("%#o", -123) #=> "..7605"
+ *
+ *   # `#' flag for `x' add a prefix `0x' for non-zero numbers.
+ *   # `+' and space flag disables complements for negative numbers.
+ *   sprintf("%x", 123)   #=> "7b"
+ *   sprintf("%#x", 123)  #=> "0x7b"
+ *   sprintf("%+x", -123) #=> "-7b"
+ *   sprintf("%x", -123)  #=> "..f85"
+ *   sprintf("%#x", -123) #=> "0x..f85"
+ *   sprintf("%#x", 0)    #=> "0"
+ *
+ *   # `#' for `X' uses the prefix `0X'.
+ *   sprintf("%X", 123)  #=> "7B"
+ *   sprintf("%#X", 123) #=> "0X7B"
+ *
+ *   # `#' flag for `b' add a prefix `0b' for non-zero numbers.
+ *   # `+' and space flag disables complements for negative numbers.
+ *   sprintf("%b", 123)   #=> "1111011"
+ *   sprintf("%#b", 123)  #=> "0b1111011"
+ *   sprintf("%+b", -123) #=> "-1111011"
+ *   sprintf("%b", -123)  #=> "..10000101"
+ *   sprintf("%#b", -123) #=> "0b..10000101"
+ *   sprintf("%#b", 0)    #=> "0"
+ *
+ *   # `#' for `B' uses the prefix `0B'.
+ *   sprintf("%B", 123)  #=> "1111011"
+ *   sprintf("%#B", 123) #=> "0B1111011"
+ *
+ *   # `#' for `e' forces to show the decimal point.
+ *   sprintf("%.0e", 1)  #=> "1e+00"
+ *   sprintf("%#.0e", 1) #=> "1.e+00"
+ *
+ *   # `#' for `f' forces to show the decimal point.
+ *   sprintf("%.0f", 1234)  #=> "1234"
+ *   sprintf("%#.0f", 1234) #=> "1234."
+ *
+ *   # `#' for `g' forces to show the decimal point.
+ *   # It also disables stripping lowest zeros.
+ *   sprintf("%g", 123.4)   #=> "123.4"
+ *   sprintf("%#g", 123.4)  #=> "123.400"
+ *   sprintf("%g", 123456)  #=> "123456"
+ *   sprintf("%#g", 123456) #=> "123456."
+ *
+ *  The field width is an optional integer, followed optionally by a
+ *  period and a precision.  The width specifies the minimum number of
+ *  characters that will be written to the result for this field.
+ *
+ *  Examples of width:
+ *
+ *   # padding is done by spaces,       width=20
+ *   # 0 or radix-1.             <------------------>
+ *   sprintf("%20d", 123)   #=> "                 123"
+ *   sprintf("%+20d", 123)  #=> "                +123"
+ *   sprintf("%020d", 123)  #=> "00000000000000000123"
+ *   sprintf("%+020d", 123) #=> "+0000000000000000123"
+ *   sprintf("% 020d", 123) #=> " 0000000000000000123"
+ *   sprintf("%-20d", 123)  #=> "123                 "
+ *   sprintf("%-+20d", 123) #=> "+123                "
+ *   sprintf("%- 20d", 123) #=> " 123                "
+ *   sprintf("%020x", -123) #=> "..ffffffffffffffff85"
+ *
+ *  For
+ *  numeric fields, the precision controls the number of decimal places
+ *  displayed.  For string fields, the precision determines the maximum
+ *  number of characters to be copied from the string.  (Thus, the format
+ *  sequence <code>%10.10s</code> will always contribute exactly ten
+ *  characters to the result.)
+ *
+ *  Examples of precisions:
+ *
+ *   # precision for `d', 'o', 'x' and 'b' is
+ *   # minimum number of digits               <------>
+ *   sprintf("%20.8d", 123)  #=> "            00000123"
+ *   sprintf("%20.8o", 123)  #=> "            00000173"
+ *   sprintf("%20.8x", 123)  #=> "            0000007b"
+ *   sprintf("%20.8b", 123)  #=> "            01111011"
+ *   sprintf("%20.8d", -123) #=> "           -00000123"
+ *   sprintf("%20.8o", -123) #=> "            ..777605"
+ *   sprintf("%20.8x", -123) #=> "            ..ffff85"
+ *   sprintf("%20.8b", -11)  #=> "            ..110101"
+ *
+ *   # "0x" and "0b" for `#x' and `#b' is not counted for
+ *   # precision but "0" for `#o' is counted.  <------>
+ *   sprintf("%#20.8d", 123)  #=> "            00000123"
+ *   sprintf("%#20.8o", 123)  #=> "            00000173"
+ *   sprintf("%#20.8x", 123)  #=> "          0x0000007b"
+ *   sprintf("%#20.8b", 123)  #=> "          0b01111011"
+ *   sprintf("%#20.8d", -123) #=> "           -00000123"
+ *   sprintf("%#20.8o", -123) #=> "            ..777605"
+ *   sprintf("%#20.8x", -123) #=> "          0x..ffff85"
+ *   sprintf("%#20.8b", -11)  #=> "          0b..110101"
+ *
+ *   # precision for `e' is number of
+ *   # digits after the decimal point           <------>
+ *   sprintf("%20.8e", 1234.56789) #=> "      1.23456789e+03"
+ *
+ *   # precision for `f' is number of
+ *   # digits after the decimal point               <------>
+ *   sprintf("%20.8f", 1234.56789) #=> "       1234.56789000"
+ *
+ *   # precision for `g' is number of
+ *   # significant digits                          <------->
+ *   sprintf("%20.8g", 1234.56789) #=> "           1234.5679"
+ *
+ *   #                                         <------->
+ *   sprintf("%20.8g", 123456789)  #=> "       1.2345679e+08"
+ *
+ *   # precision for `s' is
+ *   # maximum number of characters                    <------>
+ *   sprintf("%20.8s", "string test") #=> "            string t"
+ *
  *  Examples:
  *
  *     sprintf("%d %04x", 123, 123)               #=> "123 007b"
@@ -241,28 +419,35 @@ sign_bits(base, p)
  *     sprintf("%1$*2$s %2$d %1$s", "hello", 8)   #=> "   hello 8 hello"
  *     sprintf("%1$*2$s %2$d", "hello", -8)       #=> "hello    -8"
  *     sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23)   #=> "+1.23: 1.23:1.23"
- *     sprintf("%u", -123)                        #=> "..4294967173"
+ *     sprintf("%u", -123)                        #=> "-123"
+ *
+ *  For more complex formatting, Ruby supports a reference by name.
+ *  %<name>s style uses format style, but %{name} style doesn't.
+ *
+ *  Exapmles:
+ *    sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
+ *      #=> 1 : 2.000000
+ *    sprintf("%{foo}f", { :foo => 1 })
+ *      # => "1f"
  */
 
 VALUE
-rb_f_sprintf(argc, argv)
-    int argc;
-    VALUE *argv;
+rb_f_sprintf(int argc, const VALUE *argv)
 {
     return rb_str_format(argc - 1, argv + 1, GETNTHARG(0));
 }
 
 VALUE
-rb_str_format(argc, argv, fmt)
-    int argc;
-    VALUE *argv;
-    VALUE fmt;
+rb_str_format(int argc, const VALUE *argv, VALUE fmt)
 {
+    rb_encoding *enc;
     const char *p, *end;
     char *buf;
-    int blen, bsiz;
+    long blen, bsiz;
     VALUE result;
 
+    long scanned = 0;
+    int coderange = ENC_CODERANGE_7BIT;
     int width, prec, flags = FNONE;
     int nextarg = 1;
     int posarg = 0;
@@ -270,6 +455,7 @@ rb_str_format(argc, argv, fmt)
     VALUE nextvalue;
     VALUE tmp;
     VALUE str;
+    volatile VALUE hash = Qundef;
 
 #define CHECK_FOR_WIDTH(f)				 \
     if ((f) & FWIDTH) {					 \
@@ -290,20 +476,29 @@ rb_str_format(argc, argv, fmt)
     --argv;
     if (OBJ_TAINTED(fmt)) tainted = 1;
     StringValue(fmt);
+    enc = rb_enc_get(fmt);
     fmt = rb_str_new4(fmt);
-    p = RSTRING(fmt)->ptr;
-    end = p + RSTRING(fmt)->len;
+    p = RSTRING_PTR(fmt);
+    end = p + RSTRING_LEN(fmt);
     blen = 0;
     bsiz = 120;
     result = rb_str_buf_new(bsiz);
-    buf = RSTRING(result)->ptr;
+    rb_enc_copy(result, fmt);
+    buf = RSTRING_PTR(result);
+    memset(buf, 0, bsiz);
+    ENC_CODERANGE_SET(result, coderange);
 
     for (; p < end; p++) {
 	const char *t;
 	int n;
+	ID id = 0;
 
 	for (t = p; t < end && *t != '%'; t++) ;
 	PUSH(p, t - p);
+	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
+	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
+	    ENC_CODERANGE_SET(result, coderange);
+	}
 	if (t >= end) {
 	    /* end of fmt string */
 	    goto sprint_exit;
@@ -315,7 +510,7 @@ rb_str_format(argc, argv, fmt)
       retry:
 	switch (*p) {
 	  default:
-	    if (ISPRINT(*p))
+	    if (rb_enc_isprint(*p, enc))
 		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
 	    else
 		rb_raise(rb_eArgError, "malformed format string");
@@ -368,6 +563,32 @@ rb_str_format(argc, argv, fmt)
 	    flags |= FWIDTH;
 	    goto retry;
 
+	  case '<':
+	  case '{':
+	    {
+		const char *start = p;
+		char term = (*p == '<') ? '>' : '}';
+
+		for (; p < end && *p != term; ) {
+		    p += rb_enc_mbclen(p, end, enc);
+		}
+		if (p >= end) {
+		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
+		}
+		if (id) {
+		    rb_raise(rb_eArgError, "name%.*s after <%s>",
+			     (int)(p - start + 1), start, rb_id2name(id));
+		}
+		id = rb_intern3(start + 1, p - start - 1, enc);
+		nextvalue = GETNAMEARG(ID2SYM(id), start, (int)(p - start + 1));
+		if (nextvalue == Qundef) {
+		    rb_raise(rb_eKeyError, "key%.*s not found", (int)(p - start + 1), start);
+		}
+		if (term == '}') goto format_s;
+		p++;
+		goto retry;
+	    }
+
 	  case '*':
 	    CHECK_FOR_WIDTH(flags);
 	    flags |= FWIDTH;
@@ -404,7 +625,7 @@ rb_str_format(argc, argv, fmt)
 	    p--;
 	  case '%':
 	    if (flags != FNONE) {
-		rb_raise(rb_eArgError, "illegal format character - %%");
+		rb_raise(rb_eArgError, "invalid format character - %%");
 	    }
 	    PUSH("%", 1);
 	    break;
@@ -412,54 +633,99 @@ rb_str_format(argc, argv, fmt)
 	  case 'c':
 	    {
 		VALUE val = GETARG();
-		char c;
-
-		if (!(flags & FMINUS))
-		    while (--width > 0)
-			PUSH(" ", 1);
-		c = NUM2INT(val) & 0xff;
-		PUSH(&c, 1);
-		while (--width > 0)
-		    PUSH(" ", 1);
+		VALUE tmp;
+		unsigned int c;
+		int n;
+
+		tmp = rb_check_string_type(val);
+		if (!NIL_P(tmp)) {
+		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
+			rb_raise(rb_eArgError, "%%c requires a character");
+		    }
+		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
+		}
+		else {
+		    c = NUM2INT(val);
+		    n = rb_enc_codelen(c, enc);
+		}
+		if (n <= 0) {
+		    rb_raise(rb_eArgError, "invalid character");
+		}
+		if (!(flags & FWIDTH)) {
+		    CHECK(n);
+		    rb_enc_mbcput(c, &buf[blen], enc);
+		    blen += n;
+		}
+		else if ((flags & FMINUS)) {
+		    CHECK(n);
+		    rb_enc_mbcput(c, &buf[blen], enc);
+		    blen += n;
+		    FILL(' ', width-1);
+		}
+		else {
+		    FILL(' ', width-1);
+		    CHECK(n);
+		    rb_enc_mbcput(c, &buf[blen], enc);
+		    blen += n;
+		}
 	    }
 	    break;
 
 	  case 's':
 	  case 'p':
+	  format_s:
 	    {
 		VALUE arg = GETARG();
-		long len;
+		long len, slen;
 
 		if (*p == 'p') arg = rb_inspect(arg);
 		str = rb_obj_as_string(arg);
 		if (OBJ_TAINTED(str)) tainted = 1;
-		len = RSTRING(str)->len;
-		if (flags&FPREC) {
-		    if (prec < len) {
-			len = prec;
-		    }
+		len = RSTRING_LEN(str);
+		rb_str_set_len(result, blen);
+		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
+		    int cr = coderange;
+		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
+		    ENC_CODERANGE_SET(result,
+				      (cr == ENC_CODERANGE_UNKNOWN ?
+				       ENC_CODERANGE_BROKEN : (coderange = cr)));
 		}
-		/* need to adjust multi-byte string pos */
-		if (flags&FWIDTH) {
-		    if (width > len) {
-			CHECK(width);
-			width -= len;
+		enc = rb_enc_check(result, str);
+		if (flags&(FPREC|FWIDTH)) {
+		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
+		    if (slen < 0) {
+			rb_raise(rb_eArgError, "invalid mbstring sequence");
+		    }
+		    if ((flags&FPREC) && (prec < slen)) {
+			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
+					     prec, enc);
+			slen = prec;
+			len = p - RSTRING_PTR(str);
+		    }
+		    /* need to adjust multi-byte string pos */
+		    if ((flags&FWIDTH) && (width > slen)) {
+			width -= (int)slen;
 			if (!(flags&FMINUS)) {
+			    CHECK(width);
 			    while (width--) {
 				buf[blen++] = ' ';
 			    }
 			}
+			CHECK(len);
 			memcpy(&buf[blen], RSTRING_PTR(str), len);
 			blen += len;
 			if (flags&FMINUS) {
+			    CHECK(width);
 			    while (width--) {
 				buf[blen++] = ' ';
 			    }
 			}
+			rb_enc_associate(result, enc);
 			break;
 		    }
 		}
-		PUSH(RSTRING(str)->ptr, len);
+		PUSH(RSTRING_PTR(str), len);
+		rb_enc_associate(result, enc);
 	    }
 	    break;
 
@@ -472,28 +738,26 @@ rb_str_format(argc, argv, fmt)
 	  case 'B':
 	  case 'u':
 	    {
+		volatile VALUE tmp1;
 		volatile VALUE val = GETARG();
-		char fbuf[32], nbuf[64], *s, *t;
+		char fbuf[32], nbuf[64], *s;
 		const char *prefix = 0;
-		int sign = 0;
+		int sign = 0, dots = 0;
 		char sc = 0;
 		long v = 0;
 		int base, bignum = 0;
 		int len, pos;
-		volatile VALUE tmp;
-                volatile VALUE tmp1;
 
 		switch (*p) {
 		  case 'd':
 		  case 'i':
+		  case 'u':
 		    sign = 1; break;
 		  case 'o':
 		  case 'x':
 		  case 'X':
 		  case 'b':
 		  case 'B':
-		  case 'u':
-		  default:
 		    if (flags&(FPLUS|FSPACE)) sign = 1;
 		    break;
 		}
@@ -510,20 +774,21 @@ rb_str_format(argc, argv, fmt)
 		      case 'B':
 			prefix = "0B"; break;
 		    }
-		    if (prefix) {
-			width -= strlen(prefix);
-		    }
 		}
 
 	      bin_retry:
 		switch (TYPE(val)) {
 		  case T_FLOAT:
-		    val = rb_dbl2big(RFLOAT(val)->value);
+		    if (FIXABLE(RFLOAT_VALUE(val))) {
+			val = LONG2FIX((long)RFLOAT_VALUE(val));
+			goto bin_retry;
+		    }
+		    val = rb_dbl2big(RFLOAT_VALUE(val));
 		    if (FIXNUM_P(val)) goto bin_retry;
 		    bignum = 1;
 		    break;
 		  case T_STRING:
-		    val = rb_str_to_inum(val, 0, Qtrue);
+		    val = rb_str_to_inum(val, 0, TRUE);
 		    goto bin_retry;
 		  case T_BIGNUM:
 		    bignum = 1;
@@ -573,107 +838,119 @@ rb_str_format(argc, argv, fmt)
 			    sc = ' ';
 			    width--;
 			}
-			sprintf(fbuf, "%%l%c", c);
-			sprintf(nbuf, fbuf, v);
+			snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
+			snprintf(nbuf, sizeof(nbuf), fbuf, v);
 			s = nbuf;
-			goto format_integer;
 		    }
-		    s = nbuf;
-		    if (v < 0) {
-			if (base == 10) {
-			    rb_warning("negative number for %%u specifier");
+		    else {
+			s = nbuf;
+			if (v < 0) {
+			    dots = 1;
 			}
-			if (!(flags&(FPREC|FZERO))) {
-			    strcpy(s, "..");
-			    s += 2;
+			snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p);
+			snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
+			if (v < 0) {
+			    char d = 0;
+
+			    s = remove_sign_bits(s, base);
+			    switch (base) {
+			      case 16:
+				d = 'f'; break;
+			      case 8:
+				d = '7'; break;
+			    }
+			    if (d && *s != d) {
+				*--s = d;
+			    }
 			}
 		    }
-		    sprintf(fbuf, "%%l%c", *p == 'X' ? 'x' : *p);
-		    sprintf(s, fbuf, v);
-		    if (v < 0) {
-			char d = 0;
-
-			remove_sign_bits(s, base);
-			switch (base) {
-			  case 16:
-			    d = 'f'; break;
-			  case 8:
-			    d = '7'; break;
+		    len = (int)strlen(s);
+		}
+		else {
+		    if (sign) {
+			tmp = rb_big2str(val, base);
+			s = RSTRING_PTR(tmp);
+			if (s[0] == '-') {
+			    s++;
+			    sc = '-';
+			    width--;
 			}
-			if (d && *s != d) {
-			    memmove(s+1, s, strlen(s)+1);
-			    *s = d;
+			else if (flags & FPLUS) {
+			    sc = '+';
+			    width--;
+			}
+			else if (flags & FSPACE) {
+			    sc = ' ';
+			    width--;
 			}
 		    }
-		    s = nbuf;
-		    goto format_integer;
-		}
-
-		if (sign) {
-		    tmp = rb_big2str(val, base);
-		    s = RSTRING(tmp)->ptr;
-		    if (s[0] == '-') {
-			s++;
-			sc = '-';
-                        width--;
-		    }
-		    else if (flags & FPLUS) {
-			sc = '+';
-                        width--;
-		    }
-		    else if (flags & FSPACE) {
-			sc = ' ';
-                        width--;
-		    }
-		    goto format_integer;
-		}
-		if (!RBIGNUM(val)->sign) {
-		    val = rb_big_clone(val);
-		    rb_big_2comp(val);
-		}
-		tmp1 = tmp = rb_big2str0(val, base, RBIGNUM(val)->sign);
-		s = RSTRING(tmp)->ptr;
-		if (*s == '-') {
-		    if (base == 10) {
-			rb_warning("negative number for %%u specifier");
-		    }
-		    remove_sign_bits(++s, base);
-		    tmp = rb_str_new(0, 3+strlen(s));
-		    t = RSTRING(tmp)->ptr;
-		    if (!(flags&(FPREC|FZERO))) {
-			strcpy(t, "..");
-			t += 2;
-		    }
-		    switch (base) {
-		      case 16:
-			if (s[0] != 'f') strcpy(t++, "f"); break;
-		      case 8:
-			if (s[0] != '7') strcpy(t++, "7"); break;
-		      case 2:
-			if (s[0] != '1') strcpy(t++, "1"); break;
+		    else {
+			if (!RBIGNUM_SIGN(val)) {
+			    val = rb_big_clone(val);
+			    rb_big_2comp(val);
+			}
+			tmp1 = tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
+			s = RSTRING_PTR(tmp);
+			if (*s == '-') {
+			    dots = 1;
+			    if (base == 10) {
+				rb_warning("negative number for %%u specifier");
+			    }
+			    s = remove_sign_bits(++s, base);
+			    switch (base) {
+			      case 16:
+				if (s[0] != 'f') *--s = 'f'; break;
+			      case 8:
+				if (s[0] != '7') *--s = '7'; break;
+			      case 2:
+				if (s[0] != '1') *--s = '1'; break;
+			    }
+			}
 		    }
-		    strcpy(t, s);
-		    bignum = 2;
+		    len = rb_long2int(RSTRING_END(tmp) - s);
 		}
-		s = RSTRING(tmp)->ptr;
 
-	      format_integer:
 		pos = -1;
-		len = strlen(s);
+		if (dots) {
+		    prec -= 2;
+		    width -= 2;
+		}
 
 		if (*p == 'X') {
 		    char *pp = s;
-		    while (*pp) {
-			*pp = toupper(*pp);
+		    int c;
+		    while ((c = (int)(unsigned char)*pp) != 0) {
+			*pp = rb_enc_toupper(c, enc);
 			pp++;
 		    }
 		}
-		if ((flags&(FZERO|FPREC)) == FZERO) {
+		if (prefix && !prefix[1]) { /* octal */
+		    if (dots) {
+			prefix = 0;
+		    }
+		    else if (len == 1 && *s == '0') {
+			len = 0;
+			if (flags & FPREC) prec--;
+		    }
+		    else if ((flags & FPREC) && (prec > len)) {
+			prefix = 0;
+		    }
+		}
+		else if (len == 1 && *s == '0') {
+		    prefix = 0;
+		}
+		if (prefix) {
+		    width -= (int)strlen(prefix);
+		}
+		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
 		    prec = width;
 		    width = 0;
 		}
 		else {
-		    if (prec < len) prec = len;
+		    if (prec < len) {
+			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
+			prec = len;
+		    }
 		    width -= prec;
 		}
 		if (!(flags&FMINUS)) {
@@ -684,20 +961,21 @@ rb_str_format(argc, argv, fmt)
 		}
 		if (sc) PUSH(&sc, 1);
 		if (prefix) {
-		    int plen = strlen(prefix);
+		    int plen = (int)strlen(prefix);
 		    PUSH(prefix, plen);
 		}
 		CHECK(prec - len);
+		if (dots) PUSH("..", 2);
 		if (!bignum && v < 0) {
 		    char c = sign_bits(base, p);
 		    while (len < prec--) {
 			buf[blen++] = c;
 		    }
 		}
-		else {
+		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
 		    char c;
 
-		    if (!sign && bignum && !RBIGNUM(val)->sign)
+		    if (!sign && bignum && !RBIGNUM_SIGN(val))
 			c = sign_bits(base, p);
 		    else
 			c = '0';
@@ -718,33 +996,32 @@ rb_str_format(argc, argv, fmt)
 	  case 'G':
 	  case 'e':
 	  case 'E':
+	  case 'a':
+	  case 'A':
 	    {
 		VALUE val = GETARG();
 		double fval;
 		int i, need = 6;
 		char fbuf[32];
 
-		fval = RFLOAT(rb_Float(val))->value;
-#if defined(_WIN32) && !defined(__BORLANDC__)
+		fval = RFLOAT_VALUE(rb_Float(val));
 		if (isnan(fval) || isinf(fval)) {
-		    char *expr;
+		    const char *expr;
 
-		    if  (isnan(fval)) {
+		    if (isnan(fval)) {
 			expr = "NaN";
 		    }
 		    else {
 			expr = "Inf";
 		    }
-		    need = strlen(expr);
+		    need = (int)strlen(expr);
 		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
 			need++;
-		    else if (flags & FSPACE)
-			need++;
 		    if ((flags & FWIDTH) && need < width)
 			need = width;
 
-		    CHECK(need);
-		    sprintf(&buf[blen], "%*s", need, "");
+		    CHECK(need + 1);
+		    snprintf(&buf[blen], need + 1, "%*s", need, "");
 		    if (flags & FMINUS) {
 			if (!isnan(fval) && fval < 0.0)
 			    buf[blen++] = '-';
@@ -752,39 +1029,23 @@ rb_str_format(argc, argv, fmt)
 			    buf[blen++] = '+';
 			else if (flags & FSPACE)
 			    blen++;
-			strncpy(&buf[blen], expr, strlen(expr));
-		    }
-		    else if (flags & FZERO) {
-			if (!isnan(fval) && fval < 0.0) {
-			    buf[blen++] = '-';
-			    need--;
-			}
-			else if (flags & FPLUS) {
-			    buf[blen++] = '+';
-			    need--;
-			}
-			else if (flags & FSPACE) {
-			    blen++;
-			    need--;
-			}
-			while (need-- - strlen(expr) > 0) {
-			    buf[blen++] = '0';
-			}
-			strncpy(&buf[blen], expr, strlen(expr));
+			memcpy(&buf[blen], expr, strlen(expr));
 		    }
 		    else {
 			if (!isnan(fval) && fval < 0.0)
 			    buf[blen + need - strlen(expr) - 1] = '-';
 			else if (flags & FPLUS)
 			    buf[blen + need - strlen(expr) - 1] = '+';
-			strncpy(&buf[blen + need - strlen(expr)], expr,
-				strlen(expr));
+			else if ((flags & FSPACE) && need > width)
+			    blen++;
+			memcpy(&buf[blen + need - strlen(expr)], expr,
+			       strlen(expr));
 		    }
 		    blen += strlen(&buf[blen]);
 		    break;
 		}
-#endif	/* defined(_WIN32) && !defined(__BORLANDC__) */
-		fmt_setup(fbuf, *p, flags, width, prec);
+
+		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
 		need = 0;
 		if (*p != 'e' && *p != 'E') {
 		    i = INT_MIN;
@@ -798,7 +1059,7 @@ rb_str_format(argc, argv, fmt)
 		need += 20;
 
 		CHECK(need);
-		sprintf(&buf[blen], fbuf, fval);
+		snprintf(&buf[blen], need, fbuf, fval);
 		blen += strlen(&buf[blen]);
 	    }
 	    break;
@@ -807,12 +1068,12 @@ rb_str_format(argc, argv, fmt)
     }
 
   sprint_exit:
-    /* XXX - We cannot validiate the number of arguments if (digit)$ style used.
+    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
      */
     if (posarg >= 0 && nextarg < argc) {
 	const char *mesg = "too many arguments for format string";
-	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, mesg);
-	if (RTEST(ruby_verbose)) rb_warn(mesg);
+	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
+	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
     }
     rb_str_resize(result, blen);
 
@@ -821,11 +1082,9 @@ rb_str_format(argc, argv, fmt)
 }
 
 static void
-fmt_setup(buf, c, flags, width, prec)
-    char *buf;
-    int c;
-    int flags, width, prec;
+fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec)
 {
+    char *end = buf + size;
     *buf++ = '%';
     if (flags & FSHARP) *buf++ = '#';
     if (flags & FPLUS)  *buf++ = '+';
@@ -834,15 +1093,150 @@ fmt_setup(buf, c, flags, width, prec)
     if (flags & FSPACE) *buf++ = ' ';
 
     if (flags & FWIDTH) {
-	sprintf(buf, "%d", width);
+	snprintf(buf, end - buf, "%d", width);
 	buf += strlen(buf);
     }
 
     if (flags & FPREC) {
-	sprintf(buf, ".%d", prec);
+	snprintf(buf, end - buf, ".%d", prec);
 	buf += strlen(buf);
     }
 
     *buf++ = c;
     *buf = '\0';
 }
+
+#undef FILE
+#define FILE rb_printf_buffer
+#define __sbuf rb_printf_sbuf
+#define __sFILE rb_printf_sfile
+#undef feof
+#undef ferror
+#undef clearerr
+#undef fileno
+#if SIZEOF_LONG < SIZEOF_VOIDP
+# if  SIZEOF_LONG_LONG == SIZEOF_VOIDP
+#  define _HAVE_SANE_QUAD_
+#  define _HAVE_LLP64_
+#  define quad_t LONG_LONG
+#  define u_quad_t unsigned LONG_LONG
+# endif
+#endif
+#define FLOATING_POINT 1
+#define BSD__dtoa ruby_dtoa
+#include "vsnprintf.c"
+
+static int
+ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio)
+{
+    struct __siov *iov;
+    VALUE result = (VALUE)fp->_bf._base;
+    char *buf = (char*)fp->_p;
+    size_t len, n;
+    size_t blen = buf - RSTRING_PTR(result), bsiz = fp->_w;
+
+    if (RBASIC(result)->klass) {
+	rb_raise(rb_eRuntimeError, "rb_vsprintf reentered");
+    }
+    if ((len = uio->uio_resid) == 0)
+	return 0;
+    CHECK(len);
+    buf += blen;
+    fp->_w = bsiz;
+    for (iov = uio->uio_iov; len > 0; ++iov) {
+	MEMCPY(buf, iov->iov_base, char, n = iov->iov_len);
+	buf += n;
+	len -= n;
+    }
+    fp->_p = (unsigned char *)buf;
+    return 0;
+}
+
+VALUE
+rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap)
+{
+    rb_printf_buffer f;
+    VALUE result;
+
+    f._flags = __SWR | __SSTR;
+    f._bf._size = 0;
+    f._w = 120;
+    result = rb_str_buf_new(f._w);
+    if (enc) rb_enc_associate(result, enc);
+    f._bf._base = (unsigned char *)result;
+    f._p = (unsigned char *)RSTRING_PTR(result);
+    RBASIC(result)->klass = 0;
+    f.vwrite = ruby__sfvwrite;
+    BSD_vfprintf(&f, fmt, ap);
+    RBASIC(result)->klass = rb_cString;
+    rb_str_resize(result, (char *)f._p - RSTRING_PTR(result));
+
+    return result;
+}
+
+VALUE
+rb_enc_sprintf(rb_encoding *enc, const char *format, ...)
+{
+    VALUE result;
+    va_list ap;
+
+    va_start(ap, format);
+    result = rb_enc_vsprintf(enc, format, ap);
+    va_end(ap);
+
+    return result;
+}
+
+VALUE
+rb_vsprintf(const char *fmt, va_list ap)
+{
+    return rb_enc_vsprintf(NULL, fmt, ap);
+}
+
+VALUE
+rb_sprintf(const char *format, ...)
+{
+    VALUE result;
+    va_list ap;
+
+    va_start(ap, format);
+    result = rb_vsprintf(format, ap);
+    va_end(ap);
+
+    return result;
+}
+
+VALUE
+rb_str_vcatf(VALUE str, const char *fmt, va_list ap)
+{
+    rb_printf_buffer f;
+    VALUE klass;
+
+    StringValue(str);
+    rb_str_modify(str);
+    f._flags = __SWR | __SSTR;
+    f._bf._size = 0;
+    f._w = rb_str_capacity(str);
+    f._bf._base = (unsigned char *)str;
+    f._p = (unsigned char *)RSTRING_END(str);
+    klass = RBASIC(str)->klass;
+    RBASIC(str)->klass = 0;
+    f.vwrite = ruby__sfvwrite;
+    BSD_vfprintf(&f, fmt, ap);
+    RBASIC(str)->klass = klass;
+    rb_str_resize(str, (char *)f._p - RSTRING_PTR(str));
+
+    return str;
+}
+
+VALUE
+rb_str_catf(VALUE str, const char *format, ...)
+{
+    va_list ap;
+
+    va_start(ap, format);
+    str = rb_str_vcatf(str, format, ap);
+    va_end(ap);
+
+    return str;
+}