diff options
Diffstat (limited to 'sprintf.c')
| -rw-r--r-- | sprintf.c | 1819 |
1 files changed, 1058 insertions, 761 deletions
@@ -3,7 +3,6 @@ sprintf.c - $Author$ - $Date$ created at: Fri Oct 15 10:39:26 JST 1993 Copyright (C) 1993-2007 Yukihiro Matsumoto @@ -12,46 +11,31 @@ **********************************************************************/ -#include "ruby/ruby.h" -#include "ruby/re.h" -#include "ruby/encoding.h" +#include "ruby/internal/config.h" + #include <math.h> #include <stdarg.h> -#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */ +#ifdef HAVE_IEEEFP_H +# include <ieeefp.h> +#endif -static void fmt_setup(char*,int,int,int,int); +#include "id.h" +#include "internal.h" +#include "internal/error.h" +#include "internal/hash.h" +#include "internal/numeric.h" +#include "internal/object.h" +#include "internal/sanitizers.h" +#include "internal/symbol.h" +#include "ruby/encoding.h" +#include "ruby/re.h" +#include "ruby/util.h" -static char* -remove_sign_bits(char *str, int base) -{ - char *s, *t; - - s = t = str; - - if (base == 16) { - while (*t == 'f') { - t++; - } - } - else if (base == 8) { - if (*t == '3') t++; - while (*t == '7') { - t++; - } - } - else if (base == 2) { - while (*t == '1') { - t++; - } - } - if (t > s) { - while (*t) *s++ = *t++; - *s = '\0'; - } +#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */ - return str; -} +static char *fmt_setup(char*,size_t,int,int,int,int); +static char *ruby_ultoa(unsigned long val, char *endp, int base, int octzero); static char sign_bits(int base, const char *p) @@ -60,13 +44,13 @@ sign_bits(int base, const char *p) switch (base) { case 16: - if (*p == 'X') c = 'F'; - else c = 'f'; - break; + if (*p == 'X') c = 'F'; + else c = 'f'; + break; case 8: - c = '7'; break; + c = '7'; break; case 2: - c = '1'; break; + c = '1'; break; } return c; } @@ -82,168 +66,144 @@ sign_bits(int base, const char *p) #define FPREC0 128 #define CHECK(l) do {\ - while (blen + (l) >= bsiz) {\ - bsiz*=2;\ + int cr = ENC_CODERANGE(result);\ + RUBY_ASSERT(bsiz >= blen); \ + while ((l) > bsiz - blen) {\ + bsiz*=2;\ + if (bsiz<0) rb_raise(rb_eArgError, "too big specifier");\ }\ rb_str_resize(result, bsiz);\ + ENC_CODERANGE_SET(result, cr);\ buf = RSTRING_PTR(result);\ } while (0) #define PUSH(s, l) do { \ CHECK(l);\ - memcpy(&buf[blen], s, l);\ + PUSH_(s, l);\ +} while (0) + +#define PUSH_(s, l) do { \ + memcpy(&buf[blen], (s), (l));\ blen += (l);\ } while (0) #define FILL(c, l) do { \ + if ((l) <= 0) break;\ CHECK(l);\ - memset(&buf[blen], c, l);\ + FILL_(c, l);\ +} while (0) + +#define FILL_(c, l) do { \ + memset(&buf[blen], (c), (l));\ blen += (l);\ } while (0) -#define GETARG() (nextvalue != Qundef ? nextvalue : \ - posarg < 0 ? \ - (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \ +#define GETARG() (!UNDEF_P(nextvalue) ? nextvalue : \ + GETNEXTARG()) + +#define GETNEXTARG() ( \ + check_next_arg(posarg, nextarg), \ (posarg = nextarg++, GETNTHARG(posarg))) -#define GETPOSARG(n) (posarg > 0 ? \ - (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", n, posarg), 0) : \ - ((n < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", n), 0) : \ - (posarg = -1, GETNTHARG(n)))) +#define GETPOSARG(n) ( \ + check_pos_arg(posarg, (n)), \ + (posarg = -1, GETNTHARG(n))) #define GETNTHARG(nth) \ - ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth]) + (((nth) >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[(nth)]) + +#define CHECKNAMEARG(name, len, enc) ( \ + check_name_arg(posarg, name, len, enc), \ + posarg = -2) #define GETNUM(n, val) \ - for (; p < end && rb_enc_isdigit(*p, enc); p++) { \ - int next_n = 10 * n + (*p - '0'); \ - if (next_n / 10 != n) {\ - rb_raise(rb_eArgError, #val " too big"); \ - } \ - n = next_n; \ - } \ - if (p >= end) { \ - rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \ - } + (!(p = get_num(p, end, enc, &(n))) ? \ + rb_raise(rb_eArgError, #val " too big") : (void)0) #define GETASTER(val) do { \ t = p++; \ n = 0; \ GETNUM(n, val); \ if (*p == '$') { \ - tmp = GETPOSARG(n); \ + tmp = GETPOSARG(n); \ } \ else { \ - tmp = GETARG(); \ - p = t; \ + tmp = GETNEXTARG(); \ + p = t; \ } \ - val = NUM2INT(tmp); \ + (val) = NUM2INT(tmp); \ } while (0) +static const char * +get_num(const char *p, const char *end, rb_encoding *enc, int *valp) +{ + int next_n = *valp; + for (; p < end && rb_enc_isdigit(*p, enc); p++) { + if (MUL_OVERFLOW_INT_P(10, next_n)) + return NULL; + next_n *= 10; + if (INT_MAX - (*p - '0') < next_n) + return NULL; + next_n += *p - '0'; + } + if (p >= end) { + rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); + } + *valp = next_n; + return p; +} + +static void +check_next_arg(int posarg, int nextarg) +{ + switch (posarg) { + case -1: + rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg); + case -2: + rb_raise(rb_eArgError, "unnumbered(%d) mixed with named", nextarg); + } +} + +static void +check_pos_arg(int posarg, int n) +{ + if (posarg > 0) { + rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", n, posarg); + } + if (posarg == -2) { + rb_raise(rb_eArgError, "numbered(%d) after named", n); + } + if (n < 1) { + rb_raise(rb_eArgError, "invalid index - %d$", n); + } +} + +static void +check_name_arg(int posarg, const char *name, int len, rb_encoding *enc) +{ + if (posarg > 0) { + rb_enc_raise(enc, rb_eArgError, "named%.*s after unnumbered(%d)", len, name, posarg); + } + if (posarg == -1) { + rb_enc_raise(enc, rb_eArgError, "named%.*s after numbered", len, name); + } +} + +static VALUE +get_hash(volatile VALUE *hash, int argc, const VALUE *argv) +{ + VALUE tmp; -/* - * call-seq: - * format(format_string [, arguments...] ) => string - * sprintf(format_string [, arguments...] ) => string - * - * Returns the string resulting from applying <i>format_string</i> to - * any additional arguments. Within the format string, any characters - * other than format sequences are copied to the result. A format - * sequence consists of a percent sign, followed by optional flags, - * width, and precision indicators, then terminated with a field type - * character. The field type controls how the corresponding - * <code>sprintf</code> argument is to be interpreted, while the flags - * modify that interpretation. The field type characters are listed - * in the table at the end of this section. The flag characters are: - * - * Flag | Applies to | Meaning - * ---------+--------------+----------------------------------------- - * space | bdeEfgGiouxX | Leave a space at the start of - * | | positive numbers. - * ---------+--------------+----------------------------------------- - * (digit)$ | all | Specifies the absolute argument number - * | | for this field. Absolute and relative - * | | argument numbers cannot be mixed in a - * | | sprintf string. - * ---------+--------------+----------------------------------------- - * # | beEfgGoxX | Use an alternative format. For the - * | | conversions `o', `x', `X', and `b', - * | | prefix the result with ``0'', ``0x'', ``0X'', - * | | and ``0b'', respectively. For `e', - * | | `E', `f', `g', and 'G', force a decimal - * | | point to be added, even if no digits follow. - * | | For `g' and 'G', do not remove trailing zeros. - * ---------+--------------+----------------------------------------- - * + | bdeEfgGiouxX | Add a leading plus sign to positive numbers. - * ---------+--------------+----------------------------------------- - * - | all | Left-justify the result of this conversion. - * ---------+--------------+----------------------------------------- - * 0 (zero) | bdeEfgGiouxX | Pad with zeros, not spaces. - * ---------+--------------+----------------------------------------- - * * | all | Use the next argument as the field width. - * | | If negative, left-justify the result. If the - * | | asterisk is followed by a number and a dollar - * | | sign, use the indicated argument as the width. - * - * - * The field width is an optional integer, followed optionally by a - * period and a precision. The width specifies the minimum number of - * characters that will be written to the result for this field. For - * numeric fields, the precision controls the number of decimal places - * displayed. For string fields, the precision determines the maximum - * number of characters to be copied from the string. (Thus, the format - * sequence <code>%10.10s</code> will always contribute exactly ten - * characters to the result.) - * - * The field types are: - * - * Field | Conversion - * ------+-------------------------------------------------------------- - * b | Convert argument as a binary number. - * c | Argument is the numeric code for a single character. - * d | Convert argument as a decimal number. - * E | Equivalent to `e', but uses an uppercase E to indicate - * | the exponent. - * e | Convert floating point argument into exponential notation - * | with one digit before the decimal point. The precision - * | determines the number of fractional digits (defaulting to six). - * f | Convert floating point argument as [-]ddd.ddd, - * | where the precision determines the number of digits after - * | the decimal point. - * G | Equivalent to `g', but use an uppercase `E' in exponent form. - * g | Convert a floating point number using exponential form - * | if the exponent is less than -4 or greater than or - * | equal to the precision, or in d.dddd form otherwise. - * i | Identical to `d'. - * o | Convert argument as an octal number. - * p | The valuing of argument.inspect. - * s | Argument is a string to be substituted. If the format - * | sequence contains a precision, at most that many characters - * | will be copied. - * u | Treat argument as an unsigned decimal number. Negative integers - * | are displayed as a 32 bit two's complement plus one for the - * | underlying architecture; that is, 2 ** 32 + n. However, since - * | Ruby has no inherent limit on bits used to represent the - * | integer, this value is preceded by two dots (..) in order to - * | indicate a infinite number of leading sign bits. - * X | Convert argument as a hexadecimal number using uppercase - * | letters. Negative numbers will be displayed with two - * | leading periods (representing an infinite string of - * | leading 'FF's. - * x | Convert argument as a hexadecimal number. - * | Negative numbers will be displayed with two - * | leading periods (representing an infinite string of - * | leading 'ff's. - * - * Examples: - * - * sprintf("%d %04x", 123, 123) #=> "123 007b" - * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" - * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" - * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" - * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" - * sprintf("%u", -123) #=> "..4294967173" - */ + if (!UNDEF_P(*hash)) return *hash; + if (argc != 2) { + rb_raise(rb_eArgError, "one hash required"); + } + tmp = rb_check_hash_type(argv[1]); + if (NIL_P(tmp)) { + rb_raise(rb_eArgError, "one hash required"); + } + return (*hash = tmp); +} VALUE rb_f_sprintf(int argc, const VALUE *argv) @@ -254,623 +214,764 @@ rb_f_sprintf(int argc, const VALUE *argv) VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt) { + enum {default_float_precision = 6}; rb_encoding *enc; const char *p, *end; char *buf; - int blen, bsiz; + long blen, bsiz; VALUE result; + long scanned = 0; + enum ruby_coderange_type coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; - int tainted = 0; VALUE nextvalue; VALUE tmp; + VALUE orig; VALUE str; + volatile VALUE hash = Qundef; #define CHECK_FOR_WIDTH(f) \ if ((f) & FWIDTH) { \ - rb_raise(rb_eArgError, "width given twice"); \ + rb_raise(rb_eArgError, "width given twice"); \ } \ if ((f) & FPREC0) { \ - rb_raise(rb_eArgError, "width after precision"); \ + rb_raise(rb_eArgError, "width after precision"); \ } #define CHECK_FOR_FLAGS(f) \ if ((f) & FWIDTH) { \ - rb_raise(rb_eArgError, "flag after width"); \ + rb_raise(rb_eArgError, "flag after width"); \ } \ if ((f) & FPREC0) { \ - rb_raise(rb_eArgError, "flag after precision"); \ + rb_raise(rb_eArgError, "flag after precision"); \ } +#define update_coderange(partial) do { \ + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { \ + int cr = coderange; \ + scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); \ + ENC_CODERANGE_SET(result, \ + (partial && cr == ENC_CODERANGE_UNKNOWN ? \ + ENC_CODERANGE_BROKEN : (coderange = cr))); \ + } \ + } while (0) ++argc; --argv; - if (OBJ_TAINTED(fmt)) tainted = 1; StringValue(fmt); enc = rb_enc_get(fmt); - fmt = rb_str_new4(fmt); + rb_must_asciicompat(fmt); + orig = fmt; + fmt = rb_str_tmp_frozen_acquire(fmt); p = RSTRING_PTR(fmt); end = p + RSTRING_LEN(fmt); blen = 0; bsiz = 120; result = rb_str_buf_new(bsiz); + rb_enc_associate(result, enc); buf = RSTRING_PTR(result); + memset(buf, 0, bsiz); + ENC_CODERANGE_SET(result, coderange); for (; p < end; p++) { - const char *t; - int n; - - for (t = p; t < end && *t != '%'; t++) ; - PUSH(p, t - p); - if (t >= end) { - /* end of fmt string */ - goto sprint_exit; - } - p = t + 1; /* skip `%' */ - - width = prec = -1; - nextvalue = Qundef; + const char *t; + int n; + VALUE sym = Qnil; + + for (t = p; t < end && *t != '%'; t++) ; + if (t + 1 == end) { + rb_raise(rb_eArgError, "incomplete format specifier; use %%%% (double %%) instead"); + } + PUSH(p, t - p); + update_coderange(FALSE); + if (t >= end) { + /* end of fmt string */ + goto sprint_exit; + } + p = t + 1; /* skip `%' */ + + width = prec = -1; + nextvalue = Qundef; retry: - switch (*p) { - default: - if (rb_enc_isprint(*p, enc)) - rb_raise(rb_eArgError, "malformed format string - %%%c", *p); - else - rb_raise(rb_eArgError, "malformed format string"); - break; - - case ' ': - CHECK_FOR_FLAGS(flags); - flags |= FSPACE; - p++; - goto retry; - - case '#': - CHECK_FOR_FLAGS(flags); - flags |= FSHARP; - p++; - goto retry; - - case '+': - CHECK_FOR_FLAGS(flags); - flags |= FPLUS; - p++; - goto retry; - - case '-': - CHECK_FOR_FLAGS(flags); - flags |= FMINUS; - p++; - goto retry; - - case '0': - CHECK_FOR_FLAGS(flags); - flags |= FZERO; - p++; - goto retry; - - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - n = 0; - GETNUM(n, width); - if (*p == '$') { - if (nextvalue != Qundef) { - rb_raise(rb_eArgError, "value given twice - %d$", n); - } - nextvalue = GETPOSARG(n); - p++; - goto retry; - } - CHECK_FOR_WIDTH(flags); - width = n; - flags |= FWIDTH; - goto retry; - - case '*': - CHECK_FOR_WIDTH(flags); - flags |= FWIDTH; - GETASTER(width); - if (width < 0) { - flags |= FMINUS; - width = -width; - } - p++; - goto retry; - - case '.': - if (flags & FPREC0) { - rb_raise(rb_eArgError, "precision given twice"); - } - flags |= FPREC|FPREC0; - - prec = 0; - p++; - if (*p == '*') { - GETASTER(prec); - if (prec < 0) { /* ignore negative precision */ - flags &= ~FPREC; - } - p++; - goto retry; - } - - GETNUM(prec, precision); - goto retry; - - case '\n': - case '\0': - p--; - case '%': - if (flags != FNONE) { - rb_raise(rb_eArgError, "illegal format character - %%"); - } - PUSH("%", 1); - break; - - case 'c': - { - VALUE val = GETARG(); - VALUE tmp; - int c, n; - - tmp = rb_check_string_type(val); - if (!NIL_P(tmp)) { - if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { - rb_raise(rb_eArgError, "%%c requires a character"); - } - c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc); - } - else { - c = NUM2INT(val); - } - n = rb_enc_codelen(c, enc); - if (n == 0) { - rb_raise(rb_eArgError, "invalid character"); - } - if (!(flags & FWIDTH)) { - CHECK(n); - rb_enc_mbcput(c, &buf[blen], enc); - blen += n; - } - else if ((flags & FMINUS)) { - CHECK(n); - rb_enc_mbcput(c, &buf[blen], enc); - blen += n; - FILL(' ', width-1); - } - else { - FILL(' ', width-1); - CHECK(n); - rb_enc_mbcput(c, &buf[blen], enc); - blen += n; - } - } - break; - - case 's': - case 'p': - { - VALUE arg = GETARG(); - long len, slen; - - if (*p == 'p') arg = rb_inspect(arg); - str = rb_obj_as_string(arg); - if (OBJ_TAINTED(str)) tainted = 1; - len = RSTRING_LEN(str); - enc = rb_enc_check(fmt, str); - if (flags&(FPREC|FWIDTH)) { - slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); - if (slen < 0) { - rb_raise(rb_eArgError, "invalid mbstring sequence"); - } - } - if (flags&FPREC) { - if (prec < slen) { - char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), - prec, enc); - slen = prec; - len = p - RSTRING_PTR(str); - } - } - /* need to adjust multi-byte string pos */ - if (flags&FWIDTH) { - if (width > slen) { - width -= slen; - if (!(flags&FMINUS)) { - CHECK(width); - while (width--) { - buf[blen++] = ' '; - } - } - CHECK(len); - memcpy(&buf[blen], RSTRING_PTR(str), len); - blen += len; - if (flags&FMINUS) { - CHECK(width); - while (width--) { - buf[blen++] = ' '; - } - } - break; - } - } - PUSH(RSTRING_PTR(str), len); - } - break; - - case 'd': - case 'i': - case 'o': - case 'x': - case 'X': - case 'b': - case 'B': - case 'u': - { - volatile VALUE val = GETARG(); - char fbuf[32], nbuf[64], *s, *t; - const char *prefix = 0; - int sign = 0; - char sc = 0; - long v = 0; - int base, bignum = 0; - int len, pos; - - switch (*p) { - case 'd': - case 'i': - case 'u': - sign = 1; break; - case 'o': - case 'x': - case 'X': - case 'b': - case 'B': - if (flags&(FPLUS|FSPACE)) sign = 1; - break; - } - if (flags & FSHARP) { - switch (*p) { - case 'o': - prefix = "0"; break; - case 'x': - prefix = "0x"; break; - case 'X': - prefix = "0X"; break; - case 'b': - prefix = "0b"; break; - case 'B': - prefix = "0B"; break; - } - if (prefix) { - width -= strlen(prefix); - } - } - - bin_retry: - switch (TYPE(val)) { - case T_FLOAT: - if (FIXABLE((long)RFLOAT_VALUE(val))) { - val = LONG2FIX((long)RFLOAT_VALUE(val)); - goto bin_retry; - } - val = rb_dbl2big(RFLOAT_VALUE(val)); - if (FIXNUM_P(val)) goto bin_retry; - bignum = 1; - break; - case T_STRING: - val = rb_str_to_inum(val, 0, Qtrue); - goto bin_retry; - case T_BIGNUM: - bignum = 1; - break; - case T_FIXNUM: - v = FIX2LONG(val); - break; - default: - val = rb_Integer(val); - goto bin_retry; - } - - switch (*p) { - case 'o': - base = 8; break; - case 'x': - case 'X': - base = 16; break; - case 'b': - case 'B': - base = 2; break; - case 'u': - case 'd': - case 'i': - default: - base = 10; break; - } - - if (!bignum) { - if (base == 2) { - val = rb_int2big(v); - goto bin_retry; - } - if (sign) { - char c = *p; - if (c == 'i') c = 'd'; /* %d and %i are identical */ - if (v < 0) { - v = -v; - sc = '-'; - width--; - } - else if (flags & FPLUS) { - sc = '+'; - width--; - } - else if (flags & FSPACE) { - sc = ' '; - width--; - } - sprintf(fbuf, "%%l%c", c); - sprintf(nbuf, fbuf, v); - } - else { - s = nbuf; - if (v < 0) { - strcpy(s, ".."); - s += 2; - } - sprintf(fbuf, "%%l%c", *p == 'X' ? 'x' : *p); - sprintf(s, fbuf, v); - if (v < 0) { - char d = 0; - - remove_sign_bits(s, base); - switch (base) { - case 16: - d = 'f'; break; - case 8: - d = '7'; break; - } - if (d && *s != d) { - memmove(s+1, s, strlen(s)+1); - *s = d; - } - } - } - s = nbuf; - } - else { - if (sign) { - tmp = rb_big2str(val, base); - s = RSTRING_PTR(tmp); - if (s[0] == '-') { - s++; - sc = '-'; - width--; - } - else if (flags & FPLUS) { - sc = '+'; - width--; - } - else if (flags & FSPACE) { - sc = ' '; - width--; - } - } - else { - volatile VALUE tmp1; - if (!RBIGNUM_SIGN(val)) { - val = rb_big_clone(val); - rb_big_2comp(val); - } - tmp1 = tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val)); - s = RSTRING_PTR(tmp); - if (*s == '-') { - if (base == 10) { - rb_warning("negative number for %%u specifier"); - } - remove_sign_bits(++s, base); - tmp = rb_str_new(0, 3+strlen(s)); - t = RSTRING_PTR(tmp); - if (!(flags&(FPREC|FZERO))) { - strcpy(t, ".."); - t += 2; - } - switch (base) { - case 16: - if (s[0] != 'f') strcpy(t++, "f"); break; - case 8: - if (s[0] != '7') strcpy(t++, "7"); break; - case 2: - if (s[0] != '1') strcpy(t++, "1"); break; - } - strcpy(t, s); - s = RSTRING_PTR(tmp); - } - } - } - - pos = -1; - len = strlen(s); - - if (*p == 'X') { - char *pp = s; - int c; - while (c = (int)*pp) { - *pp = rb_enc_toupper(c, enc); - pp++; - } - } - if ((flags&(FZERO|FPREC)) == FZERO) { - prec = width; - width = 0; - } - else { - if (prec < len) prec = len; - width -= prec; - } - if (!(flags&FMINUS)) { - CHECK(width); - while (width-- > 0) { - buf[blen++] = ' '; - } - } - if (sc) PUSH(&sc, 1); - if (prefix) { - int plen = strlen(prefix); - PUSH(prefix, plen); - } - CHECK(prec - len); - if (!bignum && v < 0) { - char c = sign_bits(base, p); - while (len < prec--) { - buf[blen++] = c; - } - } - else { - char c; - - if (!sign && bignum && !RBIGNUM_SIGN(val)) - c = sign_bits(base, p); - else - c = '0'; - while (len < prec--) { - buf[blen++] = c; - } - } - PUSH(s, len); - CHECK(width); - while (width-- > 0) { - buf[blen++] = ' '; - } - } - break; - - case 'f': - case 'g': - case 'G': - case 'e': - case 'E': - { - VALUE val = GETARG(); - double fval; - int i, need = 6; - char fbuf[32]; - - fval = RFLOAT_VALUE(rb_Float(val)); - if (isnan(fval) || isinf(fval)) { - const char *expr; - - if (isnan(fval)) { - expr = "NaN"; - } - else { - expr = "Inf"; - } - need = strlen(expr); - if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) - need++; - if ((flags & FWIDTH) && need < width) - need = width; - - CHECK(need); - sprintf(&buf[blen], "%*s", need, ""); - if (flags & FMINUS) { - if (!isnan(fval) && fval < 0.0) - buf[blen++] = '-'; - else if (flags & FPLUS) - buf[blen++] = '+'; - else if (flags & FSPACE) - blen++; - strncpy(&buf[blen], expr, strlen(expr)); - } - else if (flags & FZERO) { - if (!isnan(fval) && fval < 0.0) { - buf[blen++] = '-'; - need--; - } - else if (flags & FPLUS) { - buf[blen++] = '+'; - need--; - } - else if (flags & FSPACE) { - blen++; - need--; - } - while (need-- - strlen(expr) > 0) { - buf[blen++] = '0'; - } - strncpy(&buf[blen], expr, strlen(expr)); - } - else { - if (!isnan(fval) && fval < 0.0) - buf[blen + need - strlen(expr) - 1] = '-'; - else if (flags & FPLUS) - buf[blen + need - strlen(expr) - 1] = '+'; - strncpy(&buf[blen + need - strlen(expr)], expr, - strlen(expr)); - } - blen += strlen(&buf[blen]); - break; - } - - fmt_setup(fbuf, *p, flags, width, prec); - need = 0; - if (*p != 'e' && *p != 'E') { - i = INT_MIN; - frexp(fval, &i); - if (i > 0) - need = BIT_DIGITS(i); - } - need += (flags&FPREC) ? prec : 6; - if ((flags&FWIDTH) && need < width) - need = width; - need += 20; - - CHECK(need); - sprintf(&buf[blen], fbuf, fval); - blen += strlen(&buf[blen]); - } - break; - } - flags = FNONE; + switch (*p) { + default: + if (rb_enc_isprint(*p, enc)) + rb_raise(rb_eArgError, "malformed format string - %%%c", *p); + else + rb_raise(rb_eArgError, "malformed format string"); + break; + + case ' ': + CHECK_FOR_FLAGS(flags); + flags |= FSPACE; + p++; + goto retry; + + case '#': + CHECK_FOR_FLAGS(flags); + flags |= FSHARP; + p++; + goto retry; + + case '+': + CHECK_FOR_FLAGS(flags); + flags |= FPLUS; + p++; + goto retry; + + case '-': + CHECK_FOR_FLAGS(flags); + flags |= FMINUS; + p++; + goto retry; + + case '0': + CHECK_FOR_FLAGS(flags); + flags |= FZERO; + p++; + goto retry; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = 0; + GETNUM(n, width); + if (*p == '$') { + if (!UNDEF_P(nextvalue)) { + rb_raise(rb_eArgError, "value given twice - %d$", n); + } + nextvalue = GETPOSARG(n); + p++; + goto retry; + } + CHECK_FOR_WIDTH(flags); + width = n; + flags |= FWIDTH; + goto retry; + + case '<': + case '{': + { + const char *start = p; + char term = (*p == '<') ? '>' : '}'; + int len; + + for (; p < end && *p != term; ) { + p += rb_enc_mbclen(p, end, enc); + } + if (p >= end) { + rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); + } +#if SIZEOF_INT < SIZEOF_SIZE_T + if ((size_t)(p - start) >= INT_MAX) { + const int message_limit = 20; + len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start); + rb_enc_raise(enc, rb_eArgError, + "too long name (%"PRIuSIZE" bytes) - %.*s...%c", + (size_t)(p - start - 2), len, start, term); + } +#endif + len = (int)(p - start + 1); /* including parenthesis */ + if (sym != Qnil) { + rb_enc_raise(enc, rb_eArgError, "named%.*s after <%"PRIsVALUE">", + len, start, rb_sym2str(sym)); + } + CHECKNAMEARG(start, len, enc); + get_hash(&hash, argc, argv); + sym = rb_check_symbol_cstr(start + 1, + len - 2 /* without parenthesis */, + enc); + if (!NIL_P(sym)) nextvalue = rb_hash_lookup2(hash, sym, Qundef); + if (UNDEF_P(nextvalue)) { + if (NIL_P(sym)) { + sym = rb_sym_intern(start + 1, + len - 2 /* without parenthesis */, + enc); + } + nextvalue = rb_hash_default_value(hash, sym); + if (NIL_P(nextvalue)) { + rb_key_err_raise(rb_enc_sprintf(enc, "key%.*s not found", len, start), hash, sym); + } + } + if (term == '}') goto format_s; + p++; + goto retry; + } + + case '*': + CHECK_FOR_WIDTH(flags); + flags |= FWIDTH; + GETASTER(width); + if (width < 0) { + flags |= FMINUS; + width = -width; + if (width < 0) rb_raise(rb_eArgError, "width too big"); + } + p++; + goto retry; + + case '.': + if (flags & FPREC0) { + rb_raise(rb_eArgError, "precision given twice"); + } + flags |= FPREC|FPREC0; + + prec = 0; + p++; + if (*p == '*') { + GETASTER(prec); + if (prec < 0) { /* ignore negative precision */ + flags &= ~FPREC; + } + p++; + goto retry; + } + + GETNUM(prec, precision); + goto retry; + + case '%': + if (flags != FNONE) { + rb_raise(rb_eArgError, "invalid format character - %%"); + } + PUSH("%", 1); + break; + + case 'c': + { + VALUE val = GETARG(); + VALUE tmp; + unsigned int c; + int n, encidx; + + tmp = rb_check_string_type(val); + if (!NIL_P(tmp)) { + flags |= FPREC; + prec = 1; + str = tmp; + goto format_s1; + } + n = NUM2INT(val); + if (n >= 0) { + n = rb_enc_codelen((c = n), enc); + encidx = rb_ascii8bit_appendable_encoding_index(enc, c); + } + if (n <= 0) { + rb_raise(rb_eArgError, "invalid character"); + } + if (encidx >= 0 && encidx != rb_enc_to_index(enc)) { + /* special case */ + rb_enc_associate_index(result, encidx); + enc = rb_enc_from_index(encidx); + coderange = ENC_CODERANGE_VALID; + } + if (!(flags & FWIDTH)) { + CHECK(n); + rb_enc_mbcput(c, &buf[blen], enc); + blen += n; + } + else if ((flags & FMINUS)) { + --width; + CHECK(n + (width > 0 ? width : 0)); + rb_enc_mbcput(c, &buf[blen], enc); + blen += n; + if (width > 0) FILL_(' ', width); + } + else { + --width; + CHECK(n + (width > 0 ? width : 0)); + if (width > 0) FILL_(' ', width); + rb_enc_mbcput(c, &buf[blen], enc); + blen += n; + } + } + break; + + case 's': + case 'p': + format_s: + { + VALUE arg = GETARG(); + long len, slen; + + if (*p == 'p') { + str = rb_inspect(arg); + } + else { + str = rb_obj_as_string(arg); + } + format_s1: + len = RSTRING_LEN(str); + rb_str_set_len(result, blen); + update_coderange(TRUE); + enc = rb_enc_check(result, str); + if (flags&(FPREC|FWIDTH)) { + slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); + if (slen < 0) { + rb_raise(rb_eArgError, "invalid mbstring sequence"); + } + if ((flags&FPREC) && (prec < slen)) { + char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), + prec, enc); + slen = prec; + len = p - RSTRING_PTR(str); + } + /* need to adjust multi-byte string pos */ + if ((flags&FWIDTH) && (width > slen)) { + width -= (int)slen; + CHECK(len + width); + if (!(flags&FMINUS)) { + FILL_(' ', width); + width = 0; + } + memcpy(&buf[blen], RSTRING_PTR(str), len); + RB_GC_GUARD(str); + blen += len; + if (flags&FMINUS) { + FILL_(' ', width); + } + rb_enc_associate(result, enc); + break; + } + } + PUSH(RSTRING_PTR(str), len); + RB_GC_GUARD(str); + rb_enc_associate(result, enc); + } + break; + + case 'd': + case 'i': + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + case 'u': + { + volatile VALUE val = GETARG(); + int valsign; + char nbuf[BIT_DIGITS(SIZEOF_LONG*CHAR_BIT)+2], *s; + const char *prefix = 0; + int sign = 0, dots = 0; + char sc = 0; + long v = 0; + int base, bignum = 0; + int len; + + switch (*p) { + case 'd': + case 'i': + case 'u': + sign = 1; break; + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + if (flags&(FPLUS|FSPACE)) sign = 1; + break; + } + if (flags & FSHARP) { + switch (*p) { + case 'o': + prefix = "0"; break; + case 'x': + prefix = "0x"; break; + case 'X': + prefix = "0X"; break; + case 'b': + prefix = "0b"; break; + case 'B': + prefix = "0B"; break; + } + } + + bin_retry: + switch (TYPE(val)) { + case T_FLOAT: + if (FIXABLE(RFLOAT_VALUE(val))) { + val = LONG2FIX((long)RFLOAT_VALUE(val)); + goto bin_retry; + } + val = rb_dbl2big(RFLOAT_VALUE(val)); + if (FIXNUM_P(val)) goto bin_retry; + bignum = 1; + break; + case T_STRING: + val = rb_str_to_inum(val, 0, TRUE); + goto bin_retry; + case T_BIGNUM: + bignum = 1; + break; + case T_FIXNUM: + v = FIX2LONG(val); + break; + default: + val = rb_Integer(val); + goto bin_retry; + } + + switch (*p) { + case 'o': + base = 8; break; + case 'x': + case 'X': + base = 16; break; + case 'b': + case 'B': + base = 2; break; + case 'u': + case 'd': + case 'i': + default: + base = 10; break; + } + + if (base != 10) { + int numbits = ffs(base)-1; + size_t abs_nlz_bits; + size_t numdigits = rb_absint_numwords(val, numbits, &abs_nlz_bits); + long i; + if (INT_MAX-1 < numdigits) /* INT_MAX is used because rb_long2int is used later. */ + rb_raise(rb_eArgError, "size too big"); + if (sign) { + if (numdigits == 0) + numdigits = 1; + tmp = rb_str_new(NULL, numdigits); + valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp), + 1, CHAR_BIT-numbits, INTEGER_PACK_BIG_ENDIAN); + for (i = 0; i < RSTRING_LEN(tmp); i++) + RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]]; + s = RSTRING_PTR(tmp); + if (valsign < 0) { + sc = '-'; + width--; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + } + else { + /* Following conditional "numdigits++" guarantees the + * most significant digit as + * - '1'(bin), '7'(oct) or 'f'(hex) for negative numbers + * - '0' for zero + * - not '0' for positive numbers. + * + * It also guarantees the most significant two + * digits will not be '11'(bin), '77'(oct), 'ff'(hex) + * or '00'. */ + if (numdigits == 0 || + ((abs_nlz_bits != (size_t)(numbits-1) || + !rb_absint_singlebit_p(val)) && + (!bignum ? v < 0 : BIGNUM_NEGATIVE_P(val)))) + numdigits++; + tmp = rb_str_new(NULL, numdigits); + valsign = rb_integer_pack(val, RSTRING_PTR(tmp), RSTRING_LEN(tmp), + 1, CHAR_BIT-numbits, INTEGER_PACK_2COMP | INTEGER_PACK_BIG_ENDIAN); + for (i = 0; i < RSTRING_LEN(tmp); i++) + RSTRING_PTR(tmp)[i] = ruby_digitmap[((unsigned char *)RSTRING_PTR(tmp))[i]]; + s = RSTRING_PTR(tmp); + dots = valsign < 0; + } + len = rb_long2int(RSTRING_END(tmp) - s); + } + else if (!bignum) { + valsign = 1; + if (v < 0) { + v = -v; + sc = '-'; + width--; + valsign = -1; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + s = ruby_ultoa((unsigned long)v, nbuf + sizeof(nbuf), 10, 0); + len = (int)(nbuf + sizeof(nbuf) - s); + } + else { + tmp = rb_big2str(val, 10); + s = RSTRING_PTR(tmp); + valsign = 1; + if (s[0] == '-') { + s++; + sc = '-'; + width--; + valsign = -1; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + len = rb_long2int(RSTRING_END(tmp) - s); + } + + if (dots) { + prec -= 2; + width -= 2; + } + + if (*p == 'X') { + char *pp = s; + int c; + while ((c = (int)(unsigned char)*pp) != 0) { + *pp = rb_enc_toupper(c, enc); + pp++; + } + } + if (prefix && !prefix[1]) { /* octal */ + if (dots) { + prefix = 0; + } + else if (len == 1 && *s == '0') { + len = 0; + if (flags & FPREC) prec--; + } + else if ((flags & FPREC) && (prec > len)) { + prefix = 0; + } + } + else if (len == 1 && *s == '0') { + prefix = 0; + } + if (prefix) { + width -= (int)strlen(prefix); + } + if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { + prec = width; + width = 0; + } + else { + if (prec < len) { + if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; + prec = len; + } + width -= prec; + } + if (!(flags&FMINUS)) { + FILL(' ', width); + width = 0; + } + if (sc) PUSH(&sc, 1); + if (prefix) { + int plen = (int)strlen(prefix); + PUSH(prefix, plen); + } + if (dots) PUSH("..", 2); + if (prec > len) { + CHECK(prec - len); + if (!sign && valsign < 0) { + char c = sign_bits(base, p); + FILL_(c, prec - len); + } + else if ((flags & (FMINUS|FPREC)) != FMINUS) { + FILL_('0', prec - len); + } + } + PUSH(s, len); + RB_GC_GUARD(tmp); + FILL(' ', width); + } + break; + + case 'f': + { + VALUE val = GETARG(), num, den; + int sign = (flags&FPLUS) ? 1 : 0, zero = 0; + long len, fill; + if (RB_INTEGER_TYPE_P(val)) { + den = INT2FIX(1); + num = val; + } + else if (RB_TYPE_P(val, T_RATIONAL)) { + den = rb_rational_den(val); + num = rb_rational_num(val); + } + else { + nextvalue = val; + goto float_value; + } + if (!(flags&FPREC)) prec = default_float_precision; + if (FIXNUM_P(num)) { + if ((SIGNED_VALUE)num < 0) { + long n = -FIX2LONG(num); + num = LONG2NUM(n); + sign = -1; + } + } + else if (BIGNUM_NEGATIVE_P(num)) { + sign = -1; + num = rb_big_uminus(num); + } + if (den != INT2FIX(1)) { + num = rb_int_mul(num, rb_int_positive_pow(10, prec)); + num = rb_int_plus(num, rb_int_idiv(den, INT2FIX(2))); + num = rb_int_idiv(num, den); + } + else if (prec >= 0) { + zero = prec; + } + val = rb_int2str(num, 10); + len = RSTRING_LEN(val) + zero; + if (prec >= len) len = prec + 1; /* integer part 0 */ + if (sign || (flags&FSPACE)) ++len; + if (prec > 0) ++len; /* period */ + fill = width > len ? width - len : 0; + CHECK(fill + len); + if (fill && !(flags&(FMINUS|FZERO))) { + FILL_(' ', fill); + } + if (sign || (flags&FSPACE)) { + buf[blen++] = sign > 0 ? '+' : sign < 0 ? '-' : ' '; + } + if (fill && (flags&(FMINUS|FZERO)) == FZERO) { + FILL_('0', fill); + } + len = RSTRING_LEN(val) + zero; + t = RSTRING_PTR(val); + if (len > prec) { + PUSH_(t, len - prec); + } + else { + buf[blen++] = '0'; + } + if (prec > 0) { + buf[blen++] = '.'; + } + if (zero) { + FILL_('0', zero); + } + else if (prec > len) { + FILL_('0', prec - len); + PUSH_(t, len); + } + else if (prec > 0) { + PUSH_(t + len - prec, prec); + } + if (fill && (flags&FMINUS)) { + FILL_(' ', fill); + } + RB_GC_GUARD(val); + break; + } + case 'g': + case 'G': + case 'e': + case 'E': + /* TODO: rational support */ + case 'a': + case 'A': + float_value: + { + VALUE val = GETARG(); + double fval; + + fval = RFLOAT_VALUE(rb_Float(val)); + if (!isfinite(fval)) { + const char *expr; + int need; + int elen; + char sign = '\0'; + + if (isnan(fval)) { + expr = "NaN"; + } + else { + expr = "Inf"; + } + need = (int)strlen(expr); + elen = need; + if (!isnan(fval) && fval < 0.0) + sign = '-'; + else if (flags & (FPLUS|FSPACE)) + sign = (flags & FPLUS) ? '+' : ' '; + if (sign) + ++need; + if ((flags & FWIDTH) && need < width) + need = width; + + FILL(' ', need); + if (flags & FMINUS) { + if (sign) + buf[blen - need--] = sign; + memcpy(&buf[blen - need], expr, elen); + } + else { + if (sign) + buf[blen - elen - 1] = sign; + memcpy(&buf[blen - elen], expr, elen); + } + break; + } + else { + int cr = ENC_CODERANGE(result); + char fbuf[2*BIT_DIGITS(SIZEOF_INT*CHAR_BIT)+10]; + char *fmt = fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); + rb_str_set_len(result, blen); + rb_str_catf(result, fmt, fval); + ENC_CODERANGE_SET(result, cr); + bsiz = rb_str_capacity(result); + RSTRING_GETMEM(result, buf, blen); + } + } + break; + } + flags = FNONE; } + update_coderange(FALSE); sprint_exit: - /* XXX - We cannot validiate the number of arguments if (digit)$ style used. + rb_str_tmp_frozen_release(orig, fmt); + /* XXX - We cannot validate the number of arguments if (digit)$ style used. */ - if (posarg >= 0 && nextarg < argc) { - const char *mesg = "too many arguments for format string"; - if (RTEST(ruby_debug)) rb_raise(rb_eArgError, mesg); - if (RTEST(ruby_verbose)) rb_warn(mesg); + if (posarg >= 0 && nextarg < argc && !(argc == 2 && RB_TYPE_P(argv[1], T_HASH))) { + const char *mesg = "too many arguments for format string"; + if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); + if (RTEST(ruby_verbose)) rb_warn("%s", mesg); } rb_str_resize(result, blen); - if (tainted) OBJ_TAINT(result); return result; } -static void -fmt_setup(char *buf, int c, int flags, int width, int prec) +static char * +fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec) { - *buf++ = '%'; - if (flags & FSHARP) *buf++ = '#'; - if (flags & FPLUS) *buf++ = '+'; - if (flags & FMINUS) *buf++ = '-'; - if (flags & FZERO) *buf++ = '0'; - if (flags & FSPACE) *buf++ = ' '; + buf += size; + *--buf = '\0'; + *--buf = c; - if (flags & FWIDTH) { - sprintf(buf, "%d", width); - buf += strlen(buf); + if (flags & FPREC) { + buf = ruby_ultoa(prec, buf, 10, 0); + *--buf = '.'; } - if (flags & FPREC) { - sprintf(buf, ".%d", prec); - buf += strlen(buf); + if (flags & FWIDTH) { + buf = ruby_ultoa(width, buf, 10, 0); } - *buf++ = c; - *buf = '\0'; + if (flags & FSPACE) *--buf = ' '; + if (flags & FZERO) *--buf = '0'; + if (flags & FMINUS) *--buf = '-'; + if (flags & FPLUS) *--buf = '+'; + if (flags & FSHARP) *--buf = '#'; + *--buf = '%'; + return buf; } #undef FILE @@ -881,17 +982,82 @@ fmt_setup(char *buf, int c, int flags, int width, int prec) #undef ferror #undef clearerr #undef fileno -#if SIZEOF_LONG < SIZEOF_VOIDP -# if SIZEOF_LONG_LONG == SIZEOF_VOIDP -# define _HAVE_SANE_QUAD_ -# define _HAVE_LLP64_ -# define quad_t LONG_LONG -# define u_quad_t unsigned LONG_LONG +#if SIZEOF_LONG < SIZEOF_LONG_LONG +# if SIZEOF_LONG_LONG == SIZEOF_VOIDP +/* actually this doesn't mean a pointer is strictly 64bit, but just + * quad_t size */ +# define _HAVE_LLP64_ # endif +# define _HAVE_SANE_QUAD_ +# define quad_t LONG_LONG +# define u_quad_t unsigned LONG_LONG +#endif +#define FLOATING_POINT 1 +#define BSD__dtoa ruby_dtoa +#define BSD__hdtoa ruby_hdtoa +#ifdef RUBY_PRI_VALUE_MARK +# define PRI_EXTRA_MARK RUBY_PRI_VALUE_MARK +#endif +#define lower_hexdigits (ruby_hexdigits+0) +#define upper_hexdigits (ruby_hexdigits+16) +#include "vsnprintf.c" + +static char * +ruby_ultoa(unsigned long val, char *endp, int base, int flags) +{ + const char *xdigs = lower_hexdigits; + int octzero = flags & FSHARP; + return BSD__ultoa(val, endp, base, octzero, xdigs); +} + +static int ruby_do_vsnprintf(char *str, size_t n, const char *fmt, va_list ap); + +int +ruby_vsnprintf(char *str, size_t n, const char *fmt, va_list ap) +{ + if (str && (ssize_t)n < 1) + return (EOF); + return ruby_do_vsnprintf(str, n, fmt, ap); +} + +static int +ruby_do_vsnprintf(char *str, size_t n, const char *fmt, va_list ap) +{ + ssize_t ret; + rb_printf_buffer f; + + f._flags = __SWR | __SSTR; + f._bf._base = f._p = (unsigned char *)str; + f._bf._size = f._w = str ? (n - 1) : 0; + f.vwrite = BSD__sfvwrite; + f.vextra = 0; + ret = BSD_vfprintf(&f, fmt, ap); + if (str) *f._p = 0; +#if SIZEOF_SIZE_T > SIZEOF_INT + if (n > INT_MAX) return INT_MAX; #endif -#undef vsnprintf -#undef snprintf -#include "missing/vsnprintf.c" + return (int)ret; +} + +int +ruby_snprintf(char *str, size_t n, char const *fmt, ...) +{ + int ret; + va_list ap; + + if (str && (ssize_t)n < 1) + return (EOF); + + va_start(ap, fmt); + ret = ruby_do_vsnprintf(str, n, fmt, ap); + va_end(ap); + return ret; +} + +typedef struct { + rb_printf_buffer base; + volatile VALUE value; +} rb_printf_buffer_extra; static int ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio) @@ -899,48 +1065,157 @@ ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio) struct __siov *iov; VALUE result = (VALUE)fp->_bf._base; char *buf = (char*)fp->_p; - size_t len, n; - int blen = buf - RSTRING_PTR(result), bsiz = fp->_w; + long len, n; + long blen = buf - RSTRING_PTR(result), bsiz = fp->_w; if (RBASIC(result)->klass) { - rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); + rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); } - if ((len = uio->uio_resid) == 0) - return 0; + if (uio->uio_resid == 0) + return 0; +#if SIZE_MAX > LONG_MAX + if (uio->uio_resid >= LONG_MAX) + rb_raise(rb_eRuntimeError, "too big string"); +#endif + len = (long)uio->uio_resid; CHECK(len); buf += blen; fp->_w = bsiz; for (iov = uio->uio_iov; len > 0; ++iov) { - MEMCPY(buf, iov->iov_base, char, n = iov->iov_len); - buf += n; - len -= n; + MEMCPY(buf, iov->iov_base, char, n = iov->iov_len); + buf += n; + len -= n; } fp->_p = (unsigned char *)buf; + rb_str_set_len(result, buf - RSTRING_PTR(result)); return 0; } -VALUE -rb_vsprintf(const char *fmt, va_list ap) +static const char * +ruby__sfvextra(rb_printf_buffer *fp, size_t valsize, void *valp, long *sz, int sign) { - rb_printf_buffer f; - VALUE result; + VALUE value, result = (VALUE)fp->_bf._base; + rb_encoding *enc; + char *cp; + + if (valsize != sizeof(VALUE)) return 0; + value = *(VALUE *)valp; + if (RBASIC(result)->klass) { + rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); + } + if (sign == '+') { +# define LITERAL(str) (*sz = rb_strlen_lit(str), str) + /* optimize special const cases */ + switch (value) { +# define LITERAL_CASE(x) case Q##x: return LITERAL(#x) + LITERAL_CASE(nil); + LITERAL_CASE(true); + LITERAL_CASE(false); +# undef LITERAL_CASE + } +# undef LITERAL + value = rb_inspect(value); + } + else if (SYMBOL_P(value)) { + value = rb_sym2str(value); + if (sign == ' ' && !rb_str_symname_p(value)) { + value = rb_str_escape(value); + } + } + else { + value = rb_obj_as_string(value); + if (sign == ' ') value = QUOTE(value); + } + enc = rb_enc_compatible(result, value); + if (enc) { + rb_enc_associate(result, enc); + } + else { + enc = rb_enc_get(result); + value = rb_str_conv_enc_opts(value, rb_enc_get(value), enc, + ECONV_UNDEF_REPLACE|ECONV_INVALID_REPLACE, + Qnil); + *(volatile VALUE *)valp = value; + } + StringValueCStr(value); + RSTRING_GETMEM(value, cp, *sz); + ((rb_printf_buffer_extra *)fp)->value = value; + return cp; +} + +static void +ruby_vsprintf0(VALUE result, char *p, const char *fmt, va_list ap) +{ + rb_printf_buffer_extra buffer; +#define f buffer.base + VALUE klass = RBASIC(result)->klass; + int coderange = ENC_CODERANGE(result); + long scanned = 0; + + if (coderange != ENC_CODERANGE_UNKNOWN) scanned = p - RSTRING_PTR(result); f._flags = __SWR | __SSTR; f._bf._size = 0; - f._w = 120; - result = rb_str_buf_new(f._w); + f._w = rb_str_capacity(result); f._bf._base = (unsigned char *)result; - f._p = (unsigned char *)RSTRING_PTR(result); - RBASIC(result)->klass = 0; + f._p = (unsigned char *)p; + RBASIC_CLEAR_CLASS(result); f.vwrite = ruby__sfvwrite; + f.vextra = ruby__sfvextra; + buffer.value = 0; BSD_vfprintf(&f, fmt, ap); - RBASIC(result)->klass = rb_cString; - rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); + RBASIC_SET_CLASS_RAW(result, klass); + p = RSTRING_PTR(result); + long blen = (char *)f._p - p; + + coderange = ENC_CODERANGE(result); + if (coderange != ENC_CODERANGE_UNKNOWN && scanned < blen) { + rb_str_coderange_scan_restartable(p + scanned, p + blen, rb_enc_get(result), &coderange); + ENC_CODERANGE_SET(result, coderange); + } + rb_str_resize(result, blen); +#undef f +} + +VALUE +rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) +{ + const int initial_len = 120; + VALUE result; + + result = rb_str_buf_new(initial_len); + if (enc) { + if (rb_enc_mbminlen(enc) > 1) { + /* the implementation deeply depends on plain char */ + rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s", + rb_enc_name(enc)); + } + rb_enc_associate(result, enc); + } + ruby_vsprintf0(result, RSTRING_PTR(result), fmt, ap); + return result; +} + +VALUE +rb_enc_sprintf(rb_encoding *enc, const char *format, ...) +{ + VALUE result; + va_list ap; + + va_start(ap, format); + result = rb_enc_vsprintf(enc, format, ap); + va_end(ap); return result; } VALUE +rb_vsprintf(const char *fmt, va_list ap) +{ + return rb_enc_vsprintf(NULL, fmt, ap); +} + +VALUE rb_sprintf(const char *format, ...) { VALUE result; @@ -952,3 +1227,25 @@ rb_sprintf(const char *format, ...) return result; } + +VALUE +rb_str_vcatf(VALUE str, const char *fmt, va_list ap) +{ + StringValue(str); + rb_str_modify(str); + ruby_vsprintf0(str, RSTRING_END(str), fmt, ap); + + return str; +} + +VALUE +rb_str_catf(VALUE str, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + str = rb_str_vcatf(str, format, ap); + va_end(ap); + + return str; +} |
