diff options
Diffstat (limited to 'ext/json/generator')
| -rw-r--r-- | ext/json/generator/depend | 171 | ||||
| -rw-r--r-- | ext/json/generator/extconf.rb | 19 | ||||
| -rw-r--r-- | ext/json/generator/generator.c | 2472 | ||||
| -rw-r--r-- | ext/json/generator/generator.h | 171 |
4 files changed, 1697 insertions, 1136 deletions
diff --git a/ext/json/generator/depend b/ext/json/generator/depend index 54e8ae3eb4..3ba4acfdd2 100644 --- a/ext/json/generator/depend +++ b/ext/json/generator/depend @@ -1,13 +1,174 @@ $(OBJS): $(ruby_headers) -generator.o: generator.c generator.h $(srcdir)/../fbuffer/fbuffer.h +generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h # AUTOGENERATED DEPENDENCIES START generator.o: $(RUBY_EXTCONF_H) generator.o: $(arch_hdrdir)/ruby/config.h +generator.o: $(hdrdir)/ruby.h +generator.o: $(hdrdir)/ruby/assert.h generator.o: $(hdrdir)/ruby/backward.h +generator.o: $(hdrdir)/ruby/backward/2/assume.h +generator.o: $(hdrdir)/ruby/backward/2/attributes.h +generator.o: $(hdrdir)/ruby/backward/2/bool.h +generator.o: $(hdrdir)/ruby/backward/2/inttypes.h +generator.o: $(hdrdir)/ruby/backward/2/limits.h +generator.o: $(hdrdir)/ruby/backward/2/long_long.h +generator.o: $(hdrdir)/ruby/backward/2/stdalign.h +generator.o: $(hdrdir)/ruby/backward/2/stdarg.h generator.o: $(hdrdir)/ruby/defines.h generator.o: $(hdrdir)/ruby/encoding.h generator.o: $(hdrdir)/ruby/intern.h +generator.o: $(hdrdir)/ruby/internal/abi.h +generator.o: $(hdrdir)/ruby/internal/anyargs.h +generator.o: $(hdrdir)/ruby/internal/arithmetic.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/char.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/double.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/int.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/long.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/short.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +generator.o: $(hdrdir)/ruby/internal/assume.h +generator.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +generator.o: $(hdrdir)/ruby/internal/attr/artificial.h +generator.o: $(hdrdir)/ruby/internal/attr/cold.h +generator.o: $(hdrdir)/ruby/internal/attr/const.h +generator.o: $(hdrdir)/ruby/internal/attr/constexpr.h +generator.o: $(hdrdir)/ruby/internal/attr/deprecated.h +generator.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +generator.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +generator.o: $(hdrdir)/ruby/internal/attr/error.h +generator.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +generator.o: $(hdrdir)/ruby/internal/attr/forceinline.h +generator.o: $(hdrdir)/ruby/internal/attr/format.h +generator.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +generator.o: $(hdrdir)/ruby/internal/attr/noalias.h +generator.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +generator.o: $(hdrdir)/ruby/internal/attr/noexcept.h +generator.o: $(hdrdir)/ruby/internal/attr/noinline.h +generator.o: $(hdrdir)/ruby/internal/attr/nonnull.h +generator.o: $(hdrdir)/ruby/internal/attr/noreturn.h +generator.o: $(hdrdir)/ruby/internal/attr/packed_struct.h +generator.o: $(hdrdir)/ruby/internal/attr/pure.h +generator.o: $(hdrdir)/ruby/internal/attr/restrict.h +generator.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +generator.o: $(hdrdir)/ruby/internal/attr/warning.h +generator.o: $(hdrdir)/ruby/internal/attr/weakref.h +generator.o: $(hdrdir)/ruby/internal/cast.h +generator.o: $(hdrdir)/ruby/internal/compiler_is.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +generator.o: $(hdrdir)/ruby/internal/compiler_since.h +generator.o: $(hdrdir)/ruby/internal/config.h +generator.o: $(hdrdir)/ruby/internal/constant_p.h +generator.o: $(hdrdir)/ruby/internal/core.h +generator.o: $(hdrdir)/ruby/internal/core/rarray.h +generator.o: $(hdrdir)/ruby/internal/core/rbasic.h +generator.o: $(hdrdir)/ruby/internal/core/rbignum.h +generator.o: $(hdrdir)/ruby/internal/core/rclass.h +generator.o: $(hdrdir)/ruby/internal/core/rdata.h +generator.o: $(hdrdir)/ruby/internal/core/rfile.h +generator.o: $(hdrdir)/ruby/internal/core/rhash.h +generator.o: $(hdrdir)/ruby/internal/core/rmatch.h +generator.o: $(hdrdir)/ruby/internal/core/robject.h +generator.o: $(hdrdir)/ruby/internal/core/rregexp.h +generator.o: $(hdrdir)/ruby/internal/core/rstring.h +generator.o: $(hdrdir)/ruby/internal/core/rstruct.h +generator.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +generator.o: $(hdrdir)/ruby/internal/ctype.h +generator.o: $(hdrdir)/ruby/internal/dllexport.h +generator.o: $(hdrdir)/ruby/internal/dosish.h +generator.o: $(hdrdir)/ruby/internal/encoding/coderange.h +generator.o: $(hdrdir)/ruby/internal/encoding/ctype.h +generator.o: $(hdrdir)/ruby/internal/encoding/encoding.h +generator.o: $(hdrdir)/ruby/internal/encoding/pathname.h +generator.o: $(hdrdir)/ruby/internal/encoding/re.h +generator.o: $(hdrdir)/ruby/internal/encoding/sprintf.h +generator.o: $(hdrdir)/ruby/internal/encoding/string.h +generator.o: $(hdrdir)/ruby/internal/encoding/symbol.h +generator.o: $(hdrdir)/ruby/internal/encoding/transcode.h +generator.o: $(hdrdir)/ruby/internal/error.h +generator.o: $(hdrdir)/ruby/internal/eval.h +generator.o: $(hdrdir)/ruby/internal/event.h +generator.o: $(hdrdir)/ruby/internal/fl_type.h +generator.o: $(hdrdir)/ruby/internal/gc.h +generator.o: $(hdrdir)/ruby/internal/glob.h +generator.o: $(hdrdir)/ruby/internal/globals.h +generator.o: $(hdrdir)/ruby/internal/has/attribute.h +generator.o: $(hdrdir)/ruby/internal/has/builtin.h +generator.o: $(hdrdir)/ruby/internal/has/c_attribute.h +generator.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +generator.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +generator.o: $(hdrdir)/ruby/internal/has/extension.h +generator.o: $(hdrdir)/ruby/internal/has/feature.h +generator.o: $(hdrdir)/ruby/internal/has/warning.h +generator.o: $(hdrdir)/ruby/internal/intern/array.h +generator.o: $(hdrdir)/ruby/internal/intern/bignum.h +generator.o: $(hdrdir)/ruby/internal/intern/class.h +generator.o: $(hdrdir)/ruby/internal/intern/compar.h +generator.o: $(hdrdir)/ruby/internal/intern/complex.h +generator.o: $(hdrdir)/ruby/internal/intern/cont.h +generator.o: $(hdrdir)/ruby/internal/intern/dir.h +generator.o: $(hdrdir)/ruby/internal/intern/enum.h +generator.o: $(hdrdir)/ruby/internal/intern/enumerator.h +generator.o: $(hdrdir)/ruby/internal/intern/error.h +generator.o: $(hdrdir)/ruby/internal/intern/eval.h +generator.o: $(hdrdir)/ruby/internal/intern/file.h +generator.o: $(hdrdir)/ruby/internal/intern/hash.h +generator.o: $(hdrdir)/ruby/internal/intern/io.h +generator.o: $(hdrdir)/ruby/internal/intern/load.h +generator.o: $(hdrdir)/ruby/internal/intern/marshal.h +generator.o: $(hdrdir)/ruby/internal/intern/numeric.h +generator.o: $(hdrdir)/ruby/internal/intern/object.h +generator.o: $(hdrdir)/ruby/internal/intern/parse.h +generator.o: $(hdrdir)/ruby/internal/intern/proc.h +generator.o: $(hdrdir)/ruby/internal/intern/process.h +generator.o: $(hdrdir)/ruby/internal/intern/random.h +generator.o: $(hdrdir)/ruby/internal/intern/range.h +generator.o: $(hdrdir)/ruby/internal/intern/rational.h +generator.o: $(hdrdir)/ruby/internal/intern/re.h +generator.o: $(hdrdir)/ruby/internal/intern/ruby.h +generator.o: $(hdrdir)/ruby/internal/intern/select.h +generator.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +generator.o: $(hdrdir)/ruby/internal/intern/set.h +generator.o: $(hdrdir)/ruby/internal/intern/signal.h +generator.o: $(hdrdir)/ruby/internal/intern/sprintf.h +generator.o: $(hdrdir)/ruby/internal/intern/string.h +generator.o: $(hdrdir)/ruby/internal/intern/struct.h +generator.o: $(hdrdir)/ruby/internal/intern/thread.h +generator.o: $(hdrdir)/ruby/internal/intern/time.h +generator.o: $(hdrdir)/ruby/internal/intern/variable.h +generator.o: $(hdrdir)/ruby/internal/intern/vm.h +generator.o: $(hdrdir)/ruby/internal/interpreter.h +generator.o: $(hdrdir)/ruby/internal/iterator.h +generator.o: $(hdrdir)/ruby/internal/memory.h +generator.o: $(hdrdir)/ruby/internal/method.h +generator.o: $(hdrdir)/ruby/internal/module.h +generator.o: $(hdrdir)/ruby/internal/newobj.h +generator.o: $(hdrdir)/ruby/internal/scan_args.h +generator.o: $(hdrdir)/ruby/internal/special_consts.h +generator.o: $(hdrdir)/ruby/internal/static_assert.h +generator.o: $(hdrdir)/ruby/internal/stdalign.h +generator.o: $(hdrdir)/ruby/internal/stdbool.h +generator.o: $(hdrdir)/ruby/internal/stdckdint.h +generator.o: $(hdrdir)/ruby/internal/symbol.h +generator.o: $(hdrdir)/ruby/internal/value.h +generator.o: $(hdrdir)/ruby/internal/value_type.h +generator.o: $(hdrdir)/ruby/internal/variable.h +generator.o: $(hdrdir)/ruby/internal/warning_push.h +generator.o: $(hdrdir)/ruby/internal/xmalloc.h generator.o: $(hdrdir)/ruby/missing.h generator.o: $(hdrdir)/ruby/onigmo.h generator.o: $(hdrdir)/ruby/oniguruma.h @@ -16,8 +177,10 @@ generator.o: $(hdrdir)/ruby/regex.h generator.o: $(hdrdir)/ruby/ruby.h generator.o: $(hdrdir)/ruby/st.h generator.o: $(hdrdir)/ruby/subst.h -generator.o: $(top_srcdir)/ext/json/fbuffer/fbuffer.h -generator.o: $(top_srcdir)/include/ruby.h +generator.o: $(srcdir)/../fbuffer/fbuffer.h +generator.o: $(srcdir)/../json.h +generator.o: $(srcdir)/../simd/simd.h +generator.o: $(srcdir)/../vendor/fpconv.c +generator.o: $(srcdir)/../vendor/jeaiii-ltoa.h generator.o: generator.c -generator.o: generator.h # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb index 8627c5f4bd..33af03ea30 100644 --- a/ext/json/generator/extconf.rb +++ b/ext/json/generator/extconf.rb @@ -1,4 +1,19 @@ require 'mkmf' -$defs << "-DJSON_GENERATOR" -create_makefile 'json/ext/generator' +if RUBY_ENGINE == 'truffleruby' + # The pure-Ruby generator is faster on TruffleRuby, so skip compiling the generator extension + File.write('Makefile', dummy_makefile("").join) +else + append_cflags("-std=c99") + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 + have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1 + + $defs << "-DJSON_GENERATOR" + $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" + + if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" + end + + create_makefile 'json/ext/generator' +end diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index 2bf8074562..110b5f6b32 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -1,986 +1,1353 @@ +#include "../json.h" #include "../fbuffer/fbuffer.h" -#include "generator.h" +#include "../vendor/fpconv.c" -#ifdef HAVE_RUBY_ENCODING_H -static VALUE CEncoding_UTF_8; -static ID i_encoding, i_encode; +#include <math.h> +#include <ctype.h> + +#include "../simd/simd.h" + +/* ruby api and some helpers */ + +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + +typedef struct JSON_Generator_StateStruct { + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + VALUE as_json; + + long max_nesting; + long depth; + long buffer_initial_length; + + enum duplicate_key_action on_duplicate_key; + + bool as_json_single_arg; + bool allow_nan; + bool ascii_only; + bool script_safe; + bool strict; +} JSON_Generator_State; + +static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8; + +static ID i_to_s, i_to_json, i_new, i_encode; +static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key, + sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json; + + +#define GET_STATE_TO(self, state) \ + TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + GET_STATE_TO(self, state) + +struct generate_json_data; + +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj); + +struct generate_json_data { + FBuffer *buffer; + VALUE vstate; + JSON_Generator_State *state; + VALUE obj; + generator_func func; + long depth; +}; + +static SIMD_Implementation simd_impl; + +static VALUE cState_from_state_s(VALUE self, VALUE opts); +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj); + +static int usascii_encindex, utf8_encindex, binary_encindex; + +NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str) +{ + rb_enc_associate_index(str, utf8_encindex); + VALUE exc = rb_exc_new_str(eGeneratorError, str); + rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object); + rb_exc_raise(exc); +} + +#ifdef RBIMPL_ATTR_FORMAT +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) #endif +NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + VALUE str = rb_vsprintf(fmt, args); + va_end(args); + raise_generator_error_str(invalid_object, str); +} + +// 0 - single byte char that don't need to be escaped. +// (x | 8) - char that needs to be escaped. +static const unsigned char CHAR_LENGTH_MASK = 7; +static const unsigned char ESCAPE_MASK = 8; + +typedef struct _search_state { + const char *ptr; + const char *end; + const char *cursor; + FBuffer *buffer; -static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, - mHash, mArray, -#ifdef RUBY_INTEGER_UNIFICATION - mInteger, +#ifdef HAVE_SIMD + const char *chunk_base; + const char *chunk_end; + bool has_matches; + +#if defined(HAVE_SIMD_NEON) + uint64_t matches_mask; +#elif defined(HAVE_SIMD_SSE2) + int matches_mask; #else - mFixnum, mBignum, -#endif - mFloat, mString, mString_Extend, - mTrueClass, mFalseClass, mNilClass, eGeneratorError, - eNestingError, CRegexp_MULTILINE, CJSON_SAFE_STATE_PROTOTYPE, - i_SAFE_STATE_PROTOTYPE; +#error "Unknown SIMD Implementation." +#endif /* HAVE_SIMD_NEON */ +#endif /* HAVE_SIMD */ +} search_state; -static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, - i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only, - i_pack, i_unpack, i_create_id, i_extend, i_key_p, - i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth, - i_buffer_initial_length, i_dup; +ALWAYS_INLINE(static) void search_flush(search_state *search) +{ + // Do not remove this conditional without profiling, specifically escape-heavy text. + // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush). + // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method + // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the + // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if + // nothing needs to be flushed, we can save a few memory references with this conditional. + if (search->ptr > search->cursor) { + fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); + search->cursor = search->ptr; + } +} -/* - * Copyright 2001-2004 Unicode, Inc. +static const unsigned char escape_table_basic[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static inline unsigned char search_escape_basic(search_state *search) +{ + while (search->ptr < search->end) { + if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) { + search_flush(search); + return 1; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; +} + +ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + search->ptr++; + search->cursor = search->ptr; +} + +/* Converts in_string to a JSON string (without the wrapping '"' + * characters) in FBuffer out_buffer. + * + * Character are JSON-escaped according to: * - * Disclaimer + * - Always: ASCII control characters (0x00-0x1F), dquote, and + * backslash. * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. + * - If out_ascii_only: non-ASCII characters (>0x7F) * - * Limitations on Rights to Redistribute This Code + * - If script_safe: forwardslash (/), line separator (U+2028), and + * paragraph separator (U+2029) * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. + * Everything else (should be UTF-8) is just passed through and + * appended to the result. */ -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; +#if defined(HAVE_SIMD_NEON) +static inline unsigned char search_escape_basic_neon(search_state *search); +#elif defined(HAVE_SIMD_SSE2) +static inline unsigned char search_escape_basic_sse2(search_state *search); +#endif -/* - * Utility routine to tell whether a sequence of bytes is legal UTF-8. - * This must be called with the length pre-determined by the first byte. - * If not calling this from ConvertUTF8to*, then the length can be set by: - * length = trailingBytesForUTF8[*source]+1; - * and the sequence is illegal right away if there aren't that many bytes - * available. - * If presented with a length > 4, this returns 0. The Unicode - * definition of UTF-8 goes up to 4-byte sequences. - */ -static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length) -{ - UTF8 a; - const UTF8 *srcptr = source+length; - switch (length) { - default: return 0; - /* Everything else falls through when "1"... */ - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 2: if ((a = (*--srcptr)) > 0xBF) return 0; - - switch (*source) { - /* no fall-through in this inner switch */ - case 0xE0: if (a < 0xA0) return 0; break; - case 0xED: if (a > 0x9F) return 0; break; - case 0xF0: if (a < 0x90) return 0; break; - case 0xF4: if (a > 0x8F) return 0; break; - default: if (a < 0x80) return 0; - } +static inline unsigned char search_escape_basic(search_state *search); - case 1: if (*source >= 0x80 && *source < 0xC2) return 0; +static inline void convert_UTF8_to_JSON(search_state *search) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + while (search_escape_basic_neon(search)) { + escape_UTF8_char_basic(search); } - if (*source > 0xF4) return 0; - return 1; +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + while (search_escape_basic_sse2(search)) { + escape_UTF8_char_basic(search); + } + return; + } + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif +#else + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif /* HAVE_SIMD */ } -/* Escapes the UTF16 character and stores the result in the buffer buf. */ -static void unicode_escape(char *buf, UTF16 character) +static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) { - const char *digits = "0123456789abcdef"; - - buf[2] = digits[character >> 12]; - buf[3] = digits[(character >> 8) & 0xf]; - buf[4] = digits[(character >> 4) & 0xf]; - buf[5] = digits[character & 0xf]; + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + break; + } + case 3: { + if (search->ptr[2] & 1) { + fbuffer_append(search->buffer, "\\u2029", 6); + } else { + fbuffer_append(search->buffer, "\\u2028", 6); + } + break; + } + } + search->cursor = (search->ptr += ch_len); } -/* Escapes the UTF16 character and stores the result in the buffer buf, then - * the buffer buf is appended to the FBuffer buffer. */ -static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 - character) +#ifdef HAVE_SIMD + +ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len) { - unicode_escape(buf, character); - fbuffer_append(buffer, buf, 6); + RBIMPL_ASSERT_OR_ASSUME(len < vec_len); + + // Flush the buffer so everything up until the last 'len' characters are unflushed. + search_flush(search); + + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, vec_len); + + char *s = (buf->ptr + buf->len); + + // Pad the buffer with dummy characters that won't need escaping. + // This seem wasteful at first sight, but memset of vector length is very fast. + // This is a space as it can be directly represented as an immediate on AArch64. + memset(s, ' ', vec_len); + + // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + if (vec_len == 16) { + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); + json_fast_memcpy16(s, search->ptr, len); + } else { + MEMCPY(s, search->ptr, char, len); + } + + return s; } -/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII - * and control characters are JSON escaped. */ -static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string) +#ifdef HAVE_SIMD_NEON + +ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search) { - const UTF8 *source = (UTF8 *) RSTRING_PTR(string); - const UTF8 *sourceEnd = source + RSTRING_LEN(string); - char buf[6] = { '\\', 'u' }; + uint64_t mask = search->matches_mask; + uint32_t index = trailing_zeros64(mask) >> 2; + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - if (!isLegalUTF8(source, extraBytesToRead+1)) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); +static inline unsigned char search_escape_basic_neon(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return neon_next_match(search); + } else { + // neon_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + search->ptr = search->chunk_end; } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; + } + + /* + * The code below implements an SIMD-based algorithm to determine if N bytes at a time + * need to be escaped. + * + * Assume the ptr = "Te\sting!" (the double quotes are included in the string) + * + * The explanation will be limited to the first 8 bytes of the string for simplicity. However + * the vector insructions may work on larger vectors. + * + * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers. + * + * lower_bound: [20 20 20 20 20 20 20 20] + * backslash: [5C 5C 5C 5C 5C 5C 5C 5C] + * dblquote: [22 22 22 22 22 22 22 22] + * + * Next we load the first chunk of the ptr: + * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n) + * + * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector + * as no bytes are less than 32 (0x20): + * [0 0 0 0 0 0 0 0] + * + * Next, we check if any byte in chunk is equal to a backslash: + * [0 0 0 FF 0 0 0 0] + * + * Finally we check if any byte in chunk is equal to a double quote: + * [FF 0 0 0 0 0 0 0] + * + * Now we have three vectors where each byte indicates if the corresponding byte in chunk + * needs to be escaped. We combine these vectors with a series of logical OR instructions. + * This is the needs_escape vector and it is equal to: + * [FF 0 0 FF 0 0 0 0] + * + * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of + * the values in the vector. This computes how many bytes need to be escaped within this chunk. + * + * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then, + * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we + * have at least one byte that needs to be escaped. + */ + + if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(uint8x16_t); + return neon_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining); + + uint64_t mask = compute_chunk_mask_neon(s); + + if (!mask) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); + + search->matches_mask = mask; + search->has_matches = true; + search->chunk_end = search->end; + search->chunk_base = search->ptr; + return neon_next_match(search); + } + + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} +#endif /* HAVE_SIMD_NEON */ + +#ifdef HAVE_SIMD_SSE2 + +ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search) +{ + int mask = search->matches_mask; + int index = trailing_zeros(mask); + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} + +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) #else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); +#define TARGET_SSE2 #endif + +ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return sse2_next_match(search); + } else { + // sse2_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) { + search->ptr = search->end; } else { - /* normal case */ - if (ch >= 0x20 && ch <= 0x7f) { - switch (ch) { - case '\\': - fbuffer_append(buffer, "\\\\", 2); - break; - case '"': - fbuffer_append(buffer, "\\\"", 2); - break; - default: - fbuffer_append_char(buffer, (char)ch); - break; - } - } else { - switch (ch) { - case '\n': - fbuffer_append(buffer, "\\n", 2); - break; - case '\r': - fbuffer_append(buffer, "\\r", 2); - break; - case '\t': - fbuffer_append(buffer, "\\t", 2); - break; - case '\f': - fbuffer_append(buffer, "\\f", 2); - break; - case '\b': - fbuffer_append(buffer, "\\b", 2); - break; - default: - unicode_escape_to_buffer(buffer, buf, (UTF16) ch); - break; + search->ptr = search->chunk_base + sizeof(__m128i); + } + } + } + + if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(__m128i); + return sse2_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining); + + int needs_escape_mask = compute_chunk_mask_sse2(s); + + if (needs_escape_mask == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; + } + + search->has_matches = true; + search->matches_mask = needs_escape_mask; + search->chunk_base = search->ptr; + return sse2_next_match(search); + } + + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} + +#endif /* HAVE_SIMD_SSE2 */ + +#endif /* HAVE_SIMD */ + +static const unsigned char script_safe_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029 + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; + +static inline unsigned char search_script_safe_escape(search_state *search) +{ + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = script_safe_escape_table[ch]; + + if (RB_UNLIKELY(ch_len)) { + if (ch_len & ESCAPE_MASK) { + if (RB_UNLIKELY(ch_len == 11)) { + const unsigned char *uptr = (const unsigned char *)search->ptr; + if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) { + search->ptr += 3; + continue; } } + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr += ch_len; } - } else if (ch > UNI_MAX_UTF16) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the start */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf8"); -#else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); -#endif } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - ch -= halfBase; - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); + search->ptr++; } } - RB_GC_GUARD(string); -} - -/* Converts string to a JSON string in FBuffer buffer, where only the - * characters required by the JSON standard are JSON escaped. The remaining - * characters (should be UTF8) are just passed through and appended to the - * result. */ -static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string) -{ - const char *ptr = RSTRING_PTR(string), *p; - unsigned long len = RSTRING_LEN(string), start = 0, end = 0; - const char *escape = NULL; - int escape_len; - unsigned char c; - char buf[6] = { '\\', 'u' }; - - for (start = 0, end = 0; end < len;) { - p = ptr + end; - c = (unsigned char) *p; - if (c < 0x20) { - switch (c) { - case '\n': - escape = "\\n"; - escape_len = 2; - break; - case '\r': - escape = "\\r"; - escape_len = 2; - break; - case '\t': - escape = "\\t"; - escape_len = 2; - break; - case '\f': - escape = "\\f"; - escape_len = 2; - break; - case '\b': - escape = "\\b"; - escape_len = 2; - break; - default: - unicode_escape(buf, (UTF16) *p); - escape = buf; - escape_len = 6; + search_flush(search); + return 0; +} + +static void convert_UTF8_to_script_safe_JSON(search_state *search) +{ + unsigned char ch_len; + while ((ch_len = search_script_safe_escape(search))) { + escape_UTF8_char(search, ch_len); + } +} + +static const unsigned char ascii_only_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; + +static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256]) +{ + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = escape_table[ch]; + + if (RB_UNLIKELY(ch_len)) { + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; +} + +static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); break; + } } - } else { - switch (c) { - case '\\': - escape = "\\\\"; - escape_len = 2; + break; + } + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; + + uint32_t wchar = 0; + + switch (ch_len) { + case 2: + wchar = ch & 0x1F; break; - case '"': - escape = "\\\""; - escape_len = 2; + case 3: + wchar = ch & 0x0F; break; - default: - { - unsigned short clen = trailingBytesForUTF8[c] + 1; - if (end + clen > len) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - if (!isLegalUTF8((UTF8 *) p, clen)) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); - } - end += clen; - } - continue; + case 4: + wchar = ch & 0x07; break; } + + for (short i = 1; i < ch_len; i++) { + wchar = (wchar << 6) | (search->ptr[i] & 0x3F); + } + + if (wchar <= 0xFFFF) { + scratch[2] = hexdig[wchar >> 12]; + scratch[3] = hexdig[(wchar >> 8) & 0xf]; + scratch[4] = hexdig[(wchar >> 4) & 0xf]; + scratch[5] = hexdig[wchar & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + } else { + uint16_t hi, lo; + wchar -= 0x10000; + hi = 0xD800 + (uint16_t)(wchar >> 10); + lo = 0xDC00 + (uint16_t)(wchar & 0x3FF); + + scratch[2] = hexdig[hi >> 12]; + scratch[3] = hexdig[(hi >> 8) & 0xf]; + scratch[4] = hexdig[(hi >> 4) & 0xf]; + scratch[5] = hexdig[hi & 0xf]; + + scratch[8] = hexdig[lo >> 12]; + scratch[9] = hexdig[(lo >> 8) & 0xf]; + scratch[10] = hexdig[(lo >> 4) & 0xf]; + scratch[11] = hexdig[lo & 0xf]; + + fbuffer_append(search->buffer, scratch, 12); + } + + break; } - fbuffer_append(buffer, ptr + start, end - start); - fbuffer_append(buffer, escape, escape_len); - start = ++end; - escape = NULL; } - fbuffer_append(buffer, ptr + start, end - start); + search->cursor = (search->ptr += ch_len); } -static char *fstrndup(const char *ptr, unsigned long len) { - char *result; - if (len <= 0) return NULL; - result = ALLOC_N(char, len); - memcpy(result, ptr, len); - return result; +static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256]) +{ + unsigned char ch_len; + while ((ch_len = search_ascii_only_escape(search, escape_table))) { + full_escape_UTF8_char(search, ch_len); + } } -/* - * Document-module: JSON::Ext::Generator - * - * This is the JSON generator implemented as a C extension. It can be - * configured to be used by setting - * - * JSON.generator = JSON::Ext::Generator - * - * with the method generator= in JSON. - * - */ - -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON object, that is generated from - * this Hash instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) +static void State_mark(void *ptr) { - GENERATE_JSON(object); + JSON_Generator_State *state = ptr; + rb_gc_mark_movable(state->indent); + rb_gc_mark_movable(state->space); + rb_gc_mark_movable(state->space_before); + rb_gc_mark_movable(state->object_nl); + rb_gc_mark_movable(state->array_nl); + rb_gc_mark_movable(state->as_json); } -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON array, that is generated from - * this Array instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(array); +static void State_compact(void *ptr) +{ + JSON_Generator_State *state = ptr; + state->indent = rb_gc_location(state->indent); + state->space = rb_gc_location(state->space); + state->space_before = rb_gc_location(state->space_before); + state->object_nl = rb_gc_location(state->object_nl); + state->array_nl = rb_gc_location(state->array_nl); + state->as_json = rb_gc_location(state->as_json); } -#ifdef RUBY_INTEGER_UNIFICATION -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) +static size_t State_memsize(const void *ptr) { - GENERATE_JSON(integer); + return sizeof(JSON_Generator_State); } -#else -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) +static const rb_data_type_t JSON_Generator_State_type = { + .wrap_struct_name = "JSON/Generator/State", + .function = { + .dmark = State_mark, + .dfree = RUBY_DEFAULT_FREE, + .dsize = State_memsize, + .dcompact = State_compact, + }, + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, +}; + +static void state_init(JSON_Generator_State *state) { - GENERATE_JSON(fixnum); + state->max_nesting = 100; + state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; } -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) +static VALUE cState_s_allocate(VALUE klass) { - GENERATE_JSON(bignum); + JSON_Generator_State *state; + VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state); + state_init(state); + return obj; } -#endif -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Float number. - */ -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) +static void vstate_spill(struct generate_json_data *data) +{ + VALUE vstate = cState_s_allocate(cState); + GET_STATE(vstate); + MEMCPY(state, data->state, JSON_Generator_State, 1); + data->state = state; + data->vstate = vstate; + RB_OBJ_WRITTEN(vstate, Qundef, state->indent); + RB_OBJ_WRITTEN(vstate, Qundef, state->space); + RB_OBJ_WRITTEN(vstate, Qundef, state->space_before); + RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->as_json); +} + +static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj) { - GENERATE_JSON(float); + if (RB_UNLIKELY(!data->vstate)) { + vstate_spill(data); + } + GET_STATE(data->vstate); + state->depth = data->depth; + VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate); + // no need to restore state->depth, vstate is just a temporary State + return tmp; } -/* - * call-seq: String.included(modul) - * - * Extends _modul_ with the String::Extend module. - */ -static VALUE mString_included_s(VALUE self, VALUE modul) { - VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend); - return result; +static VALUE +json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key) +{ + VALUE proc_args[2] = {object, is_key}; + return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil); } -/* - * call-seq: to_json(*) - * - * This string should be encoded with UTF-8 A call to this method - * returns a JSON string encoded with UTF16 big endian characters as - * \u????. - */ -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) +static VALUE +convert_string_subclass(VALUE key) { - GENERATE_JSON(string); + VALUE key_to_s = rb_funcall(key, i_to_s, 0); + + if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) { + VALUE cname = rb_obj_class(key); + rb_raise(rb_eTypeError, + "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")", + cname, "String", cname, "to_s", rb_obj_class(key_to_s)); + } + + return key_to_s; } -/* - * call-seq: to_json_raw_object() - * - * This method creates a raw object hash, that can be nested into - * other data structures and will be generated as a raw string. This - * method should be used, if you want to convert raw strings to JSON - * instead of UTF-8 strings, e. g. binary data. - */ -static VALUE mString_to_json_raw_object(VALUE self) +static bool enc_utf8_compatible_p(int enc_idx) { - VALUE ary; - VALUE result = rb_hash_new(); - rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); - ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_str_new2("raw"), ary); - return result; + if (enc_idx == usascii_encindex) return true; + if (enc_idx == utf8_encindex) return true; + return false; } -/* - * call-seq: to_json_raw(*args) - * - * This method creates a JSON text from the result of a call to - * to_json_raw_object of this String. - */ -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) +static VALUE encode_json_string_try(VALUE str) { - VALUE obj = mString_to_json_raw_object(self); - Check_Type(obj, T_HASH); - return mHash_to_json(argc, argv, obj); + return rb_funcall(str, i_encode, 1, Encoding_UTF_8); } -/* - * call-seq: json_create(o) - * - * Raw Strings are JSON Objects (the raw bytes are stored in an array for the - * key "raw"). The Ruby String can be created by this module method. - */ -static VALUE mString_Extend_json_create(VALUE self, VALUE o) +static VALUE encode_json_string_rescue(VALUE str, VALUE exception) { - VALUE ary; - Check_Type(o, T_HASH); - ary = rb_hash_aref(o, rb_str_new2("raw")); - return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); + raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); + return Qundef; } -/* - * call-seq: to_json(*) - * - * Returns a JSON string for true: 'true'. - */ -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) -{ - GENERATE_JSON(true); +static inline int json_str_coderange(VALUE str) { + int coderange = RB_ENC_CODERANGE(str); + if (coderange == RUBY_ENC_CODERANGE_UNKNOWN) { + coderange = rb_enc_str_coderange(str); + } + return coderange; } -/* - * call-seq: to_json(*) - * - * Returns a JSON string for false: 'false'. - */ -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) +static inline bool valid_json_string_p(VALUE str) { - GENERATE_JSON(false); + int coderange = json_str_coderange(str); + + if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) { + return true; + } + + if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) { + return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str)); + } + + return false; } -/* - * call-seq: to_json(*) - * - * Returns a JSON string for nil: 'null'. - */ -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) +NOINLINE(static) VALUE convert_invalid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) { - GENERATE_JSON(null); + if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) { + VALUE coerced_str = json_call_as_json(data->state, str, Qfalse); + if (coerced_str != str) { + if (RB_TYPE_P(coerced_str, T_STRING)) { + if (!valid_json_string_p(coerced_str)) { + raise_generator_error(str, "source sequence is illegal/malformed utf-8"); + } + } else { + // as_json could return another type than T_STRING + if (is_key) { + raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str)); + } + } + + return coerced_str; + } + } + + if (RB_ENCODING_GET_INLINED(str) == binary_encindex) { + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + return utf8_string; + case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8_string; + break; + } + } + + return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); } -/* - * call-seq: to_json(*) - * - * Converts this object to a string (calling #to_s), converts - * it to a JSON string, and returns the result. This is a fallback, if no - * special method #to_json was defined for some object. - */ -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) +ALWAYS_INLINE(static) VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) { - VALUE state; - VALUE string = rb_funcall(self, i_to_s, 0); - rb_scan_args(argc, argv, "01", &state); - Check_Type(string, T_STRING); - state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string); + if (RB_LIKELY(valid_json_string_p(str))) { + return str; + } + else { + return convert_invalid_encoding(data, str, as_json_called, is_key); + } } -static void State_free(void *ptr) +static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - JSON_Generator_State *state = ptr; - if (state->indent) ruby_xfree(state->indent); - if (state->space) ruby_xfree(state->space); - if (state->space_before) ruby_xfree(state->space_before); - if (state->object_nl) ruby_xfree(state->object_nl); - if (state->array_nl) ruby_xfree(state->array_nl); - if (state->array_delim) fbuffer_free(state->array_delim); - if (state->object_delim) fbuffer_free(state->object_delim); - if (state->object_delim2) fbuffer_free(state->object_delim2); - ruby_xfree(state); + fbuffer_append_char(buffer, '"'); + + long len; + search_state search; + search.buffer = buffer; + RSTRING_GETMEM(obj, search.ptr, len); + search.cursor = search.ptr; + search.end = search.ptr + len; + +#ifdef HAVE_SIMD + search.matches_mask = 0; + search.has_matches = false; + search.chunk_base = NULL; + search.chunk_end = NULL; +#endif /* HAVE_SIMD */ + + switch (json_str_coderange(obj)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + if (RB_UNLIKELY(data->state->ascii_only)) { + convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table); + } else if (RB_UNLIKELY(data->state->script_safe)) { + convert_UTF8_to_script_safe_JSON(&search); + } else { + convert_UTF8_to_JSON(&search); + } + break; + default: + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + break; + } + fbuffer_append_char(buffer, '"'); } -static size_t State_memsize(const void *ptr) +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - const JSON_Generator_State *state = ptr; - size_t size = sizeof(*state); - if (state->indent) size += state->indent_len + 1; - if (state->space) size += state->space_len + 1; - if (state->space_before) size += state->space_before_len + 1; - if (state->object_nl) size += state->object_nl_len + 1; - if (state->array_nl) size += state->array_nl_len + 1; - if (state->array_delim) size += FBUFFER_CAPA(state->array_delim); - if (state->object_delim) size += FBUFFER_CAPA(state->object_delim); - if (state->object_delim2) size += FBUFFER_CAPA(state->object_delim2); - return size; + obj = ensure_valid_encoding(data, obj, false, false); + raw_generate_json_string(buffer, data, obj); } -#ifdef NEW_TYPEDDATA_WRAPPER -static const rb_data_type_t JSON_Generator_State_type = { - "JSON/Generator/State", - {NULL, State_free, State_memsize,}, -#ifdef RUBY_TYPED_FREE_IMMEDIATELY - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -#endif +struct hash_foreach_arg { + VALUE hash; + struct generate_json_data *data; + int first_key_type; + bool first; + bool mixed_keys_encountered; }; -#endif -static VALUE cState_s_allocate(VALUE klass) +NOINLINE(static) void +json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg) { - JSON_Generator_State *state; - return TypedData_Make_Struct(klass, JSON_Generator_State, - &JSON_Generator_State_type, state); + if (arg->mixed_keys_encountered) { + return; + } + arg->mixed_keys_encountered = true; + + JSON_Generator_State *state = arg->data->state; + if (state->on_duplicate_key != JSON_IGNORE) { + VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse; + rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise); + } } -/* - * call-seq: configure(opts) - * - * Configure this State instance with the Hash _opts_, and return - * itself. - */ -static VALUE cState_configure(VALUE self, VALUE opts) +static int +json_object_i(VALUE key, VALUE val, VALUE _arg) { - VALUE tmp; - GET_STATE(self); - tmp = rb_check_convert_type(opts, T_HASH, "Hash", "to_hash"); - if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h"); - opts = tmp; - tmp = rb_hash_aref(opts, ID2SYM(i_indent)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->indent = fstrndup(RSTRING_PTR(tmp), len + 1); - state->indent_len = len; - } - tmp = rb_hash_aref(opts, ID2SYM(i_space)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->space = fstrndup(RSTRING_PTR(tmp), len + 1); - state->space_len = len; - } - tmp = rb_hash_aref(opts, ID2SYM(i_space_before)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->space_before = fstrndup(RSTRING_PTR(tmp), len + 1); - state->space_before_len = len; - } - tmp = rb_hash_aref(opts, ID2SYM(i_array_nl)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->array_nl = fstrndup(RSTRING_PTR(tmp), len + 1); - state->array_nl_len = len; + struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg; + struct generate_json_data *data = arg->data; + + FBuffer *buffer = data->buffer; + JSON_Generator_State *state = data->state; + + long depth = data->depth; + int key_type = rb_type(key); + + if (arg->first) { + arg->first = false; + arg->first_key_type = key_type; } - tmp = rb_hash_aref(opts, ID2SYM(i_object_nl)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->object_nl = fstrndup(RSTRING_PTR(tmp), len + 1); - state->object_nl_len = len; + else { + fbuffer_append_char(buffer, ','); } - tmp = ID2SYM(i_max_nesting); - state->max_nesting = 100; - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - state->max_nesting = FIX2LONG(max_nesting); - } else { - state->max_nesting = 0; - } + + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); } - tmp = ID2SYM(i_depth); - state->depth = 0; - if (option_given_p(opts, tmp)) { - VALUE depth = rb_hash_aref(opts, tmp); - if (RTEST(depth)) { - Check_Type(depth, T_FIXNUM); - state->depth = FIX2LONG(depth); - } else { - state->depth = 0; - } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } - tmp = ID2SYM(i_buffer_initial_length); - if (option_given_p(opts, tmp)) { - VALUE buffer_initial_length = rb_hash_aref(opts, tmp); - if (RTEST(buffer_initial_length)) { - long initial_length; - Check_Type(buffer_initial_length, T_FIXNUM); - initial_length = FIX2LONG(buffer_initial_length); - if (initial_length > 0) state->buffer_initial_length = initial_length; - } + + VALUE key_to_s; + bool as_json_called = false; + + start: + switch (key_type) { + case T_STRING: + if (RB_UNLIKELY(arg->first_key_type != T_STRING)) { + json_inspect_hash_with_mixed_keys(arg); + } + + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { + key_to_s = key; + } else { + key_to_s = convert_string_subclass(key); + } + break; + case T_SYMBOL: + if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) { + json_inspect_hash_with_mixed_keys(arg); + } + + key_to_s = rb_sym2str(key); + break; + default: + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + key = json_call_as_json(data->state, key, Qtrue); + key_type = rb_type(key); + as_json_called = true; + goto start; + } else { + raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key)); + } + } + key_to_s = rb_convert_type(key, T_STRING, "String", "to_s"); + break; } - tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan)); - state->allow_nan = RTEST(tmp); - tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only)); - state->ascii_only = RTEST(tmp); - return self; -} -static void set_state_ivars(VALUE hash, VALUE state) -{ - VALUE ivars = rb_obj_instance_variables(state); - int i = 0; - for (i = 0; i < RARRAY_LEN(ivars); i++) { - VALUE key = rb_funcall(rb_ary_entry(ivars, i), i_to_s, 0); - long key_len = RSTRING_LEN(key); - VALUE value = rb_iv_get(state, StringValueCStr(key)); - rb_hash_aset(hash, rb_str_intern(rb_str_substr(key, 1, key_len - 1)), value); + key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true); + + if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { + raw_generate_json_string(buffer, data, key_to_s); + } else { + generate_json(buffer, data, key_to_s); } -} + if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before); + fbuffer_append_char(buffer, ':'); + if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space); + generate_json(buffer, data, val); -/* - * call-seq: to_h - * - * Returns the configuration instance variables as a hash, that can be - * passed to the configure method. - */ -static VALUE cState_to_h(VALUE self) -{ - VALUE result = rb_hash_new(); - GET_STATE(self); - set_state_ivars(result, self); - rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len)); - rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len)); - rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len)); - rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len)); - rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len)); - rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse); - rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse); - rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting)); - rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth)); - rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length)); - return result; + return ST_CONTINUE; } -/* -* call-seq: [](name) -* -* Returns the value returned by method +name+. -*/ -static VALUE cState_aref(VALUE self, VALUE name) +static inline long increase_depth(struct generate_json_data *data) { - name = rb_funcall(name, i_to_s, 0); - if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) { - return rb_funcall(self, i_send, 1, name); - } else { - return rb_ivar_get(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name))); + JSON_Generator_State *state = data->state; + long depth = ++data->depth; + if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) { + rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth); } + return depth; } -/* -* call-seq: []=(name, value) -* -* Sets the attribute name to value. -*/ -static VALUE cState_aset(VALUE self, VALUE name, VALUE value) +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - VALUE name_writer; + long depth = increase_depth(data); - name = rb_funcall(name, i_to_s, 0); - name_writer = rb_str_cat2(rb_str_dup(name), "="); - if (RTEST(rb_funcall(self, i_respond_to_p, 1, name_writer))) { - return rb_funcall(self, i_send, 2, name_writer, value); - } else { - rb_ivar_set(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)), value); + if (RHASH_SIZE(obj) == 0) { + fbuffer_append(buffer, "{}", 2); + --data->depth; + return; } - return Qnil; -} -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) -{ - char *object_nl = state->object_nl; - long object_nl_len = state->object_nl_len; - char *indent = state->indent; - long indent_len = state->indent_len; - long max_nesting = state->max_nesting; - char *delim = FBUFFER_PTR(state->object_delim); - long delim_len = FBUFFER_LEN(state->object_delim); - char *delim2 = FBUFFER_PTR(state->object_delim2); - long delim2_len = FBUFFER_LEN(state->object_delim2); - long depth = ++state->depth; - int i, j; - VALUE key, key_to_s, keys; - if (max_nesting != 0 && depth > max_nesting) { - fbuffer_free(buffer); - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); - } fbuffer_append_char(buffer, '{'); - keys = rb_funcall(obj, i_keys, 0); - for(i = 0; i < RARRAY_LEN(keys); i++) { - if (i > 0) fbuffer_append(buffer, delim, delim_len); - if (object_nl) { - fbuffer_append(buffer, object_nl, object_nl_len); - } - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } - } - key = rb_ary_entry(keys, i); - key_to_s = rb_funcall(key, i_to_s, 0); - Check_Type(key_to_s, T_STRING); - generate_json(buffer, Vstate, state, key_to_s); - fbuffer_append(buffer, delim2, delim2_len); - generate_json(buffer, Vstate, state, rb_hash_aref(obj, key)); - } - depth = --state->depth; - if (object_nl) { - fbuffer_append(buffer, object_nl, object_nl_len); - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } + + struct hash_foreach_arg arg = { + .hash = obj, + .data = data, + .first = true, + }; + rb_hash_foreach(obj, json_object_i, (VALUE)&arg); + + depth = --data->depth; + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) -{ - char *array_nl = state->array_nl; - long array_nl_len = state->array_nl_len; - char *indent = state->indent; - long indent_len = state->indent_len; - long max_nesting = state->max_nesting; - char *delim = FBUFFER_PTR(state->array_delim); - long delim_len = FBUFFER_LEN(state->array_delim); - long depth = ++state->depth; - int i, j; - if (max_nesting != 0 && depth > max_nesting) { - fbuffer_free(buffer); - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + long depth = increase_depth(data); + + if (RARRAY_LEN(obj) == 0) { + fbuffer_append(buffer, "[]", 2); + --data->depth; + return; } + fbuffer_append_char(buffer, '['); - if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len); - for(i = 0; i < RARRAY_LEN(obj); i++) { - if (i > 0) fbuffer_append(buffer, delim, delim_len); - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); + for (int i = 0; i < RARRAY_LEN(obj); i++) { + if (i > 0) { + fbuffer_append_char(buffer, ','); + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); + } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } - generate_json(buffer, Vstate, state, rb_ary_entry(obj, i)); + generate_json(buffer, data, RARRAY_AREF(obj, i)); } - state->depth = --depth; - if (array_nl) { - fbuffer_append(buffer, array_nl, array_nl_len); - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } + data->depth = --depth; + if (RB_UNLIKELY(data->state->array_nl)) { + fbuffer_append_str(buffer, data->state->array_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, ']'); } -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - fbuffer_append_char(buffer, '"'); -#ifdef HAVE_RUBY_ENCODING_H - obj = rb_funcall(obj, i_encode, 1, CEncoding_UTF_8); -#endif - if (state->ascii_only) { - convert_UTF8_to_JSON_ASCII(buffer, obj); + VALUE tmp; + if (rb_respond_to(obj, i_to_json)) { + tmp = json_call_to_json(data, obj); + Check_Type(tmp, T_STRING); + fbuffer_append_str(buffer, tmp); } else { - convert_UTF8_to_JSON(buffer, obj); + tmp = rb_funcall(obj, i_to_s, 0); + Check_Type(tmp, T_STRING); + generate_json_string(buffer, data, tmp); } - fbuffer_append_char(buffer, '"'); } -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + if (data->state->strict) { + generate_json_string(buffer, data, rb_sym2str(obj)); + } else { + generate_json_fallback(buffer, data, obj); + } +} + +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); - fbuffer_append_str(buffer, tmp); + fbuffer_append_str(buffer, StringValue(tmp)); } -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) -{ - if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, Vstate, state, obj); - else - generate_json_bignum(buffer, Vstate, state, obj); -} -#endif -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { double value = RFLOAT_VALUE(obj); - char allow_nan = state->allow_nan; - VALUE tmp = rb_funcall(obj, i_to_s, 0); - if (!allow_nan) { - if (isinf(value)) { - fbuffer_free(buffer); - rb_raise(eGeneratorError, "%u: %"PRIsVALUE" not allowed in JSON", __LINE__, RB_OBJ_STRING(tmp)); - } else if (isnan(value)) { - fbuffer_free(buffer); - rb_raise(eGeneratorError, "%u: %"PRIsVALUE" not allowed in JSON", __LINE__, RB_OBJ_STRING(tmp)); + char allow_nan = data->state->allow_nan; + if (isinf(value) || isnan(value)) { + /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */ + if (!allow_nan) { + if (data->state->strict && data->state->as_json) { + VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse); + if (casted_obj != obj) { + increase_depth(data); + generate_json(buffer, data, casted_obj); + data->depth--; + return; + } + } + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0)); } + + VALUE tmp = rb_funcall(obj, i_to_s, 0); + fbuffer_append_str(buffer, tmp); + return; } - fbuffer_append_str(buffer, tmp); + + /* This implementation writes directly into the buffer. We reserve + * the 32 characters that fpconv_dtoa states as its maximum. + */ + fbuffer_inc_capa(buffer, 32); + char* d = buffer->ptr + buffer->len; + int len = fpconv_dtoa(value, d); + /* fpconv_dtoa converts a float to its shortest string representation, + * but it adds a ".0" if this is a plain integer. + */ + fbuffer_consumed(buffer, len); } -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - VALUE tmp; - VALUE klass = CLASS_OF(obj); - if (klass == rb_cHash) { - generate_json_object(buffer, Vstate, state, obj); - } else if (klass == rb_cArray) { - generate_json_array(buffer, Vstate, state, obj); - } else if (klass == rb_cString) { - generate_json_string(buffer, Vstate, state, obj); - } else if (obj == Qnil) { - generate_json_null(buffer, Vstate, state, obj); + VALUE fragment = RSTRUCT_GET(obj, 0); + Check_Type(fragment, T_STRING); + fbuffer_append_str(buffer, fragment); +} + +static inline void generate_json_general(FBuffer *buffer, struct generate_json_data *data, VALUE obj, bool fallback) +{ + bool as_json_called = false; +start: + if (obj == Qnil) { + generate_json_null(buffer, data, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, Vstate, state, obj); + generate_json_false(buffer, data, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, Vstate, state, obj); - } else if (FIXNUM_P(obj)) { - generate_json_fixnum(buffer, Vstate, state, obj); - } else if (RB_TYPE_P(obj, T_BIGNUM)) { - generate_json_bignum(buffer, Vstate, state, obj); - } else if (klass == rb_cFloat) { - generate_json_float(buffer, Vstate, state, obj); - } else if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, Vstate); - Check_Type(tmp, T_STRING); - fbuffer_append_str(buffer, tmp); + generate_json_true(buffer, data, obj); + } else if (RB_SPECIAL_CONST_P(obj)) { + if (RB_FIXNUM_P(obj)) { + generate_json_fixnum(buffer, data, obj); + } else if (RB_FLONUM_P(obj)) { + generate_json_float(buffer, data, obj); + } else if (RB_STATIC_SYM_P(obj)) { + generate_json_symbol(buffer, data, obj); + } else { + goto general; + } } else { - tmp = rb_funcall(obj, i_to_s, 0); - Check_Type(tmp, T_STRING); - generate_json_string(buffer, Vstate, state, tmp); + VALUE klass = RBASIC_CLASS(obj); + switch (RB_BUILTIN_TYPE(obj)) { + case T_BIGNUM: + generate_json_bignum(buffer, data, obj); + break; + case T_HASH: + if (fallback && klass != rb_cHash) goto general; + generate_json_object(buffer, data, obj); + break; + case T_ARRAY: + if (fallback && klass != rb_cArray) goto general; + generate_json_array(buffer, data, obj); + break; + case T_STRING: + if (fallback && klass != rb_cString) goto general; + + if (RB_LIKELY(valid_json_string_p(obj))) { + raw_generate_json_string(buffer, data, obj); + } else if (as_json_called) { + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + } else { + obj = ensure_valid_encoding(data, obj, false, false); + as_json_called = true; + goto start; + } + break; + case T_SYMBOL: + generate_json_symbol(buffer, data, obj); + break; + case T_FLOAT: + if (fallback && klass != rb_cFloat) goto general; + generate_json_float(buffer, data, obj); + break; + case T_STRUCT: + if (klass != cFragment) goto general; + generate_json_fragment(buffer, data, obj); + break; + default: + general: + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + obj = json_call_as_json(data->state, obj, Qfalse); + as_json_called = true; + goto start; + } else { + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); + } + } else { + generate_json_fallback(buffer, data, obj); + } + } } } -static FBuffer *cState_prepare_buffer(VALUE self) +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - FBuffer *buffer; - GET_STATE(self); - buffer = fbuffer_alloc(state->buffer_initial_length); + generate_json_general(buffer, data, obj, true); +} - if (state->object_delim) { - fbuffer_clear(state->object_delim); - } else { - state->object_delim = fbuffer_alloc(16); - } - fbuffer_append_char(state->object_delim, ','); - if (state->object_delim2) { - fbuffer_clear(state->object_delim2); - } else { - state->object_delim2 = fbuffer_alloc(16); - } - if (state->space_before) fbuffer_append(state->object_delim2, state->space_before, state->space_before_len); - fbuffer_append_char(state->object_delim2, ':'); - if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len); +static void generate_json_no_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + generate_json_general(buffer, data, obj, false); +} - if (state->array_delim) { - fbuffer_clear(state->array_delim); - } else { - state->array_delim = fbuffer_alloc(16); - } - fbuffer_append_char(state->array_delim, ','); - if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len); - return buffer; +static VALUE generate_json_try(VALUE d) +{ + struct generate_json_data *data = (struct generate_json_data *)d; + + data->func(data->buffer, data, data->obj); + + return fbuffer_finalize(data->buffer); } -static VALUE cState_partial_generate(VALUE self, VALUE obj) +static VALUE generate_json_ensure(VALUE d) { - FBuffer *buffer = cState_prepare_buffer(self); - GET_STATE(self); - generate_json(buffer, self, state, obj); - return fbuffer_to_s(buffer); + struct generate_json_data *data = (struct generate_json_data *)d; + fbuffer_free(data->buffer); + + return Qundef; } -/* - * call-seq: generate(obj) +static inline VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) +{ + GET_STATE(self); + + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; + fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + + struct generate_json_data data = { + .buffer = &buffer, + .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json + .state = state, + .depth = state->depth, + .obj = obj, + .func = func + }; + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} + +/* call-seq: + * generate(obj) -> String + * generate(obj, anIO) -> anIO * * Generates a valid JSON document from object +obj+ and returns the * result. If no valid JSON document can be created this method raises a * GeneratorError exception. */ -static VALUE cState_generate(VALUE self, VALUE obj) +static VALUE cState_generate(int argc, VALUE *argv, VALUE self) { - VALUE result = cState_partial_generate(self, obj); - GET_STATE(self); - (void)state; - return result; + rb_check_arity(argc, 1, 2); + VALUE obj = argv[0]; + VALUE io = argc > 1 ? argv[1] : Qnil; + return cState_partial_generate(self, obj, generate_json, io); +} + +/* :nodoc: */ +static VALUE cState_generate_no_fallback(int argc, VALUE *argv, VALUE self) +{ + rb_check_arity(argc, 1, 2); + VALUE obj = argv[0]; + VALUE io = argc > 1 ? argv[1] : Qnil; + return cState_partial_generate(self, obj, generate_json_no_fallback, io); } -/* - * call-seq: new(opts = {}) - * - * Instantiates a new State object, configured by _opts_. - * - * _opts_ can have the following keys: - * - * * *indent*: a string used to indent levels (default: ''), - * * *space*: a string that is put after, a : or , delimiter (default: ''), - * * *space_before*: a string that is put before a : pair delimiter (default: ''), - * * *object_nl*: a string that is put at the end of a JSON object (default: ''), - * * *array_nl*: a string that is put at the end of a JSON array (default: ''), - * * *allow_nan*: true if NaN, Infinity, and -Infinity should be - * generated, otherwise an exception is thrown, if these values are - * encountered. This options defaults to false. - * * *buffer_initial_length*: sets the initial length of the generator's - * internal buffer. - */ static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) { - VALUE opts; - GET_STATE(self); - state->max_nesting = 100; - state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - rb_scan_args(argc, argv, "01", &opts); - if (!NIL_P(opts)) cState_configure(self, opts); + rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`"); return self; } @@ -1000,14 +1367,14 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); MEMCPY(objState, origState, JSON_Generator_State, 1); - objState->indent = fstrndup(origState->indent, origState->indent_len); - objState->space = fstrndup(origState->space, origState->space_len); - objState->space_before = fstrndup(origState->space_before, origState->space_before_len); - objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len); - objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len); - if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim); - if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim); - if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2); + + RB_OBJ_WRITTEN(obj, Qundef, objState->indent); + RB_OBJ_WRITTEN(obj, Qundef, objState->space); + RB_OBJ_WRITTEN(obj, Qundef, objState->space_before); + RB_OBJ_WRITTEN(obj, Qundef, objState->object_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->array_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->as_json); + return obj; } @@ -1025,10 +1392,7 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts) } else if (rb_obj_is_kind_of(opts, rb_cHash)) { return rb_funcall(self, i_new, 1, opts); } else { - if (NIL_P(CJSON_SAFE_STATE_PROTOTYPE)) { - CJSON_SAFE_STATE_PROTOTYPE = rb_const_get(mJSON, i_SAFE_STATE_PROTOTYPE); - } - return rb_funcall(CJSON_SAFE_STATE_PROTOTYPE, i_dup, 0); + return rb_class_new_instance(0, NULL, cState); } } @@ -1040,7 +1404,18 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts) static VALUE cState_indent(VALUE self) { GET_STATE(self); - return state->indent ? rb_str_new(state->indent, state->indent_len) : rb_str_new2(""); + return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0)); +} + +static VALUE string_config(VALUE config) +{ + if (RTEST(config)) { + Check_Type(config, T_STRING); + if (RSTRING_LEN(config)) { + return rb_str_new_frozen(config); + } + } + return Qfalse; } /* @@ -1050,21 +1425,9 @@ static VALUE cState_indent(VALUE self) */ static VALUE cState_indent_set(VALUE self, VALUE indent) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(indent, T_STRING); - len = RSTRING_LEN(indent); - if (len == 0) { - if (state->indent) { - ruby_xfree(state->indent); - state->indent = NULL; - state->indent_len = 0; - } - } else { - if (state->indent) ruby_xfree(state->indent); - state->indent = fstrndup(RSTRING_PTR(indent), len); - state->indent_len = len; - } + RB_OBJ_WRITE(self, &state->indent, string_config(indent)); return Qnil; } @@ -1077,7 +1440,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent) static VALUE cState_space(VALUE self) { GET_STATE(self); - return state->space ? rb_str_new(state->space, state->space_len) : rb_str_new2(""); + return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1088,21 +1451,9 @@ static VALUE cState_space(VALUE self) */ static VALUE cState_space_set(VALUE self, VALUE space) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(space, T_STRING); - len = RSTRING_LEN(space); - if (len == 0) { - if (state->space) { - ruby_xfree(state->space); - state->space = NULL; - state->space_len = 0; - } - } else { - if (state->space) ruby_xfree(state->space); - state->space = fstrndup(RSTRING_PTR(space), len); - state->space_len = len; - } + RB_OBJ_WRITE(self, &state->space, string_config(space)); return Qnil; } @@ -1114,7 +1465,7 @@ static VALUE cState_space_set(VALUE self, VALUE space) static VALUE cState_space_before(VALUE self) { GET_STATE(self); - return state->space_before ? rb_str_new(state->space_before, state->space_before_len) : rb_str_new2(""); + return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1124,21 +1475,9 @@ static VALUE cState_space_before(VALUE self) */ static VALUE cState_space_before_set(VALUE self, VALUE space_before) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(space_before, T_STRING); - len = RSTRING_LEN(space_before); - if (len == 0) { - if (state->space_before) { - ruby_xfree(state->space_before); - state->space_before = NULL; - state->space_before_len = 0; - } - } else { - if (state->space_before) ruby_xfree(state->space_before); - state->space_before = fstrndup(RSTRING_PTR(space_before), len); - state->space_before_len = len; - } + RB_OBJ_WRITE(self, &state->space_before, string_config(space_before)); return Qnil; } @@ -1151,7 +1490,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before) static VALUE cState_object_nl(VALUE self) { GET_STATE(self); - return state->object_nl ? rb_str_new(state->object_nl, state->object_nl_len) : rb_str_new2(""); + return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1162,20 +1501,9 @@ static VALUE cState_object_nl(VALUE self) */ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(object_nl, T_STRING); - len = RSTRING_LEN(object_nl); - if (len == 0) { - if (state->object_nl) { - ruby_xfree(state->object_nl); - state->object_nl = NULL; - } - } else { - if (state->object_nl) ruby_xfree(state->object_nl); - state->object_nl = fstrndup(RSTRING_PTR(object_nl), len); - state->object_nl_len = len; - } + RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl)); return Qnil; } @@ -1187,7 +1515,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) static VALUE cState_array_nl(VALUE self) { GET_STATE(self); - return state->array_nl ? rb_str_new(state->array_nl, state->array_nl_len) : rb_str_new2(""); + return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1197,23 +1525,35 @@ static VALUE cState_array_nl(VALUE self) */ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(array_nl, T_STRING); - len = RSTRING_LEN(array_nl); - if (len == 0) { - if (state->array_nl) { - ruby_xfree(state->array_nl); - state->array_nl = NULL; - } - } else { - if (state->array_nl) ruby_xfree(state->array_nl); - state->array_nl = fstrndup(RSTRING_PTR(array_nl), len); - state->array_nl_len = len; - } + RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl)); return Qnil; } +/* + * call-seq: as_json() + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json(VALUE self) +{ + GET_STATE(self); + return state->as_json; +} + +/* + * call-seq: as_json=(as_json) + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json_set(VALUE self, VALUE as_json) +{ + rb_check_frozen(self); + GET_STATE(self); + RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc")); + return Qnil; +} /* * call-seq: check_circular? @@ -1239,6 +1579,25 @@ static VALUE cState_max_nesting(VALUE self) return LONG2FIX(state->max_nesting); } +static long long_config(VALUE num) +{ + return RTEST(num) ? NUM2LONG(num) : 0; +} + +// depth must never be negative; reject early with a clear error. +static long depth_config(VALUE num) +{ + if (!RTEST(num)) return 0; + long d = NUM2LONG(num); + if (RB_UNLIKELY(d < 0)) { + rb_raise(rb_eArgError, "depth must be >= 0 (got %ld)", d); + } + if (RB_UNLIKELY(d > INT_MAX)) { + rb_raise(rb_eArgError, "depth is too large (got %ld)", d); + } + return d; +} + /* * call-seq: max_nesting=(depth) * @@ -1247,9 +1606,68 @@ static VALUE cState_max_nesting(VALUE self) */ static VALUE cState_max_nesting_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); - Check_Type(depth, T_FIXNUM); - return state->max_nesting = FIX2LONG(depth); + state->max_nesting = long_config(depth); + return Qnil; +} + +/* + * call-seq: script_safe + * + * If this boolean is true, the forward slashes will be escaped in + * the json output. + */ +static VALUE cState_script_safe(VALUE self) +{ + GET_STATE(self); + return state->script_safe ? Qtrue : Qfalse; +} + +/* + * call-seq: script_safe=(enable) + * + * This sets whether or not the forward slashes will be escaped in + * the json output. + */ +static VALUE cState_script_safe_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->script_safe = RTEST(enable); + return Qnil; +} + +/* + * call-seq: strict + * + * If this boolean is false, types unsupported by the JSON format will + * be serialized as strings. + * If this boolean is true, types unsupported by the JSON format will + * raise a JSON::GeneratorError. + */ +static VALUE cState_strict(VALUE self) +{ + GET_STATE(self); + return state->strict ? Qtrue : Qfalse; +} + +/* + * call-seq: strict=(enable) + * + * This sets whether or not to serialize types unsupported by the + * JSON format as strings. + * If this boolean is false, types unsupported by the JSON format will + * be serialized as strings. + * If this boolean is true, types unsupported by the JSON format will + * raise a JSON::GeneratorError. + */ +static VALUE cState_strict_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->strict = RTEST(enable); + return Qnil; } /* @@ -1265,9 +1683,22 @@ static VALUE cState_allow_nan_p(VALUE self) } /* + * call-seq: allow_nan=(enable) + * + * This sets whether or not to serialize NaN, Infinity, and -Infinity + */ +static VALUE cState_allow_nan_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->allow_nan = RTEST(enable); + return Qnil; +} + +/* * call-seq: ascii_only? * - * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise + * Returns true, if only ASCII characters should be generated. Otherwise * returns false. */ static VALUE cState_ascii_only_p(VALUE self) @@ -1277,6 +1708,32 @@ static VALUE cState_ascii_only_p(VALUE self) } /* + * call-seq: ascii_only=(enable) + * + * This sets whether only ASCII characters should be generated. + */ +static VALUE cState_ascii_only_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->ascii_only = RTEST(enable); + return Qnil; +} + +static VALUE cState_allow_duplicate_key_p(VALUE self) +{ + GET_STATE(self); + switch (state->on_duplicate_key) { + case JSON_IGNORE: + return Qtrue; + case JSON_DEPRECATED: + return Qnil; + default: + return Qfalse; + } +} + +/* * call-seq: depth * * This integer returns the current depth of data structure nesting. @@ -1295,9 +1752,9 @@ static VALUE cState_depth(VALUE self) */ static VALUE cState_depth_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); - Check_Type(depth, T_FIXNUM); - state->depth = FIX2LONG(depth); + state->depth = depth_config(depth); return Qnil; } @@ -1312,6 +1769,15 @@ static VALUE cState_buffer_initial_length(VALUE self) return LONG2FIX(state->buffer_initial_length); } +static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length) +{ + Check_Type(buffer_initial_length, T_FIXNUM); + long initial_length = FIX2LONG(buffer_initial_length); + if (initial_length > 0) { + state->buffer_initial_length = initial_length; + } +} + /* * call-seq: buffer_initial_length=(length) * @@ -1320,34 +1786,142 @@ static VALUE cState_buffer_initial_length(VALUE self) */ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length) { - long initial_length; + rb_check_frozen(self); GET_STATE(self); - Check_Type(buffer_initial_length, T_FIXNUM); - initial_length = FIX2LONG(buffer_initial_length); - if (initial_length > 0) { - state->buffer_initial_length = initial_length; - } + buffer_initial_length_set(state, buffer_initial_length); return Qnil; } -/* - * - */ +struct configure_state_data { + JSON_Generator_State *state; + VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated +}; + +static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value) +{ + if (RTEST(data->vstate)) { + RB_OBJ_WRITE(data->vstate, field, value); + } else { + *field = value; + } +} + +static int configure_state_i(VALUE key, VALUE val, VALUE _arg) +{ + struct configure_state_data *data = (struct configure_state_data *)_arg; + JSON_Generator_State *state = data->state; + + if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); } + else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); } + else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); } + else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); } + else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); } + else if (key == sym_max_nesting) { state->max_nesting = long_config(val); } + else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); } + else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); } + else if (key == sym_depth) { state->depth = depth_config(val); } + else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); } + else if (key == sym_script_safe) { state->script_safe = RTEST(val); } + else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } + else if (key == sym_strict) { state->strict = RTEST(val); } + else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_as_json) { + VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; + state->as_json_single_arg = proc && rb_proc_arity(proc) == 1; + state_write_value(data, &state->as_json, proc); + } + return ST_CONTINUE; +} + +static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config) +{ + if (!RTEST(config)) return; + + Check_Type(config, T_HASH); + + if (!RHASH_SIZE(config)) return; + + struct configure_state_data data = { + .state = state, + .vstate = vstate + }; + + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(config, configure_state_i, (VALUE)&data); +} + +static VALUE cState_configure(VALUE self, VALUE opts) +{ + rb_check_frozen(self); + GET_STATE(self); + configure_state(state, self, opts); + return self; +} + +static VALUE cState_m_do_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io, generator_func func) +{ + JSON_Generator_State state = {0}; + state_init(&state); + configure_state(&state, Qfalse, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; + fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + + struct generate_json_data data = { + .buffer = &buffer, + .vstate = Qfalse, + .state = &state, + .depth = state.depth, + .obj = obj, + .func = func, + }; + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} + +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json); +} + +static VALUE cState_m_generate_no_fallback(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json_no_fallback); +} + void Init_generator(void) { +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + +#undef rb_intern rb_require("json/common"); mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - mGenerator = rb_define_module_under(mExt, "Generator"); + rb_global_variable(&cFragment); + cFragment = rb_const_get(mJSON, rb_intern("Fragment")); + + VALUE mExt = rb_define_module_under(mJSON, "Ext"); + VALUE mGenerator = rb_define_module_under(mExt, "Generator"); + + rb_global_variable(&eGeneratorError); eGeneratorError = rb_path2class("JSON::GeneratorError"); + + rb_global_variable(&eNestingError); eNestingError = rb_path2class("JSON::NestingError"); cState = rb_define_class_under(mGenerator, "State", rb_cObject); rb_define_alloc_func(cState, cState_s_allocate); rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); rb_define_method(cState, "initialize", cState_initialize, -1); + rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings + rb_define_private_method(cState, "_configure", cState_configure, 1); + rb_define_method(cState, "initialize_copy", cState_init_copy, 1); rb_define_method(cState, "indent", cState_indent, 0); rb_define_method(cState, "indent=", cState_indent_set, 1); @@ -1359,85 +1933,65 @@ void Init_generator(void) rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); rb_define_method(cState, "array_nl", cState_array_nl, 0); rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); + rb_define_method(cState, "as_json", cState_as_json, 0); + rb_define_method(cState, "as_json=", cState_as_json_set, 1); rb_define_method(cState, "max_nesting", cState_max_nesting, 0); rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); + rb_define_method(cState, "script_safe", cState_script_safe, 0); + rb_define_method(cState, "script_safe?", cState_script_safe, 0); + rb_define_method(cState, "script_safe=", cState_script_safe_set, 1); + rb_define_alias(cState, "escape_slash", "script_safe"); + rb_define_alias(cState, "escape_slash?", "script_safe?"); + rb_define_alias(cState, "escape_slash=", "script_safe="); + rb_define_method(cState, "strict", cState_strict, 0); + rb_define_method(cState, "strict?", cState_strict, 0); + rb_define_method(cState, "strict=", cState_strict_set, 1); rb_define_method(cState, "check_circular?", cState_check_circular_p, 0); rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0); + rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1); rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0); + rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1); rb_define_method(cState, "depth", cState_depth, 0); rb_define_method(cState, "depth=", cState_depth_set, 1); rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); - rb_define_method(cState, "configure", cState_configure, 1); - rb_define_alias(cState, "merge", "configure"); - rb_define_method(cState, "to_h", cState_to_h, 0); - rb_define_alias(cState, "to_hash", "to_h"); - rb_define_method(cState, "[]", cState_aref, 1); - rb_define_method(cState, "[]=", cState_aset, 2); - rb_define_method(cState, "generate", cState_generate, 1); - - mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); - mObject = rb_define_module_under(mGeneratorMethods, "Object"); - rb_define_method(mObject, "to_json", mObject_to_json, -1); - mHash = rb_define_module_under(mGeneratorMethods, "Hash"); - rb_define_method(mHash, "to_json", mHash_to_json, -1); - mArray = rb_define_module_under(mGeneratorMethods, "Array"); - rb_define_method(mArray, "to_json", mArray_to_json, -1); -#ifdef RUBY_INTEGER_UNIFICATION - mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); - rb_define_method(mInteger, "to_json", mInteger_to_json, -1); -#else - mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum"); - rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1); - mBignum = rb_define_module_under(mGeneratorMethods, "Bignum"); - rb_define_method(mBignum, "to_json", mBignum_to_json, -1); -#endif - mFloat = rb_define_module_under(mGeneratorMethods, "Float"); - rb_define_method(mFloat, "to_json", mFloat_to_json, -1); - mString = rb_define_module_under(mGeneratorMethods, "String"); - rb_define_singleton_method(mString, "included", mString_included_s, 1); - rb_define_method(mString, "to_json", mString_to_json, -1); - rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); - rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); - mString_Extend = rb_define_module_under(mString, "Extend"); - rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); - mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); - rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); - mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); - rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); - mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); - rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); - - CRegexp_MULTILINE = rb_const_get(rb_cRegexp, rb_intern("MULTILINE")); + rb_define_method(cState, "generate", cState_generate, -1); + rb_define_method(cState, "_generate_no_fallback", cState_generate_no_fallback, -1); + + rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); + + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); + rb_define_singleton_method(cState, "_generate_no_fallback", cState_m_generate_no_fallback, 3); + + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + i_to_s = rb_intern("to_s"); i_to_json = rb_intern("to_json"); i_new = rb_intern("new"); - i_indent = rb_intern("indent"); - i_space = rb_intern("space"); - i_space_before = rb_intern("space_before"); - i_object_nl = rb_intern("object_nl"); - i_array_nl = rb_intern("array_nl"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_ascii_only = rb_intern("ascii_only"); - i_depth = rb_intern("depth"); - i_buffer_initial_length = rb_intern("buffer_initial_length"); - i_pack = rb_intern("pack"); - i_unpack = rb_intern("unpack"); - i_create_id = rb_intern("create_id"); - i_extend = rb_intern("extend"); - i_key_p = rb_intern("key?"); - i_aref = rb_intern("[]"); - i_send = rb_intern("__send__"); - i_respond_to_p = rb_intern("respond_to?"); - i_match = rb_intern("match"); - i_keys = rb_intern("keys"); - i_dup = rb_intern("dup"); -#ifdef HAVE_RUBY_ENCODING_H - CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); - i_encoding = rb_intern("encoding"); i_encode = rb_intern("encode"); -#endif - i_SAFE_STATE_PROTOTYPE = rb_intern("SAFE_STATE_PROTOTYPE"); - CJSON_SAFE_STATE_PROTOTYPE = Qnil; + + sym_indent = ID2SYM(rb_intern("indent")); + sym_space = ID2SYM(rb_intern("space")); + sym_space_before = ID2SYM(rb_intern("space_before")); + sym_object_nl = ID2SYM(rb_intern("object_nl")); + sym_array_nl = ID2SYM(rb_intern("array_nl")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_ascii_only = ID2SYM(rb_intern("ascii_only")); + sym_depth = ID2SYM(rb_intern("depth")); + sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length")); + sym_script_safe = ID2SYM(rb_intern("script_safe")); + sym_escape_slash = ID2SYM(rb_intern("escape_slash")); + sym_strict = ID2SYM(rb_intern("strict")); + sym_as_json = ID2SYM(rb_intern("as_json")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); + + usascii_encindex = rb_usascii_encindex(); + utf8_encindex = rb_utf8_encindex(); + binary_encindex = rb_ascii8bit_encindex(); + + rb_require("json/ext/generator/state"); + + simd_impl = find_simd_implementation(); } diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h deleted file mode 100644 index c367a6209a..0000000000 --- a/ext/json/generator/generator.h +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef _GENERATOR_H_ -#define _GENERATOR_H_ - -#include <math.h> -#include <ctype.h> - -#include "ruby.h" - -#ifdef HAVE_RUBY_RE_H -#include "ruby/re.h" -#else -#include "re.h" -#endif - -#ifndef rb_intern_str -#define rb_intern_str(string) SYM2ID(rb_str_intern(string)) -#endif - -#ifndef rb_obj_instance_variables -#define rb_obj_instance_variables(object) rb_funcall(object, rb_intern("instance_variables"), 0) -#endif - -#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) - -/* unicode definitions */ - -#define UNI_STRICT_CONVERSION 1 - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -static const int halfShift = 10; /* used for shifting by 10 bits */ - -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; - -static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length); -static void unicode_escape(char *buf, UTF16 character); -static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character); -static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string); -static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string); -static char *fstrndup(const char *ptr, unsigned long len); - -/* ruby api and some helpers */ - -typedef struct JSON_Generator_StateStruct { - char *indent; - long indent_len; - char *space; - long space_len; - char *space_before; - long space_before_len; - char *object_nl; - long object_nl_len; - char *array_nl; - long array_nl_len; - FBuffer *array_delim; - FBuffer *object_delim; - FBuffer *object_delim2; - long max_nesting; - char allow_nan; - char ascii_only; - long depth; - long buffer_initial_length; -} JSON_Generator_State; - -#define GET_STATE_TO(self, state) \ - TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) - -#define GET_STATE(self) \ - JSON_Generator_State *state; \ - GET_STATE_TO(self, state) - -#define GENERATE_JSON(type) \ - FBuffer *buffer; \ - VALUE Vstate; \ - JSON_Generator_State *state; \ - \ - rb_scan_args(argc, argv, "01", &Vstate); \ - Vstate = cState_from_state_s(cState, Vstate); \ - TypedData_Get_Struct(Vstate, JSON_Generator_State, &JSON_Generator_State_type, state); \ - buffer = cState_prepare_buffer(Vstate); \ - generate_json_##type(buffer, Vstate, state, self); \ - return fbuffer_to_s(buffer) - -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); -#ifdef RUBY_INTEGER_UNIFICATION -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self); -#else -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self); -#endif -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_included_s(VALUE self, VALUE modul); -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_to_json_raw_object(VALUE self); -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self); -static VALUE mString_Extend_json_create(VALUE self, VALUE o); -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); -static void State_free(void *state); -static VALUE cState_s_allocate(VALUE klass); -static VALUE cState_configure(VALUE self, VALUE opts); -static VALUE cState_to_h(VALUE self); -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#endif -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static VALUE cState_partial_generate(VALUE self, VALUE obj); -static VALUE cState_generate(VALUE self, VALUE obj); -static VALUE cState_initialize(int argc, VALUE *argv, VALUE self); -static VALUE cState_from_state_s(VALUE self, VALUE opts); -static VALUE cState_indent(VALUE self); -static VALUE cState_indent_set(VALUE self, VALUE indent); -static VALUE cState_space(VALUE self); -static VALUE cState_space_set(VALUE self, VALUE space); -static VALUE cState_space_before(VALUE self); -static VALUE cState_space_before_set(VALUE self, VALUE space_before); -static VALUE cState_object_nl(VALUE self); -static VALUE cState_object_nl_set(VALUE self, VALUE object_nl); -static VALUE cState_array_nl(VALUE self); -static VALUE cState_array_nl_set(VALUE self, VALUE array_nl); -static VALUE cState_max_nesting(VALUE self); -static VALUE cState_max_nesting_set(VALUE self, VALUE depth); -static VALUE cState_allow_nan_p(VALUE self); -static VALUE cState_ascii_only_p(VALUE self); -static VALUE cState_depth(VALUE self); -static VALUE cState_depth_set(VALUE self, VALUE depth); -static FBuffer *cState_prepare_buffer(VALUE self); -#ifndef ZALLOC -#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type))) -static inline void *ruby_zalloc(size_t n) -{ - void *p = ruby_xmalloc(n); - memset(p, 0, n); - return p; -} -#endif -#ifdef TypedData_Make_Struct -static const rb_data_type_t JSON_Generator_State_type; -#define NEW_TYPEDDATA_WRAPPER 1 -#else -#define TypedData_Make_Struct(klass, type, ignore, json) Data_Make_Struct(klass, type, NULL, State_free, json) -#define TypedData_Get_Struct(self, JSON_Generator_State, ignore, json) Data_Get_Struct(self, JSON_Generator_State, json) -#endif - -#endif |
