summaryrefslogtreecommitdiff
path: root/ext/json/ext/generator
diff options
context:
space:
mode:
authorusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-04-26 04:34:36 +0000
committerusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-04-26 04:34:36 +0000
commit27be3056dc0e9c313f25b430ca90b240a5e44160 (patch)
treeb339421d40fd87a888691ded57a8cc00fbc2d2f2 /ext/json/ext/generator
parentb0018f68590c8071bb0a242f75cc4fa048396e78 (diff)
* ext/jason: revert r27493. came again after canceling gcc-ism.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27500 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/json/ext/generator')
-rw-r--r--ext/json/ext/generator/extconf.rb9
-rw-r--r--ext/json/ext/generator/generator.c919
-rw-r--r--ext/json/ext/generator/unicode.c180
-rw-r--r--ext/json/ext/generator/unicode.h53
4 files changed, 1161 insertions, 0 deletions
diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb
new file mode 100644
index 0000000000..fc267420f1
--- /dev/null
+++ b/ext/json/ext/generator/extconf.rb
@@ -0,0 +1,9 @@
+require 'mkmf'
+require 'rbconfig'
+
+if CONFIG['GCC'] == 'yes'
+ $CFLAGS += ' -Wall'
+ #$CFLAGS += ' -O0 -ggdb'
+end
+
+create_makefile 'json/ext/generator'
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
new file mode 100644
index 0000000000..482938ac47
--- /dev/null
+++ b/ext/json/ext/generator/generator.c
@@ -0,0 +1,919 @@
+#include <string.h>
+#include "ruby.h"
+#if HAVE_RUBY_ST_H
+#include "ruby/st.h"
+#endif
+#if HAVE_ST_H
+#include "st.h"
+#endif
+#include "unicode.h"
+#include <math.h>
+
+#ifndef RHASH_TBL
+#define RHASH_TBL(hsh) (RHASH(hsh)->tbl)
+#endif
+
+#ifndef RHASH_SIZE
+#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries)
+#endif
+
+#ifndef RFLOAT_VALUE
+#define RFLOAT_VALUE(val) (RFLOAT(val)->value)
+#endif
+
+#ifdef HAVE_RUBY_ENCODING_H
+#include "ruby/encoding.h"
+#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
+#else
+#define FORCE_UTF8(obj)
+#endif
+
+#define check_max_nesting(state, depth) do { \
+ long current_nesting = 1 + depth; \
+ if (state->max_nesting != 0 && current_nesting > state->max_nesting) \
+ rb_raise(eNestingError, "nesting of %ld is too deep", current_nesting); \
+} while (0);
+
+static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
+ mHash, mArray, mInteger, mFloat, mString, mString_Extend,
+ mTrueClass, mFalseClass, mNilClass, eGeneratorError,
+ eCircularDatastructure, eNestingError;
+
+static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
+ i_object_nl, i_array_nl, i_check_circular, i_max_nesting,
+ i_allow_nan, i_pack, i_unpack, i_create_id, i_extend;
+
+typedef struct JSON_Generator_StateStruct {
+ VALUE indent;
+ VALUE space;
+ VALUE space_before;
+ VALUE object_nl;
+ VALUE array_nl;
+ int check_circular;
+ VALUE seen;
+ VALUE memo;
+ VALUE depth;
+ long max_nesting;
+ int flag;
+ int allow_nan;
+} JSON_Generator_State;
+
+#define GET_STATE(self) \
+ JSON_Generator_State *state; \
+ Data_Get_Struct(self, JSON_Generator_State, state);
+
+/*
+ * Document-module: JSON::Ext::Generator
+ *
+ * This is the JSON generator implemented as a C extension. It can be
+ * configured to be used by setting
+ *
+ * JSON.generator = JSON::Ext::Generator
+ *
+ * with the method generator= in JSON.
+ *
+ */
+
+static int hash_to_json_state_i(VALUE key, VALUE value, VALUE Vstate)
+{
+ VALUE json, buf, Vdepth;
+ GET_STATE(Vstate);
+ buf = state->memo;
+ Vdepth = state->depth;
+
+ if (key == Qundef) return ST_CONTINUE;
+ if (state->flag) {
+ state->flag = 0;
+ rb_str_buf_cat2(buf, ",");
+ if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(buf, state->object_nl);
+ }
+ if (RSTRING_LEN(state->object_nl)) {
+ rb_str_buf_append(buf, rb_str_times(state->indent, Vdepth));
+ }
+ json = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 2, Vstate, Vdepth);
+ Check_Type(json, T_STRING);
+ rb_str_buf_append(buf, json);
+ OBJ_INFECT(buf, json);
+ if (RSTRING_LEN(state->space_before)) {
+ rb_str_buf_append(buf, state->space_before);
+ }
+ rb_str_buf_cat2(buf, ":");
+ if (RSTRING_LEN(state->space)) rb_str_buf_append(buf, state->space);
+ json = rb_funcall(value, i_to_json, 2, Vstate, Vdepth);
+ Check_Type(json, T_STRING);
+ state->flag = 1;
+ rb_str_buf_append(buf, json);
+ OBJ_INFECT(buf, json);
+ state->depth = Vdepth;
+ state->memo = buf;
+ return ST_CONTINUE;
+}
+
+inline static VALUE mHash_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) {
+ long depth, len = RHASH_SIZE(self);
+ VALUE result;
+ GET_STATE(Vstate);
+
+ depth = 1 + FIX2LONG(Vdepth);
+ result = rb_str_buf_new(len);
+ state->memo = result;
+ state->depth = LONG2FIX(depth);
+ state->flag = 0;
+ rb_str_buf_cat2(result, "{");
+ if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl);
+ rb_hash_foreach(self, hash_to_json_state_i, Vstate);
+ if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl);
+ if (RSTRING_LEN(state->object_nl)) {
+ rb_str_buf_append(result, rb_str_times(state->indent, Vdepth));
+ }
+ rb_str_buf_cat2(result, "}");
+ return result;
+}
+
+static int hash_to_json_i(VALUE key, VALUE value, VALUE buf)
+{
+ VALUE tmp;
+
+ if (key == Qundef) return ST_CONTINUE;
+ if (RSTRING_LEN(buf) > 1) rb_str_buf_cat2(buf, ",");
+ tmp = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 0);
+ Check_Type(tmp, T_STRING);
+ rb_str_buf_append(buf, tmp);
+ OBJ_INFECT(buf, tmp);
+ rb_str_buf_cat2(buf, ":");
+ tmp = rb_funcall(value, i_to_json, 0);
+ Check_Type(tmp, T_STRING);
+ rb_str_buf_append(buf, tmp);
+ OBJ_INFECT(buf, tmp);
+
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq: to_json(state = nil, depth = 0)
+ *
+ * Returns a JSON string containing a JSON object, that is unparsed from
+ * this Hash instance.
+ * _state_ is a JSON::State object, that can also be used to configure the
+ * produced JSON string output further.
+ * _depth_ is used to find out nesting depth, to indent accordingly.
+ */
+static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE Vstate, Vdepth, result;
+ long depth;
+
+ rb_scan_args(argc, argv, "02", &Vstate, &Vdepth);
+ depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth);
+ if (NIL_P(Vstate)) {
+ long len = RHASH_SIZE(self);
+ result = rb_str_buf_new(len);
+ rb_str_buf_cat2(result, "{");
+ rb_hash_foreach(self, hash_to_json_i, result);
+ rb_str_buf_cat2(result, "}");
+ } else {
+ GET_STATE(Vstate);
+ check_max_nesting(state, depth);
+ if (state->check_circular) {
+ VALUE self_id = rb_obj_id(self);
+ if (RTEST(rb_hash_aref(state->seen, self_id))) {
+ rb_raise(eCircularDatastructure,
+ "circular data structures not supported!");
+ }
+ rb_hash_aset(state->seen, self_id, Qtrue);
+ result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth));
+ rb_hash_delete(state->seen, self_id);
+ } else {
+ result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth));
+ }
+ }
+ OBJ_INFECT(result, self);
+ FORCE_UTF8(result);
+ return result;
+}
+
+inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) {
+ long i, len = RARRAY_LEN(self);
+ VALUE shift, result;
+ long depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth);
+ VALUE delim = rb_str_new2(",");
+ GET_STATE(Vstate);
+
+ check_max_nesting(state, depth);
+ if (state->check_circular) {
+ VALUE self_id = rb_obj_id(self);
+ rb_hash_aset(state->seen, self_id, Qtrue);
+ result = rb_str_buf_new(len);
+ if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl);
+ shift = rb_str_times(state->indent, LONG2FIX(depth + 1));
+
+ rb_str_buf_cat2(result, "[");
+ OBJ_INFECT(result, self);
+ rb_str_buf_append(result, state->array_nl);
+ for (i = 0; i < len; i++) {
+ VALUE element = RARRAY_PTR(self)[i];
+ if (RTEST(rb_hash_aref(state->seen, rb_obj_id(element)))) {
+ rb_raise(eCircularDatastructure,
+ "circular data structures not supported!");
+ }
+ OBJ_INFECT(result, element);
+ if (i > 0) rb_str_buf_append(result, delim);
+ rb_str_buf_append(result, shift);
+ element = rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1));
+ Check_Type(element, T_STRING);
+ rb_str_buf_append(result, element);
+ }
+ if (RSTRING_LEN(state->array_nl)) {
+ rb_str_buf_append(result, state->array_nl);
+ rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth)));
+ }
+ rb_str_buf_cat2(result, "]");
+ rb_hash_delete(state->seen, self_id);
+ } else {
+ result = rb_str_buf_new(len);
+ OBJ_INFECT(result, self);
+ if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl);
+ shift = rb_str_times(state->indent, LONG2FIX(depth + 1));
+
+ rb_str_buf_cat2(result, "[");
+ rb_str_buf_append(result, state->array_nl);
+ for (i = 0; i < len; i++) {
+ VALUE element = RARRAY_PTR(self)[i];
+ OBJ_INFECT(result, element);
+ if (i > 0) rb_str_buf_append(result, delim);
+ rb_str_buf_append(result, shift);
+ element = rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1));
+ Check_Type(element, T_STRING);
+ rb_str_buf_append(result, element);
+ }
+ rb_str_buf_append(result, state->array_nl);
+ if (RSTRING_LEN(state->array_nl)) {
+ rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth)));
+ }
+ rb_str_buf_cat2(result, "]");
+ }
+ return result;
+}
+
+/*
+ * call-seq: to_json(state = nil, depth = 0)
+ *
+ * Returns a JSON string containing a JSON array, that is unparsed from
+ * this Array instance.
+ * _state_ is a JSON::State object, that can also be used to configure the
+ * produced JSON string output further.
+ * _depth_ is used to find out nesting depth, to indent accordingly.
+ */
+static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
+ VALUE Vstate, Vdepth, result;
+
+ rb_scan_args(argc, argv, "02", &Vstate, &Vdepth);
+ if (NIL_P(Vstate)) {
+ long i, len = RARRAY_LEN(self);
+ result = rb_str_buf_new(2 + 2 * len);
+ rb_str_buf_cat2(result, "[");
+ OBJ_INFECT(result, self);
+ for (i = 0; i < len; i++) {
+ VALUE element = RARRAY_PTR(self)[i];
+ OBJ_INFECT(result, element);
+ if (i > 0) rb_str_buf_cat2(result, ",");
+ element = rb_funcall(element, i_to_json, 0);
+ Check_Type(element, T_STRING);
+ rb_str_buf_append(result, element);
+ }
+ rb_str_buf_cat2(result, "]");
+ } else {
+ result = mArray_json_transfrom(self, Vstate, Vdepth);
+ }
+ OBJ_INFECT(result, self);
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Returns a JSON string representation for this Integer number.
+ */
+static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE result = rb_funcall(self, i_to_s, 0);
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Returns a JSON string representation for this Float number.
+ */
+static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
+{
+ JSON_Generator_State *state = NULL;
+ VALUE Vstate, rest, tmp, result;
+ double value = RFLOAT_VALUE(self);
+ rb_scan_args(argc, argv, "01*", &Vstate, &rest);
+ if (!NIL_P(Vstate)) Data_Get_Struct(Vstate, JSON_Generator_State, state);
+ if (isinf(value)) {
+ if (!state || state->allow_nan) {
+ result = rb_funcall(self, i_to_s, 0);
+ } else {
+ tmp = rb_funcall(self, i_to_s, 0);
+ rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp));
+ }
+ } else if (isnan(value)) {
+ if (!state || state->allow_nan) {
+ result = rb_funcall(self, i_to_s, 0);
+ } else {
+ tmp = rb_funcall(self, i_to_s, 0);
+ rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp));
+ }
+ } else {
+ result = rb_funcall(self, i_to_s, 0);
+ }
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: String.included(modul)
+ *
+ * Extends _modul_ with the String::Extend module.
+ */
+static VALUE mString_included_s(VALUE self, VALUE modul) {
+ VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * This string should be encoded with UTF-8 A call to this method
+ * returns a JSON string encoded with UTF16 big endian characters as
+ * \u????.
+ */
+static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE result = rb_str_buf_new(RSTRING_LEN(self));
+ rb_str_buf_cat2(result, "\"");
+ JSON_convert_UTF8_to_JSON(result, self, strictConversion);
+ rb_str_buf_cat2(result, "\"");
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json_raw_object()
+ *
+ * This method creates a raw object hash, that can be nested into
+ * other data structures and will be unparsed as a raw string. This
+ * method should be used, if you want to convert raw strings to JSON
+ * instead of UTF-8 strings, e. g. binary data.
+ */
+static VALUE mString_to_json_raw_object(VALUE self) {
+ VALUE ary;
+ VALUE result = rb_hash_new();
+ rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
+ ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
+ rb_hash_aset(result, rb_str_new2("raw"), ary);
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json_raw(*args)
+ *
+ * This method creates a JSON text from the result of a call to
+ * to_json_raw_object of this String.
+ */
+static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) {
+ VALUE result, obj = mString_to_json_raw_object(self);
+ Check_Type(obj, T_HASH);
+ result = mHash_to_json(argc, argv, obj);
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: json_create(o)
+ *
+ * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
+ * key "raw"). The Ruby String can be created by this module method.
+ */
+static VALUE mString_Extend_json_create(VALUE self, VALUE o) {
+ VALUE ary;
+ Check_Type(o, T_HASH);
+ ary = rb_hash_aref(o, rb_str_new2("raw"));
+ return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Returns a JSON string for true: 'true'.
+ */
+static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE result = rb_str_new2("true");
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Returns a JSON string for false: 'false'.
+ */
+static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE result = rb_str_new2("false");
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ */
+static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE result = rb_str_new2("null");
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Converts this object to a string (calling #to_s), converts
+ * it to a JSON string, and returns the result. This is a fallback, if no
+ * special method #to_json was defined for some object.
+ */
+static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE result, string = rb_funcall(self, i_to_s, 0);
+ Check_Type(string, T_STRING);
+ result = mString_to_json(argc, argv, string);
+ FORCE_UTF8(result);
+ return result;
+}
+
+/*
+ * Document-class: JSON::Ext::Generator::State
+ *
+ * This class is used to create State instances, that are use to hold data
+ * while generating a JSON text from a a Ruby data structure.
+ */
+
+static void State_mark(JSON_Generator_State *state)
+{
+ rb_gc_mark_maybe(state->indent);
+ rb_gc_mark_maybe(state->space);
+ rb_gc_mark_maybe(state->space_before);
+ rb_gc_mark_maybe(state->object_nl);
+ rb_gc_mark_maybe(state->array_nl);
+ rb_gc_mark_maybe(state->seen);
+ rb_gc_mark_maybe(state->memo);
+ rb_gc_mark_maybe(state->depth);
+}
+
+static JSON_Generator_State *State_allocate()
+{
+ JSON_Generator_State *state = ALLOC(JSON_Generator_State);
+ return state;
+}
+
+static VALUE cState_s_allocate(VALUE klass)
+{
+ JSON_Generator_State *state = State_allocate();
+ return Data_Wrap_Struct(klass, State_mark, -1, state);
+}
+
+/*
+ * call-seq: configure(opts)
+ *
+ * Configure this State instance with the Hash _opts_, and return
+ * itself.
+ */
+static VALUE cState_configure(VALUE self, VALUE opts)
+{
+ VALUE tmp;
+ GET_STATE(self);
+ tmp = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+ if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
+ if (NIL_P(tmp)) {
+ rb_raise(rb_eArgError, "opts has to be hash like or convertable into a hash");
+ }
+ opts = tmp;
+ tmp = rb_hash_aref(opts, ID2SYM(i_indent));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->indent = tmp;
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_space));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->space = tmp;
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->space_before = tmp;
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->array_nl = tmp;
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->object_nl = tmp;
+ }
+ tmp = ID2SYM(i_check_circular);
+ if (st_lookup(RHASH_TBL(opts), tmp, 0)) {
+ tmp = rb_hash_aref(opts, ID2SYM(i_check_circular));
+ state->check_circular = RTEST(tmp);
+ } else {
+ state->check_circular = 1;
+ }
+ tmp = ID2SYM(i_max_nesting);
+ state->max_nesting = 19;
+ if (st_lookup(RHASH_TBL(opts), tmp, 0)) {
+ VALUE max_nesting = rb_hash_aref(opts, tmp);
+ if (RTEST(max_nesting)) {
+ Check_Type(max_nesting, T_FIXNUM);
+ state->max_nesting = FIX2LONG(max_nesting);
+ } else {
+ state->max_nesting = 0;
+ }
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
+ state->allow_nan = RTEST(tmp);
+ return self;
+}
+
+/*
+ * call-seq: to_h
+ *
+ * Returns the configuration instance variables as a hash, that can be
+ * passed to the configure method.
+ */
+static VALUE cState_to_h(VALUE self)
+{
+ VALUE result = rb_hash_new();
+ GET_STATE(self);
+ rb_hash_aset(result, ID2SYM(i_indent), state->indent);
+ rb_hash_aset(result, ID2SYM(i_space), state->space);
+ rb_hash_aset(result, ID2SYM(i_space_before), state->space_before);
+ rb_hash_aset(result, ID2SYM(i_object_nl), state->object_nl);
+ rb_hash_aset(result, ID2SYM(i_array_nl), state->array_nl);
+ rb_hash_aset(result, ID2SYM(i_check_circular), state->check_circular ? Qtrue : Qfalse);
+ rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
+ rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
+ return result;
+}
+
+
+/*
+ * call-seq: new(opts = {})
+ *
+ * Instantiates a new State object, configured by _opts_.
+ *
+ * _opts_ can have the following keys:
+ *
+ * * *indent*: a string used to indent levels (default: ''),
+ * * *space*: a string that is put after, a : or , delimiter (default: ''),
+ * * *space_before*: a string that is put before a : pair delimiter (default: ''),
+ * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
+ * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
+ * * *check_circular*: true if checking for circular data structures
+ * should be done, false (the default) otherwise.
+ * * *allow_nan*: true if NaN, Infinity, and -Infinity should be
+ * generated, otherwise an exception is thrown, if these values are
+ * encountered. This options defaults to false.
+ */
+static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
+{
+ VALUE opts;
+ GET_STATE(self);
+
+ rb_scan_args(argc, argv, "01", &opts);
+ state->indent = rb_str_new2("");
+ state->space = rb_str_new2("");
+ state->space_before = rb_str_new2("");
+ state->array_nl = rb_str_new2("");
+ state->object_nl = rb_str_new2("");
+ if (NIL_P(opts)) {
+ state->check_circular = 1;
+ state->allow_nan = 0;
+ state->max_nesting = 19;
+ } else {
+ cState_configure(self, opts);
+ }
+ state->seen = rb_hash_new();
+ state->memo = Qnil;
+ state->depth = INT2FIX(0);
+ return self;
+}
+
+/*
+ * call-seq: from_state(opts)
+ *
+ * Creates a State object from _opts_, which ought to be Hash to create a
+ * new State instance configured by _opts_, something else to create an
+ * unconfigured instance. If _opts_ is a State object, it is just returned.
+ */
+static VALUE cState_from_state_s(VALUE self, VALUE opts)
+{
+ if (rb_obj_is_kind_of(opts, self)) {
+ return opts;
+ } else if (rb_obj_is_kind_of(opts, rb_cHash)) {
+ return rb_funcall(self, i_new, 1, opts);
+ } else {
+ return rb_funcall(self, i_new, 0);
+ }
+}
+
+/*
+ * call-seq: indent()
+ *
+ * This string is used to indent levels in the JSON text.
+ */
+static VALUE cState_indent(VALUE self)
+{
+ GET_STATE(self);
+ return state->indent;
+}
+
+/*
+ * call-seq: indent=(indent)
+ *
+ * This string is used to indent levels in the JSON text.
+ */
+static VALUE cState_indent_set(VALUE self, VALUE indent)
+{
+ GET_STATE(self);
+ Check_Type(indent, T_STRING);
+ return state->indent = indent;
+}
+
+/*
+ * call-seq: space()
+ *
+ * This string is used to insert a space between the tokens in a JSON
+ * string.
+ */
+static VALUE cState_space(VALUE self)
+{
+ GET_STATE(self);
+ return state->space;
+}
+
+/*
+ * call-seq: space=(space)
+ *
+ * This string is used to insert a space between the tokens in a JSON
+ * string.
+ */
+static VALUE cState_space_set(VALUE self, VALUE space)
+{
+ GET_STATE(self);
+ Check_Type(space, T_STRING);
+ return state->space = space;
+}
+
+/*
+ * call-seq: space_before()
+ *
+ * This string is used to insert a space before the ':' in JSON objects.
+ */
+static VALUE cState_space_before(VALUE self)
+{
+ GET_STATE(self);
+ return state->space_before;
+}
+
+/*
+ * call-seq: space_before=(space_before)
+ *
+ * This string is used to insert a space before the ':' in JSON objects.
+ */
+static VALUE cState_space_before_set(VALUE self, VALUE space_before)
+{
+ GET_STATE(self);
+ Check_Type(space_before, T_STRING);
+ return state->space_before = space_before;
+}
+
+/*
+ * call-seq: object_nl()
+ *
+ * This string is put at the end of a line that holds a JSON object (or
+ * Hash).
+ */
+static VALUE cState_object_nl(VALUE self)
+{
+ GET_STATE(self);
+ return state->object_nl;
+}
+
+/*
+ * call-seq: object_nl=(object_nl)
+ *
+ * This string is put at the end of a line that holds a JSON object (or
+ * Hash).
+ */
+static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
+{
+ GET_STATE(self);
+ Check_Type(object_nl, T_STRING);
+ return state->object_nl = object_nl;
+}
+
+/*
+ * call-seq: array_nl()
+ *
+ * This string is put at the end of a line that holds a JSON array.
+ */
+static VALUE cState_array_nl(VALUE self)
+{
+ GET_STATE(self);
+ return state->array_nl;
+}
+
+/*
+ * call-seq: array_nl=(array_nl)
+ *
+ * This string is put at the end of a line that holds a JSON array.
+ */
+static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
+{
+ GET_STATE(self);
+ Check_Type(array_nl, T_STRING);
+ return state->array_nl = array_nl;
+}
+
+/*
+ * call-seq: check_circular?
+ *
+ * Returns true, if circular data structures should be checked,
+ * otherwise returns false.
+ */
+static VALUE cState_check_circular_p(VALUE self)
+{
+ GET_STATE(self);
+ return state->check_circular ? Qtrue : Qfalse;
+}
+
+/*
+ * call-seq: max_nesting
+ *
+ * This integer returns the maximum level of data structure nesting in
+ * the generated JSON, max_nesting = 0 if no maximum is checked.
+ */
+static VALUE cState_max_nesting(VALUE self)
+{
+ GET_STATE(self);
+ return LONG2FIX(state->max_nesting);
+}
+
+/*
+ * call-seq: max_nesting=(depth)
+ *
+ * This sets the maximum level of data structure nesting in the generated JSON
+ * to the integer depth, max_nesting = 0 if no maximum should be checked.
+ */
+static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
+{
+ GET_STATE(self);
+ Check_Type(depth, T_FIXNUM);
+ state->max_nesting = FIX2LONG(depth);
+ return Qnil;
+}
+
+/*
+ * call-seq: allow_nan?
+ *
+ * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise
+ * returns false.
+ */
+static VALUE cState_allow_nan_p(VALUE self)
+{
+ GET_STATE(self);
+ return state->allow_nan ? Qtrue : Qfalse;
+}
+
+/*
+ * call-seq: seen?(object)
+ *
+ * Returns _true_, if _object_ was already seen during this generating run.
+ */
+static VALUE cState_seen_p(VALUE self, VALUE object)
+{
+ GET_STATE(self);
+ return rb_hash_aref(state->seen, rb_obj_id(object));
+}
+
+/*
+ * call-seq: remember(object)
+ *
+ * Remember _object_, to find out if it was already encountered (if a cyclic
+ * data structure is rendered).
+ */
+static VALUE cState_remember(VALUE self, VALUE object)
+{
+ GET_STATE(self);
+ return rb_hash_aset(state->seen, rb_obj_id(object), Qtrue);
+}
+
+/*
+ * call-seq: forget(object)
+ *
+ * Forget _object_ for this generating run.
+ */
+static VALUE cState_forget(VALUE self, VALUE object)
+{
+ GET_STATE(self);
+ return rb_hash_delete(state->seen, rb_obj_id(object));
+}
+
+/*
+ *
+ */
+void Init_generator()
+{
+ rb_require("json/common");
+ mJSON = rb_define_module("JSON");
+ mExt = rb_define_module_under(mJSON, "Ext");
+ mGenerator = rb_define_module_under(mExt, "Generator");
+ eGeneratorError = rb_path2class("JSON::GeneratorError");
+ eCircularDatastructure = rb_path2class("JSON::CircularDatastructure");
+ eNestingError = rb_path2class("JSON::NestingError");
+ cState = rb_define_class_under(mGenerator, "State", rb_cObject);
+ rb_define_alloc_func(cState, cState_s_allocate);
+ rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
+ rb_define_method(cState, "initialize", cState_initialize, -1);
+
+ rb_define_method(cState, "indent", cState_indent, 0);
+ rb_define_method(cState, "indent=", cState_indent_set, 1);
+ rb_define_method(cState, "space", cState_space, 0);
+ rb_define_method(cState, "space=", cState_space_set, 1);
+ rb_define_method(cState, "space_before", cState_space_before, 0);
+ rb_define_method(cState, "space_before=", cState_space_before_set, 1);
+ rb_define_method(cState, "object_nl", cState_object_nl, 0);
+ rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
+ rb_define_method(cState, "array_nl", cState_array_nl, 0);
+ rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
+ rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
+ rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
+ rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
+ rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
+ rb_define_method(cState, "seen?", cState_seen_p, 1);
+ rb_define_method(cState, "remember", cState_remember, 1);
+ rb_define_method(cState, "forget", cState_forget, 1);
+ rb_define_method(cState, "configure", cState_configure, 1);
+ rb_define_method(cState, "to_h", cState_to_h, 0);
+
+ mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
+ mObject = rb_define_module_under(mGeneratorMethods, "Object");
+ rb_define_method(mObject, "to_json", mObject_to_json, -1);
+ mHash = rb_define_module_under(mGeneratorMethods, "Hash");
+ rb_define_method(mHash, "to_json", mHash_to_json, -1);
+ mArray = rb_define_module_under(mGeneratorMethods, "Array");
+ rb_define_method(mArray, "to_json", mArray_to_json, -1);
+ mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
+ rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
+ mFloat = rb_define_module_under(mGeneratorMethods, "Float");
+ rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
+ mString = rb_define_module_under(mGeneratorMethods, "String");
+ rb_define_singleton_method(mString, "included", mString_included_s, 1);
+ rb_define_method(mString, "to_json", mString_to_json, -1);
+ rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
+ rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
+ mString_Extend = rb_define_module_under(mString, "Extend");
+ rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
+ mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
+ rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
+ mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
+ rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
+ mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
+ rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
+
+ i_to_s = rb_intern("to_s");
+ i_to_json = rb_intern("to_json");
+ i_new = rb_intern("new");
+ i_indent = rb_intern("indent");
+ i_space = rb_intern("space");
+ i_space_before = rb_intern("space_before");
+ i_object_nl = rb_intern("object_nl");
+ i_array_nl = rb_intern("array_nl");
+ i_check_circular = rb_intern("check_circular");
+ i_max_nesting = rb_intern("max_nesting");
+ i_allow_nan = rb_intern("allow_nan");
+ i_pack = rb_intern("pack");
+ i_unpack = rb_intern("unpack");
+ i_create_id = rb_intern("create_id");
+ i_extend = rb_intern("extend");
+}
diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c
new file mode 100644
index 0000000000..947e2d6bda
--- /dev/null
+++ b/ext/json/ext/generator/unicode.c
@@ -0,0 +1,180 @@
+#include "unicode.h"
+
+#define unicode_escape(buffer, character) \
+ snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \
+ rb_str_buf_cat(buffer, buf, 6);
+
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow. There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ * length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns 0. The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+inline static unsigned char isLegalUTF8(const UTF8 *source, int length)
+{
+ UTF8 a;
+ const UTF8 *srcptr = source+length;
+ switch (length) {
+ default: return 0;
+ /* Everything else falls through when "1"... */
+ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
+
+ switch (*source) {
+ /* no fall-through in this inner switch */
+ case 0xE0: if (a < 0xA0) return 0; break;
+ case 0xED: if (a > 0x9F) return 0; break;
+ case 0xF0: if (a < 0x90) return 0; break;
+ case 0xF4: if (a > 0x8F) return 0; break;
+ default: if (a < 0x80) return 0;
+ }
+
+ case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
+ }
+ if (*source > 0xF4) return 0;
+ return 1;
+}
+
+void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags)
+{
+ char buf[7];
+ const UTF8* source = (UTF8 *) RSTRING_PTR(string);
+ const UTF8* sourceEnd = source + RSTRING_LEN(string);
+
+ while (source < sourceEnd) {
+ UTF32 ch = 0;
+ unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+ if (source + extraBytesToRead >= sourceEnd) {
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "partial character in source, but hit end");
+ }
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed");
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (flags == strictConversion) {
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed");
+ } else {
+ unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+ }
+ } else {
+ /* normal case */
+ if (ch == '"') {
+ rb_str_buf_cat2(buffer, "\\\"");
+ } else if (ch == '\\') {
+ rb_str_buf_cat2(buffer, "\\\\");
+ } else if (ch >= 0x20 && ch <= 0x7f) {
+ rb_str_buf_cat(buffer, (char *) source - 1, 1);
+ } else if (ch == '\n') {
+ rb_str_buf_cat2(buffer, "\\n");
+ } else if (ch == '\r') {
+ rb_str_buf_cat2(buffer, "\\r");
+ } else if (ch == '\t') {
+ rb_str_buf_cat2(buffer, "\\t");
+ } else if (ch == '\f') {
+ rb_str_buf_cat2(buffer, "\\f");
+ } else if (ch == '\b') {
+ rb_str_buf_cat2(buffer, "\\b");
+ } else if (ch < 0x20) {
+ unicode_escape(buffer, (UTF16) ch);
+ } else {
+ unicode_escape(buffer, (UTF16) ch);
+ }
+ }
+ } else if (ch > UNI_MAX_UTF16) {
+ if (flags == strictConversion) {
+ source -= (extraBytesToRead+1); /* return to the start */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed");
+ } else {
+ unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+ }
+ } else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ ch -= halfBase;
+ unicode_escape(buffer, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
+ unicode_escape(buffer, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
+ }
+ }
+}
diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h
new file mode 100644
index 0000000000..841474bcea
--- /dev/null
+++ b/ext/json/ext/generator/unicode.h
@@ -0,0 +1,53 @@
+#include "ruby.h"
+
+#ifndef _GENERATOR_UNICODE_H_
+#define _GENERATOR_UNICODE_H_
+
+typedef enum {
+ conversionOK = 0, /* conversion successful */
+ sourceExhausted, /* partial character in source, but hit end */
+ targetExhausted, /* insuff. room in target for conversion */
+ sourceIllegal /* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum {
+ strictConversion = 0,
+ lenientConversion
+} ConversionFlags;
+
+typedef unsigned long UTF32; /* at least 32 bits */
+typedef unsigned short UTF16; /* at least 16 bits */
+typedef unsigned char UTF8; /* typically 8 bits */
+
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_SUR_HIGH_START (UTF32)0xD800
+#define UNI_SUR_HIGH_END (UTF32)0xDBFF
+#define UNI_SUR_LOW_START (UTF32)0xDC00
+#define UNI_SUR_LOW_END (UTF32)0xDFFF
+
+static const int halfShift = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags);
+
+#ifndef RARRAY_PTR
+#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
+#endif
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
+#endif
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(string) RSTRING(string)->ptr
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(string) RSTRING(string)->len
+#endif
+
+#endif