From 27be3056dc0e9c313f25b430ca90b240a5e44160 Mon Sep 17 00:00:00 2001 From: usa Date: Mon, 26 Apr 2010 04:34:36 +0000 Subject: * ext/jason: revert r27493. came again after canceling gcc-ism. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27500 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ext/json/ext/generator/extconf.rb | 9 + ext/json/ext/generator/generator.c | 919 +++++++++++++++++++++++++++++++++++++ ext/json/ext/generator/unicode.c | 180 ++++++++ ext/json/ext/generator/unicode.h | 53 +++ 4 files changed, 1161 insertions(+) create mode 100644 ext/json/ext/generator/extconf.rb create mode 100644 ext/json/ext/generator/generator.c create mode 100644 ext/json/ext/generator/unicode.c create mode 100644 ext/json/ext/generator/unicode.h (limited to 'ext/json/ext/generator') diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb new file mode 100644 index 0000000000..fc267420f1 --- /dev/null +++ b/ext/json/ext/generator/extconf.rb @@ -0,0 +1,9 @@ +require 'mkmf' +require 'rbconfig' + +if CONFIG['GCC'] == 'yes' + $CFLAGS += ' -Wall' + #$CFLAGS += ' -O0 -ggdb' +end + +create_makefile 'json/ext/generator' diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c new file mode 100644 index 0000000000..482938ac47 --- /dev/null +++ b/ext/json/ext/generator/generator.c @@ -0,0 +1,919 @@ +#include +#include "ruby.h" +#if HAVE_RUBY_ST_H +#include "ruby/st.h" +#endif +#if HAVE_ST_H +#include "st.h" +#endif +#include "unicode.h" +#include + +#ifndef RHASH_TBL +#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) +#endif + +#ifndef RHASH_SIZE +#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries) +#endif + +#ifndef RFLOAT_VALUE +#define RFLOAT_VALUE(val) (RFLOAT(val)->value) +#endif + +#ifdef HAVE_RUBY_ENCODING_H +#include "ruby/encoding.h" +#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +#else +#define FORCE_UTF8(obj) +#endif + +#define check_max_nesting(state, depth) do { \ + long current_nesting = 1 + depth; \ + if (state->max_nesting != 0 && current_nesting > state->max_nesting) \ + rb_raise(eNestingError, "nesting of %ld is too deep", current_nesting); \ +} while (0); + +static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, + mHash, mArray, mInteger, mFloat, mString, mString_Extend, + mTrueClass, mFalseClass, mNilClass, eGeneratorError, + eCircularDatastructure, eNestingError; + +static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, + i_object_nl, i_array_nl, i_check_circular, i_max_nesting, + i_allow_nan, i_pack, i_unpack, i_create_id, i_extend; + +typedef struct JSON_Generator_StateStruct { + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + int check_circular; + VALUE seen; + VALUE memo; + VALUE depth; + long max_nesting; + int flag; + int allow_nan; +} JSON_Generator_State; + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + Data_Get_Struct(self, JSON_Generator_State, state); + +/* + * Document-module: JSON::Ext::Generator + * + * This is the JSON generator implemented as a C extension. It can be + * configured to be used by setting + * + * JSON.generator = JSON::Ext::Generator + * + * with the method generator= in JSON. + * + */ + +static int hash_to_json_state_i(VALUE key, VALUE value, VALUE Vstate) +{ + VALUE json, buf, Vdepth; + GET_STATE(Vstate); + buf = state->memo; + Vdepth = state->depth; + + if (key == Qundef) return ST_CONTINUE; + if (state->flag) { + state->flag = 0; + rb_str_buf_cat2(buf, ","); + if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(buf, state->object_nl); + } + if (RSTRING_LEN(state->object_nl)) { + rb_str_buf_append(buf, rb_str_times(state->indent, Vdepth)); + } + json = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 2, Vstate, Vdepth); + Check_Type(json, T_STRING); + rb_str_buf_append(buf, json); + OBJ_INFECT(buf, json); + if (RSTRING_LEN(state->space_before)) { + rb_str_buf_append(buf, state->space_before); + } + rb_str_buf_cat2(buf, ":"); + if (RSTRING_LEN(state->space)) rb_str_buf_append(buf, state->space); + json = rb_funcall(value, i_to_json, 2, Vstate, Vdepth); + Check_Type(json, T_STRING); + state->flag = 1; + rb_str_buf_append(buf, json); + OBJ_INFECT(buf, json); + state->depth = Vdepth; + state->memo = buf; + return ST_CONTINUE; +} + +inline static VALUE mHash_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) { + long depth, len = RHASH_SIZE(self); + VALUE result; + GET_STATE(Vstate); + + depth = 1 + FIX2LONG(Vdepth); + result = rb_str_buf_new(len); + state->memo = result; + state->depth = LONG2FIX(depth); + state->flag = 0; + rb_str_buf_cat2(result, "{"); + if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl); + rb_hash_foreach(self, hash_to_json_state_i, Vstate); + if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl); + if (RSTRING_LEN(state->object_nl)) { + rb_str_buf_append(result, rb_str_times(state->indent, Vdepth)); + } + rb_str_buf_cat2(result, "}"); + return result; +} + +static int hash_to_json_i(VALUE key, VALUE value, VALUE buf) +{ + VALUE tmp; + + if (key == Qundef) return ST_CONTINUE; + if (RSTRING_LEN(buf) > 1) rb_str_buf_cat2(buf, ","); + tmp = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 0); + Check_Type(tmp, T_STRING); + rb_str_buf_append(buf, tmp); + OBJ_INFECT(buf, tmp); + rb_str_buf_cat2(buf, ":"); + tmp = rb_funcall(value, i_to_json, 0); + Check_Type(tmp, T_STRING); + rb_str_buf_append(buf, tmp); + OBJ_INFECT(buf, tmp); + + return ST_CONTINUE; +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string containing a JSON object, that is unparsed from + * this Hash instance. + * _state_ is a JSON::State object, that can also be used to configure the + * produced JSON string output further. + * _depth_ is used to find out nesting depth, to indent accordingly. + */ +static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE Vstate, Vdepth, result; + long depth; + + rb_scan_args(argc, argv, "02", &Vstate, &Vdepth); + depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth); + if (NIL_P(Vstate)) { + long len = RHASH_SIZE(self); + result = rb_str_buf_new(len); + rb_str_buf_cat2(result, "{"); + rb_hash_foreach(self, hash_to_json_i, result); + rb_str_buf_cat2(result, "}"); + } else { + GET_STATE(Vstate); + check_max_nesting(state, depth); + if (state->check_circular) { + VALUE self_id = rb_obj_id(self); + if (RTEST(rb_hash_aref(state->seen, self_id))) { + rb_raise(eCircularDatastructure, + "circular data structures not supported!"); + } + rb_hash_aset(state->seen, self_id, Qtrue); + result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth)); + rb_hash_delete(state->seen, self_id); + } else { + result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth)); + } + } + OBJ_INFECT(result, self); + FORCE_UTF8(result); + return result; +} + +inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) { + long i, len = RARRAY_LEN(self); + VALUE shift, result; + long depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth); + VALUE delim = rb_str_new2(","); + GET_STATE(Vstate); + + check_max_nesting(state, depth); + if (state->check_circular) { + VALUE self_id = rb_obj_id(self); + rb_hash_aset(state->seen, self_id, Qtrue); + result = rb_str_buf_new(len); + if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl); + shift = rb_str_times(state->indent, LONG2FIX(depth + 1)); + + rb_str_buf_cat2(result, "["); + OBJ_INFECT(result, self); + rb_str_buf_append(result, state->array_nl); + for (i = 0; i < len; i++) { + VALUE element = RARRAY_PTR(self)[i]; + if (RTEST(rb_hash_aref(state->seen, rb_obj_id(element)))) { + rb_raise(eCircularDatastructure, + "circular data structures not supported!"); + } + OBJ_INFECT(result, element); + if (i > 0) rb_str_buf_append(result, delim); + rb_str_buf_append(result, shift); + element = rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)); + Check_Type(element, T_STRING); + rb_str_buf_append(result, element); + } + if (RSTRING_LEN(state->array_nl)) { + rb_str_buf_append(result, state->array_nl); + rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth))); + } + rb_str_buf_cat2(result, "]"); + rb_hash_delete(state->seen, self_id); + } else { + result = rb_str_buf_new(len); + OBJ_INFECT(result, self); + if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl); + shift = rb_str_times(state->indent, LONG2FIX(depth + 1)); + + rb_str_buf_cat2(result, "["); + rb_str_buf_append(result, state->array_nl); + for (i = 0; i < len; i++) { + VALUE element = RARRAY_PTR(self)[i]; + OBJ_INFECT(result, element); + if (i > 0) rb_str_buf_append(result, delim); + rb_str_buf_append(result, shift); + element = rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)); + Check_Type(element, T_STRING); + rb_str_buf_append(result, element); + } + rb_str_buf_append(result, state->array_nl); + if (RSTRING_LEN(state->array_nl)) { + rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth))); + } + rb_str_buf_cat2(result, "]"); + } + return result; +} + +/* + * call-seq: to_json(state = nil, depth = 0) + * + * Returns a JSON string containing a JSON array, that is unparsed from + * this Array instance. + * _state_ is a JSON::State object, that can also be used to configure the + * produced JSON string output further. + * _depth_ is used to find out nesting depth, to indent accordingly. + */ +static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { + VALUE Vstate, Vdepth, result; + + rb_scan_args(argc, argv, "02", &Vstate, &Vdepth); + if (NIL_P(Vstate)) { + long i, len = RARRAY_LEN(self); + result = rb_str_buf_new(2 + 2 * len); + rb_str_buf_cat2(result, "["); + OBJ_INFECT(result, self); + for (i = 0; i < len; i++) { + VALUE element = RARRAY_PTR(self)[i]; + OBJ_INFECT(result, element); + if (i > 0) rb_str_buf_cat2(result, ","); + element = rb_funcall(element, i_to_json, 0); + Check_Type(element, T_STRING); + rb_str_buf_append(result, element); + } + rb_str_buf_cat2(result, "]"); + } else { + result = mArray_json_transfrom(self, Vstate, Vdepth); + } + OBJ_INFECT(result, self); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string representation for this Integer number. + */ +static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE result = rb_funcall(self, i_to_s, 0); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string representation for this Float number. + */ +static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) +{ + JSON_Generator_State *state = NULL; + VALUE Vstate, rest, tmp, result; + double value = RFLOAT_VALUE(self); + rb_scan_args(argc, argv, "01*", &Vstate, &rest); + if (!NIL_P(Vstate)) Data_Get_Struct(Vstate, JSON_Generator_State, state); + if (isinf(value)) { + if (!state || state->allow_nan) { + result = rb_funcall(self, i_to_s, 0); + } else { + tmp = rb_funcall(self, i_to_s, 0); + rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp)); + } + } else if (isnan(value)) { + if (!state || state->allow_nan) { + result = rb_funcall(self, i_to_s, 0); + } else { + tmp = rb_funcall(self, i_to_s, 0); + rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp)); + } + } else { + result = rb_funcall(self, i_to_s, 0); + } + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: String.included(modul) + * + * Extends _modul_ with the String::Extend module. + */ +static VALUE mString_included_s(VALUE self, VALUE modul) { + VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json(*) + * + * This string should be encoded with UTF-8 A call to this method + * returns a JSON string encoded with UTF16 big endian characters as + * \u????. + */ +static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE result = rb_str_buf_new(RSTRING_LEN(self)); + rb_str_buf_cat2(result, "\""); + JSON_convert_UTF8_to_JSON(result, self, strictConversion); + rb_str_buf_cat2(result, "\""); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json_raw_object() + * + * This method creates a raw object hash, that can be nested into + * other data structures and will be unparsed as a raw string. This + * method should be used, if you want to convert raw strings to JSON + * instead of UTF-8 strings, e. g. binary data. + */ +static VALUE mString_to_json_raw_object(VALUE self) { + VALUE ary; + VALUE result = rb_hash_new(); + rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); + ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); + rb_hash_aset(result, rb_str_new2("raw"), ary); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json_raw(*args) + * + * This method creates a JSON text from the result of a call to + * to_json_raw_object of this String. + */ +static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) { + VALUE result, obj = mString_to_json_raw_object(self); + Check_Type(obj, T_HASH); + result = mHash_to_json(argc, argv, obj); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: json_create(o) + * + * Raw Strings are JSON Objects (the raw bytes are stored in an array for the + * key "raw"). The Ruby String can be created by this module method. + */ +static VALUE mString_Extend_json_create(VALUE self, VALUE o) { + VALUE ary; + Check_Type(o, T_HASH); + ary = rb_hash_aref(o, rb_str_new2("raw")); + return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string for true: 'true'. + */ +static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE result = rb_str_new2("true"); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json(*) + * + * Returns a JSON string for false: 'false'. + */ +static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE result = rb_str_new2("false"); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json(*) + * + */ +static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE result = rb_str_new2("null"); + FORCE_UTF8(result); + return result; +} + +/* + * call-seq: to_json(*) + * + * Converts this object to a string (calling #to_s), converts + * it to a JSON string, and returns the result. This is a fallback, if no + * special method #to_json was defined for some object. + */ +static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) +{ + VALUE result, string = rb_funcall(self, i_to_s, 0); + Check_Type(string, T_STRING); + result = mString_to_json(argc, argv, string); + FORCE_UTF8(result); + return result; +} + +/* + * Document-class: JSON::Ext::Generator::State + * + * This class is used to create State instances, that are use to hold data + * while generating a JSON text from a a Ruby data structure. + */ + +static void State_mark(JSON_Generator_State *state) +{ + rb_gc_mark_maybe(state->indent); + rb_gc_mark_maybe(state->space); + rb_gc_mark_maybe(state->space_before); + rb_gc_mark_maybe(state->object_nl); + rb_gc_mark_maybe(state->array_nl); + rb_gc_mark_maybe(state->seen); + rb_gc_mark_maybe(state->memo); + rb_gc_mark_maybe(state->depth); +} + +static JSON_Generator_State *State_allocate() +{ + JSON_Generator_State *state = ALLOC(JSON_Generator_State); + return state; +} + +static VALUE cState_s_allocate(VALUE klass) +{ + JSON_Generator_State *state = State_allocate(); + return Data_Wrap_Struct(klass, State_mark, -1, state); +} + +/* + * call-seq: configure(opts) + * + * Configure this State instance with the Hash _opts_, and return + * itself. + */ +static VALUE cState_configure(VALUE self, VALUE opts) +{ + VALUE tmp; + GET_STATE(self); + tmp = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h"); + if (NIL_P(tmp)) { + rb_raise(rb_eArgError, "opts has to be hash like or convertable into a hash"); + } + opts = tmp; + tmp = rb_hash_aref(opts, ID2SYM(i_indent)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->indent = tmp; + } + tmp = rb_hash_aref(opts, ID2SYM(i_space)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->space = tmp; + } + tmp = rb_hash_aref(opts, ID2SYM(i_space_before)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->space_before = tmp; + } + tmp = rb_hash_aref(opts, ID2SYM(i_array_nl)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->array_nl = tmp; + } + tmp = rb_hash_aref(opts, ID2SYM(i_object_nl)); + if (RTEST(tmp)) { + Check_Type(tmp, T_STRING); + state->object_nl = tmp; + } + tmp = ID2SYM(i_check_circular); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + tmp = rb_hash_aref(opts, ID2SYM(i_check_circular)); + state->check_circular = RTEST(tmp); + } else { + state->check_circular = 1; + } + tmp = ID2SYM(i_max_nesting); + state->max_nesting = 19; + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + state->max_nesting = FIX2LONG(max_nesting); + } else { + state->max_nesting = 0; + } + } + tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan)); + state->allow_nan = RTEST(tmp); + return self; +} + +/* + * call-seq: to_h + * + * Returns the configuration instance variables as a hash, that can be + * passed to the configure method. + */ +static VALUE cState_to_h(VALUE self) +{ + VALUE result = rb_hash_new(); + GET_STATE(self); + rb_hash_aset(result, ID2SYM(i_indent), state->indent); + rb_hash_aset(result, ID2SYM(i_space), state->space); + rb_hash_aset(result, ID2SYM(i_space_before), state->space_before); + rb_hash_aset(result, ID2SYM(i_object_nl), state->object_nl); + rb_hash_aset(result, ID2SYM(i_array_nl), state->array_nl); + rb_hash_aset(result, ID2SYM(i_check_circular), state->check_circular ? Qtrue : Qfalse); + rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse); + rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting)); + return result; +} + + +/* + * call-seq: new(opts = {}) + * + * Instantiates a new State object, configured by _opts_. + * + * _opts_ can have the following keys: + * + * * *indent*: a string used to indent levels (default: ''), + * * *space*: a string that is put after, a : or , delimiter (default: ''), + * * *space_before*: a string that is put before a : pair delimiter (default: ''), + * * *object_nl*: a string that is put at the end of a JSON object (default: ''), + * * *array_nl*: a string that is put at the end of a JSON array (default: ''), + * * *check_circular*: true if checking for circular data structures + * should be done, false (the default) otherwise. + * * *allow_nan*: true if NaN, Infinity, and -Infinity should be + * generated, otherwise an exception is thrown, if these values are + * encountered. This options defaults to false. + */ +static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) +{ + VALUE opts; + GET_STATE(self); + + rb_scan_args(argc, argv, "01", &opts); + state->indent = rb_str_new2(""); + state->space = rb_str_new2(""); + state->space_before = rb_str_new2(""); + state->array_nl = rb_str_new2(""); + state->object_nl = rb_str_new2(""); + if (NIL_P(opts)) { + state->check_circular = 1; + state->allow_nan = 0; + state->max_nesting = 19; + } else { + cState_configure(self, opts); + } + state->seen = rb_hash_new(); + state->memo = Qnil; + state->depth = INT2FIX(0); + return self; +} + +/* + * call-seq: from_state(opts) + * + * Creates a State object from _opts_, which ought to be Hash to create a + * new State instance configured by _opts_, something else to create an + * unconfigured instance. If _opts_ is a State object, it is just returned. + */ +static VALUE cState_from_state_s(VALUE self, VALUE opts) +{ + if (rb_obj_is_kind_of(opts, self)) { + return opts; + } else if (rb_obj_is_kind_of(opts, rb_cHash)) { + return rb_funcall(self, i_new, 1, opts); + } else { + return rb_funcall(self, i_new, 0); + } +} + +/* + * call-seq: indent() + * + * This string is used to indent levels in the JSON text. + */ +static VALUE cState_indent(VALUE self) +{ + GET_STATE(self); + return state->indent; +} + +/* + * call-seq: indent=(indent) + * + * This string is used to indent levels in the JSON text. + */ +static VALUE cState_indent_set(VALUE self, VALUE indent) +{ + GET_STATE(self); + Check_Type(indent, T_STRING); + return state->indent = indent; +} + +/* + * call-seq: space() + * + * This string is used to insert a space between the tokens in a JSON + * string. + */ +static VALUE cState_space(VALUE self) +{ + GET_STATE(self); + return state->space; +} + +/* + * call-seq: space=(space) + * + * This string is used to insert a space between the tokens in a JSON + * string. + */ +static VALUE cState_space_set(VALUE self, VALUE space) +{ + GET_STATE(self); + Check_Type(space, T_STRING); + return state->space = space; +} + +/* + * call-seq: space_before() + * + * This string is used to insert a space before the ':' in JSON objects. + */ +static VALUE cState_space_before(VALUE self) +{ + GET_STATE(self); + return state->space_before; +} + +/* + * call-seq: space_before=(space_before) + * + * This string is used to insert a space before the ':' in JSON objects. + */ +static VALUE cState_space_before_set(VALUE self, VALUE space_before) +{ + GET_STATE(self); + Check_Type(space_before, T_STRING); + return state->space_before = space_before; +} + +/* + * call-seq: object_nl() + * + * This string is put at the end of a line that holds a JSON object (or + * Hash). + */ +static VALUE cState_object_nl(VALUE self) +{ + GET_STATE(self); + return state->object_nl; +} + +/* + * call-seq: object_nl=(object_nl) + * + * This string is put at the end of a line that holds a JSON object (or + * Hash). + */ +static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) +{ + GET_STATE(self); + Check_Type(object_nl, T_STRING); + return state->object_nl = object_nl; +} + +/* + * call-seq: array_nl() + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_array_nl(VALUE self) +{ + GET_STATE(self); + return state->array_nl; +} + +/* + * call-seq: array_nl=(array_nl) + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) +{ + GET_STATE(self); + Check_Type(array_nl, T_STRING); + return state->array_nl = array_nl; +} + +/* + * call-seq: check_circular? + * + * Returns true, if circular data structures should be checked, + * otherwise returns false. + */ +static VALUE cState_check_circular_p(VALUE self) +{ + GET_STATE(self); + return state->check_circular ? Qtrue : Qfalse; +} + +/* + * call-seq: max_nesting + * + * This integer returns the maximum level of data structure nesting in + * the generated JSON, max_nesting = 0 if no maximum is checked. + */ +static VALUE cState_max_nesting(VALUE self) +{ + GET_STATE(self); + return LONG2FIX(state->max_nesting); +} + +/* + * call-seq: max_nesting=(depth) + * + * This sets the maximum level of data structure nesting in the generated JSON + * to the integer depth, max_nesting = 0 if no maximum should be checked. + */ +static VALUE cState_max_nesting_set(VALUE self, VALUE depth) +{ + GET_STATE(self); + Check_Type(depth, T_FIXNUM); + state->max_nesting = FIX2LONG(depth); + return Qnil; +} + +/* + * call-seq: allow_nan? + * + * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise + * returns false. + */ +static VALUE cState_allow_nan_p(VALUE self) +{ + GET_STATE(self); + return state->allow_nan ? Qtrue : Qfalse; +} + +/* + * call-seq: seen?(object) + * + * Returns _true_, if _object_ was already seen during this generating run. + */ +static VALUE cState_seen_p(VALUE self, VALUE object) +{ + GET_STATE(self); + return rb_hash_aref(state->seen, rb_obj_id(object)); +} + +/* + * call-seq: remember(object) + * + * Remember _object_, to find out if it was already encountered (if a cyclic + * data structure is rendered). + */ +static VALUE cState_remember(VALUE self, VALUE object) +{ + GET_STATE(self); + return rb_hash_aset(state->seen, rb_obj_id(object), Qtrue); +} + +/* + * call-seq: forget(object) + * + * Forget _object_ for this generating run. + */ +static VALUE cState_forget(VALUE self, VALUE object) +{ + GET_STATE(self); + return rb_hash_delete(state->seen, rb_obj_id(object)); +} + +/* + * + */ +void Init_generator() +{ + rb_require("json/common"); + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + mGenerator = rb_define_module_under(mExt, "Generator"); + eGeneratorError = rb_path2class("JSON::GeneratorError"); + eCircularDatastructure = rb_path2class("JSON::CircularDatastructure"); + eNestingError = rb_path2class("JSON::NestingError"); + cState = rb_define_class_under(mGenerator, "State", rb_cObject); + rb_define_alloc_func(cState, cState_s_allocate); + rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); + rb_define_method(cState, "initialize", cState_initialize, -1); + + rb_define_method(cState, "indent", cState_indent, 0); + rb_define_method(cState, "indent=", cState_indent_set, 1); + rb_define_method(cState, "space", cState_space, 0); + rb_define_method(cState, "space=", cState_space_set, 1); + rb_define_method(cState, "space_before", cState_space_before, 0); + rb_define_method(cState, "space_before=", cState_space_before_set, 1); + rb_define_method(cState, "object_nl", cState_object_nl, 0); + rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); + rb_define_method(cState, "array_nl", cState_array_nl, 0); + rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); + rb_define_method(cState, "check_circular?", cState_check_circular_p, 0); + rb_define_method(cState, "max_nesting", cState_max_nesting, 0); + rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); + rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0); + rb_define_method(cState, "seen?", cState_seen_p, 1); + rb_define_method(cState, "remember", cState_remember, 1); + rb_define_method(cState, "forget", cState_forget, 1); + rb_define_method(cState, "configure", cState_configure, 1); + rb_define_method(cState, "to_h", cState_to_h, 0); + + mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); + mObject = rb_define_module_under(mGeneratorMethods, "Object"); + rb_define_method(mObject, "to_json", mObject_to_json, -1); + mHash = rb_define_module_under(mGeneratorMethods, "Hash"); + rb_define_method(mHash, "to_json", mHash_to_json, -1); + mArray = rb_define_module_under(mGeneratorMethods, "Array"); + rb_define_method(mArray, "to_json", mArray_to_json, -1); + mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); + rb_define_method(mInteger, "to_json", mInteger_to_json, -1); + mFloat = rb_define_module_under(mGeneratorMethods, "Float"); + rb_define_method(mFloat, "to_json", mFloat_to_json, -1); + mString = rb_define_module_under(mGeneratorMethods, "String"); + rb_define_singleton_method(mString, "included", mString_included_s, 1); + rb_define_method(mString, "to_json", mString_to_json, -1); + rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); + rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); + mString_Extend = rb_define_module_under(mString, "Extend"); + rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); + mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); + rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); + mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); + rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); + mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); + rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); + + i_to_s = rb_intern("to_s"); + i_to_json = rb_intern("to_json"); + i_new = rb_intern("new"); + i_indent = rb_intern("indent"); + i_space = rb_intern("space"); + i_space_before = rb_intern("space_before"); + i_object_nl = rb_intern("object_nl"); + i_array_nl = rb_intern("array_nl"); + i_check_circular = rb_intern("check_circular"); + i_max_nesting = rb_intern("max_nesting"); + i_allow_nan = rb_intern("allow_nan"); + i_pack = rb_intern("pack"); + i_unpack = rb_intern("unpack"); + i_create_id = rb_intern("create_id"); + i_extend = rb_intern("extend"); +} diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c new file mode 100644 index 0000000000..947e2d6bda --- /dev/null +++ b/ext/json/ext/generator/unicode.c @@ -0,0 +1,180 @@ +#include "unicode.h" + +#define unicode_escape(buffer, character) \ + snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \ + rb_str_buf_cat(buffer, buf, 6); + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns 0. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +inline static unsigned char isLegalUTF8(const UTF8 *source, int length) +{ + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return 0; + /* Everything else falls through when "1"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 2: if ((a = (*--srcptr)) > 0xBF) return 0; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return 0; break; + case 0xED: if (a > 0x9F) return 0; break; + case 0xF0: if (a < 0x90) return 0; break; + case 0xF4: if (a > 0x8F) return 0; break; + default: if (a < 0x80) return 0; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return 0; + } + if (*source > 0xF4) return 0; + return 1; +} + +void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags) +{ + char buf[7]; + const UTF8* source = (UTF8 *) RSTRING_PTR(string); + const UTF8* sourceEnd = source + RSTRING_LEN(string); + + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "partial character in source, but hit end"); + } + if (!isLegalUTF8(source, extraBytesToRead+1)) { + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed"); + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed"); + } else { + unicode_escape(buffer, UNI_REPLACEMENT_CHAR); + } + } else { + /* normal case */ + if (ch == '"') { + rb_str_buf_cat2(buffer, "\\\""); + } else if (ch == '\\') { + rb_str_buf_cat2(buffer, "\\\\"); + } else if (ch >= 0x20 && ch <= 0x7f) { + rb_str_buf_cat(buffer, (char *) source - 1, 1); + } else if (ch == '\n') { + rb_str_buf_cat2(buffer, "\\n"); + } else if (ch == '\r') { + rb_str_buf_cat2(buffer, "\\r"); + } else if (ch == '\t') { + rb_str_buf_cat2(buffer, "\\t"); + } else if (ch == '\f') { + rb_str_buf_cat2(buffer, "\\f"); + } else if (ch == '\b') { + rb_str_buf_cat2(buffer, "\\b"); + } else if (ch < 0x20) { + unicode_escape(buffer, (UTF16) ch); + } else { + unicode_escape(buffer, (UTF16) ch); + } + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the start */ + rb_raise(rb_path2class("JSON::GeneratorError"), + "source sequence is illegal/malformed"); + } else { + unicode_escape(buffer, UNI_REPLACEMENT_CHAR); + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + ch -= halfBase; + unicode_escape(buffer, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); + unicode_escape(buffer, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); + } + } +} diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h new file mode 100644 index 0000000000..841474bcea --- /dev/null +++ b/ext/json/ext/generator/unicode.h @@ -0,0 +1,53 @@ +#include "ruby.h" + +#ifndef _GENERATOR_UNICODE_H_ +#define _GENERATOR_UNICODE_H_ + +typedef enum { + conversionOK = 0, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags); + +#ifndef RARRAY_PTR +#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr +#endif +#ifndef RARRAY_LEN +#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len +#endif +#ifndef RSTRING_PTR +#define RSTRING_PTR(string) RSTRING(string)->ptr +#endif +#ifndef RSTRING_LEN +#define RSTRING_LEN(string) RSTRING(string)->len +#endif + +#endif -- cgit v1.2.3