summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/json/ext/generator/extconf.h3
-rw-r--r--ext/json/ext/generator/extconf.rb9
-rwxr-xr-xext/json/ext/generator/generator.c728
-rw-r--r--ext/json/ext/generator/unicode.c184
-rwxr-xr-xext/json/ext/generator/unicode.h53
-rw-r--r--ext/json/ext/parser/extconf.h3
-rw-r--r--ext/json/ext/parser/extconf.rb9
-rw-r--r--ext/json/ext/parser/parser.c1601
-rw-r--r--ext/json/ext/parser/parser.rl569
-rw-r--r--ext/json/ext/parser/unicode.c156
-rwxr-xr-xext/json/ext/parser/unicode.h58
-rw-r--r--ext/nkf/nkf-utf8/nkf.c198
-rw-r--r--ext/nkf/nkf-utf8/utf8tbl.c247
-rw-r--r--ext/nkf/nkf-utf8/utf8tbl.h3
14 files changed, 3751 insertions, 70 deletions
diff --git a/ext/json/ext/generator/extconf.h b/ext/json/ext/generator/extconf.h
new file mode 100644
index 0000000000..cda0cc8ea5
--- /dev/null
+++ b/ext/json/ext/generator/extconf.h
@@ -0,0 +1,3 @@
+#ifndef EXTCONF_H
+#define EXTCONF_H
+#endif
diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb
new file mode 100644
index 0000000000..db721a92f8
--- /dev/null
+++ b/ext/json/ext/generator/extconf.rb
@@ -0,0 +1,9 @@
+require 'mkmf'
+require 'rbconfig'
+
+if CONFIG['CC'] =~ /gcc/
+ CONFIG['CC'] += ' -Wall -ggdb'
+ #CONFIG['CC'] += ' -Wall'
+end
+
+create_makefile 'json/ext/generator'
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
new file mode 100755
index 0000000000..60e09355f0
--- /dev/null
+++ b/ext/json/ext/generator/generator.c
@@ -0,0 +1,728 @@
+/* vim: set cin et sw=4 ts=4: */
+
+#include <string.h>
+#include "ruby.h"
+#include "st.h"
+#include "unicode.h"
+
+static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
+ mHash, mArray, mInteger, mFloat, mString, mString_Extend,
+ mTrueClass, mFalseClass, mNilClass, eGeneratorError,
+ eCircularDatastructure;
+
+static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
+ i_object_nl, i_array_nl, i_check_circular, i_pack, i_unpack,
+ i_create_id, i_extend;
+
+typedef struct JSON_Generator_StateStruct {
+ VALUE indent;
+ VALUE space;
+ VALUE space_before;
+ VALUE object_nl;
+ VALUE array_nl;
+ int check_circular;
+ VALUE seen;
+ VALUE memo;
+ VALUE depth;
+ int flag;
+} JSON_Generator_State;
+
+#define GET_STATE(self) \
+ JSON_Generator_State *state; \
+ Data_Get_Struct(self, JSON_Generator_State, state);
+
+/*
+ * Document-module: JSON::Ext::Generator
+ *
+ * This is the JSON generator implemented as a C extension. It can be
+ * configured to be used by setting
+ *
+ * JSON.generator = JSON::Ext::Generator
+ *
+ * with the method generator= in JSON.
+ *
+ */
+
+static int hash_to_json_state_i(VALUE key, VALUE value, VALUE Vstate)
+{
+ VALUE json, buf, Vdepth;
+ GET_STATE(Vstate);
+ buf = state->memo;
+ Vdepth = state->depth;
+
+ if (key == Qundef) return ST_CONTINUE;
+ if (state->flag) {
+ state->flag = 0;
+ rb_str_buf_cat2(buf, ",");
+ if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(buf, state->object_nl);
+ }
+ if (RSTRING_LEN(state->object_nl)) {
+ rb_str_buf_append(buf, rb_str_times(state->indent, Vdepth));
+ }
+ json = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 2, Vstate, Vdepth);
+ rb_str_buf_append(buf, json);
+ OBJ_INFECT(buf, json);
+ if (RSTRING_LEN(state->space_before)) {
+ rb_str_buf_append(buf, state->space_before);
+ }
+ rb_str_buf_cat2(buf, ":");
+ if (RSTRING_LEN(state->space)) rb_str_buf_append(buf, state->space);
+ json = rb_funcall(value, i_to_json, 2, Vstate, Vdepth);
+ state->flag = 1;
+ rb_str_buf_append(buf, json);
+ OBJ_INFECT(buf, json);
+ state->depth = Vdepth;
+ state->memo = buf;
+ return ST_CONTINUE;
+}
+
+inline static VALUE mHash_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) {
+ long depth, len = RHASH(self)->tbl->num_entries;
+ VALUE result;
+ GET_STATE(Vstate);
+
+ depth = 1 + FIX2LONG(Vdepth);
+ result = rb_str_buf_new(len);
+ state->memo = result;
+ state->depth = LONG2FIX(depth);
+ state->flag = 0;
+ rb_str_buf_cat2(result, "{");
+ if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl);
+ rb_hash_foreach(self, hash_to_json_state_i, Vstate);
+ if (RSTRING_LEN(state->object_nl)) rb_str_buf_append(result, state->object_nl);
+ if (RSTRING_LEN(state->object_nl)) {
+ rb_str_buf_append(result, rb_str_times(state->indent, Vdepth));
+ }
+ rb_str_buf_cat2(result, "}");
+ return result;
+}
+
+static int hash_to_json_i(VALUE key, VALUE value, VALUE buf)
+{
+ VALUE tmp;
+
+ if (key == Qundef) return ST_CONTINUE;
+ if (RSTRING_LEN(buf) > 1) rb_str_buf_cat2(buf, ",");
+ tmp = rb_funcall(rb_funcall(key, i_to_s, 0), i_to_json, 0);
+ rb_str_buf_append(buf, tmp);
+ OBJ_INFECT(buf, tmp);
+ rb_str_buf_cat2(buf, ":");
+ tmp = rb_funcall(value, i_to_json, 0);
+ rb_str_buf_append(buf, tmp);
+ OBJ_INFECT(buf, tmp);
+
+ return ST_CONTINUE;
+}
+
+/*
+ * call-seq: to_json(state = nil, depth = 0)
+ *
+ * Returns a JSON string containing a JSON object, that is unparsed from
+ * this Hash instance.
+ * _state_ is a JSON::State object, that can also be used to configure the
+ * produced JSON string output further.
+ * _depth_ is used to find out nesting depth, to indent accordingly.
+ */
+static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE Vstate, Vdepth, result;
+ long depth;
+
+ rb_scan_args(argc, argv, "02", &Vstate, &Vdepth);
+ depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth);
+ if (NIL_P(Vstate)) {
+ long len = RHASH(self)->tbl->num_entries;
+ result = rb_str_buf_new(len);
+ rb_str_buf_cat2(result, "{");
+ rb_hash_foreach(self, hash_to_json_i, result);
+ rb_str_buf_cat2(result, "}");
+ } else {
+ GET_STATE(Vstate);
+ if (state->check_circular) {
+ VALUE self_id = rb_obj_id(self);
+ if (RTEST(rb_hash_aref(state->seen, self_id))) {
+ rb_raise(eCircularDatastructure,
+ "circular data structures not supported!");
+ }
+ rb_hash_aset(state->seen, self_id, Qtrue);
+ result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth));
+ rb_hash_delete(state->seen, self_id);
+ } else {
+ result = mHash_json_transfrom(self, Vstate, LONG2FIX(depth));
+ }
+ }
+ OBJ_INFECT(result, self);
+ return result;
+}
+
+inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth) {
+ long i, len = RARRAY_LEN(self);
+ VALUE shift, result;
+ long depth = NIL_P(Vdepth) ? 0 : FIX2LONG(Vdepth);
+ VALUE delim = rb_str_new2(",");
+ GET_STATE(Vstate);
+
+ if (state->check_circular) {
+ VALUE self_id = rb_obj_id(self);
+ rb_hash_aset(state->seen, self_id, Qtrue);
+ result = rb_str_buf_new(len);
+ if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl);
+ shift = rb_str_times(state->indent, LONG2FIX(depth + 1));
+
+ rb_str_buf_cat2(result, "[");
+ rb_str_buf_append(result, state->array_nl);
+ for (i = 0; i < len; i++) {
+ VALUE element = RARRAY_PTR(self)[i];
+ if (RTEST(rb_hash_aref(state->seen, rb_obj_id(element)))) {
+ rb_raise(eCircularDatastructure,
+ "circular data structures not supported!");
+ }
+ OBJ_INFECT(result, element);
+ if (i > 0) rb_str_buf_append(result, delim);
+ rb_str_buf_append(result, shift);
+ rb_str_buf_append(result, rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)));
+ }
+ if (RSTRING_LEN(state->array_nl)) {
+ rb_str_buf_append(result, state->array_nl);
+ rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth)));
+ }
+ rb_str_buf_cat2(result, "]");
+ rb_hash_delete(state->seen, self_id);
+ } else {
+ result = rb_str_buf_new(len);
+ if (RSTRING_LEN(state->array_nl)) rb_str_append(delim, state->array_nl);
+ shift = rb_str_times(state->indent, LONG2FIX(depth + 1));
+
+ rb_str_buf_cat2(result, "[");
+ rb_str_buf_append(result, state->array_nl);
+ for (i = 0; i < len; i++) {
+ VALUE element = RARRAY_PTR(self)[i];
+ OBJ_INFECT(result, element);
+ if (i > 0) rb_str_buf_append(result, delim);
+ rb_str_buf_append(result, shift);
+ rb_str_buf_append(result, rb_funcall(element, i_to_json, 2, Vstate, LONG2FIX(depth + 1)));
+ }
+ rb_str_buf_append(result, state->array_nl);
+ if (RSTRING_LEN(state->array_nl)) {
+ rb_str_buf_append(result, rb_str_times(state->indent, LONG2FIX(depth)));
+ }
+ rb_str_buf_cat2(result, "]");
+ }
+ return result;
+}
+
+/*
+ * call-seq: to_json(state = nil, depth = 0)
+ *
+ * Returns a JSON string containing a JSON array, that is unparsed from
+ * this Array instance.
+ * _state_ is a JSON::State object, that can also be used to configure the
+ * produced JSON string output further.
+ * _depth_ is used to find out nesting depth, to indent accordingly.
+ */
+static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
+ VALUE Vstate, Vdepth, result;
+
+ rb_scan_args(argc, argv, "02", &Vstate, &Vdepth);
+ if (NIL_P(Vstate)) {
+ long i, len = RARRAY_LEN(self);
+ result = rb_str_buf_new(2 + 2 * len);
+ rb_str_buf_cat2(result, "[");
+ for (i = 0; i < len; i++) {
+ VALUE element = RARRAY_PTR(self)[i];
+ OBJ_INFECT(result, element);
+ if (i > 0) rb_str_buf_cat2(result, ",");
+ rb_str_buf_append(result, rb_funcall(element, i_to_json, 0));
+ }
+ rb_str_buf_cat2(result, "]");
+ } else {
+ result = mArray_json_transfrom(self, Vstate, Vdepth);
+ }
+ OBJ_INFECT(result, self);
+ return result;
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Returns a JSON string representation for this Integer number.
+ */
+static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
+{
+ return rb_funcall(self, i_to_s, 0);
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Returns a JSON string representation for this Float number.
+ */
+static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
+{
+ return rb_funcall(self, i_to_s, 0);
+}
+
+/*
+ * call-seq: String.included(modul)
+ *
+ * Extends _modul_ with the String::Extend module.
+ */
+static VALUE mString_included_s(VALUE self, VALUE modul) {
+ return rb_funcall(modul, i_extend, 1, mString_Extend);
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * This string should be encoded with UTF-8 A call to this method
+ * returns a JSON string encoded with UTF16 big endian characters as
+ * \u????.
+ */
+static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE result = rb_str_buf_new(RSTRING_LEN(self));
+ rb_str_buf_cat2(result, "\"");
+ JSON_convert_UTF8_to_JSON(result, self, strictConversion);
+ rb_str_buf_cat2(result, "\"");
+ return result;
+}
+
+/*
+ * call-seq: to_json_raw_object()
+ *
+ * This method creates a raw object hash, that can be nested into
+ * other data structures and will be unparsed as a raw string. This
+ * method should be used, if you want to convert raw strings to JSON
+ * instead of UTF-8 strings, e. g. binary data.
+ */
+static VALUE mString_to_json_raw_object(VALUE self) {
+ VALUE ary;
+ VALUE result = rb_hash_new();
+ rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
+ ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
+ rb_hash_aset(result, rb_str_new2("raw"), ary);
+ return result;
+}
+
+/*
+ * call-seq: to_json_raw(*args)
+ *
+ * This method creates a JSON text from the result of a call to
+ * to_json_raw_object of this String.
+ */
+static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) {
+ VALUE obj = mString_to_json_raw_object(self);
+ Check_Type(obj, T_HASH);
+ return mHash_to_json(argc, argv, obj);
+}
+
+/*
+ * call-seq: json_create(o)
+ *
+ * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
+ * key "raw"). The Ruby String can be created by this module method.
+ */
+static VALUE mString_Extend_json_create(VALUE self, VALUE o) {
+ VALUE ary;
+ Check_Type(o, T_HASH);
+ ary = rb_hash_aref(o, rb_str_new2("raw"));
+ return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
+}
+
+/*
+ * call-seq: to_json(state = nil, depth = 0)
+ *
+ * Returns a JSON string for true: 'true'.
+ */
+static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
+{
+ return rb_str_new2("true");
+}
+
+/*
+ * call-seq: to_json(state = nil, depth = 0)
+ *
+ * Returns a JSON string for false: 'false'.
+ */
+static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
+{
+ return rb_str_new2("false");
+}
+
+/*
+ * call-seq: to_json(state = nil, depth = 0)
+ *
+ */
+static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
+{
+ return rb_str_new2("null");
+}
+
+/*
+ * call-seq: to_json(*)
+ *
+ * Converts this object to a string (calling #to_s), converts
+ * it to a JSON string, and returns the result. This is a fallback, if no
+ * special method #to_json was defined for some object.
+ */
+static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
+{
+ VALUE string = rb_funcall(self, i_to_s, 0);
+ Check_Type(string, T_STRING);
+ return mString_to_json(argc, argv, string);
+}
+
+/*
+ * Document-class: JSON::Ext::Generator::State
+ *
+ * This class is used to create State instances, that are use to hold data
+ * while generating a JSON text from a a Ruby data structure.
+ */
+
+static void State_mark(JSON_Generator_State *state)
+{
+ rb_gc_mark_maybe(state->indent);
+ rb_gc_mark_maybe(state->space);
+ rb_gc_mark_maybe(state->space_before);
+ rb_gc_mark_maybe(state->object_nl);
+ rb_gc_mark_maybe(state->array_nl);
+ rb_gc_mark_maybe(state->seen);
+ rb_gc_mark_maybe(state->memo);
+ rb_gc_mark_maybe(state->depth);
+}
+
+static JSON_Generator_State *State_allocate()
+{
+ JSON_Generator_State *state = ALLOC(JSON_Generator_State);
+ return state;
+}
+
+static VALUE cState_s_allocate(VALUE klass)
+{
+ JSON_Generator_State *state = State_allocate();
+ return Data_Wrap_Struct(klass, State_mark, -1, state);
+}
+
+/*
+ * call-seq: new(opts = {})
+ *
+ * Instantiates a new State object, configured by _opts_.
+ *
+ * _opts_ can have the following keys:
+ *
+ * * *indent*: a string used to indent levels (default: ''),
+ * * *space*: a string that is put after, a : or , delimiter (default: ''),
+ * * *space_before*: a string that is put before a : pair delimiter (default: ''),
+ * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
+ * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
+ * * *check_circular*: true if checking for circular data structures
+ * should be done, false (the default) otherwise.
+ */
+static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
+{
+ VALUE opts;
+ GET_STATE(self);
+
+ rb_scan_args(argc, argv, "01", &opts);
+ if (NIL_P(opts)) {
+ state->indent = rb_str_new2("");
+ state->space = rb_str_new2("");
+ state->space_before = rb_str_new2("");
+ state->array_nl = rb_str_new2("");
+ state->object_nl = rb_str_new2("");
+ state->check_circular = 0;
+ } else {
+ VALUE tmp;
+ opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+ tmp = rb_hash_aref(opts, ID2SYM(i_indent));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->indent = tmp;
+ } else {
+ state->indent = rb_str_new2("");
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_space));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->space = tmp;
+ } else {
+ state->space = rb_str_new2("");
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->space_before = tmp;
+ } else {
+ state->space_before = rb_str_new2("");
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->array_nl = tmp;
+ } else {
+ state->array_nl = rb_str_new2("");
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
+ if (RTEST(tmp)) {
+ Check_Type(tmp, T_STRING);
+ state->object_nl = tmp;
+ } else {
+ state->object_nl = rb_str_new2("");
+ }
+ tmp = rb_hash_aref(opts, ID2SYM(i_check_circular));
+ state->check_circular = RTEST(tmp);
+ }
+ state->seen = rb_hash_new();
+ state->memo = Qnil;
+ state->depth = INT2FIX(0);
+ return self;
+}
+
+/*
+ * call-seq: from_state(opts)
+ *
+ * Creates a State object from _opts_, which ought to be Hash to create a
+ * new State instance configured by _opts_, something else to create an
+ * unconfigured instance. If _opts_ is a State object, it is just returned.
+ */
+static VALUE cState_from_state_s(VALUE self, VALUE opts)
+{
+ if (rb_obj_is_kind_of(opts, self)) {
+ return opts;
+ } else if (rb_obj_is_kind_of(opts, rb_cHash)) {
+ return rb_funcall(self, i_new, 1, opts);
+ } else {
+ return rb_funcall(self, i_new, 0);
+ }
+}
+
+/*
+ * call-seq: indent()
+ *
+ * This string is used to indent levels in the JSON text.
+ */
+static VALUE cState_indent(VALUE self)
+{
+ GET_STATE(self);
+ return state->indent;
+}
+
+/*
+ * call-seq: indent=(indent)
+ *
+ * This string is used to indent levels in the JSON text.
+ */
+static VALUE cState_indent_set(VALUE self, VALUE indent)
+{
+ GET_STATE(self);
+ Check_Type(indent, T_STRING);
+ return state->indent = indent;
+}
+
+/*
+ * call-seq: space()
+ *
+ * This string is used to insert a space between the tokens in a JSON
+ * string.
+ */
+static VALUE cState_space(VALUE self)
+{
+ GET_STATE(self);
+ return state->space;
+}
+
+/*
+ * call-seq: space=(space)
+ *
+ * This string is used to insert a space between the tokens in a JSON
+ * string.
+ */
+static VALUE cState_space_set(VALUE self, VALUE space)
+{
+ GET_STATE(self);
+ Check_Type(space, T_STRING);
+ return state->space = space;
+}
+
+/*
+ * call-seq: space_before()
+ *
+ * This string is used to insert a space before the ':' in JSON objects.
+ */
+static VALUE cState_space_before(VALUE self)
+{
+ GET_STATE(self);
+ return state->space_before;
+}
+
+/*
+ * call-seq: space_before=(space_before)
+ *
+ * This string is used to insert a space before the ':' in JSON objects.
+ */
+static VALUE cState_space_before_set(VALUE self, VALUE space_before)
+{
+ GET_STATE(self);
+ Check_Type(space_before, T_STRING);
+ return state->space_before = space_before;
+}
+
+/*
+ * call-seq: object_nl()
+ *
+ * This string is put at the end of a line that holds a JSON object (or
+ * Hash).
+ */
+static VALUE cState_object_nl(VALUE self)
+{
+ GET_STATE(self);
+ return state->object_nl;
+}
+
+/*
+ * call-seq: object_nl=(object_nl)
+ *
+ * This string is put at the end of a line that holds a JSON object (or
+ * Hash).
+ */
+static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
+{
+ GET_STATE(self);
+ Check_Type(object_nl, T_STRING);
+ return state->object_nl = object_nl;
+}
+
+/*
+ * call-seq: array_nl()
+ *
+ * This string is put at the end of a line that holds a JSON array.
+ */
+static VALUE cState_array_nl(VALUE self)
+{
+ GET_STATE(self);
+ return state->array_nl;
+}
+
+/*
+ * call-seq: array_nl=(array_nl)
+ *
+ * This string is put at the end of a line that holds a JSON array.
+ */
+static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
+{
+ GET_STATE(self);
+ Check_Type(array_nl, T_STRING);
+ return state->array_nl = array_nl;
+}
+
+/*
+ * call-seq: check_circular?(object)
+ *
+ * Returns true, if circular data structures should be checked,
+ * otherwise returns false.
+ */
+static VALUE cState_check_circular_p(VALUE self)
+{
+ GET_STATE(self);
+ return state->check_circular ? Qtrue : Qfalse;
+}
+
+/*
+ * call-seq: seen?(object)
+ *
+ * Returns _true_, if _object_ was already seen during this generating run.
+ */
+static VALUE cState_seen_p(VALUE self, VALUE object)
+{
+ GET_STATE(self);
+ return rb_hash_aref(state->seen, rb_obj_id(object));
+}
+
+/*
+ * call-seq: remember(object)
+ *
+ * Remember _object_, to find out if it was already encountered (if a cyclic
+ * data structure is rendered).
+ */
+static VALUE cState_remember(VALUE self, VALUE object)
+{
+ GET_STATE(self);
+ return rb_hash_aset(state->seen, rb_obj_id(object), Qtrue);
+}
+
+/*
+ * call-seq: forget(object)
+ *
+ * Forget _object_ for this generating run.
+ */
+static VALUE cState_forget(VALUE self, VALUE object)
+{
+ GET_STATE(self);
+ return rb_hash_delete(state->seen, rb_obj_id(object));
+}
+
+void Init_generator()
+{
+ mJSON = rb_define_module("JSON");
+ mExt = rb_define_module_under(mJSON, "Ext");
+ mGenerator = rb_define_module_under(mExt, "Generator");
+ eGeneratorError = rb_path2class("JSON::GeneratorError");
+ eCircularDatastructure = rb_path2class("JSON::CircularDatastructure");
+ cState = rb_define_class_under(mGenerator, "State", rb_cObject);
+ rb_define_alloc_func(cState, cState_s_allocate);
+ rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
+ rb_define_method(cState, "initialize", cState_initialize, -1);
+
+ rb_define_method(cState, "indent", cState_indent, 0);
+ rb_define_method(cState, "indent=", cState_indent_set, 1);
+ rb_define_method(cState, "space", cState_space, 0);
+ rb_define_method(cState, "space=", cState_space_set, 1);
+ rb_define_method(cState, "space_before", cState_space_before, 0);
+ rb_define_method(cState, "space_before=", cState_space_before_set, 1);
+ rb_define_method(cState, "object_nl", cState_object_nl, 0);
+ rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
+ rb_define_method(cState, "array_nl", cState_array_nl, 0);
+ rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
+ rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
+ rb_define_method(cState, "seen?", cState_seen_p, 1);
+ rb_define_method(cState, "remember", cState_remember, 1);
+ rb_define_method(cState, "forget", cState_forget, 1);
+ mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
+ mObject = rb_define_module_under(mGeneratorMethods, "Object");
+ rb_define_method(mObject, "to_json", mObject_to_json, -1);
+ mHash = rb_define_module_under(mGeneratorMethods, "Hash");
+ rb_define_method(mHash, "to_json", mHash_to_json, -1);
+ mArray = rb_define_module_under(mGeneratorMethods, "Array");
+ rb_define_method(mArray, "to_json", mArray_to_json, -1);
+ mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
+ rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
+ mFloat = rb_define_module_under(mGeneratorMethods, "Float");
+ rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
+ mString = rb_define_module_under(mGeneratorMethods, "String");
+ rb_define_singleton_method(mString, "included", mString_included_s, 1);
+ rb_define_method(mString, "to_json", mString_to_json, -1);
+ rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
+ rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
+ mString_Extend = rb_define_module_under(mString, "Extend");
+ rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
+ mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
+ rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
+ mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
+ rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
+ mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
+ rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
+
+ i_to_s = rb_intern("to_s");
+ i_to_json = rb_intern("to_json");
+ i_new = rb_intern("new");
+ i_indent = rb_intern("indent");
+ i_space = rb_intern("space");
+ i_space_before = rb_intern("space_before");
+ i_object_nl = rb_intern("object_nl");
+ i_array_nl = rb_intern("array_nl");
+ i_check_circular = rb_intern("check_circular");
+ i_pack = rb_intern("pack");
+ i_unpack = rb_intern("unpack");
+ i_create_id = rb_intern("create_id");
+ i_extend = rb_intern("extend");
+}
diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c
new file mode 100644
index 0000000000..44e1f41f98
--- /dev/null
+++ b/ext/json/ext/generator/unicode.c
@@ -0,0 +1,184 @@
+/* vim: set cin et sw=4 ts=4: */
+
+#include "unicode.h"
+
+#define unicode_escape(buffer, character) \
+ snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \
+ rb_str_buf_cat(buffer, buf, 6);
+
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow. There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ * length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns 0. The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+inline static unsigned char isLegalUTF8(const UTF8 *source, int length)
+{
+ UTF8 a;
+ const UTF8 *srcptr = source+length;
+ switch (length) {
+ default: return 0;
+ /* Everything else falls through when "1"... */
+ case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+ case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
+
+ switch (*source) {
+ /* no fall-through in this inner switch */
+ case 0xE0: if (a < 0xA0) return 0; break;
+ case 0xED: if (a > 0x9F) return 0; break;
+ case 0xF0: if (a < 0x90) return 0; break;
+ case 0xF4: if (a > 0x8F) return 0; break;
+ default: if (a < 0x80) return 0;
+ }
+
+ case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
+ }
+ if (*source > 0xF4) return 0;
+ return 1;
+}
+
+void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags)
+{
+ char buf[7];
+ const UTF8* source = (UTF8 *) RSTRING_PTR(string);
+ const UTF8* sourceEnd = source + RSTRING_LEN(string);
+
+ while (source < sourceEnd) {
+ UTF32 ch = 0;
+ unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+ if (source + extraBytesToRead >= sourceEnd) {
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "partial character in source, but hit end");
+ }
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed");
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (flags == strictConversion) {
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed");
+ } else {
+ unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+ }
+ } else {
+ /* normal case */
+ if (ch == '"') {
+ rb_str_buf_cat2(buffer, "\\\"");
+ } else if (ch == '\\') {
+ rb_str_buf_cat2(buffer, "\\\\");
+ } else if (ch == '/') {
+ rb_str_buf_cat2(buffer, "\\/");
+ } else if (ch >= 0x20 && ch <= 0x7f) {
+ rb_str_buf_cat(buffer, (char *) source - 1, 1);
+ } else if (ch == '\n') {
+ rb_str_buf_cat2(buffer, "\\n");
+ } else if (ch == '\r') {
+ rb_str_buf_cat2(buffer, "\\r");
+ } else if (ch == '\t') {
+ rb_str_buf_cat2(buffer, "\\t");
+ } else if (ch == '\f') {
+ rb_str_buf_cat2(buffer, "\\f");
+ } else if (ch == '\b') {
+ rb_str_buf_cat2(buffer, "\\b");
+ } else if (ch < 0x20) {
+ unicode_escape(buffer, (UTF16) ch);
+ } else {
+ unicode_escape(buffer, (UTF16) ch);
+ }
+ }
+ } else if (ch > UNI_MAX_UTF16) {
+ if (flags == strictConversion) {
+ source -= (extraBytesToRead+1); /* return to the start */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed");
+ } else {
+ unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+ }
+ } else {
+ /* target is a character in range 0xFFFF - 0x10FFFF. */
+ ch -= halfBase;
+ unicode_escape(buffer, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
+ unicode_escape(buffer, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
+ }
+ }
+}
diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h
new file mode 100755
index 0000000000..841474bcea
--- /dev/null
+++ b/ext/json/ext/generator/unicode.h
@@ -0,0 +1,53 @@
+#include "ruby.h"
+
+#ifndef _GENERATOR_UNICODE_H_
+#define _GENERATOR_UNICODE_H_
+
+typedef enum {
+ conversionOK = 0, /* conversion successful */
+ sourceExhausted, /* partial character in source, but hit end */
+ targetExhausted, /* insuff. room in target for conversion */
+ sourceIllegal /* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum {
+ strictConversion = 0,
+ lenientConversion
+} ConversionFlags;
+
+typedef unsigned long UTF32; /* at least 32 bits */
+typedef unsigned short UTF16; /* at least 16 bits */
+typedef unsigned char UTF8; /* typically 8 bits */
+
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_SUR_HIGH_START (UTF32)0xD800
+#define UNI_SUR_HIGH_END (UTF32)0xDBFF
+#define UNI_SUR_LOW_START (UTF32)0xDC00
+#define UNI_SUR_LOW_END (UTF32)0xDFFF
+
+static const int halfShift = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags);
+
+#ifndef RARRAY_PTR
+#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
+#endif
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
+#endif
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(string) RSTRING(string)->ptr
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(string) RSTRING(string)->len
+#endif
+
+#endif
diff --git a/ext/json/ext/parser/extconf.h b/ext/json/ext/parser/extconf.h
new file mode 100644
index 0000000000..cda0cc8ea5
--- /dev/null
+++ b/ext/json/ext/parser/extconf.h
@@ -0,0 +1,3 @@
+#ifndef EXTCONF_H
+#define EXTCONF_H
+#endif
diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb
new file mode 100644
index 0000000000..085c8d060c
--- /dev/null
+++ b/ext/json/ext/parser/extconf.rb
@@ -0,0 +1,9 @@
+require 'mkmf'
+require 'rbconfig'
+
+if CONFIG['CC'] =~ /gcc/
+ #CONFIG['CC'] += ' -Wall -ggdb'
+ CONFIG['CC'] += ' -Wall'
+end
+
+create_makefile 'json/ext/parser'
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
new file mode 100644
index 0000000000..7448e5fb7a
--- /dev/null
+++ b/ext/json/ext/parser/parser.c
@@ -0,0 +1,1601 @@
+#line 1 "parser.rl"
+/* vim: set cin et sw=4 ts=4: */
+
+#include "ruby.h"
+#include "re.h"
+#include "st.h"
+#include "unicode.h"
+
+#define EVIL 0x666
+
+static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
+
+static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_max_nesting;
+
+typedef struct JSON_ParserStruct {
+ VALUE Vsource;
+ char *source;
+ long len;
+ char *memo;
+ VALUE create_id;
+ int max_nesting;
+ int current_nesting;
+} JSON_Parser;
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
+
+#define GET_STRUCT \
+ JSON_Parser *json; \
+ Data_Get_Struct(self, JSON_Parser, json);
+
+#line 58 "parser.rl"
+
+
+
+#line 41 "parser.c"
+static const int JSON_object_start = 1;
+static const int JSON_object_first_final = 27;
+static const int JSON_object_error = 0;
+
+static const int JSON_object_en_main = 1;
+
+#line 91 "parser.rl"
+
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+ VALUE last_name = Qnil;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+
+ *result = rb_hash_new();
+
+
+#line 63 "parser.c"
+ {
+ cs = JSON_object_start;
+ }
+#line 105 "parser.rl"
+
+#line 69 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ if ( (*p) == 123 )
+ goto st2;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 13: goto st2;
+ case 32: goto st2;
+ case 34: goto tr2;
+ case 47: goto st23;
+ case 125: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st2;
+ goto st0;
+tr2:
+#line 77 "parser.rl"
+ {
+ char *np = JSON_parse_string(json, p, pe, &last_name);
+ if (np == NULL) goto _out3; else {p = (( np))-1;}
+ }
+ goto st3;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+#line 106 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st3;
+ case 32: goto st3;
+ case 47: goto st4;
+ case 58: goto st8;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st3;
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ switch( (*p) ) {
+ case 42: goto st5;
+ case 47: goto st7;
+ }
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) == 42 )
+ goto st6;
+ goto st5;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ switch( (*p) ) {
+ case 42: goto st6;
+ case 47: goto st3;
+ }
+ goto st5;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) == 10 )
+ goto st3;
+ goto st7;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ switch( (*p) ) {
+ case 13: goto st8;
+ case 32: goto st8;
+ case 34: goto tr11;
+ case 45: goto tr11;
+ case 47: goto st19;
+ case 91: goto tr11;
+ case 102: goto tr11;
+ case 110: goto tr11;
+ case 116: goto tr11;
+ case 123: goto tr11;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr11;
+ } else if ( (*p) >= 9 )
+ goto st8;
+ goto st0;
+tr11:
+#line 66 "parser.rl"
+ {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, p, pe, &v);
+ if (np == NULL) {
+ goto _out9;
+ } else {
+ rb_hash_aset(*result, last_name, v);
+ {p = (( np))-1;}
+ }
+ }
+ goto st9;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+#line 187 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st9;
+ case 32: goto st9;
+ case 44: goto st10;
+ case 47: goto st15;
+ case 125: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st9;
+ goto st0;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+ switch( (*p) ) {
+ case 13: goto st10;
+ case 32: goto st10;
+ case 34: goto tr2;
+ case 47: goto st11;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st10;
+ goto st0;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+ switch( (*p) ) {
+ case 42: goto st12;
+ case 47: goto st14;
+ }
+ goto st0;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+ if ( (*p) == 42 )
+ goto st13;
+ goto st12;
+st13:
+ if ( ++p == pe )
+ goto _out13;
+case 13:
+ switch( (*p) ) {
+ case 42: goto st13;
+ case 47: goto st10;
+ }
+ goto st12;
+st14:
+ if ( ++p == pe )
+ goto _out14;
+case 14:
+ if ( (*p) == 10 )
+ goto st10;
+ goto st14;
+st15:
+ if ( ++p == pe )
+ goto _out15;
+case 15:
+ switch( (*p) ) {
+ case 42: goto st16;
+ case 47: goto st18;
+ }
+ goto st0;
+st16:
+ if ( ++p == pe )
+ goto _out16;
+case 16:
+ if ( (*p) == 42 )
+ goto st17;
+ goto st16;
+st17:
+ if ( ++p == pe )
+ goto _out17;
+case 17:
+ switch( (*p) ) {
+ case 42: goto st17;
+ case 47: goto st9;
+ }
+ goto st16;
+st18:
+ if ( ++p == pe )
+ goto _out18;
+case 18:
+ if ( (*p) == 10 )
+ goto st9;
+ goto st18;
+tr4:
+#line 82 "parser.rl"
+ { goto _out27; }
+ goto st27;
+st27:
+ if ( ++p == pe )
+ goto _out27;
+case 27:
+#line 283 "parser.c"
+ goto st0;
+st19:
+ if ( ++p == pe )
+ goto _out19;
+case 19:
+ switch( (*p) ) {
+ case 42: goto st20;
+ case 47: goto st22;
+ }
+ goto st0;
+st20:
+ if ( ++p == pe )
+ goto _out20;
+case 20:
+ if ( (*p) == 42 )
+ goto st21;
+ goto st20;
+st21:
+ if ( ++p == pe )
+ goto _out21;
+case 21:
+ switch( (*p) ) {
+ case 42: goto st21;
+ case 47: goto st8;
+ }
+ goto st20;
+st22:
+ if ( ++p == pe )
+ goto _out22;
+case 22:
+ if ( (*p) == 10 )
+ goto st8;
+ goto st22;
+st23:
+ if ( ++p == pe )
+ goto _out23;
+case 23:
+ switch( (*p) ) {
+ case 42: goto st24;
+ case 47: goto st26;
+ }
+ goto st0;
+st24:
+ if ( ++p == pe )
+ goto _out24;
+case 24:
+ if ( (*p) == 42 )
+ goto st25;
+ goto st24;
+st25:
+ if ( ++p == pe )
+ goto _out25;
+case 25:
+ switch( (*p) ) {
+ case 42: goto st25;
+ case 47: goto st2;
+ }
+ goto st24;
+st26:
+ if ( ++p == pe )
+ goto _out26;
+case 26:
+ if ( (*p) == 10 )
+ goto st2;
+ goto st26;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+ _out12: cs = 12; goto _out;
+ _out13: cs = 13; goto _out;
+ _out14: cs = 14; goto _out;
+ _out15: cs = 15; goto _out;
+ _out16: cs = 16; goto _out;
+ _out17: cs = 17; goto _out;
+ _out18: cs = 18; goto _out;
+ _out27: cs = 27; goto _out;
+ _out19: cs = 19; goto _out;
+ _out20: cs = 20; goto _out;
+ _out21: cs = 21; goto _out;
+ _out22: cs = 22; goto _out;
+ _out23: cs = 23; goto _out;
+ _out24: cs = 24; goto _out;
+ _out25: cs = 25; goto _out;
+ _out26: cs = 26; goto _out;
+
+ _out: {}
+ }
+#line 106 "parser.rl"
+
+ if (cs >= JSON_object_first_final) {
+ VALUE klassname = rb_hash_aref(*result, json->create_id);
+ if (!NIL_P(klassname)) {
+ VALUE klass = rb_path2class(StringValueCStr(klassname));
+ if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
+ *result = rb_funcall(klass, i_json_create, 1, *result);
+ }
+ }
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+#line 397 "parser.c"
+static const int JSON_value_start = 1;
+static const int JSON_value_first_final = 12;
+static const int JSON_value_error = 0;
+
+static const int JSON_value_en_main = 1;
+
+#line 177 "parser.rl"
+
+
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+
+#line 412 "parser.c"
+ {
+ cs = JSON_value_start;
+ }
+#line 184 "parser.rl"
+
+#line 418 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ switch( (*p) ) {
+ case 34: goto tr0;
+ case 45: goto tr2;
+ case 91: goto tr3;
+ case 102: goto st2;
+ case 110: goto st6;
+ case 116: goto st9;
+ case 123: goto tr7;
+ }
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ goto st0;
+st0:
+ goto _out0;
+tr0:
+#line 136 "parser.rl"
+ {
+ char *np = JSON_parse_string(json, p, pe, result);
+ if (np == NULL) goto _out12; else {p = (( np))-1;}
+ }
+ goto st12;
+tr2:
+#line 141 "parser.rl"
+ {
+ char *np;
+ np = JSON_parse_float(json, p, pe, result);
+ if (np != NULL) {p = (( np))-1;}
+ np = JSON_parse_integer(json, p, pe, result);
+ if (np != NULL) {p = (( np))-1;}
+ goto _out12;
+ }
+ goto st12;
+tr3:
+#line 150 "parser.rl"
+ {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_array(json, p, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) goto _out12; else {p = (( np))-1;}
+ }
+ goto st12;
+tr7:
+#line 158 "parser.rl"
+ {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_object(json, p, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) goto _out12; else {p = (( np))-1;}
+ }
+ goto st12;
+tr11:
+#line 130 "parser.rl"
+ {
+ *result = Qfalse;
+ }
+ goto st12;
+tr14:
+#line 127 "parser.rl"
+ {
+ *result = Qnil;
+ }
+ goto st12;
+tr17:
+#line 133 "parser.rl"
+ {
+ *result = Qtrue;
+ }
+ goto st12;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+#line 166 "parser.rl"
+ { goto _out12; }
+#line 501 "parser.c"
+ goto st0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ if ( (*p) == 97 )
+ goto st3;
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( (*p) == 108 )
+ goto st4;
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( (*p) == 115 )
+ goto st5;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) == 101 )
+ goto tr11;
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ if ( (*p) == 117 )
+ goto st7;
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) == 108 )
+ goto st8;
+ goto st0;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ if ( (*p) == 108 )
+ goto tr14;
+ goto st0;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ if ( (*p) == 114 )
+ goto st10;
+ goto st0;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+ if ( (*p) == 117 )
+ goto st11;
+ goto st0;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+ if ( (*p) == 101 )
+ goto tr17;
+ goto st0;
+ }
+ _out0: cs = 0; goto _out;
+ _out12: cs = 12; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+
+ _out: {}
+ }
+#line 185 "parser.rl"
+
+ if (cs >= JSON_value_first_final) {
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+
+#line 599 "parser.c"
+static const int JSON_integer_start = 1;
+static const int JSON_integer_first_final = 5;
+static const int JSON_integer_error = 0;
+
+static const int JSON_integer_en_main = 1;
+
+#line 201 "parser.rl"
+
+
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+
+#line 614 "parser.c"
+ {
+ cs = JSON_integer_start;
+ }
+#line 208 "parser.rl"
+ json->memo = p;
+
+#line 621 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ switch( (*p) ) {
+ case 45: goto st2;
+ case 48: goto st3;
+ }
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st4;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ if ( (*p) == 48 )
+ goto st3;
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st4;
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st0;
+ goto tr4;
+tr4:
+#line 198 "parser.rl"
+ { goto _out5; }
+ goto st5;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+#line 661 "parser.c"
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st4;
+ goto tr4;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out5: cs = 5; goto _out;
+ _out4: cs = 4; goto _out;
+
+ _out: {}
+ }
+#line 210 "parser.rl"
+
+ if (cs >= JSON_integer_first_final) {
+ long len = p - json->memo;
+ *result = rb_Integer(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+#line 691 "parser.c"
+static const int JSON_float_start = 1;
+static const int JSON_float_first_final = 10;
+static const int JSON_float_error = 0;
+
+static const int JSON_float_en_main = 1;
+
+#line 232 "parser.rl"
+
+
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+
+#line 706 "parser.c"
+ {
+ cs = JSON_float_start;
+ }
+#line 239 "parser.rl"
+ json->memo = p;
+
+#line 713 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ switch( (*p) ) {
+ case 45: goto st2;
+ case 48: goto st3;
+ }
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st9;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ if ( (*p) == 48 )
+ goto st3;
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st9;
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ switch( (*p) ) {
+ case 46: goto st4;
+ case 69: goto st6;
+ case 101: goto st6;
+ }
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st5;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ switch( (*p) ) {
+ case 69: goto st6;
+ case 101: goto st6;
+ }
+ if ( (*p) > 46 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st5;
+ } else if ( (*p) >= 45 )
+ goto st0;
+ goto tr7;
+tr7:
+#line 226 "parser.rl"
+ { goto _out10; }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+#line 777 "parser.c"
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ switch( (*p) ) {
+ case 43: goto st7;
+ case 45: goto st7;
+ }
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st8;
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st8;
+ goto st0;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ switch( (*p) ) {
+ case 69: goto st0;
+ case 101: goto st0;
+ }
+ if ( (*p) > 46 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st8;
+ } else if ( (*p) >= 45 )
+ goto st0;
+ goto tr7;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ switch( (*p) ) {
+ case 46: goto st4;
+ case 69: goto st6;
+ case 101: goto st6;
+ }
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st9;
+ goto st0;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out10: cs = 10; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+
+ _out: {}
+ }
+#line 241 "parser.rl"
+
+ if (cs >= JSON_float_first_final) {
+ long len = p - json->memo;
+ *result = rb_Float(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+
+#line 850 "parser.c"
+static const int JSON_array_start = 1;
+static const int JSON_array_first_final = 17;
+static const int JSON_array_error = 0;
+
+static const int JSON_array_en_main = 1;
+
+#line 277 "parser.rl"
+
+
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+ *result = rb_ary_new();
+
+
+#line 870 "parser.c"
+ {
+ cs = JSON_array_start;
+ }
+#line 289 "parser.rl"
+
+#line 876 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ if ( (*p) == 91 )
+ goto st2;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 13: goto st2;
+ case 32: goto st2;
+ case 34: goto tr2;
+ case 45: goto tr2;
+ case 47: goto st13;
+ case 91: goto tr2;
+ case 93: goto tr4;
+ case 102: goto tr2;
+ case 110: goto tr2;
+ case 116: goto tr2;
+ case 123: goto tr2;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ } else if ( (*p) >= 9 )
+ goto st2;
+ goto st0;
+tr2:
+#line 258 "parser.rl"
+ {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, p, pe, &v);
+ if (np == NULL) {
+ goto _out3;
+ } else {
+ rb_ary_push(*result, v);
+ {p = (( np))-1;}
+ }
+ }
+ goto st3;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+#line 928 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st3;
+ case 32: goto st3;
+ case 44: goto st4;
+ case 47: goto st9;
+ case 93: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st3;
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ switch( (*p) ) {
+ case 13: goto st4;
+ case 32: goto st4;
+ case 34: goto tr2;
+ case 45: goto tr2;
+ case 47: goto st5;
+ case 91: goto tr2;
+ case 102: goto tr2;
+ case 110: goto tr2;
+ case 116: goto tr2;
+ case 123: goto tr2;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ } else if ( (*p) >= 9 )
+ goto st4;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ switch( (*p) ) {
+ case 42: goto st6;
+ case 47: goto st8;
+ }
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ if ( (*p) == 42 )
+ goto st7;
+ goto st6;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ switch( (*p) ) {
+ case 42: goto st7;
+ case 47: goto st4;
+ }
+ goto st6;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ if ( (*p) == 10 )
+ goto st4;
+ goto st8;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ switch( (*p) ) {
+ case 42: goto st10;
+ case 47: goto st12;
+ }
+ goto st0;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+ if ( (*p) == 42 )
+ goto st11;
+ goto st10;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+ switch( (*p) ) {
+ case 42: goto st11;
+ case 47: goto st3;
+ }
+ goto st10;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+ if ( (*p) == 10 )
+ goto st3;
+ goto st12;
+tr4:
+#line 269 "parser.rl"
+ { goto _out17; }
+ goto st17;
+st17:
+ if ( ++p == pe )
+ goto _out17;
+case 17:
+#line 1033 "parser.c"
+ goto st0;
+st13:
+ if ( ++p == pe )
+ goto _out13;
+case 13:
+ switch( (*p) ) {
+ case 42: goto st14;
+ case 47: goto st16;
+ }
+ goto st0;
+st14:
+ if ( ++p == pe )
+ goto _out14;
+case 14:
+ if ( (*p) == 42 )
+ goto st15;
+ goto st14;
+st15:
+ if ( ++p == pe )
+ goto _out15;
+case 15:
+ switch( (*p) ) {
+ case 42: goto st15;
+ case 47: goto st2;
+ }
+ goto st14;
+st16:
+ if ( ++p == pe )
+ goto _out16;
+case 16:
+ if ( (*p) == 10 )
+ goto st2;
+ goto st16;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+ _out12: cs = 12; goto _out;
+ _out17: cs = 17; goto _out;
+ _out13: cs = 13; goto _out;
+ _out14: cs = 14; goto _out;
+ _out15: cs = 15; goto _out;
+ _out16: cs = 16; goto _out;
+
+ _out: {}
+ }
+#line 290 "parser.rl"
+
+ if(cs >= JSON_array_first_final) {
+ return p + 1;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static VALUE json_string_unescape(char *p, char *pe)
+{
+ VALUE result = rb_str_buf_new(pe - p + 1);
+
+ while (p < pe) {
+ if (*p == '\\') {
+ p++;
+ if (p >= pe) return Qnil; /* raise an exception later, \ at end */
+ switch (*p) {
+ case '"':
+ case '\\':
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ case 'b':
+ rb_str_buf_cat2(result, "\b");
+ p++;
+ break;
+ case 'f':
+ rb_str_buf_cat2(result, "\f");
+ p++;
+ break;
+ case 'n':
+ rb_str_buf_cat2(result, "\n");
+ p++;
+ break;
+ case 'r':
+ rb_str_buf_cat2(result, "\r");
+ p++;
+ break;
+ case 't':
+ rb_str_buf_cat2(result, "\t");
+ p++;
+ break;
+ case 'u':
+ if (p > pe - 4) {
+ return Qnil;
+ } else {
+ p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+ }
+ break;
+ default:
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ }
+ } else {
+ char *q = p;
+ while (*q != '\\' && q < pe) q++;
+ rb_str_buf_cat(result, p, q - p);
+ p = q;
+ }
+ }
+ return result;
+}
+
+
+#line 1154 "parser.c"
+static const int JSON_string_start = 1;
+static const int JSON_string_first_final = 8;
+static const int JSON_string_error = 0;
+
+static const int JSON_string_en_main = 1;
+
+#line 368 "parser.rl"
+
+
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ *result = rb_str_new("", 0);
+
+#line 1170 "parser.c"
+ {
+ cs = JSON_string_start;
+ }
+#line 376 "parser.rl"
+ json->memo = p;
+
+#line 1177 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ if ( (*p) == 34 )
+ goto st2;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 34: goto tr2;
+ case 92: goto st3;
+ }
+ if ( 0 <= (*p) && (*p) <= 31 )
+ goto st0;
+ goto st2;
+tr2:
+#line 360 "parser.rl"
+ {
+ *result = json_string_unescape(json->memo + 1, p);
+ if (NIL_P(*result)) goto _out8; else {p = (( p + 1))-1;}
+ }
+#line 365 "parser.rl"
+ { goto _out8; }
+ goto st8;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+#line 1213 "parser.c"
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( (*p) == 117 )
+ goto st4;
+ if ( 0 <= (*p) && (*p) <= 31 )
+ goto st0;
+ goto st2;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st5;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st5;
+ } else
+ goto st5;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st6;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st6;
+ } else
+ goto st6;
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st7;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st7;
+ } else
+ goto st7;
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st2;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st2;
+ } else
+ goto st2;
+ goto st0;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out8: cs = 8; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+
+ _out: {}
+ }
+#line 378 "parser.rl"
+
+ if (cs >= JSON_string_first_final) {
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+
+#line 1299 "parser.c"
+static const int JSON_start = 1;
+static const int JSON_first_final = 10;
+static const int JSON_error = 0;
+
+static const int JSON_en_main = 1;
+
+#line 412 "parser.rl"
+
+
+/*
+ * Document-class: JSON::Ext::Parser
+ *
+ * This is the JSON parser implemented as a C extension. It can be configured
+ * to be used by setting
+ *
+ * JSON.parser = JSON::Ext::Parser
+ *
+ * with the method parser= in JSON.
+ *
+ */
+
+/*
+ * call-seq: new(source, opts => {})
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * It will be configured by the _opts_ hash. _opts_ can have the following
+ * keys:
+ *
+ * _opts_ can have the following keys:
+ * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
+ * structures. Disable depth checking with :max_nesting => false.
+ */
+static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
+{
+ char *ptr;
+ long len;
+ VALUE source, opts;
+ GET_STRUCT;
+ rb_scan_args(argc, argv, "11", &source, &opts);
+ source = StringValue(source);
+ ptr = RSTRING_PTR(source);
+ len = RSTRING_LEN(source);
+ if (len < 2) {
+ rb_raise(eParserError, "A JSON text must at least contain two octets!");
+ }
+ json->max_nesting = 19;
+ if (!NIL_P(opts)) {
+ opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+ if (NIL_P(opts)) {
+ rb_raise(rb_eArgError, "opts needs to be like a hash");
+ } else {
+ VALUE s_max_nesting = ID2SYM(i_max_nesting);
+ if (st_lookup(RHASH(opts)->tbl, s_max_nesting, 0)) {
+ VALUE max_nesting = rb_hash_aref(opts, s_max_nesting);
+ if (RTEST(max_nesting)) {
+ Check_Type(max_nesting, T_FIXNUM);
+ json->max_nesting = FIX2INT(max_nesting);
+ } else {
+ json->max_nesting = 0;
+ }
+ }
+ }
+ }
+ json->current_nesting = 0;
+ /*
+ Convert these?
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ }
+ */
+ json->len = len;
+ json->source = ptr;
+ json->Vsource = source;
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ return self;
+}
+
+/*
+ * call-seq: parse()
+ *
+ * Parses the current JSON text _source_ and returns the complete data
+ * structure as a result.
+ */
+static VALUE cParser_parse(VALUE self)
+{
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
+ GET_STRUCT;
+
+
+#line 1400 "parser.c"
+ {
+ cs = JSON_start;
+ }
+#line 505 "parser.rl"
+ p = json->source;
+ pe = p + json->len;
+
+#line 1408 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+st1:
+ if ( ++p == pe )
+ goto _out1;
+case 1:
+ switch( (*p) ) {
+ case 13: goto st1;
+ case 32: goto st1;
+ case 47: goto st2;
+ case 91: goto tr3;
+ case 123: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st1;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 42: goto st3;
+ case 47: goto st5;
+ }
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( (*p) == 42 )
+ goto st4;
+ goto st3;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ switch( (*p) ) {
+ case 42: goto st4;
+ case 47: goto st1;
+ }
+ goto st3;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) == 10 )
+ goto st1;
+ goto st5;
+tr3:
+#line 401 "parser.rl"
+ {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_array(json, p, pe, &result);
+ if (np == NULL) goto _out10; else {p = (( np))-1;}
+ }
+ goto st10;
+tr4:
+#line 394 "parser.rl"
+ {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_object(json, p, pe, &result);
+ if (np == NULL) goto _out10; else {p = (( np))-1;}
+ }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+#line 1484 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st10;
+ case 32: goto st10;
+ case 47: goto st6;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st10;
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ switch( (*p) ) {
+ case 42: goto st7;
+ case 47: goto st9;
+ }
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) == 42 )
+ goto st8;
+ goto st7;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ switch( (*p) ) {
+ case 42: goto st8;
+ case 47: goto st10;
+ }
+ goto st7;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ if ( (*p) == 10 )
+ goto st10;
+ goto st9;
+ }
+ _out1: cs = 1; goto _out;
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out10: cs = 10; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+
+ _out: {}
+ }
+#line 508 "parser.rl"
+
+ if (cs >= JSON_first_final && p == pe) {
+ return result;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static JSON_Parser *JSON_allocate()
+{
+ JSON_Parser *json = ALLOC(JSON_Parser);
+ MEMZERO(json, JSON_Parser, 1);
+ return json;
+}
+
+static void JSON_mark(JSON_Parser *json)
+{
+ rb_gc_mark_maybe(json->Vsource);
+ rb_gc_mark_maybe(json->create_id);
+}
+
+static void JSON_free(JSON_Parser *json)
+{
+ free(json);
+}
+
+static VALUE cJSON_parser_s_allocate(VALUE klass)
+{
+ JSON_Parser *json = JSON_allocate();
+ return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
+}
+
+/*
+ * call-seq: source()
+ *
+ * Returns a copy of the current _source_ string, that was used to construct
+ * this Parser.
+ */
+static VALUE cParser_source(VALUE self)
+{
+ GET_STRUCT;
+ return rb_str_dup(json->Vsource);
+}
+
+void Init_parser()
+{
+ mJSON = rb_define_module("JSON");
+ mExt = rb_define_module_under(mJSON, "Ext");
+ cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
+ eParserError = rb_path2class("JSON::ParserError");
+ eNestingError = rb_path2class("JSON::NestingError");
+ rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
+ rb_define_method(cParser, "initialize", cParser_initialize, -1);
+ rb_define_method(cParser, "parse", cParser_parse, 0);
+ rb_define_method(cParser, "source", cParser_source, 0);
+
+ i_json_creatable_p = rb_intern("json_creatable?");
+ i_json_create = rb_intern("json_create");
+ i_create_id = rb_intern("create_id");
+ i_chr = rb_intern("chr");
+ i_max_nesting = rb_intern("max_nesting");
+}
diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl
new file mode 100644
index 0000000000..9ce8c6fc24
--- /dev/null
+++ b/ext/json/ext/parser/parser.rl
@@ -0,0 +1,569 @@
+/* vim: set cin et sw=4 ts=4: */
+
+#include "ruby.h"
+#include "re.h"
+#include "st.h"
+#include "unicode.h"
+
+#define EVIL 0x666
+
+static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
+
+static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_max_nesting;
+
+typedef struct JSON_ParserStruct {
+ VALUE Vsource;
+ char *source;
+ long len;
+ char *memo;
+ VALUE create_id;
+ int max_nesting;
+ int current_nesting;
+} JSON_Parser;
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
+
+#define GET_STRUCT \
+ JSON_Parser *json; \
+ Data_Get_Struct(self, JSON_Parser, json);
+
+%%{
+ machine JSON_common;
+
+ cr = '\n';
+ cr_neg = [^\n];
+ ws = [ \t\r\n];
+ c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
+ cpp_comment = '//' cr_neg* cr;
+ comment = c_comment | cpp_comment;
+ ignore = ws | comment;
+ name_separator = ':';
+ value_separator = ',';
+ Vnull = 'null';
+ Vfalse = 'false';
+ Vtrue = 'true';
+ begin_value = [nft"\-[{] | digit;
+ begin_object = '{';
+ end_object = '}';
+ begin_array = '[';
+ end_array = ']';
+ begin_string = '"';
+ begin_name = begin_string;
+ begin_number = digit | '-';
+}%%
+
+%%{
+ machine JSON_object;
+ include JSON_common;
+
+ write data;
+
+ action parse_value {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, fpc, pe, &v);
+ if (np == NULL) {
+ fbreak;
+ } else {
+ rb_hash_aset(*result, last_name, v);
+ fexec np;
+ }
+ }
+
+ action parse_name {
+ char *np = JSON_parse_string(json, fpc, pe, &last_name);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action exit { fbreak; }
+
+ a_pair = ignore* begin_name >parse_name
+ ignore* name_separator ignore*
+ begin_value >parse_value;
+
+ main := begin_object
+ (a_pair (ignore* value_separator a_pair)*)?
+ ignore* end_object @exit;
+}%%
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+ VALUE last_name = Qnil;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+
+ *result = rb_hash_new();
+
+ %% write init;
+ %% write exec;
+
+ if (cs >= JSON_object_first_final) {
+ VALUE klassname = rb_hash_aref(*result, json->create_id);
+ if (!NIL_P(klassname)) {
+ VALUE klass = rb_path2class(StringValueCStr(klassname));
+ if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
+ *result = rb_funcall(klass, i_json_create, 1, *result);
+ }
+ }
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+%%{
+ machine JSON_value;
+ include JSON_common;
+
+ write data;
+
+ action parse_null {
+ *result = Qnil;
+ }
+ action parse_false {
+ *result = Qfalse;
+ }
+ action parse_true {
+ *result = Qtrue;
+ }
+ action parse_string {
+ char *np = JSON_parse_string(json, fpc, pe, result);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action parse_number {
+ char *np;
+ np = JSON_parse_float(json, fpc, pe, result);
+ if (np != NULL) fexec np;
+ np = JSON_parse_integer(json, fpc, pe, result);
+ if (np != NULL) fexec np;
+ fbreak;
+ }
+
+ action parse_array {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_array(json, fpc, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action parse_object {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_object(json, fpc, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action exit { fbreak; }
+
+main := (
+ Vnull @parse_null |
+ Vfalse @parse_false |
+ Vtrue @parse_true |
+ begin_number >parse_number |
+ begin_string >parse_string |
+ begin_array >parse_array |
+ begin_object >parse_object
+ ) %*exit;
+}%%
+
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ %% write init;
+ %% write exec;
+
+ if (cs >= JSON_value_first_final) {
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+%%{
+ machine JSON_integer;
+
+ write data;
+
+ action exit { fbreak; }
+
+ main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit);
+}%%
+
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ %% write init;
+ json->memo = p;
+ %% write exec;
+
+ if (cs >= JSON_integer_first_final) {
+ long len = p - json->memo;
+ *result = rb_Integer(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+%%{
+ machine JSON_float;
+ include JSON_common;
+
+ write data;
+
+ action exit { fbreak; }
+
+ main := '-'? (
+ (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
+ | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
+ ) (^[0-9Ee.\-] @exit );
+}%%
+
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ %% write init;
+ json->memo = p;
+ %% write exec;
+
+ if (cs >= JSON_float_first_final) {
+ long len = p - json->memo;
+ *result = rb_Float(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+%%{
+ machine JSON_array;
+ include JSON_common;
+
+ write data;
+
+ action parse_value {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, fpc, pe, &v);
+ if (np == NULL) {
+ fbreak;
+ } else {
+ rb_ary_push(*result, v);
+ fexec np;
+ }
+ }
+
+ action exit { fbreak; }
+
+ next_element = value_separator ignore* begin_value >parse_value;
+
+ main := begin_array ignore*
+ ((begin_value >parse_value ignore*)
+ (ignore* next_element ignore*)*)?
+ end_array @exit;
+}%%
+
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+ *result = rb_ary_new();
+
+ %% write init;
+ %% write exec;
+
+ if(cs >= JSON_array_first_final) {
+ return p + 1;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static VALUE json_string_unescape(char *p, char *pe)
+{
+ VALUE result = rb_str_buf_new(pe - p + 1);
+
+ while (p < pe) {
+ if (*p == '\\') {
+ p++;
+ if (p >= pe) return Qnil; /* raise an exception later, \ at end */
+ switch (*p) {
+ case '"':
+ case '\\':
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ case 'b':
+ rb_str_buf_cat2(result, "\b");
+ p++;
+ break;
+ case 'f':
+ rb_str_buf_cat2(result, "\f");
+ p++;
+ break;
+ case 'n':
+ rb_str_buf_cat2(result, "\n");
+ p++;
+ break;
+ case 'r':
+ rb_str_buf_cat2(result, "\r");
+ p++;
+ break;
+ case 't':
+ rb_str_buf_cat2(result, "\t");
+ p++;
+ break;
+ case 'u':
+ if (p > pe - 4) {
+ return Qnil;
+ } else {
+ p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+ }
+ break;
+ default:
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ }
+ } else {
+ char *q = p;
+ while (*q != '\\' && q < pe) q++;
+ rb_str_buf_cat(result, p, q - p);
+ p = q;
+ }
+ }
+ return result;
+}
+
+%%{
+ machine JSON_string;
+ include JSON_common;
+
+ write data;
+
+ action parse_string {
+ *result = json_string_unescape(json->memo + 1, p);
+ if (NIL_P(*result)) fbreak; else fexec p + 1;
+ }
+
+ action exit { fbreak; }
+
+ main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^(["\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
+}%%
+
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ *result = rb_str_new("", 0);
+ %% write init;
+ json->memo = p;
+ %% write exec;
+
+ if (cs >= JSON_string_first_final) {
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+%%{
+ machine JSON;
+
+ write data;
+
+ include JSON_common;
+
+ action parse_object {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_object(json, fpc, pe, &result);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action parse_array {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_array(json, fpc, pe, &result);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ main := ignore* (
+ begin_object >parse_object |
+ begin_array >parse_array
+ ) ignore*;
+}%%
+
+/*
+ * Document-class: JSON::Ext::Parser
+ *
+ * This is the JSON parser implemented as a C extension. It can be configured
+ * to be used by setting
+ *
+ * JSON.parser = JSON::Ext::Parser
+ *
+ * with the method parser= in JSON.
+ *
+ */
+
+/*
+ * call-seq: new(source, opts => {})
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * It will be configured by the _opts_ hash. _opts_ can have the following
+ * keys:
+ *
+ * _opts_ can have the following keys:
+ * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
+ * structures. Disable depth checking with :max_nesting => false.
+ */
+static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
+{
+ char *ptr;
+ long len;
+ VALUE source, opts;
+ GET_STRUCT;
+ rb_scan_args(argc, argv, "11", &source, &opts);
+ source = StringValue(source);
+ ptr = RSTRING_PTR(source);
+ len = RSTRING_LEN(source);
+ if (len < 2) {
+ rb_raise(eParserError, "A JSON text must at least contain two octets!");
+ }
+ json->max_nesting = 19;
+ if (!NIL_P(opts)) {
+ opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+ if (NIL_P(opts)) {
+ rb_raise(rb_eArgError, "opts needs to be like a hash");
+ } else {
+ VALUE s_max_nesting = ID2SYM(i_max_nesting);
+ if (st_lookup(RHASH(opts)->tbl, s_max_nesting, 0)) {
+ VALUE max_nesting = rb_hash_aref(opts, s_max_nesting);
+ if (RTEST(max_nesting)) {
+ Check_Type(max_nesting, T_FIXNUM);
+ json->max_nesting = FIX2INT(max_nesting);
+ } else {
+ json->max_nesting = 0;
+ }
+ }
+ }
+ }
+ json->current_nesting = 0;
+ /*
+ Convert these?
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ }
+ */
+ json->len = len;
+ json->source = ptr;
+ json->Vsource = source;
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ return self;
+}
+
+/*
+ * call-seq: parse()
+ *
+ * Parses the current JSON text _source_ and returns the complete data
+ * structure as a result.
+ */
+static VALUE cParser_parse(VALUE self)
+{
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
+ GET_STRUCT;
+
+ %% write init;
+ p = json->source;
+ pe = p + json->len;
+ %% write exec;
+
+ if (cs >= JSON_first_final && p == pe) {
+ return result;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static JSON_Parser *JSON_allocate()
+{
+ JSON_Parser *json = ALLOC(JSON_Parser);
+ MEMZERO(json, JSON_Parser, 1);
+ return json;
+}
+
+static void JSON_mark(JSON_Parser *json)
+{
+ rb_gc_mark_maybe(json->Vsource);
+ rb_gc_mark_maybe(json->create_id);
+}
+
+static void JSON_free(JSON_Parser *json)
+{
+ free(json);
+}
+
+static VALUE cJSON_parser_s_allocate(VALUE klass)
+{
+ JSON_Parser *json = JSON_allocate();
+ return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
+}
+
+/*
+ * call-seq: source()
+ *
+ * Returns a copy of the current _source_ string, that was used to construct
+ * this Parser.
+ */
+static VALUE cParser_source(VALUE self)
+{
+ GET_STRUCT;
+ return rb_str_dup(json->Vsource);
+}
+
+void Init_parser()
+{
+ mJSON = rb_define_module("JSON");
+ mExt = rb_define_module_under(mJSON, "Ext");
+ cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
+ eParserError = rb_path2class("JSON::ParserError");
+ eNestingError = rb_path2class("JSON::NestingError");
+ rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
+ rb_define_method(cParser, "initialize", cParser_initialize, -1);
+ rb_define_method(cParser, "parse", cParser_parse, 0);
+ rb_define_method(cParser, "source", cParser_source, 0);
+
+ i_json_creatable_p = rb_intern("json_creatable?");
+ i_json_create = rb_intern("json_create");
+ i_create_id = rb_intern("create_id");
+ i_chr = rb_intern("chr");
+ i_max_nesting = rb_intern("max_nesting");
+}
diff --git a/ext/json/ext/parser/unicode.c b/ext/json/ext/parser/unicode.c
new file mode 100644
index 0000000000..609a0e83e2
--- /dev/null
+++ b/ext/json/ext/parser/unicode.c
@@ -0,0 +1,156 @@
+/* vim: set cin et sw=4 ts=4: */
+
+#include "unicode.h"
+
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow. There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+char *JSON_convert_UTF16_to_UTF8 (
+ VALUE buffer,
+ char *source,
+ char *sourceEnd,
+ ConversionFlags flags)
+{
+ UTF16 *tmp, *tmpPtr, *tmpEnd;
+ char buf[5];
+ long n = 0, i;
+ char *p = source - 1;
+
+ while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') {
+ p += 6;
+ n++;
+ }
+ p = source + 1;
+ buf[4] = 0;
+ tmpPtr = tmp = ALLOC_N(UTF16, n);
+ tmpEnd = tmp + n;
+ for (i = 0; i < n; i++) {
+ buf[0] = *p++;
+ buf[1] = *p++;
+ buf[2] = *p++;
+ buf[3] = *p++;
+ tmpPtr[i] = strtol(buf, NULL, 16);
+ p += 2;
+ }
+
+ while (tmpPtr < tmpEnd) {
+ UTF32 ch;
+ unsigned short bytesToWrite = 0;
+ const UTF32 byteMask = 0xBF;
+ const UTF32 byteMark = 0x80;
+ ch = *tmpPtr++;
+ /* If we have a surrogate pair, convert to UTF32 first. */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ /* If the 16 bits following the high surrogate are in the source
+ * buffer... */
+ if (tmpPtr < tmpEnd) {
+ UTF32 ch2 = *tmpPtr;
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ ++tmpPtr;
+ } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+ free(tmp);
+ rb_raise(rb_path2class("JSON::ParserError"),
+ "source sequence is illegal/malformed near %s", source);
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ free(tmp);
+ rb_raise(rb_path2class("JSON::ParserError"),
+ "partial character in source, but hit end near %s", source);
+ break;
+ }
+ } else if (flags == strictConversion) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+ free(tmp);
+ rb_raise(rb_path2class("JSON::ParserError"),
+ "source sequence is illegal/malformed near %s", source);
+ }
+ }
+ /* Figure out how many bytes the result will require */
+ if (ch < (UTF32) 0x80) {
+ bytesToWrite = 1;
+ } else if (ch < (UTF32) 0x800) {
+ bytesToWrite = 2;
+ } else if (ch < (UTF32) 0x10000) {
+ bytesToWrite = 3;
+ } else if (ch < (UTF32) 0x110000) {
+ bytesToWrite = 4;
+ } else {
+ bytesToWrite = 3;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ buf[0] = 0;
+ buf[1] = 0;
+ buf[2] = 0;
+ buf[3] = 0;
+ p = buf + bytesToWrite;
+ switch (bytesToWrite) { /* note: everything falls through. */
+ case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]);
+ }
+ rb_str_buf_cat(buffer, p, bytesToWrite);
+ }
+ free(tmp);
+ source += 5 + (n - 1) * 6;
+ return source;
+}
diff --git a/ext/json/ext/parser/unicode.h b/ext/json/ext/parser/unicode.h
new file mode 100755
index 0000000000..155da0ceee
--- /dev/null
+++ b/ext/json/ext/parser/unicode.h
@@ -0,0 +1,58 @@
+
+#ifndef _PARSER_UNICODE_H_
+#define _PARSER_UNICODE_H_
+
+#include "ruby.h"
+
+typedef unsigned long UTF32; /* at least 32 bits */
+typedef unsigned short UTF16; /* at least 16 bits */
+typedef unsigned char UTF8; /* typically 8 bits */
+
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_SUR_HIGH_START (UTF32)0xD800
+#define UNI_SUR_HIGH_END (UTF32)0xDBFF
+#define UNI_SUR_LOW_START (UTF32)0xDC00
+#define UNI_SUR_LOW_END (UTF32)0xDFFF
+
+static const int halfShift = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+typedef enum {
+ conversionOK = 0, /* conversion successful */
+ sourceExhausted, /* partial character in source, but hit end */
+ targetExhausted, /* insuff. room in target for conversion */
+ sourceIllegal /* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum {
+ strictConversion = 0,
+ lenientConversion
+} ConversionFlags;
+
+char *JSON_convert_UTF16_to_UTF8 (
+ VALUE buffer,
+ char *source,
+ char *sourceEnd,
+ ConversionFlags flags);
+
+#ifndef RARRAY_PTR
+#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
+#endif
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
+#endif
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(string) RSTRING(string)->ptr
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(string) RSTRING(string)->len
+#endif
+
+#endif
diff --git a/ext/nkf/nkf-utf8/nkf.c b/ext/nkf/nkf-utf8/nkf.c
index 3cd1b160da..30bb6c47b8 100644
--- a/ext/nkf/nkf-utf8/nkf.c
+++ b/ext/nkf/nkf-utf8/nkf.c
@@ -41,7 +41,7 @@
***********************************************************************/
/* $Id$ */
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2007-01-28"
+#define NKF_RELEASE_DATE "2007-05-28"
#include "config.h"
#include "utf8tbl.h"
@@ -351,10 +351,12 @@ static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
* 0: Shift_JIS, eucJP-ascii
* 1: eucJP-ms
* 2: CP932, CP51932
+ * 3: CP10001
*/
-#define UCS_MAP_ASCII 0
-#define UCS_MAP_MS 1
-#define UCS_MAP_CP932 2
+#define UCS_MAP_ASCII 0
+#define UCS_MAP_MS 1
+#define UCS_MAP_CP932 2
+#define UCS_MAP_CP10001 3
static int ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_INPUT_ENABLE
@@ -1233,6 +1235,14 @@ void options(unsigned char *cp)
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
+ }else if(strcmp(codeset, "CP10001") == 0){
+ input_f = SJIS_INPUT;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP10001;
+#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
input_f = EUC_INPUT;
@@ -1371,6 +1381,11 @@ void options(unsigned char *cp)
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
+ }else if(strcmp(codeset, "CP10001") == 0){
+ output_conv = s_oconv;
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP10001;
+#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
output_conv = e_oconv;
@@ -2676,6 +2691,12 @@ nkf_char kanji_convert(FILE *f)
} else { /* bogus code, skip SSO and one byte */
NEXT;
}
+ } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
+ (c1 == 0xFD || c1 == 0xFE)) {
+ /* CP10001 */
+ c2 = X0201;
+ c1 &= 0x7f;
+ SEND;
} else {
/* already established */
c2 = c1;
@@ -2885,35 +2906,41 @@ nkf_char kanji_convert(FILE *f)
(*oconv)(0, ESC);
SEND;
}
- } else if ((c1 == NL || c1 == CR) && broken_f&4) {
- input_mode = ASCII; set_iconv(FALSE, 0);
- SEND;
- } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
- if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
- i_ungetc(SPACE,f);
- continue;
- } else {
- i_ungetc(c1,f);
- }
- c1 = NL;
- SEND;
- } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
- if ((c1=(*i_getc)(f))!=EOF) {
- if (c1==SPACE) {
- i_ungetc(SPACE,f);
- continue;
- } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
- i_ungetc(SPACE,f);
- continue;
- } else {
- i_ungetc(c1,f);
+ } else if (c1 == NL || c1 == CR) {
+ if (broken_f&4) {
+ input_mode = ASCII; set_iconv(FALSE, 0);
+ SEND;
+ } else if (mime_decode_f && !mime_decode_mode){
+ if (c1 == NL) {
+ if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
+ i_ungetc(SPACE,f);
+ continue;
+ } else {
+ i_ungetc(c1,f);
+ }
+ c1 = NL;
+ SEND;
+ } else { /* if (c1 == CR)*/
+ if ((c1=(*i_getc)(f))!=EOF) {
+ if (c1==SPACE) {
+ i_ungetc(SPACE,f);
+ continue;
+ } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
+ i_ungetc(SPACE,f);
+ continue;
+ } else {
+ i_ungetc(c1,f);
+ }
+ i_ungetc(NL,f);
+ } else {
+ i_ungetc(c1,f);
+ }
+ c1 = CR;
+ SEND;
}
- i_ungetc(NL,f);
- } else {
- i_ungetc(c1,f);
}
- c1 = CR;
- SEND;
+ if (crmode_f == CR && c1 == NL) crmode_f = CRLF;
+ else crmode_f = c1;
} else if (c1 == DEL && input_mode == X0208 ) {
/* CP5022x */
c2 = c1;
@@ -3125,9 +3152,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
#ifdef SHIFTJIS_CP932
if (!cp932inv_f && is_ibmext_in_sjis(c2)){
-#if 0
- extern const unsigned short shiftjis_cp932[3][189];
-#endif
val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
if (val){
c2 = val >> 8;
@@ -3136,9 +3160,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
}
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
-#if 0
- extern const unsigned short cp932inv[2][189];
-#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
@@ -3148,9 +3169,6 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
if (!x0213_f && is_ibmext_in_sjis(c2)){
-#if 0
- extern const unsigned short shiftjis_x0212[3][189];
-#endif
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
if (val > 0x7FFF){
@@ -3481,14 +3499,6 @@ nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
{
-#if 0
- extern const unsigned short *const utf8_to_euc_2bytes[];
- extern const unsigned short *const utf8_to_euc_2bytes_ms[];
- extern const unsigned short *const utf8_to_euc_2bytes_932[];
- extern const unsigned short *const *const utf8_to_euc_3bytes[];
- extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
- extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
-#endif
const unsigned short *const *pp;
const unsigned short *const *const *ppp;
static const int no_best_fit_chars_table_C2[] =
@@ -3538,11 +3548,27 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
}
}else if(ms_ucs_map_f == UCS_MAP_MS){
if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xC2:
+ switch(c1){
+ case 0xA2:
+ case 0xA3:
+ case 0xA5:
+ case 0xA6:
+ case 0xAC:
+ case 0xAF:
+ case 0xB8:
+ return 1;
+ }
+ break;
+ }
}
}
pp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
utf8_to_euc_2bytes;
ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
}else if(c0 < 0xF0){
@@ -3565,6 +3591,19 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
if(c1 == 0x80 || c0 == 0x9C) return 1;
break;
}
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xE3:
+ switch(c1){
+ case 0x82:
+ if(c0 == 0x94) return 1;
+ break;
+ case 0x83:
+ if(c0 == 0xBB) return 1;
+ break;
+ }
+ break;
+ }
}else{
switch(c2){
case 0xE2:
@@ -3596,8 +3635,10 @@ nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *
ppp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
utf8_to_euc_3bytes;
ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
+// fprintf(stderr, "wret: %X %X %X -> %X %X\n",c2,c1,c0,*p2,*p1,ret);
}else return -1;
#ifdef SHIFTJIS_CP932
if (!ret && !cp932inv_f && is_eucg3(*p2)) {
@@ -3739,15 +3780,17 @@ void encode_fallback_subchar(nkf_char c)
#ifdef UTF8_OUTPUT_ENABLE
nkf_char e2w_conv(nkf_char c2, nkf_char c1)
{
-#if 0
- extern const unsigned short euc_to_utf8_1byte[];
- extern const unsigned short *const euc_to_utf8_2bytes[];
- extern const unsigned short *const euc_to_utf8_2bytes_ms[];
- extern const unsigned short *const x0212_to_utf8_2bytes[];
-#endif
const unsigned short *p;
if (c2 == X0201) {
+ if (ms_ucs_map_f == UCS_MAP_CP10001) {
+ switch (c1) {
+ case 0x20:
+ return 0xA0;
+ case 0x7D:
+ return 0xA9;
+ }
+ }
p = euc_to_utf8_1byte;
#ifdef X0212_ENABLE
} else if (is_eucg3(c2)){
@@ -3764,7 +3807,10 @@ nkf_char e2w_conv(nkf_char c2, nkf_char c1)
c2 &= 0x7f;
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
- p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
+ p =
+ ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
+ euc_to_utf8_2bytes_ms[c2];
else
return 0;
}
@@ -4069,9 +4115,6 @@ nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
else if(nkf_isgraph(ndx)){
nkf_char val = 0;
const unsigned short *ptr;
-#if 0
- extern const unsigned short *const x0212_shiftjis[];
-#endif
ptr = x0212_shiftjis[ndx - 0x21];
if (ptr){
val = ptr[(c1 & 0x7f) - 0x21];
@@ -4147,9 +4190,6 @@ void s_oconv(nkf_char c2, nkf_char c1)
#ifdef SHIFTJIS_CP932
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
-#if 0
- extern const unsigned short cp932inv[2][189];
-#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
@@ -4539,6 +4579,10 @@ void z_conv(nkf_char c2, nkf_char c1)
/* if (c2) c1 &= 0x7f; assertion */
+ if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
+ (*o_zconv)(c2,c1);
+ return;
+ }
if (x0201_f && z_prev2==X0201) { /* X0201 */
if (c1==(0xde&0x7f)) { /* 濁点 */
z_prev2=0;
@@ -4942,15 +4986,20 @@ void set_input_codename(char *codename)
void print_guessed_code(char *filename)
{
char *codename = "BINARY";
+ char *str_crmode = NULL;
if (!is_inputcode_mixed) {
if (strcmp(input_codename, "") == 0) {
codename = "ASCII";
} else {
codename = input_codename;
}
+ if (crmode_f == CR) str_crmode = "CR";
+ else if (crmode_f == NL) str_crmode = "LF";
+ else if (crmode_f == CRLF) str_crmode = "CRLF";
}
if (filename != NULL) printf("%s:", filename);
- printf("%s\n", codename);
+ if (str_crmode != NULL) printf("%s (%s)\n", codename, str_crmode);
+ else printf("%s\n", codename);
}
#endif /*WIN32DLL*/
@@ -5068,9 +5117,6 @@ nkf_char nfc_getc(FILE *f)
int i=0, j, k=1, lower, upper;
nkf_char buf[9];
const nkf_nfchar *array;
-#if 0
- extern const struct normalization_pair normalization_table[];
-#endif
buf[i] = (*g)(f);
while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
@@ -5437,7 +5483,7 @@ void open_mime(nkf_char mode)
int i;
int j;
p = mime_pattern[0];
- for(i=0;mime_encode[i];i++) {
+ for(i=0;mime_pattern[i];i++) {
if (mode == mime_encode[i]) {
p = mime_pattern[i];
break;
@@ -5643,10 +5689,21 @@ void mime_putc(nkf_char c)
if (mimeout_mode=='Q') {
if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
- if (c <= SPACE) {
+ if (c == CR || c == NL) {
+ close_mime();
+ (*o_mputc)(c);
+ base64_count = 0;
+ return;
+ } else if (c <= SPACE) {
close_mime();
- (*o_mputc)(SPACE);
- base64_count++;
+ if (base64_count > 70) {
+ (*o_mputc)(NL);
+ base64_count = 0;
+ }
+ if (!nkf_isblank(c)) {
+ (*o_mputc)(SPACE);
+ base64_count++;
+ }
}
(*o_mputc)(c);
base64_count++;
@@ -5678,7 +5735,8 @@ void mime_putc(nkf_char c)
mimeout_buf_count = 1;
}else{
if (base64_count > 1
- && base64_count + mimeout_buf_count > 76){
+ && base64_count + mimeout_buf_count > 76
+ && mimeout_buf[0] != CR && mimeout_buf[0] != NL){
(*o_mputc)(NL);
base64_count = 0;
if (!nkf_isspace(mimeout_buf[0])){
diff --git a/ext/nkf/nkf-utf8/utf8tbl.c b/ext/nkf/nkf-utf8/utf8tbl.c
index e43ad553d6..fb6c3b7362 100644
--- a/ext/nkf/nkf-utf8/utf8tbl.c
+++ b/ext/nkf/nkf-utf8/utf8tbl.c
@@ -201,6 +201,20 @@ const unsigned short euc_to_utf8_AC[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short euc_to_utf8_AC_mac[] = {
+ 0x2664, 0x2667, 0x2661, 0x2662, 0x2660, 0x2663, 0x2665,
+ 0x2666, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x3020, 0x260E, 0x3004,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x261E, 0x261C, 0x261D, 0x261F, 0x21C6, 0x21C4, 0x21C5,
+ 0, 0x21E8, 0x21E6, 0x21E7, 0x21E9, 0x2192, 0x2190, 0x2191,
+ 0x2193, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short euc_to_utf8_AD[] = {
0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466,
0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E,
@@ -215,6 +229,20 @@ const unsigned short euc_to_utf8_AD[] = {
0x2252, 0x2261, 0x222B, 0x222E, 0x2211, 0x221A, 0x22A5, 0x2220,
0x221F, 0x22BF, 0x2235, 0x2229, 0x222A, 0, 0x3299,
};
+const unsigned short euc_to_utf8_AD_mac[] = {
+ 0x65E5, 0x6708, 0x706B, 0x6C34, 0x6728, 0x91D1, 0x571F,
+ 0x796D, 0x795D, 0x81EA, 0x81F3, 0x3239, 0x547C, 0x3231, 0x8CC7,
+ 0x540D, 0x3232, 0x5B66, 0x8CA1, 0x793E, 0x7279, 0x76E3, 0x4F01,
+ 0x5354, 0x52B4, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0,
+ 0x3349, 0x3314, 0x3322, 0x334D, 0x3318, 0x3327, 0x3303, 0x3336,
+ 0x3351, 0x3357, 0x330D, 0x3326, 0x3323, 0x332B, 0x334A, 0x333B,
+ 0x339C, 0x339D, 0x339E, 0x338E, 0x338F, 0x33C4, 0x33A1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0x337B,
+ 0x301D, 0x301F, 0x2116, 0x33CD, 0x2121, 0x32A4, 0x32A5, 0x32A6,
+ 0x32A7, 0x32A8, 0x3231, 0x3232, 0x3239, 0x337E, 0x337D, 0x337C,
+ 0x2252, 0x5927, 0x5C0F, 0x32A4, 0x32A5, 0x32A6, 0x32A7, 0x32A8,
+ 0x533B, 0x8CA1, 0x512A, 0x52B4, 0x5370, 0x63A7, 0x79D8,
+};
const unsigned short euc_to_utf8_AE[] = {
0x3349, 0x3322, 0x334D, 0x3314, 0x3316, 0x3305, 0x3333,
0x334E, 0x3303, 0x3336, 0x3318, 0x3315, 0x3327, 0x3351, 0x334A,
@@ -2346,6 +2374,33 @@ const unsigned short *const euc_to_utf8_2bytes_ms[] = {
0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
euc_to_utf8_FC_ms, 0, 0,
};
+/* CP10001 */
+const unsigned short *const euc_to_utf8_2bytes_mac[] = {
+ euc_to_utf8_A1_ms, euc_to_utf8_A2_ms, euc_to_utf8_A3,
+ euc_to_utf8_A4, euc_to_utf8_A5, euc_to_utf8_A6, euc_to_utf8_A7,
+ euc_to_utf8_A8, euc_to_utf8_A9, euc_to_utf8_AA, euc_to_utf8_AB,
+ euc_to_utf8_AC_mac, euc_to_utf8_AD_mac, euc_to_utf8_AE, euc_to_utf8_AF,
+ euc_to_utf8_B0, euc_to_utf8_B1, euc_to_utf8_B2, euc_to_utf8_B3,
+ euc_to_utf8_B4, euc_to_utf8_B5, euc_to_utf8_B6, euc_to_utf8_B7,
+ euc_to_utf8_B8, euc_to_utf8_B9, euc_to_utf8_BA, euc_to_utf8_BB,
+ euc_to_utf8_BC, euc_to_utf8_BD, euc_to_utf8_BE, euc_to_utf8_BF,
+ euc_to_utf8_C0, euc_to_utf8_C1, euc_to_utf8_C2, euc_to_utf8_C3,
+ euc_to_utf8_C4, euc_to_utf8_C5, euc_to_utf8_C6, euc_to_utf8_C7,
+ euc_to_utf8_C8, euc_to_utf8_C9, euc_to_utf8_CA, euc_to_utf8_CB,
+ euc_to_utf8_CC, euc_to_utf8_CD, euc_to_utf8_CE, euc_to_utf8_CF,
+ euc_to_utf8_D0, euc_to_utf8_D1, euc_to_utf8_D2, euc_to_utf8_D3,
+ euc_to_utf8_D4, euc_to_utf8_D5, euc_to_utf8_D6, euc_to_utf8_D7,
+ euc_to_utf8_D8, euc_to_utf8_D9, euc_to_utf8_DA, euc_to_utf8_DB,
+ euc_to_utf8_DC, euc_to_utf8_DD, euc_to_utf8_DE, euc_to_utf8_DF,
+ euc_to_utf8_E0, euc_to_utf8_E1, euc_to_utf8_E2, euc_to_utf8_E3,
+ euc_to_utf8_E4, euc_to_utf8_E5, euc_to_utf8_E6, euc_to_utf8_E7,
+ euc_to_utf8_E8, euc_to_utf8_E9, euc_to_utf8_EA, euc_to_utf8_EB,
+ euc_to_utf8_EC, euc_to_utf8_ED, euc_to_utf8_EE, euc_to_utf8_EF,
+ euc_to_utf8_F0, euc_to_utf8_F1, euc_to_utf8_F2, euc_to_utf8_F3,
+ euc_to_utf8_F4, euc_to_utf8_F5, 0, 0,
+ 0, euc_to_utf8_F9, euc_to_utf8_FA, euc_to_utf8_FB,
+ euc_to_utf8_FC_ms, 0, 0,
+};
#ifdef X0212_ENABLE
const unsigned short *const x0212_to_utf8_2bytes[] = {
@@ -2397,6 +2452,16 @@ const unsigned short utf8_to_euc_C2_ms[] = {
0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
};
+const unsigned short utf8_to_euc_C2_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x0220, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178,
+ 0x212F, 0x027D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
+ 0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
+ 0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
+};
const unsigned short utf8_to_euc_C2_932[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2547,6 +2612,16 @@ const unsigned short utf8_to_euc_E284[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E284_mac[] = {
+ 0, 0, 0, 0x216E, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2B7B, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2B7D, 0x027E, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0x2272, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E285[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2557,6 +2632,16 @@ const unsigned short utf8_to_euc_E285[] = {
0xF373, 0xF374, 0xF375, 0xF376, 0xF377, 0xF378, 0xF379, 0xF37A,
0xF37B, 0xF37C, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E285_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x2A21, 0x2A22, 0x2A23, 0x2A24, 0x2A25, 0x2A26, 0x2A27, 0x2A28,
+ 0x2A29, 0x2A2A, 0, 0, 0, 0, 0, 0,
+ 0x2A35, 0x2A36, 0x2A37, 0x2A38, 0x2A39, 0x2A3A, 0x2A3B, 0x2A3C,
+ 0x2A3D, 0x2A3E, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E286[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2597,6 +2682,16 @@ const unsigned short utf8_to_euc_E288_932[] = {
0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
0, 0, 0, 0, 0, 0x2266, 0, 0,
};
+const unsigned short utf8_to_euc_E288_mac[] = {
+ 0x224F, 0, 0x225F, 0x2250, 0, 0, 0, 0x2260,
+ 0x223A, 0, 0, 0x223B, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0x2265, 0, 0, 0x2267, 0x2167, 0x2F22,
+ 0x225C, 0, 0, 0, 0, 0x2142, 0, 0x224A,
+ 0x224B, 0x2241, 0x2240, 0x2269, 0x226A, 0, 0x2F21, 0,
+ 0, 0, 0, 0, 0x2168, 0x2268, 0, 0,
+ 0, 0, 0, 0, 0, 0x2266, 0, 0,
+};
const unsigned short utf8_to_euc_E289[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2617,6 +2712,16 @@ const unsigned short utf8_to_euc_E28A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0x2D79,
};
+const unsigned short utf8_to_euc_E28A_mac[] = {
+ 0, 0, 0x223E, 0x223F, 0, 0, 0x223C, 0x223D,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x225D, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0x2F23,
+};
const unsigned short utf8_to_euc_E28C[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2637,6 +2742,16 @@ const unsigned short utf8_to_euc_E291[] = {
0x2D31, 0x2D32, 0x2D33, 0x2D34, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E291_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x2921, 0x2922, 0x2923, 0x2924, 0x2925, 0x2926, 0x2927, 0x2928,
+ 0x2929, 0x292A, 0x292B, 0x292C, 0x292D, 0x292E, 0x292F, 0x2930,
+ 0x2931, 0x2932, 0x2933, 0x2934, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E294[] = {
0x2821, 0x282C, 0x2822, 0x282D, 0, 0, 0, 0,
0, 0, 0, 0, 0x2823, 0, 0, 0x282E,
@@ -2767,6 +2882,16 @@ const unsigned short utf8_to_euc_E388[] = {
0, 0x2D6A, 0x2D6B, 0, 0, 0, 0, 0,
0, 0x2D6C, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E388_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2D2E, 0x2D31, 0, 0, 0, 0, 0,
+ 0, 0x2D2C, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -2777,6 +2902,16 @@ const unsigned short utf8_to_euc_E38A[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38A_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0x2D73, 0x2D74, 0x2D75, 0x2D76,
+ 0x2D77, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38C[] = {
0, 0, 0, 0x2D46, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x2D4A, 0, 0,
@@ -2787,6 +2922,16 @@ const unsigned short utf8_to_euc_E38C[] = {
0, 0, 0, 0, 0, 0, 0x2D47, 0,
0, 0, 0, 0x2D4F, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38C_mac[] = {
+ 0, 0, 0, 0x2E29, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x2E32, 0, 0,
+ 0, 0, 0, 0, 0x2E24, 0, 0, 0,
+ 0x2E2B, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0x2E22, 0x2E34, 0, 0, 0x2E35, 0x2E2D,
+ 0, 0, 0, 0x2E37, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2E2A, 0,
+ 0, 0, 0, 0x2E36, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38D[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0x2D40, 0x2D4E, 0, 0, 0x2D43, 0, 0,
@@ -2797,6 +2942,16 @@ const unsigned short utf8_to_euc_E38D[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0x2D5F, 0x2D6F, 0x2D6E, 0x2D6D, 0,
};
+const unsigned short utf8_to_euc_E38D_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0x2E21, 0x2E2F, 0, 0, 0x2E23, 0, 0,
+ 0, 0x2E2E, 0, 0, 0, 0, 0, 0x2E31,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0x2E6A, 0x2E69, 0x2E68, 0x2E67, 0,
+};
const unsigned short utf8_to_euc_E38E[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2D53, 0x2D54,
@@ -2807,6 +2962,16 @@ const unsigned short utf8_to_euc_E38E[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38E_mac[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0x2B2B, 0x2B2D,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0x2B21, 0x2B23, 0x2B29, 0,
+ 0, 0x2B27, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E38F[] = {
0, 0, 0, 0, 0x2D55, 0, 0, 0,
0, 0, 0, 0, 0, 0x2D63, 0, 0,
@@ -2817,6 +2982,16 @@ const unsigned short utf8_to_euc_E38F[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
+const unsigned short utf8_to_euc_E38F_mac[] = {
+ 0, 0, 0, 0, 0x2B2E, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0x2B7C, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
const unsigned short utf8_to_euc_E4B8[] = {
0x306C, 0x437A, 0xB021, 0x3C37, 0xB022, 0xB023, 0, 0x4B7C,
0x3E66, 0x3B30, 0x3E65, 0x323C, 0xB024, 0x4954, 0x4D3F, 0,
@@ -6171,6 +6346,24 @@ const unsigned short *const utf8_to_euc_E2_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
+const unsigned short *const utf8_to_euc_E2_mac[] = {
+ utf8_to_euc_E280_932, 0, 0, 0,
+ utf8_to_euc_E284_mac, utf8_to_euc_E285_mac, utf8_to_euc_E286, utf8_to_euc_E287,
+ utf8_to_euc_E288_mac, utf8_to_euc_E289, utf8_to_euc_E28A_mac, 0,
+ utf8_to_euc_E28C, 0, 0, 0,
+ 0, utf8_to_euc_E291_mac, 0, 0,
+ utf8_to_euc_E294, utf8_to_euc_E295, utf8_to_euc_E296, utf8_to_euc_E297,
+ utf8_to_euc_E298, utf8_to_euc_E299, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+};
const unsigned short *const utf8_to_euc_E3[] = {
utf8_to_euc_E380, utf8_to_euc_E381, utf8_to_euc_E382, utf8_to_euc_E383,
0, 0, 0, 0,
@@ -6207,6 +6400,24 @@ const unsigned short *const utf8_to_euc_E3_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
+const unsigned short *const utf8_to_euc_E3_mac[] = {
+ utf8_to_euc_E380_932, utf8_to_euc_E381, utf8_to_euc_E382_932, utf8_to_euc_E383,
+ 0, 0, 0, 0,
+ utf8_to_euc_E388_mac, 0, utf8_to_euc_E38A_mac, 0,
+ utf8_to_euc_E38C_mac, utf8_to_euc_E38D_mac, utf8_to_euc_E38E_mac, utf8_to_euc_E38F_mac,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+};
const unsigned short *const utf8_to_euc_E4[] = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -6441,6 +6652,36 @@ const unsigned short *const utf8_to_euc_2bytes_932[] = {
0, 0, 0, 0,
0, 0, 0, 0,
};
+const unsigned short *const utf8_to_euc_2bytes_mac[] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, utf8_to_euc_C2_mac, utf8_to_euc_C3,
+ utf8_to_euc_C4, utf8_to_euc_C5, 0, utf8_to_euc_C7,
+ 0, 0, 0, utf8_to_euc_CB,
+ 0, 0, utf8_to_euc_CE, utf8_to_euc_CF,
+ utf8_to_euc_D0, utf8_to_euc_D1, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+};
const unsigned short *const *const utf8_to_euc_3bytes[] = {
0, 0, utf8_to_euc_E2, utf8_to_euc_E3,
utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7,
@@ -6459,6 +6700,12 @@ const unsigned short *const *const utf8_to_euc_3bytes_932[] = {
utf8_to_euc_E8, utf8_to_euc_E9, 0, 0,
0, 0, 0, utf8_to_euc_EF_ms,
};
+const unsigned short *const *const utf8_to_euc_3bytes_mac[] = {
+ 0, 0, utf8_to_euc_E2_mac, utf8_to_euc_E3_mac,
+ utf8_to_euc_E4, utf8_to_euc_E5, utf8_to_euc_E6, utf8_to_euc_E7,
+ utf8_to_euc_E8, utf8_to_euc_E9, 0, 0,
+ 0, 0, 0, utf8_to_euc_EF_ms,
+};
#ifdef UNICODE_NORMALIZATION
diff --git a/ext/nkf/nkf-utf8/utf8tbl.h b/ext/nkf/nkf-utf8/utf8tbl.h
index 1f40f0b363..29413d4fac 100644
--- a/ext/nkf/nkf-utf8/utf8tbl.h
+++ b/ext/nkf/nkf-utf8/utf8tbl.h
@@ -5,6 +5,7 @@
extern const unsigned short euc_to_utf8_1byte[];
extern const unsigned short *const euc_to_utf8_2bytes[];
extern const unsigned short *const euc_to_utf8_2bytes_ms[];
+extern const unsigned short *const euc_to_utf8_2bytes_mac[];
extern const unsigned short *const x0212_to_utf8_2bytes[];
#endif /* UTF8_OUTPUT_ENABLE */
@@ -12,9 +13,11 @@ extern const unsigned short *const x0212_to_utf8_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes_ms[];
extern const unsigned short *const utf8_to_euc_2bytes_932[];
+extern const unsigned short *const utf8_to_euc_2bytes_mac[];
extern const unsigned short *const *const utf8_to_euc_3bytes[];
extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
+extern const unsigned short *const *const utf8_to_euc_3bytes_mac[];
#endif /* UTF8_INPUT_ENABLE */
#ifdef UNICODE_NORMALIZATION