/********************************************************************** encoding.c - $Author: matz $ $Date: 2007-05-24 17:22:33 +0900 (Thu, 24 May 2007) $ created at: Thu May 24 17:23:27 JST 2007 Copyright (C) 2007 Yukihiro Matsumoto **********************************************************************/ #include "ruby/ruby.h" #include "ruby/encoding.h" #include "regenc.h" static ID id_encoding; struct rb_encoding_entry { const char *name; rb_encoding *enc; }; static struct rb_encoding_entry *enc_table; static int enc_table_size; static st_table *enc_table_alias; int rb_enc_register(const char *name, rb_encoding *encoding) { struct rb_encoding_entry *ent; int newsize; if (!enc_table) { ent = malloc(sizeof(*enc_table)); newsize = 1; } else { newsize = enc_table_size + 1; ent = realloc(enc_table, sizeof(*enc_table)*newsize); } if (!ent) return -1; enc_table = ent; enc_table_size = newsize; ent = &enc_table[--newsize]; ent->name = name; ent->enc = encoding; return newsize; } int rb_enc_alias(const char *alias, const char *orig) { st_data_t data; int idx; if (!enc_table_alias) { enc_table_alias = st_init_strcasetable(); } while ((idx = rb_enc_find_index(orig)) < 0) { if (!st_lookup(enc_table_alias, (st_data_t)orig, &data)) return -1; orig = (const char *)data; } st_insert(enc_table_alias, (st_data_t)alias, (st_data_t)orig); return idx; } void rb_enc_init(void) { #define ENC_REGISTER(enc) rb_enc_register(rb_enc_name(enc), enc) ENC_REGISTER(ONIG_ENCODING_ASCII); ENC_REGISTER(ONIG_ENCODING_EUC_JP); ENC_REGISTER(ONIG_ENCODING_SJIS); ENC_REGISTER(ONIG_ENCODING_UTF8); #undef ENC_REGISTER rb_enc_alias("ascii", "us-ascii"); rb_enc_alias("binary", "us-ascii"); rb_enc_alias("iso-8859-1", "us-ascii"); rb_enc_alias("sjis", "shift_jis"); } rb_encoding * rb_enc_from_index(int index) { if (!enc_table) { rb_enc_init(); } if (index < 0 || enc_table_size <= index) { return 0; } return enc_table[index].enc; } int rb_enc_find_index(const char *name) { int i; st_data_t alias = 0; if (!name) return -1; if (!enc_table) { rb_enc_init(); } find: for (i=0; i str * * Retruns the encoding name. */ VALUE rb_obj_encoding(VALUE obj) { return rb_str_new2(rb_enc_name(rb_enc_get(obj))); } char* rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc) { int c; if (rb_enc_mbmaxlen(enc) == 1) { p += nth; } else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { p += nth * rb_enc_mbmaxlen(enc); } else { for (c=0; p