• Main Page
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

transcode.c

Go to the documentation of this file.
00001 /**********************************************************************
00002 
00003   transcode.c -
00004 
00005   $Author: usa $
00006   created at: Tue Oct 30 16:10:22 JST 2007
00007 
00008   Copyright (C) 2007 Martin Duerst
00009 
00010 **********************************************************************/
00011 
00012 #include "ruby/ruby.h"
00013 #include "ruby/encoding.h"
00014 #include "transcode_data.h"
00015 #include <ctype.h>
00016 
00017 /* VALUE rb_cEncoding = rb_define_class("Encoding", rb_cObject); */
00018 VALUE rb_eUndefinedConversionError;
00019 VALUE rb_eInvalidByteSequenceError;
00020 VALUE rb_eConverterNotFoundError;
00021 
00022 VALUE rb_cEncodingConverter;
00023 
00024 static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
00025 static VALUE sym_xml, sym_text, sym_attr;
00026 static VALUE sym_universal_newline;
00027 static VALUE sym_crlf_newline;
00028 static VALUE sym_cr_newline;
00029 static VALUE sym_partial_input;
00030 
00031 static VALUE sym_invalid_byte_sequence;
00032 static VALUE sym_undefined_conversion;
00033 static VALUE sym_destination_buffer_full;
00034 static VALUE sym_source_buffer_empty;
00035 static VALUE sym_finished;
00036 static VALUE sym_after_output;
00037 static VALUE sym_incomplete_input;
00038 
00039 static unsigned char *
00040 allocate_converted_string(const char *sname, const char *dname,
00041         const unsigned char *str, size_t len,
00042         unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
00043         size_t *dst_len_ptr);
00044 
00045 /* dynamic structure, one per conversion (similar to iconv_t) */
00046 /* may carry conversion state (e.g. for iso-2022-jp) */
00047 typedef struct rb_transcoding {
00048     const rb_transcoder *transcoder;
00049 
00050     int flags;
00051 
00052     int resume_position;
00053     unsigned int next_table;
00054     VALUE next_info;
00055     unsigned char next_byte;
00056     unsigned int output_index;
00057 
00058     ssize_t recognized_len; /* already interpreted */
00059     ssize_t readagain_len; /* not yet interpreted */
00060     union {
00061         unsigned char ary[8]; /* max_input <= sizeof(ary) */
00062         unsigned char *ptr; /* length: max_input */
00063     } readbuf; /* recognized_len + readagain_len used */
00064 
00065     ssize_t writebuf_off;
00066     ssize_t writebuf_len;
00067     union {
00068         unsigned char ary[8]; /* max_output <= sizeof(ary) */
00069         unsigned char *ptr; /* length: max_output */
00070     } writebuf;
00071 
00072     union rb_transcoding_state_t { /* opaque data for stateful encoding */
00073         void *ptr;
00074         char ary[sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*)];
00075         double dummy_for_alignment;
00076     } state;
00077 } rb_transcoding;
00078 #define TRANSCODING_READBUF(tc) \
00079     ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
00080      (tc)->readbuf.ary : \
00081      (tc)->readbuf.ptr)
00082 #define TRANSCODING_WRITEBUF(tc) \
00083     ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
00084      (tc)->writebuf.ary : \
00085      (tc)->writebuf.ptr)
00086 #define TRANSCODING_WRITEBUF_SIZE(tc) \
00087     ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
00088      sizeof((tc)->writebuf.ary) : \
00089      (size_t)(tc)->transcoder->max_output)
00090 #define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union rb_transcoding_state_t))
00091 #define TRANSCODING_STATE(tc) \
00092     ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
00093      (tc)->state.ary : \
00094      (tc)->state.ptr)
00095 
00096 typedef struct {
00097     struct rb_transcoding *tc;
00098     unsigned char *out_buf_start;
00099     unsigned char *out_data_start;
00100     unsigned char *out_data_end;
00101     unsigned char *out_buf_end;
00102     rb_econv_result_t last_result;
00103 } rb_econv_elem_t;
00104 
00105 struct rb_econv_t {
00106     int flags;
00107     const char *source_encoding_name;
00108     const char *destination_encoding_name;
00109 
00110     int started;
00111 
00112     const unsigned char *replacement_str;
00113     size_t replacement_len;
00114     const char *replacement_enc;
00115     int replacement_allocated;
00116 
00117     unsigned char *in_buf_start;
00118     unsigned char *in_data_start;
00119     unsigned char *in_data_end;
00120     unsigned char *in_buf_end;
00121     rb_econv_elem_t *elems;
00122     int num_allocated;
00123     int num_trans;
00124     int num_finished;
00125     struct rb_transcoding *last_tc;
00126 
00127     /* last error */
00128     struct {
00129         rb_econv_result_t result;
00130         struct rb_transcoding *error_tc;
00131         const char *source_encoding;
00132         const char *destination_encoding;
00133         const unsigned char *error_bytes_start;
00134         size_t error_bytes_len;
00135         size_t readagain_len;
00136     } last_error;
00137 
00138     /* The following fields are only for Encoding::Converter.
00139      * rb_econv_open set them NULL. */
00140     rb_encoding *source_encoding;
00141     rb_encoding *destination_encoding;
00142 };
00143 
00144 /*
00145  *  Dispatch data and logic
00146  */
00147 
00148 #define DECORATOR_P(sname, dname) (*(sname) == '\0')
00149 
00150 typedef struct {
00151     const char *sname;
00152     const char *dname;
00153     const char *lib; /* null means means no need to load a library */
00154     const rb_transcoder *transcoder;
00155 } transcoder_entry_t;
00156 
00157 static st_table *transcoder_table;
00158 
00159 static transcoder_entry_t *
00160 make_transcoder_entry(const char *sname, const char *dname)
00161 {
00162     st_data_t val;
00163     st_table *table2;
00164 
00165     if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
00166         val = (st_data_t)st_init_strcasetable();
00167         st_add_direct(transcoder_table, (st_data_t)sname, val);
00168     }
00169     table2 = (st_table *)val;
00170     if (!st_lookup(table2, (st_data_t)dname, &val)) {
00171         transcoder_entry_t *entry = ALLOC(transcoder_entry_t);
00172         entry->sname = sname;
00173         entry->dname = dname;
00174         entry->lib = NULL;
00175         entry->transcoder = NULL;
00176         val = (st_data_t)entry;
00177         st_add_direct(table2, (st_data_t)dname, val);
00178     }
00179     return (transcoder_entry_t *)val;
00180 }
00181 
00182 static transcoder_entry_t *
00183 get_transcoder_entry(const char *sname, const char *dname)
00184 {
00185     st_data_t val;
00186     st_table *table2;
00187 
00188     if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
00189         return NULL;
00190     }
00191     table2 = (st_table *)val;
00192     if (!st_lookup(table2, (st_data_t)dname, &val)) {
00193         return NULL;
00194     }
00195     return (transcoder_entry_t *)val;
00196 }
00197 
00198 void
00199 rb_register_transcoder(const rb_transcoder *tr)
00200 {
00201     const char *const sname = tr->src_encoding;
00202     const char *const dname = tr->dst_encoding;
00203 
00204     transcoder_entry_t *entry;
00205 
00206     entry = make_transcoder_entry(sname, dname);
00207     if (entry->transcoder) {
00208         rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered",
00209                  sname, dname);
00210     }
00211 
00212     entry->transcoder = tr;
00213 }
00214 
00215 static void
00216 declare_transcoder(const char *sname, const char *dname, const char *lib)
00217 {
00218     transcoder_entry_t *entry;
00219 
00220     entry = make_transcoder_entry(sname, dname);
00221     entry->lib = lib;
00222 }
00223 
00224 #define MAX_TRANSCODER_LIBNAME_LEN 64
00225 static const char transcoder_lib_prefix[] = "enc/trans/";
00226 
00227 void
00228 rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
00229 {
00230     if (!lib || strlen(lib) > MAX_TRANSCODER_LIBNAME_LEN) {
00231         rb_raise(rb_eArgError, "invalid library name - %s",
00232                  lib ? lib : "(null)");
00233     }
00234     declare_transcoder(enc1, enc2, lib);
00235 }
00236 
00237 #define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
00238 
00239 typedef struct search_path_queue_tag {
00240     struct search_path_queue_tag *next;
00241     const char *enc;
00242 } search_path_queue_t;
00243 
00244 typedef struct {
00245     st_table *visited;
00246     search_path_queue_t *queue;
00247     search_path_queue_t **queue_last_ptr;
00248     const char *base_enc;
00249 } search_path_bfs_t;
00250 
00251 static int
00252 transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
00253 {
00254     const char *dname = (const char *)key;
00255     search_path_bfs_t *bfs = (search_path_bfs_t *)arg;
00256     search_path_queue_t *q;
00257 
00258     if (st_lookup(bfs->visited, (st_data_t)dname, &val)) {
00259         return ST_CONTINUE;
00260     }
00261 
00262     q = ALLOC(search_path_queue_t);
00263     q->enc = dname;
00264     q->next = NULL;
00265     *bfs->queue_last_ptr = q;
00266     bfs->queue_last_ptr = &q->next;
00267 
00268     st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc);
00269     return ST_CONTINUE;
00270 }
00271 
00272 static int
00273 transcode_search_path(const char *sname, const char *dname,
00274     void (*callback)(const char *sname, const char *dname, int depth, void *arg),
00275     void *arg)
00276 {
00277     search_path_bfs_t bfs;
00278     search_path_queue_t *q;
00279     st_data_t val;
00280     st_table *table2;
00281     int found;
00282     int pathlen = -1;
00283 
00284     if (encoding_equal(sname, dname))
00285         return -1;
00286 
00287     q = ALLOC(search_path_queue_t);
00288     q->enc = sname;
00289     q->next = NULL;
00290     bfs.queue_last_ptr = &q->next;
00291     bfs.queue = q;
00292 
00293     bfs.visited = st_init_strcasetable();
00294     st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL);
00295 
00296     while (bfs.queue) {
00297         q = bfs.queue;
00298         bfs.queue = q->next;
00299         if (!bfs.queue)
00300             bfs.queue_last_ptr = &bfs.queue;
00301 
00302         if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
00303             xfree(q);
00304             continue;
00305         }
00306         table2 = (st_table *)val;
00307 
00308         if (st_lookup(table2, (st_data_t)dname, &val)) {
00309             st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
00310             xfree(q);
00311             found = 1;
00312             goto cleanup;
00313         }
00314 
00315         bfs.base_enc = q->enc;
00316         st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
00317         bfs.base_enc = NULL;
00318 
00319         xfree(q);
00320     }
00321     found = 0;
00322 
00323   cleanup:
00324     while (bfs.queue) {
00325         q = bfs.queue;
00326         bfs.queue = q->next;
00327         xfree(q);
00328     }
00329 
00330     if (found) {
00331         const char *enc = dname;
00332         int depth;
00333         pathlen = 0;
00334         while (1) {
00335             st_lookup(bfs.visited, (st_data_t)enc, &val);
00336             if (!val)
00337                 break;
00338             pathlen++;
00339             enc = (const char *)val;
00340         }
00341         depth = pathlen;
00342         enc = dname;
00343         while (1) {
00344             st_lookup(bfs.visited, (st_data_t)enc, &val);
00345             if (!val)
00346                 break;
00347             callback((const char *)val, enc, --depth, arg);
00348             enc = (const char *)val;
00349         }
00350     }
00351 
00352     st_free_table(bfs.visited);
00353 
00354     return pathlen; /* is -1 if not found */
00355 }
00356 
00357 static const rb_transcoder *
00358 load_transcoder_entry(transcoder_entry_t *entry)
00359 {
00360     if (entry->transcoder)
00361         return entry->transcoder;
00362 
00363     if (entry->lib) {
00364         const char *lib = entry->lib;
00365         size_t len = strlen(lib);
00366         char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN];
00367 
00368         entry->lib = NULL;
00369 
00370         if (len > MAX_TRANSCODER_LIBNAME_LEN)
00371             return NULL;
00372         memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
00373         memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1);
00374         if (!rb_require(path))
00375             return NULL;
00376     }
00377 
00378     if (entry->transcoder)
00379         return entry->transcoder;
00380 
00381     return NULL;
00382 }
00383 
00384 static const char*
00385 get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr)
00386 {
00387     if (encoding_equal(encname, "UTF-8")) {
00388         *len_ret = 3;
00389         *repl_encname_ptr = "UTF-8";
00390         return "\xEF\xBF\xBD";
00391     }
00392     else {
00393         *len_ret = 1;
00394         *repl_encname_ptr = "US-ASCII";
00395         return "?";
00396     }
00397 }
00398 
00399 /*
00400  *  Transcoding engine logic
00401  */
00402 
00403 static const unsigned char *
00404 transcode_char_start(rb_transcoding *tc,
00405                          const unsigned char *in_start,
00406                          const unsigned char *inchar_start,
00407                          const unsigned char *in_p,
00408                          size_t *char_len_ptr)
00409 {
00410     const unsigned char *ptr;
00411     if (inchar_start - in_start < tc->recognized_len) {
00412         MEMCPY(TRANSCODING_READBUF(tc) + tc->recognized_len,
00413                inchar_start, unsigned char, in_p - inchar_start);
00414         ptr = TRANSCODING_READBUF(tc);
00415     }
00416     else {
00417         ptr = inchar_start - tc->recognized_len;
00418     }
00419     *char_len_ptr = tc->recognized_len + (in_p - inchar_start);
00420     return ptr;
00421 }
00422 
00423 static rb_econv_result_t
00424 transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
00425                       const unsigned char *in_stop, unsigned char *out_stop,
00426                       rb_transcoding *tc,
00427                       const int opt)
00428 {
00429     const rb_transcoder *tr = tc->transcoder;
00430     int unitlen = tr->input_unit_length;
00431     ssize_t readagain_len = 0;
00432 
00433     const unsigned char *inchar_start;
00434     const unsigned char *in_p;
00435 
00436     unsigned char *out_p;
00437 
00438     in_p = inchar_start = *in_pos;
00439 
00440     out_p = *out_pos;
00441 
00442 #define SUSPEND(ret, num) \
00443     do { \
00444         tc->resume_position = (num); \
00445         if (0 < in_p - inchar_start) \
00446             MEMMOVE(TRANSCODING_READBUF(tc)+tc->recognized_len, \
00447                    inchar_start, unsigned char, in_p - inchar_start); \
00448         *in_pos = in_p; \
00449         *out_pos = out_p; \
00450         tc->recognized_len += in_p - inchar_start; \
00451         if (readagain_len) { \
00452             tc->recognized_len -= readagain_len; \
00453             tc->readagain_len = readagain_len; \
00454         } \
00455         return ret; \
00456         resume_label ## num:; \
00457     } while (0)
00458 #define SUSPEND_OBUF(num) \
00459     do { \
00460         while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
00461     } while (0)
00462 
00463 #define SUSPEND_AFTER_OUTPUT(num) \
00464     if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
00465         SUSPEND(econv_after_output, num); \
00466     }
00467 
00468 #define next_table (tc->next_table)
00469 #define next_info (tc->next_info)
00470 #define next_byte (tc->next_byte)
00471 #define writebuf_len (tc->writebuf_len)
00472 #define writebuf_off (tc->writebuf_off)
00473 
00474     switch (tc->resume_position) {
00475       case 0: break;
00476       case 1: goto resume_label1;
00477       case 2: goto resume_label2;
00478       case 3: goto resume_label3;
00479       case 4: goto resume_label4;
00480       case 5: goto resume_label5;
00481       case 6: goto resume_label6;
00482       case 7: goto resume_label7;
00483       case 8: goto resume_label8;
00484       case 9: goto resume_label9;
00485       case 10: goto resume_label10;
00486       case 11: goto resume_label11;
00487       case 12: goto resume_label12;
00488       case 13: goto resume_label13;
00489       case 14: goto resume_label14;
00490       case 15: goto resume_label15;
00491       case 16: goto resume_label16;
00492       case 17: goto resume_label17;
00493       case 18: goto resume_label18;
00494       case 19: goto resume_label19;
00495       case 20: goto resume_label20;
00496       case 21: goto resume_label21;
00497       case 22: goto resume_label22;
00498       case 23: goto resume_label23;
00499       case 24: goto resume_label24;
00500       case 25: goto resume_label25;
00501       case 26: goto resume_label26;
00502       case 27: goto resume_label27;
00503       case 28: goto resume_label28;
00504       case 29: goto resume_label29;
00505       case 30: goto resume_label30;
00506       case 31: goto resume_label31;
00507       case 32: goto resume_label32;
00508       case 33: goto resume_label33;
00509       case 34: goto resume_label34;
00510     }
00511 
00512     while (1) {
00513         inchar_start = in_p;
00514         tc->recognized_len = 0;
00515         next_table = tr->conv_tree_start;
00516 
00517         SUSPEND_AFTER_OUTPUT(24);
00518 
00519         if (in_stop <= in_p) {
00520             if (!(opt & ECONV_PARTIAL_INPUT))
00521                 break;
00522             SUSPEND(econv_source_buffer_empty, 7);
00523             continue;
00524         }
00525 
00526 #define BYTE_ADDR(index) (tr->byte_array + (index))
00527 #define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
00528 #define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
00529 #define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
00530 #define BL_MIN_BYTE     (BL_BASE[0])
00531 #define BL_MAX_BYTE     (BL_BASE[1])
00532 #define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
00533 #define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
00534 
00535         next_byte = (unsigned char)*in_p++;
00536       follow_byte:
00537         if (next_byte < BL_MIN_BYTE || BL_MAX_BYTE < next_byte)
00538             next_info = INVALID;
00539         else {
00540             next_info = (VALUE)BL_ACTION(next_byte);
00541         }
00542       follow_info:
00543         switch (next_info & 0x1F) {
00544           case NOMAP:
00545             {
00546                 const unsigned char *p = inchar_start;
00547                 writebuf_off = 0;
00548                 while (p < in_p) {
00549                     TRANSCODING_WRITEBUF(tc)[writebuf_off++] = (unsigned char)*p++;
00550                 }
00551                 writebuf_len = writebuf_off;
00552                 writebuf_off = 0;
00553                 while (writebuf_off < writebuf_len) {
00554                     SUSPEND_OBUF(3);
00555                     *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00556                 }
00557             }
00558             continue;
00559           case 0x00: case 0x04: case 0x08: case 0x0C:
00560           case 0x10: case 0x14: case 0x18: case 0x1C:
00561             SUSPEND_AFTER_OUTPUT(25);
00562             while (in_p >= in_stop) {
00563                 if (!(opt & ECONV_PARTIAL_INPUT))
00564                     goto incomplete;
00565                 SUSPEND(econv_source_buffer_empty, 5);
00566             }
00567             next_byte = (unsigned char)*in_p++;
00568             next_table = (unsigned int)next_info;
00569             goto follow_byte;
00570           case ZERObt: /* drop input */
00571             continue;
00572           case ONEbt:
00573             SUSPEND_OBUF(9); *out_p++ = getBT1(next_info);
00574             continue;
00575           case TWObt:
00576             SUSPEND_OBUF(10); *out_p++ = getBT1(next_info);
00577             SUSPEND_OBUF(21); *out_p++ = getBT2(next_info);
00578             continue;
00579           case THREEbt:
00580             SUSPEND_OBUF(11); *out_p++ = getBT1(next_info);
00581             SUSPEND_OBUF(15); *out_p++ = getBT2(next_info);
00582             SUSPEND_OBUF(16); *out_p++ = getBT3(next_info);
00583             continue;
00584           case FOURbt:
00585             SUSPEND_OBUF(12); *out_p++ = getBT0(next_info);
00586             SUSPEND_OBUF(17); *out_p++ = getBT1(next_info);
00587             SUSPEND_OBUF(18); *out_p++ = getBT2(next_info);
00588             SUSPEND_OBUF(19); *out_p++ = getBT3(next_info);
00589             continue;
00590           case GB4bt:
00591             SUSPEND_OBUF(29); *out_p++ = getGB4bt0(next_info);
00592             SUSPEND_OBUF(30); *out_p++ = getGB4bt1(next_info);
00593             SUSPEND_OBUF(31); *out_p++ = getGB4bt2(next_info);
00594             SUSPEND_OBUF(32); *out_p++ = getGB4bt3(next_info);
00595             continue;
00596           case STR1:
00597             tc->output_index = 0;
00598             while (tc->output_index < STR1_LENGTH(BYTE_ADDR(STR1_BYTEINDEX(next_info)))) {
00599                 SUSPEND_OBUF(28); *out_p++ = BYTE_ADDR(STR1_BYTEINDEX(next_info))[1+tc->output_index];
00600                 tc->output_index++;
00601             }
00602             continue;
00603           case FUNii:
00604             next_info = (VALUE)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info);
00605             goto follow_info;
00606           case FUNsi:
00607             {
00608                 const unsigned char *char_start;
00609                 size_t char_len;
00610                 char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00611                 next_info = (VALUE)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len);
00612                 goto follow_info;
00613             }
00614           case FUNio:
00615             SUSPEND_OBUF(13);
00616             if (tr->max_output <= out_stop - out_p)
00617                 out_p += tr->func_io(TRANSCODING_STATE(tc),
00618                     next_info, out_p, out_stop - out_p);
00619             else {
00620                 writebuf_len = tr->func_io(TRANSCODING_STATE(tc),
00621                     next_info,
00622                     TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00623                 writebuf_off = 0;
00624                 while (writebuf_off < writebuf_len) {
00625                     SUSPEND_OBUF(20);
00626                     *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00627                 }
00628             }
00629             break;
00630           case FUNso:
00631             {
00632                 const unsigned char *char_start;
00633                 size_t char_len;
00634                 SUSPEND_OBUF(14);
00635                 if (tr->max_output <= out_stop - out_p) {
00636                     char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00637                     out_p += tr->func_so(TRANSCODING_STATE(tc),
00638                         char_start, (size_t)char_len,
00639                         out_p, out_stop - out_p);
00640                 }
00641                 else {
00642                     char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00643                     writebuf_len = tr->func_so(TRANSCODING_STATE(tc),
00644                         char_start, (size_t)char_len,
00645                         TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00646                     writebuf_off = 0;
00647                     while (writebuf_off < writebuf_len) {
00648                         SUSPEND_OBUF(22);
00649                         *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00650                     }
00651                 }
00652                 break;
00653             }
00654       case FUNsio:
00655             {
00656                 const unsigned char *char_start;
00657                 size_t char_len;
00658                 SUSPEND_OBUF(33);
00659                 if (tr->max_output <= out_stop - out_p) {
00660                     char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00661                     out_p += tr->func_sio(TRANSCODING_STATE(tc),
00662                         char_start, (size_t)char_len, next_info,
00663                         out_p, out_stop - out_p);
00664                 }
00665                 else {
00666                     char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
00667                     writebuf_len = tr->func_sio(TRANSCODING_STATE(tc),
00668                         char_start, (size_t)char_len, next_info,
00669                         TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00670                     writebuf_off = 0;
00671                     while (writebuf_off < writebuf_len) {
00672                         SUSPEND_OBUF(34);
00673                         *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00674                     }
00675                 }
00676                 break;
00677             }
00678           case INVALID:
00679             if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
00680                 if (tc->recognized_len + (in_p - inchar_start) < unitlen)
00681                     SUSPEND_AFTER_OUTPUT(26);
00682                 while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
00683                     in_p = in_stop;
00684                     SUSPEND(econv_source_buffer_empty, 8);
00685                 }
00686                 if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) {
00687                     in_p = in_stop;
00688                 }
00689                 else {
00690                     in_p = inchar_start + (unitlen - tc->recognized_len);
00691                 }
00692             }
00693             else {
00694                 ssize_t invalid_len; /* including the last byte which causes invalid */
00695                 ssize_t discard_len;
00696                 invalid_len = tc->recognized_len + (in_p - inchar_start);
00697                 discard_len = ((invalid_len - 1) / unitlen) * unitlen;
00698                 readagain_len = invalid_len - discard_len;
00699             }
00700             goto invalid;
00701           case UNDEF:
00702             goto undef;
00703           default:
00704             rb_raise(rb_eRuntimeError, "unknown transcoding instruction");
00705         }
00706         continue;
00707 
00708       invalid:
00709         SUSPEND(econv_invalid_byte_sequence, 1);
00710         continue;
00711 
00712       incomplete:
00713         SUSPEND(econv_incomplete_input, 27);
00714         continue;
00715 
00716       undef:
00717         SUSPEND(econv_undefined_conversion, 2);
00718         continue;
00719     }
00720 
00721     /* cleanup */
00722     if (tr->finish_func) {
00723         SUSPEND_OBUF(4);
00724         if (tr->max_output <= out_stop - out_p) {
00725             out_p += tr->finish_func(TRANSCODING_STATE(tc),
00726                 out_p, out_stop - out_p);
00727         }
00728         else {
00729             writebuf_len = tr->finish_func(TRANSCODING_STATE(tc),
00730                 TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
00731             writebuf_off = 0;
00732             while (writebuf_off < writebuf_len) {
00733                 SUSPEND_OBUF(23);
00734                 *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
00735             }
00736         }
00737     }
00738     while (1)
00739         SUSPEND(econv_finished, 6);
00740 #undef SUSPEND
00741 #undef next_table
00742 #undef next_info
00743 #undef next_byte
00744 #undef writebuf_len
00745 #undef writebuf_off
00746 }
00747 
00748 static rb_econv_result_t
00749 transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
00750                       const unsigned char *in_stop, unsigned char *out_stop,
00751                       rb_transcoding *tc,
00752                       const int opt)
00753 {
00754     if (tc->readagain_len) {
00755         unsigned char *readagain_buf = ALLOCA_N(unsigned char, tc->readagain_len);
00756         const unsigned char *readagain_pos = readagain_buf;
00757         const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
00758         rb_econv_result_t res;
00759 
00760         MEMCPY(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
00761                unsigned char, tc->readagain_len);
00762         tc->readagain_len = 0;
00763         res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT);
00764         if (res != econv_source_buffer_empty) {
00765             MEMCPY(TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len,
00766                    readagain_pos, unsigned char, readagain_stop - readagain_pos);
00767             tc->readagain_len += readagain_stop - readagain_pos;
00768             return res;
00769         }
00770     }
00771     return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, tc, opt);
00772 }
00773 
00774 static rb_transcoding *
00775 rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags)
00776 {
00777     rb_transcoding *tc;
00778 
00779     tc = ALLOC(rb_transcoding);
00780     tc->transcoder = tr;
00781     tc->flags = flags;
00782     if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
00783         tc->state.ptr = xmalloc(tr->state_size);
00784     if (tr->state_init_func) {
00785         (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */
00786     }
00787     tc->resume_position = 0;
00788     tc->recognized_len = 0;
00789     tc->readagain_len = 0;
00790     tc->writebuf_len = 0;
00791     tc->writebuf_off = 0;
00792     if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
00793         tc->readbuf.ptr = xmalloc(tr->max_input);
00794     }
00795     if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
00796         tc->writebuf.ptr = xmalloc(tr->max_output);
00797     }
00798     return tc;
00799 }
00800 
00801 static rb_econv_result_t
00802 rb_transcoding_convert(rb_transcoding *tc,
00803   const unsigned char **input_ptr, const unsigned char *input_stop,
00804   unsigned char **output_ptr, unsigned char *output_stop,
00805   int flags)
00806 {
00807     return transcode_restartable(
00808                 input_ptr, output_ptr,
00809                 input_stop, output_stop,
00810                 tc, flags);
00811 }
00812 
00813 static void
00814 rb_transcoding_close(rb_transcoding *tc)
00815 {
00816     const rb_transcoder *tr = tc->transcoder;
00817     if (tr->state_fini_func) {
00818         (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */
00819     }
00820     if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
00821         xfree(tc->state.ptr);
00822     if ((int)sizeof(tc->readbuf.ary) < tr->max_input)
00823         xfree(tc->readbuf.ptr);
00824     if ((int)sizeof(tc->writebuf.ary) < tr->max_output)
00825         xfree(tc->writebuf.ptr);
00826     xfree(tc);
00827 }
00828 
00829 static size_t
00830 rb_transcoding_memsize(rb_transcoding *tc)
00831 {
00832     size_t size = sizeof(rb_transcoding);
00833     const rb_transcoder *tr = tc->transcoder;
00834 
00835     if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
00836         size += tr->state_size;
00837     }
00838     if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
00839         size += tr->max_input;
00840     }
00841     if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
00842         size += tr->max_output;
00843     }
00844     return size;
00845 }
00846 
00847 static rb_econv_t *
00848 rb_econv_alloc(int n_hint)
00849 {
00850     rb_econv_t *ec;
00851 
00852     if (n_hint <= 0)
00853         n_hint = 1;
00854 
00855     ec = ALLOC(rb_econv_t);
00856     ec->flags = 0;
00857     ec->source_encoding_name = NULL;
00858     ec->destination_encoding_name = NULL;
00859     ec->started = 0;
00860     ec->replacement_str = NULL;
00861     ec->replacement_len = 0;
00862     ec->replacement_enc = NULL;
00863     ec->replacement_allocated = 0;
00864     ec->in_buf_start = NULL;
00865     ec->in_data_start = NULL;
00866     ec->in_data_end = NULL;
00867     ec->in_buf_end = NULL;
00868     ec->num_allocated = n_hint;
00869     ec->num_trans = 0;
00870     ec->elems = ALLOC_N(rb_econv_elem_t, ec->num_allocated);
00871     ec->num_finished = 0;
00872     ec->last_tc = NULL;
00873     ec->last_error.result = econv_source_buffer_empty;
00874     ec->last_error.error_tc = NULL;
00875     ec->last_error.source_encoding = NULL;
00876     ec->last_error.destination_encoding = NULL;
00877     ec->last_error.error_bytes_start = NULL;
00878     ec->last_error.error_bytes_len = 0;
00879     ec->last_error.readagain_len = 0;
00880     ec->source_encoding = NULL;
00881     ec->destination_encoding = NULL;
00882     return ec;
00883 }
00884 
00885 static int
00886 rb_econv_add_transcoder_at(rb_econv_t *ec, const rb_transcoder *tr, int i)
00887 {
00888     int n, j;
00889     int bufsize = 4096;
00890     unsigned char *p;
00891 
00892     if (ec->num_trans == ec->num_allocated) {
00893         n = ec->num_allocated * 2;
00894         REALLOC_N(ec->elems, rb_econv_elem_t, n);
00895         ec->num_allocated = n;
00896     }
00897 
00898     p = xmalloc(bufsize);
00899 
00900     MEMMOVE(ec->elems+i+1, ec->elems+i, rb_econv_elem_t, ec->num_trans-i);
00901 
00902     ec->elems[i].tc = rb_transcoding_open_by_transcoder(tr, 0);
00903     ec->elems[i].out_buf_start = p;
00904     ec->elems[i].out_buf_end = p + bufsize;
00905     ec->elems[i].out_data_start = p;
00906     ec->elems[i].out_data_end = p;
00907     ec->elems[i].last_result = econv_source_buffer_empty;
00908 
00909     ec->num_trans++;
00910 
00911     if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
00912         for (j = ec->num_trans-1; i <= j; j--) {
00913             rb_transcoding *tc = ec->elems[j].tc;
00914             const rb_transcoder *tr2 = tc->transcoder;
00915             if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) {
00916                 ec->last_tc = tc;
00917                 break;
00918             }
00919         }
00920 
00921     return 0;
00922 }
00923 
00924 static rb_econv_t *
00925 rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
00926 {
00927     rb_econv_t *ec;
00928     int i, ret;
00929 
00930     for (i = 0; i < n; i++) {
00931         const rb_transcoder *tr;
00932         tr = load_transcoder_entry(entries[i]);
00933         if (!tr)
00934             return NULL;
00935     }
00936 
00937     ec = rb_econv_alloc(n);
00938 
00939     for (i = 0; i < n; i++) {
00940         const rb_transcoder *tr = load_transcoder_entry(entries[i]);
00941         ret = rb_econv_add_transcoder_at(ec, tr, ec->num_trans);
00942         if (ret == -1) {
00943             rb_econv_close(ec);
00944             return NULL;
00945         }
00946     }
00947 
00948     return ec;
00949 }
00950 
00951 struct trans_open_t {
00952     transcoder_entry_t **entries;
00953     int num_additional;
00954 };
00955 
00956 static void
00957 trans_open_i(const char *sname, const char *dname, int depth, void *arg)
00958 {
00959     struct trans_open_t *toarg = arg;
00960 
00961     if (!toarg->entries) {
00962         toarg->entries = ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional);
00963     }
00964     toarg->entries[depth] = get_transcoder_entry(sname, dname);
00965 }
00966 
00967 static rb_econv_t *
00968 rb_econv_open0(const char *sname, const char *dname, int ecflags)
00969 {
00970     transcoder_entry_t **entries = NULL;
00971     int num_trans;
00972     rb_econv_t *ec;
00973 
00974     rb_encoding *senc, *denc;
00975     int sidx, didx;
00976 
00977     senc = NULL;
00978     if (*sname) {
00979         sidx = rb_enc_find_index(sname);
00980         if (0 <= sidx) {
00981             senc = rb_enc_from_index(sidx);
00982         }
00983     }
00984 
00985     denc = NULL;
00986     if (*dname) {
00987         didx = rb_enc_find_index(dname);
00988         if (0 <= didx) {
00989             denc = rb_enc_from_index(didx);
00990         }
00991     }
00992 
00993     if (*sname == '\0' && *dname == '\0') {
00994         num_trans = 0;
00995         entries = NULL;
00996     }
00997     else {
00998         struct trans_open_t toarg;
00999         toarg.entries = NULL;
01000         toarg.num_additional = 0;
01001         num_trans = transcode_search_path(sname, dname, trans_open_i, (void *)&toarg);
01002         entries = toarg.entries;
01003         if (num_trans < 0) {
01004             xfree(entries);
01005             return NULL;
01006         }
01007     }
01008 
01009     ec = rb_econv_open_by_transcoder_entries(num_trans, entries);
01010     xfree(entries);
01011     if (!ec)
01012         return NULL;
01013 
01014     ec->flags = ecflags;
01015     ec->source_encoding_name = sname;
01016     ec->destination_encoding_name = dname;
01017 
01018     return ec;
01019 }
01020 
01021 #define MAX_ECFLAGS_DECORATORS 32
01022 
01023 static int
01024 decorator_names(int ecflags, const char **decorators_ret)
01025 {
01026     int num_decorators;
01027 
01028     if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) &&
01029         (ecflags & ECONV_CR_NEWLINE_DECORATOR))
01030         return -1;
01031 
01032     if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) &&
01033         (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR))
01034         return -1;
01035 
01036     if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
01037         (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR))
01038         return -1;
01039 
01040     num_decorators = 0;
01041 
01042     if (ecflags & ECONV_XML_TEXT_DECORATOR)
01043         decorators_ret[num_decorators++] = "xml_text_escape";
01044     if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
01045         decorators_ret[num_decorators++] = "xml_attr_content_escape";
01046     if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
01047         decorators_ret[num_decorators++] = "xml_attr_quote";
01048 
01049     if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
01050         decorators_ret[num_decorators++] = "crlf_newline";
01051     if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
01052         decorators_ret[num_decorators++] = "cr_newline";
01053     if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
01054         decorators_ret[num_decorators++] = "universal_newline";
01055 
01056     return num_decorators;
01057 }
01058 
01059 rb_econv_t *
01060 rb_econv_open(const char *sname, const char *dname, int ecflags)
01061 {
01062     rb_econv_t *ec;
01063     int num_decorators;
01064     const char *decorators[MAX_ECFLAGS_DECORATORS];
01065     int i;
01066 
01067     num_decorators = decorator_names(ecflags, decorators);
01068     if (num_decorators == -1)
01069         return NULL;
01070 
01071     ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
01072     if (!ec)
01073         return NULL;
01074 
01075     for (i = 0; i < num_decorators; i++)
01076         if (rb_econv_decorate_at_last(ec, decorators[i]) == -1) {
01077             rb_econv_close(ec);
01078             return NULL;
01079         }
01080 
01081     ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
01082 
01083     return ec;
01084 }
01085 
01086 static int
01087 trans_sweep(rb_econv_t *ec,
01088     const unsigned char **input_ptr, const unsigned char *input_stop,
01089     unsigned char **output_ptr, unsigned char *output_stop,
01090     int flags,
01091     int start)
01092 {
01093     int try;
01094     int i, f;
01095 
01096     const unsigned char **ipp, *is, *iold;
01097     unsigned char **opp, *os, *oold;
01098     rb_econv_result_t res;
01099 
01100     try = 1;
01101     while (try) {
01102         try = 0;
01103         for (i = start; i < ec->num_trans; i++) {
01104             rb_econv_elem_t *te = &ec->elems[i];
01105 
01106             if (i == 0) {
01107                 ipp = input_ptr;
01108                 is = input_stop;
01109             }
01110             else {
01111                 rb_econv_elem_t *prev_te = &ec->elems[i-1];
01112                 ipp = (const unsigned char **)&prev_te->out_data_start;
01113                 is = prev_te->out_data_end;
01114             }
01115 
01116             if (i == ec->num_trans-1) {
01117                 opp = output_ptr;
01118                 os = output_stop;
01119             }
01120             else {
01121                 if (te->out_buf_start != te->out_data_start) {
01122                     ssize_t len = te->out_data_end - te->out_data_start;
01123                     ssize_t off = te->out_data_start - te->out_buf_start;
01124                     MEMMOVE(te->out_buf_start, te->out_data_start, unsigned char, len);
01125                     te->out_data_start = te->out_buf_start;
01126                     te->out_data_end -= off;
01127                 }
01128                 opp = &te->out_data_end;
01129                 os = te->out_buf_end;
01130             }
01131 
01132             f = flags;
01133             if (ec->num_finished != i)
01134                 f |= ECONV_PARTIAL_INPUT;
01135             if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) {
01136                 start = 1;
01137                 flags &= ~ECONV_AFTER_OUTPUT;
01138             }
01139             if (i != 0)
01140                 f &= ~ECONV_AFTER_OUTPUT;
01141             iold = *ipp;
01142             oold = *opp;
01143             te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f);
01144             if (iold != *ipp || oold != *opp)
01145                 try = 1;
01146 
01147             switch (res) {
01148               case econv_invalid_byte_sequence:
01149               case econv_incomplete_input:
01150               case econv_undefined_conversion:
01151               case econv_after_output:
01152                 return i;
01153 
01154               case econv_destination_buffer_full:
01155               case econv_source_buffer_empty:
01156                 break;
01157 
01158               case econv_finished:
01159                 ec->num_finished = i+1;
01160                 break;
01161             }
01162         }
01163     }
01164     return -1;
01165 }
01166 
01167 static rb_econv_result_t
01168 rb_trans_conv(rb_econv_t *ec,
01169     const unsigned char **input_ptr, const unsigned char *input_stop,
01170     unsigned char **output_ptr, unsigned char *output_stop,
01171     int flags,
01172     int *result_position_ptr)
01173 {
01174     int i;
01175     int needreport_index;
01176     int sweep_start;
01177 
01178     unsigned char empty_buf;
01179     unsigned char *empty_ptr = &empty_buf;
01180 
01181     if (!input_ptr) {
01182         input_ptr = (const unsigned char **)&empty_ptr;
01183         input_stop = empty_ptr;
01184     }
01185 
01186     if (!output_ptr) {
01187         output_ptr = &empty_ptr;
01188         output_stop = empty_ptr;
01189     }
01190 
01191     if (ec->elems[0].last_result == econv_after_output)
01192         ec->elems[0].last_result = econv_source_buffer_empty;
01193 
01194     needreport_index = -1;
01195     for (i = ec->num_trans-1; 0 <= i; i--) {
01196         switch (ec->elems[i].last_result) {
01197           case econv_invalid_byte_sequence:
01198           case econv_incomplete_input:
01199           case econv_undefined_conversion:
01200           case econv_after_output:
01201           case econv_finished:
01202             sweep_start = i+1;
01203             needreport_index = i;
01204             goto found_needreport;
01205 
01206           case econv_destination_buffer_full:
01207           case econv_source_buffer_empty:
01208             break;
01209 
01210           default:
01211             rb_bug("unexpected transcode last result");
01212         }
01213     }
01214 
01215     /* /^[sd]+$/ is confirmed.  but actually /^s*d*$/. */
01216 
01217     if (ec->elems[ec->num_trans-1].last_result == econv_destination_buffer_full &&
01218         (flags & ECONV_AFTER_OUTPUT)) {
01219         rb_econv_result_t res;
01220 
01221         res = rb_trans_conv(ec, NULL, NULL, output_ptr, output_stop,
01222                 (flags & ~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT,
01223                 result_position_ptr);
01224 
01225         if (res == econv_source_buffer_empty)
01226             return econv_after_output;
01227         return res;
01228     }
01229 
01230     sweep_start = 0;
01231 
01232   found_needreport:
01233 
01234     do {
01235         needreport_index = trans_sweep(ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
01236         sweep_start = needreport_index + 1;
01237     } while (needreport_index != -1 && needreport_index != ec->num_trans-1);
01238 
01239     for (i = ec->num_trans-1; 0 <= i; i--) {
01240         if (ec->elems[i].last_result != econv_source_buffer_empty) {
01241             rb_econv_result_t res = ec->elems[i].last_result;
01242             if (res == econv_invalid_byte_sequence ||
01243                 res == econv_incomplete_input ||
01244                 res == econv_undefined_conversion ||
01245                 res == econv_after_output) {
01246                 ec->elems[i].last_result = econv_source_buffer_empty;
01247             }
01248             if (result_position_ptr)
01249                 *result_position_ptr = i;
01250             return res;
01251         }
01252     }
01253     if (result_position_ptr)
01254         *result_position_ptr = -1;
01255     return econv_source_buffer_empty;
01256 }
01257 
01258 static rb_econv_result_t
01259 rb_econv_convert0(rb_econv_t *ec,
01260     const unsigned char **input_ptr, const unsigned char *input_stop,
01261     unsigned char **output_ptr, unsigned char *output_stop,
01262     int flags)
01263 {
01264     rb_econv_result_t res;
01265     int result_position;
01266     int has_output = 0;
01267 
01268     memset(&ec->last_error, 0, sizeof(ec->last_error));
01269 
01270     if (ec->num_trans == 0) {
01271         size_t len;
01272         if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) {
01273             if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) {
01274                 len = output_stop - *output_ptr;
01275                 memcpy(*output_ptr, ec->in_data_start, len);
01276                 *output_ptr = output_stop;
01277                 ec->in_data_start += len;
01278                 res = econv_destination_buffer_full;
01279                 goto gotresult;
01280             }
01281             len = ec->in_data_end - ec->in_data_start;
01282             memcpy(*output_ptr, ec->in_data_start, len);
01283             *output_ptr += len;
01284             ec->in_data_start = ec->in_data_end = ec->in_buf_start;
01285             if (flags & ECONV_AFTER_OUTPUT) {
01286                 res = econv_after_output;
01287                 goto gotresult;
01288             }
01289         }
01290         if (output_stop - *output_ptr < input_stop - *input_ptr) {
01291             len = output_stop - *output_ptr;
01292         }
01293         else {
01294             len = input_stop - *input_ptr;
01295         }
01296         if (0 < len && (flags & ECONV_AFTER_OUTPUT)) {
01297             *(*output_ptr)++ = *(*input_ptr)++;
01298             res = econv_after_output;
01299             goto gotresult;
01300         }
01301         memcpy(*output_ptr, *input_ptr, len);
01302         *output_ptr += len;
01303         *input_ptr += len;
01304         if (*input_ptr != input_stop)
01305             res = econv_destination_buffer_full;
01306         else if (flags & ECONV_PARTIAL_INPUT)
01307             res = econv_source_buffer_empty;
01308         else
01309             res = econv_finished;
01310         goto gotresult;
01311     }
01312 
01313     if (ec->elems[ec->num_trans-1].out_data_start) {
01314         unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start;
01315         unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end;
01316         if (data_start != data_end) {
01317             size_t len;
01318             if (output_stop - *output_ptr < data_end - data_start) {
01319                 len = output_stop - *output_ptr;
01320                 memcpy(*output_ptr, data_start, len);
01321                 *output_ptr = output_stop;
01322                 ec->elems[ec->num_trans-1].out_data_start += len;
01323                 res = econv_destination_buffer_full;
01324                 goto gotresult;
01325             }
01326             len = data_end - data_start;
01327             memcpy(*output_ptr, data_start, len);
01328             *output_ptr += len;
01329             ec->elems[ec->num_trans-1].out_data_start =
01330                 ec->elems[ec->num_trans-1].out_data_end =
01331                 ec->elems[ec->num_trans-1].out_buf_start;
01332             has_output = 1;
01333         }
01334     }
01335 
01336     if (ec->in_buf_start &&
01337         ec->in_data_start != ec->in_data_end) {
01338         res = rb_trans_conv(ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop,
01339                 (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position);
01340         if (res != econv_source_buffer_empty)
01341             goto gotresult;
01342     }
01343 
01344     if (has_output &&
01345         (flags & ECONV_AFTER_OUTPUT) &&
01346         *input_ptr != input_stop) {
01347         input_stop = *input_ptr;
01348         res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
01349         if (res == econv_source_buffer_empty)
01350             res = econv_after_output;
01351     }
01352     else if ((flags & ECONV_AFTER_OUTPUT) ||
01353         ec->num_trans == 1) {
01354         res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
01355     }
01356     else {
01357         flags |= ECONV_AFTER_OUTPUT;
01358         do {
01359             res = rb_trans_conv(ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
01360         } while (res == econv_after_output);
01361     }
01362 
01363   gotresult:
01364     ec->last_error.result = res;
01365     if (res == econv_invalid_byte_sequence ||
01366         res == econv_incomplete_input ||
01367         res == econv_undefined_conversion) {
01368         rb_transcoding *error_tc = ec->elems[result_position].tc;
01369         ec->last_error.error_tc = error_tc;
01370         ec->last_error.source_encoding = error_tc->transcoder->src_encoding;
01371         ec->last_error.destination_encoding = error_tc->transcoder->dst_encoding;
01372         ec->last_error.error_bytes_start = TRANSCODING_READBUF(error_tc);
01373         ec->last_error.error_bytes_len = error_tc->recognized_len;
01374         ec->last_error.readagain_len = error_tc->readagain_len;
01375     }
01376 
01377     return res;
01378 }
01379 
01380 static int output_replacement_character(rb_econv_t *ec);
01381 
01382 static int
01383 output_hex_charref(rb_econv_t *ec)
01384 {
01385     int ret;
01386     unsigned char utfbuf[1024];
01387     const unsigned char *utf;
01388     size_t utf_len;
01389     int utf_allocated = 0;
01390     char charef_buf[16];
01391     const unsigned char *p;
01392 
01393     if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) {
01394         utf = ec->last_error.error_bytes_start;
01395         utf_len = ec->last_error.error_bytes_len;
01396     }
01397     else {
01398         utf = allocate_converted_string(ec->last_error.source_encoding, "UTF-32BE",
01399                 ec->last_error.error_bytes_start, ec->last_error.error_bytes_len,
01400                 utfbuf, sizeof(utfbuf),
01401                 &utf_len);
01402         if (!utf)
01403             return -1;
01404         if (utf != utfbuf && utf != ec->last_error.error_bytes_start)
01405             utf_allocated = 1;
01406     }
01407 
01408     if (utf_len % 4 != 0)
01409         goto fail;
01410 
01411     p = utf;
01412     while (4 <= utf_len) {
01413         unsigned int u = 0;
01414         u += p[0] << 24;
01415         u += p[1] << 16;
01416         u += p[2] << 8;
01417         u += p[3];
01418         snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u);
01419 
01420         ret = rb_econv_insert_output(ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII");
01421         if (ret == -1)
01422             goto fail;
01423 
01424         p += 4;
01425         utf_len -= 4;
01426     }
01427 
01428     if (utf_allocated)
01429         xfree((void *)utf);
01430     return 0;
01431 
01432   fail:
01433     if (utf_allocated)
01434         xfree((void *)utf);
01435     return -1;
01436 }
01437 
01438 rb_econv_result_t
01439 rb_econv_convert(rb_econv_t *ec,
01440     const unsigned char **input_ptr, const unsigned char *input_stop,
01441     unsigned char **output_ptr, unsigned char *output_stop,
01442     int flags)
01443 {
01444     rb_econv_result_t ret;
01445 
01446     unsigned char empty_buf;
01447     unsigned char *empty_ptr = &empty_buf;
01448 
01449     ec->started = 1;
01450 
01451     if (!input_ptr) {
01452         input_ptr = (const unsigned char **)&empty_ptr;
01453         input_stop = empty_ptr;
01454     }
01455 
01456     if (!output_ptr) {
01457         output_ptr = &empty_ptr;
01458         output_stop = empty_ptr;
01459     }
01460 
01461   resume:
01462     ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags);
01463 
01464     if (ret == econv_invalid_byte_sequence ||
01465         ret == econv_incomplete_input) {
01466         /* deal with invalid byte sequence */
01467         /* todo: add more alternative behaviors */
01468         switch (ec->flags & ECONV_INVALID_MASK) {
01469           case ECONV_INVALID_REPLACE:
01470             if (output_replacement_character(ec) == 0)
01471                 goto resume;
01472         }
01473     }
01474 
01475     if (ret == econv_undefined_conversion) {
01476         /* valid character in source encoding
01477          * but no related character(s) in destination encoding */
01478         /* todo: add more alternative behaviors */
01479         switch (ec->flags & ECONV_UNDEF_MASK) {
01480           case ECONV_UNDEF_REPLACE:
01481             if (output_replacement_character(ec) == 0)
01482                 goto resume;
01483             break;
01484 
01485           case ECONV_UNDEF_HEX_CHARREF:
01486             if (output_hex_charref(ec) == 0)
01487                 goto resume;
01488             break;
01489         }
01490     }
01491 
01492     return ret;
01493 }
01494 
01495 const char *
01496 rb_econv_encoding_to_insert_output(rb_econv_t *ec)
01497 {
01498     rb_transcoding *tc = ec->last_tc;
01499     const rb_transcoder *tr;
01500 
01501     if (tc == NULL)
01502         return "";
01503 
01504     tr = tc->transcoder;
01505 
01506     if (tr->asciicompat_type == asciicompat_encoder)
01507         return tr->src_encoding;
01508     return tr->dst_encoding;
01509 }
01510 
01511 static unsigned char *
01512 allocate_converted_string(const char *sname, const char *dname,
01513         const unsigned char *str, size_t len,
01514         unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
01515         size_t *dst_len_ptr)
01516 {
01517     unsigned char *dst_str;
01518     size_t dst_len;
01519     size_t dst_bufsize;
01520 
01521     rb_econv_t *ec;
01522     rb_econv_result_t res;
01523 
01524     const unsigned char *sp;
01525     unsigned char *dp;
01526 
01527     if (caller_dst_buf)
01528         dst_bufsize = caller_dst_bufsize;
01529     else if (len == 0)
01530         dst_bufsize = 1;
01531     else
01532         dst_bufsize = len;
01533 
01534     ec = rb_econv_open(sname, dname, 0);
01535     if (ec == NULL)
01536         return NULL;
01537     if (caller_dst_buf)
01538         dst_str = caller_dst_buf;
01539     else
01540         dst_str = xmalloc(dst_bufsize);
01541     dst_len = 0;
01542     sp = str;
01543     dp = dst_str+dst_len;
01544     res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
01545     dst_len = dp - dst_str;
01546     while (res == econv_destination_buffer_full) {
01547         if (SIZE_MAX/2 < dst_bufsize) {
01548             goto fail;
01549         }
01550         dst_bufsize *= 2;
01551         if (dst_str == caller_dst_buf) {
01552             unsigned char *tmp;
01553             tmp = xmalloc(dst_bufsize);
01554             memcpy(tmp, dst_str, dst_bufsize/2);
01555             dst_str = tmp;
01556         }
01557         else {
01558             dst_str = xrealloc(dst_str, dst_bufsize);
01559         }
01560         dp = dst_str+dst_len;
01561         res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
01562         dst_len = dp - dst_str;
01563     }
01564     if (res != econv_finished) {
01565         goto fail;
01566     }
01567     rb_econv_close(ec);
01568     *dst_len_ptr = dst_len;
01569     return dst_str;
01570 
01571   fail:
01572     if (dst_str != caller_dst_buf)
01573         xfree(dst_str);
01574     rb_econv_close(ec);
01575     return NULL;
01576 }
01577 
01578 /* result: 0:success -1:failure */
01579 int
01580 rb_econv_insert_output(rb_econv_t *ec,
01581     const unsigned char *str, size_t len, const char *str_encoding)
01582 {
01583     const char *insert_encoding = rb_econv_encoding_to_insert_output(ec);
01584     unsigned char insert_buf[4096];
01585     const unsigned char *insert_str = NULL;
01586     size_t insert_len;
01587 
01588     int last_trans_index;
01589     rb_transcoding *tc;
01590 
01591     unsigned char **buf_start_p;
01592     unsigned char **data_start_p;
01593     unsigned char **data_end_p;
01594     unsigned char **buf_end_p;
01595 
01596     size_t need;
01597 
01598     ec->started = 1;
01599 
01600     if (len == 0)
01601         return 0;
01602 
01603     if (encoding_equal(insert_encoding, str_encoding)) {
01604         insert_str = str;
01605         insert_len = len;
01606     }
01607     else {
01608         insert_str = allocate_converted_string(str_encoding, insert_encoding,
01609                 str, len, insert_buf, sizeof(insert_buf), &insert_len);
01610         if (insert_str == NULL)
01611             return -1;
01612     }
01613 
01614     need = insert_len;
01615 
01616     last_trans_index = ec->num_trans-1;
01617     if (ec->num_trans == 0) {
01618         tc = NULL;
01619         buf_start_p = &ec->in_buf_start;
01620         data_start_p = &ec->in_data_start;
01621         data_end_p = &ec->in_data_end;
01622         buf_end_p = &ec->in_buf_end;
01623     }
01624     else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) {
01625         tc = ec->elems[last_trans_index].tc;
01626         need += tc->readagain_len;
01627         if (need < insert_len)
01628             goto fail;
01629         if (last_trans_index == 0) {
01630             buf_start_p = &ec->in_buf_start;
01631             data_start_p = &ec->in_data_start;
01632             data_end_p = &ec->in_data_end;
01633             buf_end_p = &ec->in_buf_end;
01634         }
01635         else {
01636             rb_econv_elem_t *ee = &ec->elems[last_trans_index-1];
01637             buf_start_p = &ee->out_buf_start;
01638             data_start_p = &ee->out_data_start;
01639             data_end_p = &ee->out_data_end;
01640             buf_end_p = &ee->out_buf_end;
01641         }
01642     }
01643     else {
01644         rb_econv_elem_t *ee = &ec->elems[last_trans_index];
01645         buf_start_p = &ee->out_buf_start;
01646         data_start_p = &ee->out_data_start;
01647         data_end_p = &ee->out_data_end;
01648         buf_end_p = &ee->out_buf_end;
01649         tc = ec->elems[last_trans_index].tc;
01650     }
01651 
01652     if (*buf_start_p == NULL) {
01653         unsigned char *buf = xmalloc(need);
01654         *buf_start_p = buf;
01655         *data_start_p = buf;
01656         *data_end_p = buf;
01657         *buf_end_p = buf+need;
01658     }
01659     else if ((size_t)(*buf_end_p - *data_end_p) < need) {
01660         MEMMOVE(*buf_start_p, *data_start_p, unsigned char, *data_end_p - *data_start_p);
01661         *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
01662         *data_start_p = *buf_start_p;
01663         if ((size_t)(*buf_end_p - *data_end_p) < need) {
01664             unsigned char *buf;
01665             size_t s = (*data_end_p - *buf_start_p) + need;
01666             if (s < need)
01667                 goto fail;
01668             buf = xrealloc(*buf_start_p, s);
01669             *data_start_p = buf;
01670             *data_end_p = buf + (*data_end_p - *buf_start_p);
01671             *buf_start_p = buf;
01672             *buf_end_p = buf + s;
01673         }
01674     }
01675 
01676     memcpy(*data_end_p, insert_str, insert_len);
01677     *data_end_p += insert_len;
01678     if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
01679         memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
01680         *data_end_p += tc->readagain_len;
01681         tc->readagain_len = 0;
01682     }
01683 
01684     if (insert_str != str && insert_str != insert_buf)
01685         xfree((void*)insert_str);
01686     return 0;
01687 
01688   fail:
01689     if (insert_str != str && insert_str != insert_buf)
01690         xfree((void*)insert_str);
01691     return -1;
01692 }
01693 
01694 void
01695 rb_econv_close(rb_econv_t *ec)
01696 {
01697     int i;
01698 
01699     if (ec->replacement_allocated) {
01700         xfree((void *)ec->replacement_str);
01701     }
01702     for (i = 0; i < ec->num_trans; i++) {
01703         rb_transcoding_close(ec->elems[i].tc);
01704         if (ec->elems[i].out_buf_start)
01705             xfree(ec->elems[i].out_buf_start);
01706     }
01707     xfree(ec->in_buf_start);
01708     xfree(ec->elems);
01709     xfree(ec);
01710 }
01711 
01712 size_t
01713 rb_econv_memsize(rb_econv_t *ec)
01714 {
01715     size_t size = sizeof(rb_econv_t);
01716     int i;
01717 
01718     if (ec->replacement_allocated) {
01719         size += ec->replacement_len;
01720     }
01721     for (i = 0; i < ec->num_trans; i++) {
01722         size += rb_transcoding_memsize(ec->elems[i].tc);
01723 
01724         if (ec->elems[i].out_buf_start) {
01725             size += ec->elems[i].out_buf_end - ec->elems[i].out_buf_start;
01726         }
01727     }
01728     size += ec->in_buf_end - ec->in_buf_start;
01729     size += sizeof(rb_econv_elem_t) * ec->num_allocated;
01730 
01731     return size;
01732 }
01733 
01734 int
01735 rb_econv_putbackable(rb_econv_t *ec)
01736 {
01737     if (ec->num_trans == 0)
01738         return 0;
01739 #if SIZEOF_SIZE_T > SIZEOF_INT
01740     if (ec->elems[0].tc->readagain_len > INT_MAX) return INT_MAX;
01741 #endif
01742     return (int)ec->elems[0].tc->readagain_len;
01743 }
01744 
01745 void
01746 rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n)
01747 {
01748     rb_transcoding *tc;
01749     if (ec->num_trans == 0 || n == 0)
01750         return;
01751     tc = ec->elems[0].tc;
01752     memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n);
01753     tc->readagain_len -= n;
01754 }
01755 
01756 struct asciicompat_encoding_t {
01757     const char *ascii_compat_name;
01758     const char *ascii_incompat_name;
01759 };
01760 
01761 static int
01762 asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg)
01763 {
01764     struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg;
01765     transcoder_entry_t *entry = (transcoder_entry_t *)val;
01766     const rb_transcoder *tr;
01767 
01768     if (DECORATOR_P(entry->sname, entry->dname))
01769         return ST_CONTINUE;
01770     tr = load_transcoder_entry(entry);
01771     if (tr && tr->asciicompat_type == asciicompat_decoder) {
01772         data->ascii_compat_name = tr->dst_encoding;
01773         return ST_STOP;
01774     }
01775     return ST_CONTINUE;
01776 }
01777 
01778 const char *
01779 rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
01780 {
01781     st_data_t v;
01782     st_table *table2;
01783     struct asciicompat_encoding_t data;
01784 
01785     if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
01786         return NULL;
01787     table2 = (st_table *)v;
01788 
01789     /*
01790      * Assumption:
01791      * There is at most one transcoder for
01792      * converting from ASCII incompatible encoding.
01793      *
01794      * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
01795      */
01796     if (table2->num_entries != 1)
01797         return NULL;
01798 
01799     data.ascii_incompat_name = ascii_incompat_name;
01800     data.ascii_compat_name = NULL;
01801     st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
01802     return data.ascii_compat_name;
01803 }
01804 
01805 VALUE
01806 rb_econv_substr_append(rb_econv_t *ec, VALUE src, long off, long len, VALUE dst, int flags)
01807 {
01808     unsigned const char *ss, *sp, *se;
01809     unsigned char *ds, *dp, *de;
01810     rb_econv_result_t res;
01811     int max_output;
01812 
01813     if (NIL_P(dst)) {
01814         dst = rb_str_buf_new(len);
01815         if (ec->destination_encoding)
01816             rb_enc_associate(dst, ec->destination_encoding);
01817     }
01818 
01819     if (ec->last_tc)
01820         max_output = ec->last_tc->transcoder->max_output;
01821     else
01822         max_output = 1;
01823 
01824     res = econv_destination_buffer_full;
01825     while (res == econv_destination_buffer_full) {
01826         long dlen = RSTRING_LEN(dst);
01827         if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
01828             unsigned long new_capa = (unsigned long)dlen + len + max_output;
01829             if (LONG_MAX < new_capa)
01830                 rb_raise(rb_eArgError, "too long string");
01831             rb_str_resize(dst, new_capa);
01832             rb_str_set_len(dst, dlen);
01833         }
01834         ss = sp = (const unsigned char *)RSTRING_PTR(src) + off;
01835         se = ss + len;
01836         ds = (unsigned char *)RSTRING_PTR(dst);
01837         de = ds + rb_str_capacity(dst);
01838         dp = ds += dlen;
01839         res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
01840         off += sp - ss;
01841         len -= sp - ss;
01842         rb_str_set_len(dst, dlen + (dp - ds));
01843         rb_econv_check_error(ec);
01844     }
01845 
01846     return dst;
01847 }
01848 
01849 VALUE
01850 rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags)
01851 {
01852     return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), dst, flags);
01853 }
01854 
01855 VALUE
01856 rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags)
01857 {
01858     return rb_econv_substr_append(ec, src, byteoff, bytesize, Qnil, flags);
01859 }
01860 
01861 VALUE
01862 rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags)
01863 {
01864     return rb_econv_substr_append(ec, src, 0, RSTRING_LEN(src), Qnil, flags);
01865 }
01866 
01867 static int
01868 rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int n)
01869 {
01870     transcoder_entry_t *entry;
01871     const rb_transcoder *tr;
01872 
01873     if (ec->started != 0)
01874         return -1;
01875 
01876     entry = get_transcoder_entry(sname, dname);
01877     if (!entry)
01878         return -1;
01879 
01880     tr = load_transcoder_entry(entry);
01881 
01882     return rb_econv_add_transcoder_at(ec, tr, n);
01883 }
01884 
01885 static int
01886 rb_econv_decorate_at(rb_econv_t *ec, const char *decorator_name, int n)
01887 {
01888     return rb_econv_add_converter(ec, "", decorator_name, n);
01889 }
01890 
01891 int
01892 rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name)
01893 {
01894     const rb_transcoder *tr;
01895 
01896     if (ec->num_trans == 0)
01897         return rb_econv_decorate_at(ec, decorator_name, 0);
01898 
01899     tr = ec->elems[0].tc->transcoder;
01900 
01901     if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
01902         tr->asciicompat_type == asciicompat_decoder)
01903         return rb_econv_decorate_at(ec, decorator_name, 1);
01904 
01905     return rb_econv_decorate_at(ec, decorator_name, 0);
01906 }
01907 
01908 int
01909 rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name)
01910 {
01911     const rb_transcoder *tr;
01912 
01913     if (ec->num_trans == 0)
01914         return rb_econv_decorate_at(ec, decorator_name, 0);
01915 
01916     tr = ec->elems[ec->num_trans-1].tc->transcoder;
01917 
01918     if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
01919         tr->asciicompat_type == asciicompat_encoder)
01920         return rb_econv_decorate_at(ec, decorator_name, ec->num_trans-1);
01921 
01922     return rb_econv_decorate_at(ec, decorator_name, ec->num_trans);
01923 }
01924 
01925 void
01926 rb_econv_binmode(rb_econv_t *ec)
01927 {
01928     const rb_transcoder *trs[3];
01929     int n, i, j;
01930     transcoder_entry_t *entry;
01931     int num_trans;
01932 
01933     n = 0;
01934     if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
01935         entry = get_transcoder_entry("", "universal_newline");
01936         if (entry->transcoder)
01937             trs[n++] = entry->transcoder;
01938     }
01939     if (ec->flags & ECONV_CRLF_NEWLINE_DECORATOR) {
01940         entry = get_transcoder_entry("", "crlf_newline");
01941         if (entry->transcoder)
01942             trs[n++] = entry->transcoder;
01943     }
01944     if (ec->flags & ECONV_CR_NEWLINE_DECORATOR) {
01945         entry = get_transcoder_entry("", "cr_newline");
01946         if (entry->transcoder)
01947             trs[n++] = entry->transcoder;
01948     }
01949 
01950     num_trans = ec->num_trans;
01951     j = 0;
01952     for (i = 0; i < num_trans; i++) {
01953         int k;
01954         for (k = 0; k < n; k++)
01955             if (trs[k] == ec->elems[i].tc->transcoder)
01956                 break;
01957         if (k == n) {
01958             ec->elems[j] = ec->elems[i];
01959             j++;
01960         }
01961         else {
01962             rb_transcoding_close(ec->elems[i].tc);
01963             xfree(ec->elems[i].out_buf_start);
01964             ec->num_trans--;
01965         }
01966     }
01967 
01968     ec->flags &= ~(ECONV_UNIVERSAL_NEWLINE_DECORATOR|ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR);
01969 
01970 }
01971 
01972 static VALUE
01973 econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
01974 {
01975     int has_description = 0;
01976 
01977     if (NIL_P(mesg))
01978         mesg = rb_str_new(NULL, 0);
01979 
01980     if (*sname != '\0' || *dname != '\0') {
01981         if (*sname == '\0')
01982             rb_str_cat2(mesg, dname);
01983         else if (*dname == '\0')
01984             rb_str_cat2(mesg, sname);
01985         else
01986             rb_str_catf(mesg, "%s to %s", sname, dname);
01987         has_description = 1;
01988     }
01989 
01990     if (ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR|
01991                    ECONV_CRLF_NEWLINE_DECORATOR|
01992                    ECONV_CR_NEWLINE_DECORATOR|
01993                    ECONV_XML_TEXT_DECORATOR|
01994                    ECONV_XML_ATTR_CONTENT_DECORATOR|
01995                    ECONV_XML_ATTR_QUOTE_DECORATOR)) {
01996         const char *pre = "";
01997         if (has_description)
01998             rb_str_cat2(mesg, " with ");
01999         if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)  {
02000             rb_str_cat2(mesg, pre); pre = ",";
02001             rb_str_cat2(mesg, "universal_newline");
02002         }
02003         if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) {
02004             rb_str_cat2(mesg, pre); pre = ",";
02005             rb_str_cat2(mesg, "crlf_newline");
02006         }
02007         if (ecflags & ECONV_CR_NEWLINE_DECORATOR) {
02008             rb_str_cat2(mesg, pre); pre = ",";
02009             rb_str_cat2(mesg, "cr_newline");
02010         }
02011         if (ecflags & ECONV_XML_TEXT_DECORATOR) {
02012             rb_str_cat2(mesg, pre); pre = ",";
02013             rb_str_cat2(mesg, "xml_text");
02014         }
02015         if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) {
02016             rb_str_cat2(mesg, pre); pre = ",";
02017             rb_str_cat2(mesg, "xml_attr_content");
02018         }
02019         if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) {
02020             rb_str_cat2(mesg, pre); pre = ",";
02021             rb_str_cat2(mesg, "xml_attr_quote");
02022         }
02023         has_description = 1;
02024     }
02025     if (!has_description) {
02026         rb_str_cat2(mesg, "no-conversion");
02027     }
02028 
02029     return mesg;
02030 }
02031 
02032 VALUE
02033 rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
02034 {
02035     VALUE mesg, exc;
02036     mesg = rb_str_new_cstr("code converter not found (");
02037     econv_description(sname, dname, ecflags, mesg);
02038     rb_str_cat2(mesg, ")");
02039     exc = rb_exc_new3(rb_eConverterNotFoundError, mesg);
02040     return exc;
02041 }
02042 
02043 static VALUE
02044 make_econv_exception(rb_econv_t *ec)
02045 {
02046     VALUE mesg, exc;
02047     if (ec->last_error.result == econv_invalid_byte_sequence ||
02048         ec->last_error.result == econv_incomplete_input) {
02049         const char *err = (const char *)ec->last_error.error_bytes_start;
02050         size_t error_len = ec->last_error.error_bytes_len;
02051         VALUE bytes = rb_str_new(err, error_len);
02052         VALUE dumped = rb_str_dump(bytes);
02053         size_t readagain_len = ec->last_error.readagain_len;
02054         VALUE bytes2 = Qnil;
02055         VALUE dumped2;
02056         int idx;
02057         if (ec->last_error.result == econv_incomplete_input) {
02058             mesg = rb_sprintf("incomplete %s on %s",
02059                     StringValueCStr(dumped),
02060                     ec->last_error.source_encoding);
02061         }
02062         else if (readagain_len) {
02063             bytes2 = rb_str_new(err+error_len, readagain_len);
02064             dumped2 = rb_str_dump(bytes2);
02065             mesg = rb_sprintf("%s followed by %s on %s",
02066                     StringValueCStr(dumped),
02067                     StringValueCStr(dumped2),
02068                     ec->last_error.source_encoding);
02069         }
02070         else {
02071             mesg = rb_sprintf("%s on %s",
02072                     StringValueCStr(dumped),
02073                     ec->last_error.source_encoding);
02074         }
02075 
02076         exc = rb_exc_new3(rb_eInvalidByteSequenceError, mesg);
02077         rb_ivar_set(exc, rb_intern("error_bytes"), bytes);
02078         rb_ivar_set(exc, rb_intern("readagain_bytes"), bytes2);
02079         rb_ivar_set(exc, rb_intern("incomplete_input"), ec->last_error.result == econv_incomplete_input ? Qtrue : Qfalse);
02080 
02081       set_encs:
02082         rb_ivar_set(exc, rb_intern("source_encoding_name"), rb_str_new2(ec->last_error.source_encoding));
02083         rb_ivar_set(exc, rb_intern("destination_encoding_name"), rb_str_new2(ec->last_error.destination_encoding));
02084         idx = rb_enc_find_index(ec->last_error.source_encoding);
02085         if (0 <= idx)
02086             rb_ivar_set(exc, rb_intern("source_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
02087         idx = rb_enc_find_index(ec->last_error.destination_encoding);
02088         if (0 <= idx)
02089             rb_ivar_set(exc, rb_intern("destination_encoding"), rb_enc_from_encoding(rb_enc_from_index(idx)));
02090         return exc;
02091     }
02092     if (ec->last_error.result == econv_undefined_conversion) {
02093         VALUE bytes = rb_str_new((const char *)ec->last_error.error_bytes_start,
02094                                  ec->last_error.error_bytes_len);
02095         VALUE dumped = Qnil;
02096         int idx;
02097         if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) {
02098             rb_encoding *utf8 = rb_utf8_encoding();
02099             const char *start, *end;
02100             int n;
02101             start = (const char *)ec->last_error.error_bytes_start;
02102             end = start + ec->last_error.error_bytes_len;
02103             n = rb_enc_precise_mbclen(start, end, utf8);
02104             if (MBCLEN_CHARFOUND_P(n) &&
02105                 (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) {
02106                 unsigned int cc = rb_enc_mbc_to_codepoint(start, end, utf8);
02107                 dumped = rb_sprintf("U+%04X", cc);
02108             }
02109         }
02110         if (dumped == Qnil)
02111             dumped = rb_str_dump(bytes);
02112         if (strcmp(ec->last_error.source_encoding,
02113                    ec->source_encoding_name) == 0 &&
02114             strcmp(ec->last_error.destination_encoding,
02115                    ec->destination_encoding_name) == 0) {
02116             mesg = rb_sprintf("%s from %s to %s",
02117                     StringValueCStr(dumped),
02118                     ec->last_error.source_encoding,
02119                     ec->last_error.destination_encoding);
02120         }
02121         else {
02122             int i;
02123             mesg = rb_sprintf("%s to %s in conversion from %s",
02124                     StringValueCStr(dumped),
02125                     ec->last_error.destination_encoding,
02126                     ec->source_encoding_name);
02127             for (i = 0; i < ec->num_trans; i++) {
02128                 const rb_transcoder *tr = ec->elems[i].tc->transcoder;
02129                 if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
02130                     rb_str_catf(mesg, " to %s",
02131                                 ec->elems[i].tc->transcoder->dst_encoding);
02132             }
02133         }
02134         exc = rb_exc_new3(rb_eUndefinedConversionError, mesg);
02135         idx = rb_enc_find_index(ec->last_error.source_encoding);
02136         if (0 <= idx)
02137             rb_enc_associate_index(bytes, idx);
02138         rb_ivar_set(exc, rb_intern("error_char"), bytes);
02139         goto set_encs;
02140     }
02141     return Qnil;
02142 }
02143 
02144 static void
02145 more_output_buffer(
02146         VALUE destination,
02147         unsigned char *(*resize_destination)(VALUE, size_t, size_t),
02148         int max_output,
02149         unsigned char **out_start_ptr,
02150         unsigned char **out_pos,
02151         unsigned char **out_stop_ptr)
02152 {
02153     size_t len = (*out_pos - *out_start_ptr);
02154     size_t new_len = (len + max_output) * 2;
02155     *out_start_ptr = resize_destination(destination, len, new_len);
02156     *out_pos = *out_start_ptr + len;
02157     *out_stop_ptr = *out_start_ptr + new_len;
02158 }
02159 
02160 static int
02161 make_replacement(rb_econv_t *ec)
02162 {
02163     rb_transcoding *tc;
02164     const rb_transcoder *tr;
02165     rb_encoding *enc;
02166     const unsigned char *replacement;
02167     const char *repl_enc;
02168     const char *ins_enc;
02169     size_t len;
02170 
02171     if (ec->replacement_str)
02172         return 0;
02173 
02174     ins_enc = rb_econv_encoding_to_insert_output(ec);
02175 
02176     tc = ec->last_tc;
02177     if (*ins_enc) {
02178         tr = tc->transcoder;
02179         enc = rb_enc_find(tr->dst_encoding);
02180         replacement = (const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc);
02181     }
02182     else {
02183         replacement = (unsigned char *)"?";
02184         len = 1;
02185         repl_enc = "";
02186     }
02187 
02188     ec->replacement_str = replacement;
02189     ec->replacement_len = len;
02190     ec->replacement_enc = repl_enc;
02191     ec->replacement_allocated = 0;
02192     return 0;
02193 }
02194 
02195 int
02196 rb_econv_set_replacement(rb_econv_t *ec,
02197     const unsigned char *str, size_t len, const char *encname)
02198 {
02199     unsigned char *str2;
02200     size_t len2;
02201     const char *encname2;
02202 
02203     encname2 = rb_econv_encoding_to_insert_output(ec);
02204 
02205     if (encoding_equal(encname, encname2)) {
02206         str2 = xmalloc(len);
02207         MEMCPY(str2, str, unsigned char, len); /* xxx: str may be invalid */
02208         len2 = len;
02209         encname2 = encname;
02210     }
02211     else {
02212         str2 = allocate_converted_string(encname, encname2, str, len, NULL, 0, &len2);
02213         if (!str2)
02214             return -1;
02215     }
02216 
02217     if (ec->replacement_allocated) {
02218         xfree((void *)ec->replacement_str);
02219     }
02220     ec->replacement_allocated = 1;
02221     ec->replacement_str = str2;
02222     ec->replacement_len = len2;
02223     ec->replacement_enc = encname2;
02224     return 0;
02225 }
02226 
02227 static int
02228 output_replacement_character(rb_econv_t *ec)
02229 {
02230     int ret;
02231 
02232     if (make_replacement(ec) == -1)
02233         return -1;
02234 
02235     ret = rb_econv_insert_output(ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc);
02236     if (ret == -1)
02237         return -1;
02238 
02239     return 0;
02240 }
02241 
02242 #if 1
02243 static void
02244 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
02245                const unsigned char *in_stop, unsigned char *out_stop,
02246                VALUE destination,
02247                unsigned char *(*resize_destination)(VALUE, size_t, size_t),
02248                const char *src_encoding,
02249                const char *dst_encoding,
02250                int ecflags,
02251                VALUE ecopts)
02252 {
02253     rb_econv_t *ec;
02254     rb_transcoding *last_tc;
02255     rb_econv_result_t ret;
02256     unsigned char *out_start = *out_pos;
02257     int max_output;
02258     VALUE exc;
02259     VALUE fallback = Qnil;
02260 
02261     ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
02262     if (!ec)
02263         rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
02264 
02265     if (!NIL_P(ecopts) && TYPE(ecopts) == T_HASH)
02266         fallback = rb_hash_aref(ecopts, sym_fallback);
02267     last_tc = ec->last_tc;
02268     max_output = last_tc ? last_tc->transcoder->max_output : 1;
02269 
02270   resume:
02271     ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
02272 
02273     if (!NIL_P(fallback) && ret == econv_undefined_conversion) {
02274         VALUE rep = rb_enc_str_new(
02275                 (const char *)ec->last_error.error_bytes_start,
02276                 ec->last_error.error_bytes_len,
02277                 rb_enc_find(ec->last_error.source_encoding));
02278         rep = rb_hash_lookup2(fallback, rep, Qundef);
02279         if (rep != Qundef) {
02280             StringValue(rep);
02281             ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
02282                     RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
02283             if ((int)ret == -1) {
02284                 rb_raise(rb_eArgError, "too big fallback string");
02285             }
02286             goto resume;
02287         }
02288     }
02289 
02290     if (ret == econv_invalid_byte_sequence ||
02291         ret == econv_incomplete_input ||
02292         ret == econv_undefined_conversion) {
02293         exc = make_econv_exception(ec);
02294         rb_econv_close(ec);
02295         rb_exc_raise(exc);
02296     }
02297 
02298     if (ret == econv_destination_buffer_full) {
02299         more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
02300         goto resume;
02301     }
02302 
02303     rb_econv_close(ec);
02304     return;
02305 }
02306 #else
02307 /* sample transcode_loop implementation in byte-by-byte stream style */
02308 static void
02309 transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
02310                const unsigned char *in_stop, unsigned char *out_stop,
02311                VALUE destination,
02312                unsigned char *(*resize_destination)(VALUE, size_t, size_t),
02313                const char *src_encoding,
02314                const char *dst_encoding,
02315                int ecflags,
02316                VALUE ecopts)
02317 {
02318     rb_econv_t *ec;
02319     rb_transcoding *last_tc;
02320     rb_econv_result_t ret;
02321     unsigned char *out_start = *out_pos;
02322     const unsigned char *ptr;
02323     int max_output;
02324     VALUE exc;
02325 
02326     ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
02327     if (!ec)
02328         rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
02329 
02330     last_tc = ec->last_tc;
02331     max_output = last_tc ? last_tc->transcoder->max_output : 1;
02332 
02333     ret = econv_source_buffer_empty;
02334     ptr = *in_pos;
02335     while (ret != econv_finished) {
02336         unsigned char input_byte;
02337         const unsigned char *p = &input_byte;
02338 
02339         if (ret == econv_source_buffer_empty) {
02340             if (ptr < in_stop) {
02341                 input_byte = *ptr;
02342                 ret = rb_econv_convert(ec, &p, p+1, out_pos, out_stop, ECONV_PARTIAL_INPUT);
02343             }
02344             else {
02345                 ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, 0);
02346             }
02347         }
02348         else {
02349             ret = rb_econv_convert(ec, NULL, NULL, out_pos, out_stop, ECONV_PARTIAL_INPUT);
02350         }
02351         if (&input_byte != p)
02352             ptr += p - &input_byte;
02353         switch (ret) {
02354           case econv_invalid_byte_sequence:
02355           case econv_incomplete_input:
02356           case econv_undefined_conversion:
02357             exc = make_econv_exception(ec);
02358             rb_econv_close(ec);
02359             rb_exc_raise(exc);
02360             break;
02361 
02362           case econv_destination_buffer_full:
02363             more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
02364             break;
02365 
02366           case econv_source_buffer_empty:
02367             break;
02368 
02369           case econv_finished:
02370             break;
02371         }
02372     }
02373     rb_econv_close(ec);
02374     *in_pos = in_stop;
02375     return;
02376 }
02377 #endif
02378 
02379 
02380 /*
02381  *  String-specific code
02382  */
02383 
02384 static unsigned char *
02385 str_transcoding_resize(VALUE destination, size_t len, size_t new_len)
02386 {
02387     rb_str_resize(destination, new_len);
02388     return (unsigned char *)RSTRING_PTR(destination);
02389 }
02390 
02391 static int
02392 econv_opts(VALUE opt)
02393 {
02394     VALUE v;
02395     int ecflags = 0;
02396 
02397     v = rb_hash_aref(opt, sym_invalid);
02398     if (NIL_P(v)) {
02399     }
02400     else if (v==sym_replace) {
02401         ecflags |= ECONV_INVALID_REPLACE;
02402     }
02403     else {
02404         rb_raise(rb_eArgError, "unknown value for invalid character option");
02405     }
02406 
02407     v = rb_hash_aref(opt, sym_undef);
02408     if (NIL_P(v)) {
02409     }
02410     else if (v==sym_replace) {
02411         ecflags |= ECONV_UNDEF_REPLACE;
02412     }
02413     else {
02414         rb_raise(rb_eArgError, "unknown value for undefined character option");
02415     }
02416 
02417     v = rb_hash_aref(opt, sym_replace);
02418     if (!NIL_P(v) && !(ecflags & ECONV_INVALID_REPLACE)) {
02419         ecflags |= ECONV_UNDEF_REPLACE;
02420     }
02421 
02422     v = rb_hash_aref(opt, sym_xml);
02423     if (!NIL_P(v)) {
02424         if (v==sym_text) {
02425             ecflags |= ECONV_XML_TEXT_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
02426         }
02427         else if (v==sym_attr) {
02428             ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
02429         }
02430         else if (TYPE(v) == T_SYMBOL) {
02431             rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v)));
02432         }
02433         else {
02434             rb_raise(rb_eArgError, "unexpected value for xml option");
02435         }
02436     }
02437 
02438     v = rb_hash_aref(opt, sym_universal_newline);
02439     if (RTEST(v))
02440         ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
02441 
02442     v = rb_hash_aref(opt, sym_crlf_newline);
02443     if (RTEST(v))
02444         ecflags |= ECONV_CRLF_NEWLINE_DECORATOR;
02445 
02446     v = rb_hash_aref(opt, sym_cr_newline);
02447     if (RTEST(v))
02448         ecflags |= ECONV_CR_NEWLINE_DECORATOR;
02449 
02450     return ecflags;
02451 }
02452 
02453 int
02454 rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
02455 {
02456     int ecflags;
02457     VALUE newhash = Qnil;
02458     VALUE v;
02459 
02460     if (NIL_P(opthash)) {
02461         *opts = Qnil;
02462         return 0;
02463     }
02464     ecflags = econv_opts(opthash);
02465 
02466     v = rb_hash_aref(opthash, sym_replace);
02467     if (!NIL_P(v)) {
02468         StringValue(v);
02469         if (rb_enc_str_coderange(v) == ENC_CODERANGE_BROKEN) {
02470             VALUE dumped = rb_str_dump(v);
02471             rb_raise(rb_eArgError, "replacement string is broken: %s as %s",
02472                      StringValueCStr(dumped),
02473                      rb_enc_name(rb_enc_get(v)));
02474         }
02475         v = rb_str_new_frozen(v);
02476         newhash = rb_hash_new();
02477         rb_hash_aset(newhash, sym_replace, v);
02478     }
02479 
02480     v = rb_hash_aref(opthash, sym_fallback);
02481     if (!NIL_P(v)) {
02482         v = rb_convert_type(v, T_HASH, "Hash", "to_hash");
02483         if (!NIL_P(v)) {
02484             if (NIL_P(newhash))
02485                 newhash = rb_hash_new();
02486             rb_hash_aset(newhash, sym_fallback, v);
02487         }
02488     }
02489 
02490     if (!NIL_P(newhash))
02491         rb_hash_freeze(newhash);
02492     *opts = newhash;
02493 
02494     return ecflags;
02495 }
02496 
02497 rb_econv_t *
02498 rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash)
02499 {
02500     rb_econv_t *ec;
02501     VALUE replacement;
02502 
02503     if (NIL_P(opthash)) {
02504         replacement = Qnil;
02505     }
02506     else {
02507         if (TYPE(opthash) != T_HASH || !OBJ_FROZEN(opthash))
02508             rb_bug("rb_econv_open_opts called with invalid opthash");
02509         replacement = rb_hash_aref(opthash, sym_replace);
02510     }
02511 
02512     ec = rb_econv_open(source_encoding, destination_encoding, ecflags);
02513     if (!ec)
02514         return ec;
02515 
02516     if (!NIL_P(replacement)) {
02517         int ret;
02518         rb_encoding *enc = rb_enc_get(replacement);
02519 
02520         ret = rb_econv_set_replacement(ec,
02521                 (const unsigned char *)RSTRING_PTR(replacement),
02522                 RSTRING_LEN(replacement),
02523                 rb_enc_name(enc));
02524         if (ret == -1) {
02525             rb_econv_close(ec);
02526             return NULL;
02527         }
02528     }
02529     return ec;
02530 }
02531 
02532 static int
02533 enc_arg(volatile VALUE *arg, const char **name_p, rb_encoding **enc_p)
02534 {
02535     rb_encoding *enc;
02536     const char *n;
02537     int encidx;
02538     VALUE encval;
02539 
02540     if (((encidx = rb_to_encoding_index(encval = *arg)) < 0) ||
02541         !(enc = rb_enc_from_index(encidx))) {
02542         enc = NULL;
02543         encidx = 0;
02544         n = StringValueCStr(*arg);
02545     }
02546     else {
02547         n = rb_enc_name(enc);
02548     }
02549 
02550     *name_p = n;
02551     *enc_p = enc;
02552 
02553     return encidx;
02554 }
02555 
02556 static int
02557 str_transcode_enc_args(VALUE str, volatile VALUE *arg1, volatile VALUE *arg2,
02558         const char **sname_p, rb_encoding **senc_p,
02559         const char **dname_p, rb_encoding **denc_p)
02560 {
02561     rb_encoding *senc, *denc;
02562     const char *sname, *dname;
02563     int sencidx, dencidx;
02564 
02565     dencidx = enc_arg(arg1, &dname, &denc);
02566 
02567     if (NIL_P(*arg2)) {
02568         sencidx = rb_enc_get_index(str);
02569         senc = rb_enc_from_index(sencidx);
02570         sname = rb_enc_name(senc);
02571     }
02572     else {
02573         sencidx = enc_arg(arg2, &sname, &senc);
02574     }
02575 
02576     *sname_p = sname;
02577     *senc_p = senc;
02578     *dname_p = dname;
02579     *denc_p = denc;
02580     return dencidx;
02581 }
02582 
02583 static int
02584 str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
02585 {
02586     VALUE dest;
02587     VALUE str = *self;
02588     volatile VALUE arg1, arg2;
02589     long blen, slen;
02590     unsigned char *buf, *bp, *sp;
02591     const unsigned char *fromp;
02592     rb_encoding *senc, *denc;
02593     const char *sname, *dname;
02594     int dencidx;
02595 
02596     if (argc <0 || argc > 2) {
02597         rb_raise(rb_eArgError, "wrong number of arguments (%d for 0..2)", argc);
02598     }
02599 
02600     if (argc == 0) {
02601         arg1 = rb_enc_default_internal();
02602         if (NIL_P(arg1)) {
02603             if (!ecflags) return -1;
02604             arg1 = rb_obj_encoding(str);
02605         }
02606         ecflags |= ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
02607     }
02608     else {
02609         arg1 = argv[0];
02610     }
02611     arg2 = argc<=1 ? Qnil : argv[1];
02612     dencidx = str_transcode_enc_args(str, &arg1, &arg2, &sname, &senc, &dname, &denc);
02613 
02614     if ((ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR|
02615                     ECONV_CRLF_NEWLINE_DECORATOR|
02616                     ECONV_CR_NEWLINE_DECORATOR|
02617                     ECONV_XML_TEXT_DECORATOR|
02618                     ECONV_XML_ATTR_CONTENT_DECORATOR|
02619                     ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) {
02620         if (senc && senc == denc) {
02621             return NIL_P(arg2) ? -1 : dencidx;
02622         }
02623         if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) {
02624             if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
02625                 return dencidx;
02626             }
02627         }
02628         if (encoding_equal(sname, dname)) {
02629             return NIL_P(arg2) ? -1 : dencidx;
02630         }
02631     }
02632     else {
02633         if (encoding_equal(sname, dname)) {
02634             sname = "";
02635             dname = "";
02636         }
02637     }
02638 
02639     fromp = sp = (unsigned char *)RSTRING_PTR(str);
02640     slen = RSTRING_LEN(str);
02641     blen = slen + 30; /* len + margin */
02642     dest = rb_str_tmp_new(blen);
02643     bp = (unsigned char *)RSTRING_PTR(dest);
02644 
02645     transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
02646     if (fromp != sp+slen) {
02647         rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
02648     }
02649     buf = (unsigned char *)RSTRING_PTR(dest);
02650     *bp = '\0';
02651     rb_str_set_len(dest, bp - buf);
02652 
02653     /* set encoding */
02654     if (!denc) {
02655         dencidx = rb_define_dummy_encoding(dname);
02656     }
02657     *self = dest;
02658 
02659     return dencidx;
02660 }
02661 
02662 static int
02663 str_transcode(int argc, VALUE *argv, VALUE *self)
02664 {
02665     VALUE opt;
02666     int ecflags = 0;
02667     VALUE ecopts = Qnil;
02668 
02669     if (0 < argc) {
02670         opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash");
02671         if (!NIL_P(opt)) {
02672             argc--;
02673             ecflags = rb_econv_prepare_opts(opt, &ecopts);
02674         }
02675     }
02676     return str_transcode0(argc, argv, self, ecflags, ecopts);
02677 }
02678 
02679 static inline VALUE
02680 str_encode_associate(VALUE str, int encidx)
02681 {
02682     int cr = 0;
02683 
02684     rb_enc_associate_index(str, encidx);
02685 
02686     /* transcoded string never be broken. */
02687     if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {
02688         rb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr);
02689     }
02690     else {
02691         cr = ENC_CODERANGE_VALID;
02692     }
02693     ENC_CODERANGE_SET(str, cr);
02694     return str;
02695 }
02696 
02697 /*
02698  *  call-seq:
02699  *     str.encode!(encoding [, options] )   -> str
02700  *     str.encode!(dst_encoding, src_encoding [, options] )   -> str
02701  *
02702  *  The first form transcodes the contents of <i>str</i> from
02703  *  str.encoding to +encoding+.
02704  *  The second form transcodes the contents of <i>str</i> from
02705  *  src_encoding to dst_encoding.
02706  *  The options Hash gives details for conversion. See String#encode
02707  *  for details.
02708  *  Returns the string even if no changes were made.
02709  */
02710 
02711 static VALUE
02712 str_encode_bang(int argc, VALUE *argv, VALUE str)
02713 {
02714     VALUE newstr;
02715     int encidx;
02716 
02717     if (OBJ_FROZEN(str)) { /* in future, may use str_frozen_check from string.c, but that's currently static */
02718         rb_raise(rb_eRuntimeError, "string frozen");
02719     }
02720 
02721     newstr = str;
02722     encidx = str_transcode(argc, argv, &newstr);
02723 
02724     if (encidx < 0) return str;
02725     rb_str_shared_replace(str, newstr);
02726     return str_encode_associate(str, encidx);
02727 }
02728 
02729 /*
02730  *  call-seq:
02731  *     str.encode(encoding [, options] )   -> str
02732  *     str.encode(dst_encoding, src_encoding [, options] )   -> str
02733  *     str.encode([options])   -> str
02734  *
02735  *  The first form returns a copy of <i>str</i> transcoded
02736  *  to encoding +encoding+.
02737  *  The second form returns a copy of <i>str</i> transcoded
02738  *  from src_encoding to dst_encoding.
02739  *  The last form returns a copy of <i>str</i> transcoded to
02740  *  <code>Encoding.default_internal</code>.
02741  *  By default, the first and second form raise
02742  *  Encoding::UndefinedConversionError for characters that are
02743  *  undefined in the destination encoding, and
02744  *  Encoding::InvalidByteSequenceError for invalid byte sequences
02745  *  in the source encoding. The last form by default does not raise
02746  *  exceptions but uses replacement strings.
02747  *  The <code>options</code> Hash gives details for conversion.
02748  *
02749  *  === options
02750  *  The hash <code>options</code> can have the following keys:
02751  *  :invalid ::
02752  *    If the value is <code>:replace</code>, <code>#encode</code> replaces
02753  *    invalid byte sequences in <code>str</code> with the replacement character.
02754  *    The default is to raise the exception
02755  *  :undef ::
02756  *    If the value is <code>:replace</code>, <code>#encode</code> replaces
02757  *    characters which are undefined in the destination encoding with
02758  *    the replacement character.
02759  *  :replace ::
02760  *    Sets the replacement string to the value. The default replacement
02761  *    string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
02762  *  :fallback ::
02763  *    Sets the replacement string by the hash for undefined character.
02764  *    Its key is a such undefined character encoded in source encoding
02765  *    of current transcoder. Its value can be any encoding until it
02766  *    can be converted into the destination encoding of the transcoder.
02767  *  :xml ::
02768  *    The value must be <code>:text</code> or <code>:attr</code>.
02769  *    If the value is <code>:text</code> <code>#encode</code> replaces
02770  *    undefined characters with their (upper-case hexadecimal) numeric
02771  *    character references. '&', '<', and '>' are converted to "&amp;",
02772  *    "&lt;", and "&gt;", respectively.
02773  *    If the value is <code>:attr</code>, <code>#encode</code> also quotes
02774  *    the replacement result (using '"'), and replaces '"' with "&quot;".
02775  *  :cr_newline ::
02776  *    Replaces LF ("\n") with CR ("\r") if value is true.
02777  *  :crlf_newline ::
02778  *    Replaces LF ("\n") with CRLF ("\r\n") if value is true.
02779  *  :universal_newline ::
02780  *    Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true.
02781  */
02782 
02783 static VALUE
02784 str_encode(int argc, VALUE *argv, VALUE str)
02785 {
02786     VALUE newstr = str;
02787     int encidx = str_transcode(argc, argv, &newstr);
02788 
02789     if (encidx < 0) return rb_str_dup(str);
02790     if (newstr == str) {
02791         newstr = rb_str_dup(str);
02792     }
02793     else {
02794         RBASIC(newstr)->klass = rb_obj_class(str);
02795     }
02796     return str_encode_associate(newstr, encidx);
02797 }
02798 
02799 VALUE
02800 rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
02801 {
02802     int argc = 1;
02803     VALUE *argv = &to;
02804     VALUE newstr = str;
02805     int encidx = str_transcode0(argc, argv, &newstr, ecflags, ecopts);
02806 
02807     if (encidx < 0) return rb_str_dup(str);
02808     if (newstr == str) {
02809         newstr = rb_str_dup(str);
02810     }
02811     else {
02812         RBASIC(newstr)->klass = rb_obj_class(str);
02813     }
02814     return str_encode_associate(newstr, encidx);
02815 }
02816 
02817 static void
02818 econv_free(void *ptr)
02819 {
02820     rb_econv_t *ec = ptr;
02821     rb_econv_close(ec);
02822 }
02823 
02824 static size_t
02825 econv_memsize(const void *ptr)
02826 {
02827     return ptr ? sizeof(rb_econv_t) : 0;
02828 }
02829 
02830 static const rb_data_type_t econv_data_type = {
02831     "econv",
02832     NULL, econv_free, econv_memsize,
02833 };
02834 
02835 static VALUE
02836 econv_s_allocate(VALUE klass)
02837 {
02838     return TypedData_Wrap_Struct(klass, &econv_data_type, NULL);
02839 }
02840 
02841 static rb_encoding *
02842 make_dummy_encoding(const char *name)
02843 {
02844     rb_encoding *enc;
02845     int idx;
02846     idx = rb_define_dummy_encoding(name);
02847     enc = rb_enc_from_index(idx);
02848     return enc;
02849 }
02850 
02851 static rb_encoding *
02852 make_encoding(const char *name)
02853 {
02854     rb_encoding *enc;
02855     enc = rb_enc_find(name);
02856     if (!enc)
02857         enc = make_dummy_encoding(name);
02858     return enc;
02859 }
02860 
02861 static VALUE
02862 make_encobj(const char *name)
02863 {
02864     return rb_enc_from_encoding(make_encoding(name));
02865 }
02866 
02867 /*
02868  * call-seq:
02869  *   Encoding::Converter.asciicompat_encoding(string) -> encoding or nil
02870  *   Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil
02871  *
02872  * Returns the corresponding ASCII compatible encoding.
02873  *
02874  * Returns nil if the argument is an ASCII compatible encoding.
02875  *
02876  * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which
02877  * can represents exactly the same characters as the given ASCII incompatible encoding.
02878  * So, no conversion undefined error occurs when converting between the two encodings.
02879  *
02880  *   Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
02881  *   Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
02882  *   Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil
02883  *
02884  */
02885 static VALUE
02886 econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
02887 {
02888     const char *arg_name, *result_name;
02889     rb_encoding *arg_enc, *result_enc;
02890 
02891     enc_arg(&arg, &arg_name, &arg_enc);
02892 
02893     result_name = rb_econv_asciicompat_encoding(arg_name);
02894 
02895     if (result_name == NULL)
02896         return Qnil;
02897 
02898     result_enc = make_encoding(result_name);
02899 
02900     return rb_enc_from_encoding(result_enc);
02901 }
02902 
02903 static void
02904 econv_args(int argc, VALUE *argv,
02905     volatile VALUE *snamev_p, volatile VALUE *dnamev_p,
02906     const char **sname_p, const char **dname_p,
02907     rb_encoding **senc_p, rb_encoding **denc_p,
02908     int *ecflags_p,
02909     VALUE *ecopts_p)
02910 {
02911     VALUE opt, opthash, flags_v, ecopts;
02912     int sidx, didx;
02913     const char *sname, *dname;
02914     rb_encoding *senc, *denc;
02915     int ecflags;
02916 
02917     rb_scan_args(argc, argv, "21", snamev_p, dnamev_p, &opt);
02918 
02919     if (NIL_P(opt)) {
02920         ecflags = 0;
02921         ecopts = Qnil;
02922     }
02923     else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
02924         ecflags = NUM2INT(flags_v);
02925         ecopts = Qnil;
02926     }
02927     else {
02928         opthash = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
02929         ecflags = rb_econv_prepare_opts(opthash, &ecopts);
02930     }
02931 
02932     senc = NULL;
02933     sidx = rb_to_encoding_index(*snamev_p);
02934     if (0 <= sidx) {
02935         senc = rb_enc_from_index(sidx);
02936     }
02937     else {
02938         StringValue(*snamev_p);
02939     }
02940 
02941     denc = NULL;
02942     didx = rb_to_encoding_index(*dnamev_p);
02943     if (0 <= didx) {
02944         denc = rb_enc_from_index(didx);
02945     }
02946     else {
02947         StringValue(*dnamev_p);
02948     }
02949 
02950     sname = senc ? rb_enc_name(senc) : StringValueCStr(*snamev_p);
02951     dname = denc ? rb_enc_name(denc) : StringValueCStr(*dnamev_p);
02952 
02953     *sname_p = sname;
02954     *dname_p = dname;
02955     *senc_p = senc;
02956     *denc_p = denc;
02957     *ecflags_p = ecflags;
02958     *ecopts_p = ecopts;
02959 }
02960 
02961 static int
02962 decorate_convpath(VALUE convpath, int ecflags)
02963 {
02964     int num_decorators;
02965     const char *decorators[MAX_ECFLAGS_DECORATORS];
02966     int i;
02967     int n, len;
02968 
02969     num_decorators = decorator_names(ecflags, decorators);
02970     if (num_decorators == -1)
02971         return -1;
02972 
02973     len = n = RARRAY_LENINT(convpath);
02974     if (n != 0) {
02975         VALUE pair = RARRAY_PTR(convpath)[n-1];
02976         if (TYPE(pair) == T_ARRAY) {
02977             const char *sname = rb_enc_name(rb_to_encoding(RARRAY_PTR(pair)[0]));
02978             const char *dname = rb_enc_name(rb_to_encoding(RARRAY_PTR(pair)[1]));
02979             transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
02980             const rb_transcoder *tr = load_transcoder_entry(entry);
02981             if (!tr)
02982                 return -1;
02983             if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
02984                     tr->asciicompat_type == asciicompat_encoder) {
02985                 n--;
02986                 rb_ary_store(convpath, len + num_decorators - 1, pair);
02987             }
02988         }
02989         else {
02990             rb_ary_store(convpath, len + num_decorators - 1, pair);
02991         }
02992     }
02993 
02994     for (i = 0; i < num_decorators; i++)
02995         rb_ary_store(convpath, n + i, rb_str_new_cstr(decorators[i]));
02996 
02997     return 0;
02998 }
02999 
03000 static void
03001 search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
03002 {
03003     VALUE *ary_p = arg;
03004     VALUE v;
03005 
03006     if (*ary_p == Qnil) {
03007         *ary_p = rb_ary_new();
03008     }
03009 
03010     if (DECORATOR_P(sname, dname)) {
03011         v = rb_str_new_cstr(dname);
03012     }
03013     else {
03014         v = rb_assoc_new(make_encobj(sname), make_encobj(dname));
03015     }
03016     rb_ary_store(*ary_p, depth, v);
03017 }
03018 
03019 /*
03020  * call-seq:
03021  *   Encoding::Converter.search_convpath(source_encoding, destination_encoding)         -> ary
03022  *   Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt)    -> ary
03023  *
03024  *  Returns a conversion path.
03025  *
03026  *   p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
03027  *   #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
03028  *   #    [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
03029  *
03030  *   p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
03031  *   #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
03032  *   #    [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
03033  *   #    "universal_newline"]
03034  *
03035  *   p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
03036  *   #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
03037  *   #    "universal_newline",
03038  *   #    [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
03039  */
03040 static VALUE
03041 econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
03042 {
03043     volatile VALUE snamev, dnamev;
03044     const char *sname, *dname;
03045     rb_encoding *senc, *denc;
03046     int ecflags;
03047     VALUE ecopts;
03048     VALUE convpath;
03049 
03050     econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
03051 
03052     convpath = Qnil;
03053     transcode_search_path(sname, dname, search_convpath_i, &convpath);
03054 
03055     if (NIL_P(convpath))
03056         rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
03057 
03058     if (decorate_convpath(convpath, ecflags) == -1)
03059         rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
03060 
03061     return convpath;
03062 }
03063 
03064 /*
03065  * Check the existence of a conversion path.
03066  * Returns the number of converters in the conversion path.
03067  * result: >=0:success -1:failure
03068  */
03069 int
03070 rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding)
03071 {
03072     VALUE convpath = Qnil;
03073     transcode_search_path(from_encoding, to_encoding, search_convpath_i,
03074                           &convpath);
03075     return RTEST(convpath);
03076 }
03077 
03078 struct rb_econv_init_by_convpath_t {
03079     rb_econv_t *ec;
03080     int index;
03081     int ret;
03082 };
03083 
03084 static void
03085 rb_econv_init_by_convpath_i(const char *sname, const char *dname, int depth, void *arg)
03086 {
03087     struct rb_econv_init_by_convpath_t *a = (struct rb_econv_init_by_convpath_t *)arg;
03088     int ret;
03089 
03090     if (a->ret == -1)
03091         return;
03092 
03093     ret = rb_econv_add_converter(a->ec, sname, dname, a->index);
03094 
03095     a->ret = ret;
03096     return;
03097 }
03098 
03099 static rb_econv_t *
03100 rb_econv_init_by_convpath(VALUE self, VALUE convpath,
03101     const char **sname_p, const char **dname_p,
03102     rb_encoding **senc_p, rb_encoding**denc_p)
03103 {
03104     rb_econv_t *ec;
03105     long i;
03106     int ret, first=1;
03107     VALUE elt;
03108     rb_encoding *senc = 0, *denc = 0;
03109     const char *sname, *dname;
03110 
03111     ec = rb_econv_alloc(RARRAY_LENINT(convpath));
03112     DATA_PTR(self) = ec;
03113 
03114     for (i = 0; i < RARRAY_LEN(convpath); i++) {
03115         volatile VALUE snamev, dnamev;
03116         VALUE pair;
03117         elt = rb_ary_entry(convpath, i);
03118         if (!NIL_P(pair = rb_check_array_type(elt))) {
03119             if (RARRAY_LEN(pair) != 2)
03120                 rb_raise(rb_eArgError, "not a 2-element array in convpath");
03121             snamev = rb_ary_entry(pair, 0);
03122             enc_arg(&snamev, &sname, &senc);
03123             dnamev = rb_ary_entry(pair, 1);
03124             enc_arg(&dnamev, &dname, &denc);
03125         }
03126         else {
03127             sname = "";
03128             dname = StringValueCStr(elt);
03129         }
03130         if (DECORATOR_P(sname, dname)) {
03131             ret = rb_econv_add_converter(ec, sname, dname, ec->num_trans);
03132             if (ret == -1)
03133                 rb_raise(rb_eArgError, "decoration failed: %s", dname);
03134         }
03135         else {
03136             int j = ec->num_trans;
03137             struct rb_econv_init_by_convpath_t arg;
03138             arg.ec = ec;
03139             arg.index = ec->num_trans;
03140             arg.ret = 0;
03141             ret = transcode_search_path(sname, dname, rb_econv_init_by_convpath_i, &arg);
03142             if (ret == -1 || arg.ret == -1)
03143                 rb_raise(rb_eArgError, "adding conversion failed: %s to %s", sname, dname);
03144             if (first) {
03145                 first = 0;
03146                 *senc_p = senc;
03147                 *sname_p = ec->elems[j].tc->transcoder->src_encoding;
03148             }
03149             *denc_p = denc;
03150             *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding;
03151         }
03152     }
03153 
03154     if (first) {
03155       *senc_p = NULL;
03156       *denc_p = NULL;
03157       *sname_p = "";
03158       *dname_p = "";
03159     }
03160 
03161     ec->source_encoding_name = *sname_p;
03162     ec->destination_encoding_name = *dname_p;
03163 
03164     return ec;
03165 }
03166 
03167 /*
03168  * call-seq:
03169  *   Encoding::Converter.new(source_encoding, destination_encoding)
03170  *   Encoding::Converter.new(source_encoding, destination_encoding, opt)
03171  *   Encoding::Converter.new(convpath)
03172  *
03173  * possible options elements:
03174  *   hash form:
03175  *     :invalid => nil            # raise error on invalid byte sequence (default)
03176  *     :invalid => :replace       # replace invalid byte sequence
03177  *     :undef => nil              # raise error on undefined conversion (default)
03178  *     :undef => :replace         # replace undefined conversion
03179  *     :replace => string         # replacement string ("?" or "\uFFFD" if not specified)
03180  *     :universal_newline => true # decorator for converting CRLF and CR to LF
03181  *     :crlf_newline => true      # decorator for converting LF to CRLF
03182  *     :cr_newline => true        # decorator for converting LF to CR
03183  *     :xml => :text              # escape as XML CharData.
03184  *     :xml => :attr              # escape as XML AttValue
03185  *   integer form:
03186  *     Encoding::Converter::INVALID_REPLACE
03187  *     Encoding::Converter::UNDEF_REPLACE
03188  *     Encoding::Converter::UNDEF_HEX_CHARREF
03189  *     Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
03190  *     Encoding::Converter::CRLF_NEWLINE_DECORATOR
03191  *     Encoding::Converter::CR_NEWLINE_DECORATOR
03192  *     Encoding::Converter::XML_TEXT_DECORATOR
03193  *     Encoding::Converter::XML_ATTR_CONTENT_DECORATOR
03194  *     Encoding::Converter::XML_ATTR_QUOTE_DECORATOR
03195  *
03196  * Encoding::Converter.new creates an instance of Encoding::Converter.
03197  *
03198  * Source_encoding and destination_encoding should be a string or
03199  * Encoding object.
03200  *
03201  * opt should be nil, a hash or an integer.
03202  *
03203  * convpath should be an array.
03204  * convpath may contain
03205  * - two-element arrays which contain encodings or encoding names, or
03206  * - strings representing decorator names.
03207  *
03208  * Encoding::Converter.new optionally takes an option.
03209  * The option should be a hash or an integer.
03210  * The option hash can contain :invalid => nil, etc.
03211  * The option integer should be logical-or of constants such as
03212  * Encoding::Converter::INVALID_REPLACE, etc.
03213  *
03214  * [:invalid => nil]
03215  *   Raise error on invalid byte sequence.  This is a default behavior.
03216  * [:invalid => :replace]
03217  *   Replace invalid byte sequence by replacement string.
03218  * [:undef => nil]
03219  *   Raise an error if a character in source_encoding is not defined in destination_encoding.
03220  *   This is a default behavior.
03221  * [:undef => :replace]
03222  *   Replace undefined character in destination_encoding with replacement string.
03223  * [:replace => string]
03224  *   Specify the replacement string.
03225  *   If not specified, "\uFFFD" is used for Unicode encodings and "?" for others.
03226  * [:universal_newline => true]
03227  *   Convert CRLF and CR to LF.
03228  * [:crlf_newline => true]
03229  *   Convert LF to CRLF.
03230  * [:cr_newline => true]
03231  *   Convert LF to CR.
03232  * [:xml => :text]
03233  *   Escape as XML CharData.
03234  *   This form can be used as a HTML 4.0 #PCDATA.
03235  *   - '&' -> '&amp;'
03236  *   - '<' -> '&lt;'
03237  *   - '>' -> '&gt;'
03238  *   - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
03239  * [:xml => :attr]
03240  *   Escape as XML AttValue.
03241  *   The converted result is quoted as "...".
03242  *   This form can be used as a HTML 4.0 attribute value.
03243  *   - '&' -> '&amp;'
03244  *   - '<' -> '&lt;'
03245  *   - '>' -> '&gt;'
03246  *   - '"' -> '&quot;'
03247  *   - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
03248  *
03249  * Examples:
03250  *   # UTF-16BE to UTF-8
03251  *   ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
03252  *
03253  *   # Usually, decorators such as newline conversion are inserted last.
03254  *   ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
03255  *   p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
03256  *                 #    "universal_newline"]
03257  *
03258  *   # But, if the last encoding is ASCII incompatible,
03259  *   # decorators are inserted before the last conversion.
03260  *   ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
03261  *   p ec.convpath #=> ["crlf_newline",
03262  *                 #    [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
03263  *
03264  *   # Conversion path can be specified directly.
03265  *   ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
03266  *   p ec.convpath #=> ["universal_newline",
03267  *                 #    [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
03268  *                 #    [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
03269  */
03270 static VALUE
03271 econv_init(int argc, VALUE *argv, VALUE self)
03272 {
03273     VALUE ecopts;
03274     volatile VALUE snamev, dnamev;
03275     const char *sname, *dname;
03276     rb_encoding *senc, *denc;
03277     rb_econv_t *ec;
03278     int ecflags;
03279     VALUE convpath;
03280 
03281     if (rb_check_typeddata(self, &econv_data_type)) {
03282         rb_raise(rb_eTypeError, "already initialized");
03283     }
03284 
03285     if (argc == 1 && !NIL_P(convpath = rb_check_array_type(argv[0]))) {
03286         ec = rb_econv_init_by_convpath(self, convpath, &sname, &dname, &senc, &denc);
03287         ecflags = 0;
03288         ecopts = Qnil;
03289     }
03290     else {
03291         econv_args(argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
03292         ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
03293     }
03294 
03295     if (!ec) {
03296         rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
03297     }
03298 
03299     if (!DECORATOR_P(sname, dname)) {
03300         if (!senc)
03301             senc = make_dummy_encoding(sname);
03302         if (!denc)
03303             denc = make_dummy_encoding(dname);
03304     }
03305 
03306     ec->source_encoding = senc;
03307     ec->destination_encoding = denc;
03308 
03309     DATA_PTR(self) = ec;
03310 
03311     return self;
03312 }
03313 
03314 /*
03315  * call-seq:
03316  *   ec.inspect         -> string
03317  *
03318  * Returns a printable version of <i>ec</i>
03319  *
03320  *   ec = Encoding::Converter.new("iso-8859-1", "utf-8")
03321  *   puts ec.inspect    #=> #<Encoding::Converter: ISO-8859-1 to UTF-8>
03322  *
03323  */
03324 static VALUE
03325 econv_inspect(VALUE self)
03326 {
03327     const char *cname = rb_obj_classname(self);
03328     rb_econv_t *ec;
03329 
03330     TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
03331     if (!ec)
03332         return rb_sprintf("#<%s: uninitialized>", cname);
03333     else {
03334         const char *sname = ec->source_encoding_name;
03335         const char *dname = ec->destination_encoding_name;
03336         VALUE str;
03337         str = rb_sprintf("#<%s: ", cname);
03338         econv_description(sname, dname, ec->flags, str);
03339         rb_str_cat2(str, ">");
03340         return str;
03341     }
03342 }
03343 
03344 static rb_econv_t *
03345 check_econv(VALUE self)
03346 {
03347     rb_econv_t *ec;
03348 
03349     TypedData_Get_Struct(self, rb_econv_t, &econv_data_type, ec);
03350     if (!ec) {
03351         rb_raise(rb_eTypeError, "uninitialized encoding converter");
03352     }
03353     return ec;
03354 }
03355 
03356 /*
03357  * call-seq:
03358  *   ec.source_encoding -> encoding
03359  *
03360  * Returns the source encoding as an Encoding object.
03361  */
03362 static VALUE
03363 econv_source_encoding(VALUE self)
03364 {
03365     rb_econv_t *ec = check_econv(self);
03366     if (!ec->source_encoding)
03367         return Qnil;
03368     return rb_enc_from_encoding(ec->source_encoding);
03369 }
03370 
03371 /*
03372  * call-seq:
03373  *   ec.destination_encoding -> encoding
03374  *
03375  * Returns the destination encoding as an Encoding object.
03376  */
03377 static VALUE
03378 econv_destination_encoding(VALUE self)
03379 {
03380     rb_econv_t *ec = check_econv(self);
03381     if (!ec->destination_encoding)
03382         return Qnil;
03383     return rb_enc_from_encoding(ec->destination_encoding);
03384 }
03385 
03386 /*
03387  * call-seq:
03388  *   ec.convpath        -> ary
03389  *
03390  * Returns the conversion path of ec.
03391  *
03392  * The result is an array of conversions.
03393  *
03394  *   ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true)
03395  *   p ec.convpath
03396  *   #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
03397  *   #    [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
03398  *   #    "crlf_newline"]
03399  *
03400  * Each element of the array is a pair of encodings or a string.
03401  * A pair means an encoding conversion.
03402  * A string means a decorator.
03403  *
03404  * In the above example, [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>] means
03405  * a converter from ISO-8859-1 to UTF-8.
03406  * "crlf_newline" means newline converter from LF to CRLF.
03407  */
03408 static VALUE
03409 econv_convpath(VALUE self)
03410 {
03411     rb_econv_t *ec = check_econv(self);
03412     VALUE result;
03413     int i;
03414 
03415     result = rb_ary_new();
03416     for (i = 0; i < ec->num_trans; i++) {
03417         const rb_transcoder *tr = ec->elems[i].tc->transcoder;
03418         VALUE v;
03419         if (DECORATOR_P(tr->src_encoding, tr->dst_encoding))
03420             v = rb_str_new_cstr(tr->dst_encoding);
03421         else
03422             v = rb_assoc_new(make_encobj(tr->src_encoding), make_encobj(tr->dst_encoding));
03423         rb_ary_push(result, v);
03424     }
03425     return result;
03426 }
03427 
03428 static VALUE
03429 econv_result_to_symbol(rb_econv_result_t res)
03430 {
03431     switch (res) {
03432       case econv_invalid_byte_sequence: return sym_invalid_byte_sequence;
03433       case econv_incomplete_input: return sym_incomplete_input;
03434       case econv_undefined_conversion: return sym_undefined_conversion;
03435       case econv_destination_buffer_full: return sym_destination_buffer_full;
03436       case econv_source_buffer_empty: return sym_source_buffer_empty;
03437       case econv_finished: return sym_finished;
03438       case econv_after_output: return sym_after_output;
03439       default: return INT2NUM(res); /* should not be reached */
03440     }
03441 }
03442 
03443 /*
03444  * call-seq:
03445  *   ec.primitive_convert(source_buffer, destination_buffer) -> symbol
03446  *   ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
03447  *   ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
03448  *   ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
03449  *
03450  * possible opt elements:
03451  *   hash form:
03452  *     :partial_input => true           # source buffer may be part of larger source
03453  *     :after_output => true            # stop conversion after output before input
03454  *   integer form:
03455  *     Encoding::Converter::PARTIAL_INPUT
03456  *     Encoding::Converter::AFTER_OUTPUT
03457  *
03458  * possible results:
03459  *    :invalid_byte_sequence
03460  *    :incomplete_input
03461  *    :undefined_conversion
03462  *    :after_output
03463  *    :destination_buffer_full
03464  *    :source_buffer_empty
03465  *    :finished
03466  *
03467  * primitive_convert converts source_buffer into destination_buffer.
03468  *
03469  * source_buffer should be a string or nil.
03470  * nil means a empty string.
03471  *
03472  * destination_buffer should be a string.
03473  *
03474  * destination_byteoffset should be an integer or nil.
03475  * nil means the end of destination_buffer.
03476  * If it is omitted, nil is assumed.
03477  *
03478  * destination_bytesize should be an integer or nil.
03479  * nil means unlimited.
03480  * If it is omitted, nil is assumed.
03481  *
03482  * opt should be nil, a hash or an integer.
03483  * nil means no flags.
03484  * If it is omitted, nil is assumed.
03485  *
03486  * primitive_convert converts the content of source_buffer from beginning
03487  * and store the result into destination_buffer.
03488  *
03489  * destination_byteoffset and destination_bytesize specify the region which
03490  * the converted result is stored.
03491  * destination_byteoffset specifies the start position in destination_buffer in bytes.
03492  * If destination_byteoffset is nil,
03493  * destination_buffer.bytesize is used for appending the result.
03494  * destination_bytesize specifies maximum number of bytes.
03495  * If destination_bytesize is nil,
03496  * destination size is unlimited.
03497  * After conversion, destination_buffer is resized to
03498  * destination_byteoffset + actually produced number of bytes.
03499  * Also destination_buffer's encoding is set to destination_encoding.
03500  *
03501  * primitive_convert drops the converted part of source_buffer.
03502  * the dropped part is converted in destination_buffer or
03503  * buffered in Encoding::Converter object.
03504  *
03505  * primitive_convert stops conversion when one of following condition met.
03506  * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
03507  * - unexpected end of source buffer (:incomplete_input)
03508  *   this occur only when :partial_input is not specified.
03509  * - character not representable in output encoding (:undefined_conversion)
03510  * - after some output is generated, before input is done (:after_output)
03511  *   this occur only when :after_output is specified.
03512  * - destination buffer is full (:destination_buffer_full)
03513  *   this occur only when destination_bytesize is non-nil.
03514  * - source buffer is empty (:source_buffer_empty)
03515  *   this occur only when :partial_input is specified.
03516  * - conversion is finished (:finished)
03517  *
03518  * example:
03519  *   ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
03520  *   ret = ec.primitive_convert(src="pi", dst="", nil, 100)
03521  *   p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
03522  *
03523  *   ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
03524  *   ret = ec.primitive_convert(src="pi", dst="", nil, 1)
03525  *   p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
03526  *   ret = ec.primitive_convert(src, dst="", nil, 1)
03527  *   p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
03528  *   ret = ec.primitive_convert(src, dst="", nil, 1)
03529  *   p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
03530  *   ret = ec.primitive_convert(src, dst="", nil, 1)
03531  *   p [ret, src, dst] #=> [:finished, "", "i"]
03532  *
03533  */
03534 static VALUE
03535 econv_primitive_convert(int argc, VALUE *argv, VALUE self)
03536 {
03537     VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
03538     rb_econv_t *ec = check_econv(self);
03539     rb_econv_result_t res;
03540     const unsigned char *ip, *is;
03541     unsigned char *op, *os;
03542     long output_byteoffset, output_bytesize;
03543     unsigned long output_byteend;
03544     int flags;
03545 
03546     rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt);
03547 
03548     if (NIL_P(output_byteoffset_v))
03549         output_byteoffset = 0; /* dummy */
03550     else
03551         output_byteoffset = NUM2LONG(output_byteoffset_v);
03552 
03553     if (NIL_P(output_bytesize_v))
03554         output_bytesize = 0; /* dummy */
03555     else
03556         output_bytesize = NUM2LONG(output_bytesize_v);
03557 
03558     if (NIL_P(opt)) {
03559         flags = 0;
03560     }
03561     else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
03562         flags = NUM2INT(flags_v);
03563     }
03564     else {
03565         VALUE v;
03566         opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
03567         flags = 0;
03568         v = rb_hash_aref(opt, sym_partial_input);
03569         if (RTEST(v))
03570             flags |= ECONV_PARTIAL_INPUT;
03571         v = rb_hash_aref(opt, sym_after_output);
03572         if (RTEST(v))
03573             flags |= ECONV_AFTER_OUTPUT;
03574     }
03575 
03576     StringValue(output);
03577     if (!NIL_P(input))
03578         StringValue(input);
03579     rb_str_modify(output);
03580 
03581     if (NIL_P(output_bytesize_v)) {
03582         output_bytesize = RSTRING_EMBED_LEN_MAX;
03583         if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input))
03584             output_bytesize = RSTRING_LEN(input);
03585     }
03586 
03587   retry:
03588 
03589     if (NIL_P(output_byteoffset_v))
03590         output_byteoffset = RSTRING_LEN(output);
03591 
03592     if (output_byteoffset < 0)
03593         rb_raise(rb_eArgError, "negative output_byteoffset");
03594 
03595     if (RSTRING_LEN(output) < output_byteoffset)
03596         rb_raise(rb_eArgError, "output_byteoffset too big");
03597 
03598     if (output_bytesize < 0)
03599         rb_raise(rb_eArgError, "negative output_bytesize");
03600 
03601     output_byteend = (unsigned long)output_byteoffset +
03602                      (unsigned long)output_bytesize;
03603 
03604     if (output_byteend < (unsigned long)output_byteoffset ||
03605         LONG_MAX < output_byteend)
03606         rb_raise(rb_eArgError, "output_byteoffset+output_bytesize too big");
03607 
03608     if (rb_str_capacity(output) < output_byteend)
03609         rb_str_resize(output, output_byteend);
03610 
03611     if (NIL_P(input)) {
03612         ip = is = NULL;
03613     }
03614     else {
03615         ip = (const unsigned char *)RSTRING_PTR(input);
03616         is = ip + RSTRING_LEN(input);
03617     }
03618 
03619     op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
03620     os = op + output_bytesize;
03621 
03622     res = rb_econv_convert(ec, &ip, is, &op, os, flags);
03623     rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
03624     if (!NIL_P(input))
03625         rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
03626 
03627     if (NIL_P(output_bytesize_v) && res == econv_destination_buffer_full) {
03628         if (LONG_MAX / 2 < output_bytesize)
03629             rb_raise(rb_eArgError, "too long conversion result");
03630         output_bytesize *= 2;
03631         output_byteoffset_v = Qnil;
03632         goto retry;
03633     }
03634 
03635     if (ec->destination_encoding) {
03636         rb_enc_associate(output, ec->destination_encoding);
03637     }
03638 
03639     return econv_result_to_symbol(res);
03640 }
03641 
03642 /*
03643  * call-seq:
03644  *   ec.convert(source_string) -> destination_string
03645  *
03646  * Convert source_string and return destination_string.
03647  *
03648  * source_string is assumed as a part of source.
03649  * i.e.  :partial_input=>true is specified internally.
03650  * finish method should be used last.
03651  *
03652  *   ec = Encoding::Converter.new("utf-8", "euc-jp")
03653  *   puts ec.convert("\u3042").dump     #=> "\xA4\xA2"
03654  *   puts ec.finish.dump                #=> ""
03655  *
03656  *   ec = Encoding::Converter.new("euc-jp", "utf-8")
03657  *   puts ec.convert("\xA4").dump       #=> ""
03658  *   puts ec.convert("\xA2").dump       #=> "\xE3\x81\x82"
03659  *   puts ec.finish.dump                #=> ""
03660  *
03661  *   ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
03662  *   puts ec.convert("\xE3").dump       #=> "".force_encoding("ISO-2022-JP")
03663  *   puts ec.convert("\x81").dump       #=> "".force_encoding("ISO-2022-JP")
03664  *   puts ec.convert("\x82").dump       #=> "\e$B$\"".force_encoding("ISO-2022-JP")
03665  *   puts ec.finish.dump                #=> "\e(B".force_encoding("ISO-2022-JP")
03666  *
03667  * If a conversion error occur,
03668  * Encoding::UndefinedConversionError or
03669  * Encoding::InvalidByteSequenceError is raised.
03670  * Encoding::Converter#convert doesn't supply methods to recover or restart
03671  * from these exceptions.
03672  * When you want to handle these conversion errors,
03673  * use Encoding::Converter#primitive_convert.
03674  *
03675  */
03676 static VALUE
03677 econv_convert(VALUE self, VALUE source_string)
03678 {
03679     VALUE ret, dst;
03680     VALUE av[5];
03681     int ac;
03682     rb_econv_t *ec = check_econv(self);
03683 
03684     StringValue(source_string);
03685 
03686     dst = rb_str_new(NULL, 0);
03687 
03688     av[0] = rb_str_dup(source_string);
03689     av[1] = dst;
03690     av[2] = Qnil;
03691     av[3] = Qnil;
03692     av[4] = INT2NUM(ECONV_PARTIAL_INPUT);
03693     ac = 5;
03694 
03695     ret = econv_primitive_convert(ac, av, self);
03696 
03697     if (ret == sym_invalid_byte_sequence ||
03698         ret == sym_undefined_conversion ||
03699         ret == sym_incomplete_input) {
03700         VALUE exc = make_econv_exception(ec);
03701         rb_exc_raise(exc);
03702     }
03703 
03704     if (ret == sym_finished) {
03705         rb_raise(rb_eArgError, "converter already finished");
03706     }
03707 
03708     if (ret != sym_source_buffer_empty) {
03709         rb_bug("unexpected result of econv_primitive_convert");
03710     }
03711 
03712     return dst;
03713 }
03714 
03715 /*
03716  * call-seq:
03717  *   ec.finish -> string
03718  *
03719  * Finishes the converter.
03720  * It returns the last part of the converted string.
03721  *
03722  *   ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
03723  *   p ec.convert("\u3042")     #=> "\e$B$\""
03724  *   p ec.finish                #=> "\e(B"
03725  */
03726 static VALUE
03727 econv_finish(VALUE self)
03728 {
03729     VALUE ret, dst;
03730     VALUE av[5];
03731     int ac;
03732     rb_econv_t *ec = check_econv(self);
03733 
03734     dst = rb_str_new(NULL, 0);
03735 
03736     av[0] = Qnil;
03737     av[1] = dst;
03738     av[2] = Qnil;
03739     av[3] = Qnil;
03740     av[4] = INT2NUM(0);
03741     ac = 5;
03742 
03743     ret = econv_primitive_convert(ac, av, self);
03744 
03745     if (ret == sym_invalid_byte_sequence ||
03746         ret == sym_undefined_conversion ||
03747         ret == sym_incomplete_input) {
03748         VALUE exc = make_econv_exception(ec);
03749         rb_exc_raise(exc);
03750     }
03751 
03752     if (ret != sym_finished) {
03753         rb_bug("unexpected result of econv_primitive_convert");
03754     }
03755 
03756     return dst;
03757 }
03758 
03759 /*
03760  * call-seq:
03761  *   ec.primitive_errinfo -> array
03762  *
03763  * primitive_errinfo returns important information regarding the last error
03764  * as a 5-element array:
03765  *
03766  *   [result, enc1, enc2, error_bytes, readagain_bytes]
03767  *
03768  * result is the last result of primitive_convert.
03769  *
03770  * Other elements are only meaningful when result is
03771  * :invalid_byte_sequence, :incomplete_input or :undefined_conversion.
03772  *
03773  * enc1 and enc2 indicate a conversion step as a pair of strings.
03774  * For example, a converter from EUC-JP to ISO-8859-1 converts
03775  * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1.
03776  * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"].
03777  *
03778  * error_bytes and readagain_bytes indicate the byte sequences which caused the error.
03779  * error_bytes is discarded portion.
03780  * readagain_bytes is buffered portion which is read again on next conversion.
03781  *
03782  * Example:
03783  *
03784  *   # \xff is invalid as EUC-JP.
03785  *   ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
03786  *   ec.primitive_convert(src="\xff", dst="", nil, 10)
03787  *   p ec.primitive_errinfo
03788  *   #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", ""]
03789  *
03790  *   # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
03791  *   # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
03792  *   # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
03793  *   ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
03794  *   ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
03795  *   p ec.primitive_errinfo
03796  *   #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""]
03797  *
03798  *   # partial character is invalid
03799  *   ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
03800  *   ec.primitive_convert(src="\xa4", dst="", nil, 10)
03801  *   p ec.primitive_errinfo
03802  *   #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""]
03803  *
03804  *   # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
03805  *   # partial characters.
03806  *   ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
03807  *   ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
03808  *   p ec.primitive_errinfo
03809  *   #=> [:source_buffer_empty, nil, nil, nil, nil]
03810  *
03811  *   # \xd8\x00\x00@ is invalid as UTF-16BE because
03812  *   # no low surrogate after high surrogate (\xd8\x00).
03813  *   # It is detected by 3rd byte (\00) which is part of next character.
03814  *   # So the high surrogate (\xd8\x00) is discarded and
03815  *   # the 3rd byte is read again later.
03816  *   # Since the byte is buffered in ec, it is dropped from src.
03817  *   ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
03818  *   ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
03819  *   p ec.primitive_errinfo
03820  *   #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"]
03821  *   p src
03822  *   #=> "@"
03823  *
03824  *   # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
03825  *   # The problem is detected by 4th byte.
03826  *   ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
03827  *   ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
03828  *   p ec.primitive_errinfo
03829  *   #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"]
03830  *   p src
03831  *   #=> ""
03832  *
03833  */
03834 static VALUE
03835 econv_primitive_errinfo(VALUE self)
03836 {
03837     rb_econv_t *ec = check_econv(self);
03838 
03839     VALUE ary;
03840 
03841     ary = rb_ary_new2(5);
03842 
03843     rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result));
03844     rb_ary_store(ary, 4, Qnil);
03845 
03846     if (ec->last_error.source_encoding)
03847         rb_ary_store(ary, 1, rb_str_new2(ec->last_error.source_encoding));
03848 
03849     if (ec->last_error.destination_encoding)
03850         rb_ary_store(ary, 2, rb_str_new2(ec->last_error.destination_encoding));
03851 
03852     if (ec->last_error.error_bytes_start) {
03853         rb_ary_store(ary, 3, rb_str_new((const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len));
03854         rb_ary_store(ary, 4, rb_str_new((const char *)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len));
03855     }
03856 
03857     return ary;
03858 }
03859 
03860 /*
03861  * call-seq:
03862  *   ec.insert_output(string) -> nil
03863  *
03864  * Inserts string into the encoding converter.
03865  * The string will be converted to the destination encoding and
03866  * output on later conversions.
03867  *
03868  * If the destination encoding is stateful,
03869  * string is converted according to the state and the state is updated.
03870  *
03871  * This method should be used only when a conversion error occurs.
03872  *
03873  *  ec = Encoding::Converter.new("utf-8", "iso-8859-1")
03874  *  src = "HIRAGANA LETTER A is \u{3042}."
03875  *  dst = ""
03876  *  p ec.primitive_convert(src, dst)    #=> :undefined_conversion
03877  *  puts "[#{dst.dump}, #{src.dump}]"   #=> ["HIRAGANA LETTER A is ", "."]
03878  *  ec.insert_output("<err>")
03879  *  p ec.primitive_convert(src, dst)    #=> :finished
03880  *  puts "[#{dst.dump}, #{src.dump}]"   #=> ["HIRAGANA LETTER A is <err>.", ""]
03881  *
03882  *  ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
03883  *  src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp
03884  *  dst = ""
03885  *  p ec.primitive_convert(src, dst)    #=> :undefined_conversion
03886  *  puts "[#{dst.dump}, #{src.dump}]"   #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"]
03887  *  ec.insert_output "?"                # state change required to output "?".
03888  *  p ec.primitive_convert(src, dst)    #=> :finished
03889  *  puts "[#{dst.dump}, #{src.dump}]"   #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""]
03890  *
03891  */
03892 static VALUE
03893 econv_insert_output(VALUE self, VALUE string)
03894 {
03895     const char *insert_enc;
03896 
03897     int ret;
03898 
03899     rb_econv_t *ec = check_econv(self);
03900 
03901     StringValue(string);
03902     insert_enc = rb_econv_encoding_to_insert_output(ec);
03903     string = rb_str_encode(string, rb_enc_from_encoding(rb_enc_find(insert_enc)), 0, Qnil);
03904 
03905     ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
03906     if (ret == -1) {
03907         rb_raise(rb_eArgError, "too big string");
03908     }
03909 
03910     return Qnil;
03911 }
03912 
03913 /*
03914  * call-seq
03915  *   ec.putback                    -> string
03916  *   ec.putback(max_numbytes)      -> string
03917  *
03918  * Put back the bytes which will be converted.
03919  *
03920  * The bytes are caused by invalid_byte_sequence error.
03921  * When invalid_byte_sequence error, some bytes are discarded and
03922  * some bytes are buffered to be converted later.
03923  * The latter bytes can be put back.
03924  * It can be observed by
03925  * Encoding::InvalidByteSequenceError#readagain_bytes and
03926  * Encoding::Converter#primitive_errinfo.
03927  *
03928  *   ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
03929  *   src = "\x00\xd8\x61\x00"
03930  *   dst = ""
03931  *   p ec.primitive_convert(src, dst)   #=> :invalid_byte_sequence
03932  *   p ec.primitive_errinfo     #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
03933  *   p ec.putback               #=> "a\x00"
03934  *   p ec.putback               #=> ""          # no more bytes to put back
03935  *
03936  */
03937 static VALUE
03938 econv_putback(int argc, VALUE *argv, VALUE self)
03939 {
03940     rb_econv_t *ec = check_econv(self);
03941     int n;
03942     int putbackable;
03943     VALUE str, max;
03944 
03945     rb_scan_args(argc, argv, "01", &max);
03946 
03947     if (NIL_P(max))
03948         n = rb_econv_putbackable(ec);
03949     else {
03950         n = NUM2INT(max);
03951         putbackable = rb_econv_putbackable(ec);
03952         if (putbackable < n)
03953             n = putbackable;
03954     }
03955 
03956     str = rb_str_new(NULL, n);
03957     rb_econv_putback(ec, (unsigned char *)RSTRING_PTR(str), n);
03958 
03959     if (ec->source_encoding) {
03960         rb_enc_associate(str, ec->source_encoding);
03961     }
03962 
03963     return str;
03964 }
03965 
03966 /*
03967  * call-seq:
03968  *   ec.last_error -> exception or nil
03969  *
03970  * Returns an exception object for the last conversion.
03971  * Returns nil if the last conversion did not produce an error.
03972  *
03973  * "error" means that
03974  * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for
03975  * Encoding::Converter#convert and
03976  * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
03977  * Encoding::Converter#primitive_convert.
03978  *
03979  *  ec = Encoding::Converter.new("utf-8", "iso-8859-1")
03980  *  p ec.primitive_convert(src="\xf1abcd", dst="")       #=> :invalid_byte_sequence
03981  *  p ec.last_error      #=> #<Encoding::InvalidByteSequenceError: "\xF1" followed by "a" on UTF-8>
03982  *  p ec.primitive_convert(src, dst, nil, 1)             #=> :destination_buffer_full
03983  *  p ec.last_error      #=> nil
03984  *
03985  */
03986 static VALUE
03987 econv_last_error(VALUE self)
03988 {
03989     rb_econv_t *ec = check_econv(self);
03990     VALUE exc;
03991 
03992     exc = make_econv_exception(ec);
03993     if (NIL_P(exc))
03994         return Qnil;
03995     return exc;
03996 }
03997 
03998 /*
03999  * call-seq:
04000  *   ec.replacement -> string
04001  *
04002  * Returns the replacement string.
04003  *
04004  *  ec = Encoding::Converter.new("euc-jp", "us-ascii")
04005  *  p ec.replacement    #=> "?"
04006  *
04007  *  ec = Encoding::Converter.new("euc-jp", "utf-8")
04008  *  p ec.replacement    #=> "\uFFFD"
04009  */
04010 static VALUE
04011 econv_get_replacement(VALUE self)
04012 {
04013     rb_econv_t *ec = check_econv(self);
04014     int ret;
04015     rb_encoding *enc;
04016 
04017     ret = make_replacement(ec);
04018     if (ret == -1) {
04019         rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
04020     }
04021 
04022     enc = rb_enc_find(ec->replacement_enc);
04023     return rb_enc_str_new((const char *)ec->replacement_str, (long)ec->replacement_len, enc);
04024 }
04025 
04026 /*
04027  * call-seq:
04028  *   ec.replacement = string
04029  *
04030  * Sets the replacement string.
04031  *
04032  *  ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
04033  *  ec.replacement = "<undef>"
04034  *  p ec.convert("a \u3042 b")      #=> "a <undef> b"
04035  */
04036 static VALUE
04037 econv_set_replacement(VALUE self, VALUE arg)
04038 {
04039     rb_econv_t *ec = check_econv(self);
04040     VALUE string = arg;
04041     int ret;
04042     rb_encoding *enc;
04043 
04044     StringValue(string);
04045     enc = rb_enc_get(string);
04046 
04047     ret = rb_econv_set_replacement(ec,
04048             (const unsigned char *)RSTRING_PTR(string),
04049             RSTRING_LEN(string),
04050             rb_enc_name(enc));
04051 
04052     if (ret == -1) {
04053         /* xxx: rb_eInvalidByteSequenceError? */
04054         rb_raise(rb_eUndefinedConversionError, "replacement character setup failed");
04055     }
04056 
04057     return arg;
04058 }
04059 
04060 VALUE
04061 rb_econv_make_exception(rb_econv_t *ec)
04062 {
04063     return make_econv_exception(ec);
04064 }
04065 
04066 void
04067 rb_econv_check_error(rb_econv_t *ec)
04068 {
04069     VALUE exc;
04070 
04071     exc = make_econv_exception(ec);
04072     if (NIL_P(exc))
04073         return;
04074     rb_exc_raise(exc);
04075 }
04076 
04077 /*
04078  * call-seq:
04079  *   ecerr.source_encoding_name         -> string
04080  *
04081  * Returns the source encoding name as a string.
04082  */
04083 static VALUE
04084 ecerr_source_encoding_name(VALUE self)
04085 {
04086     return rb_attr_get(self, rb_intern("source_encoding_name"));
04087 }
04088 
04089 /*
04090  * call-seq:
04091  *   ecerr.source_encoding              -> encoding
04092  *
04093  * Returns the source encoding as an encoding object.
04094  *
04095  * Note that the result may not be equal to the source encoding of
04096  * the encoding converter if the conversion has multiple steps.
04097  *
04098  *  ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
04099  *  begin
04100  *    ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
04101  *  rescue Encoding::UndefinedConversionError
04102  *    p $!.source_encoding              #=> #<Encoding:UTF-8>
04103  *    p $!.destination_encoding         #=> #<Encoding:EUC-JP>
04104  *    p $!.source_encoding_name         #=> "UTF-8"
04105  *    p $!.destination_encoding_name    #=> "EUC-JP"
04106  *  end
04107  *
04108  */
04109 static VALUE
04110 ecerr_source_encoding(VALUE self)
04111 {
04112     return rb_attr_get(self, rb_intern("source_encoding"));
04113 }
04114 
04115 /*
04116  * call-seq:
04117  *   ecerr.destination_encoding_name         -> string
04118  *
04119  * Returns the destination encoding name as a string.
04120  */
04121 static VALUE
04122 ecerr_destination_encoding_name(VALUE self)
04123 {
04124     return rb_attr_get(self, rb_intern("destination_encoding_name"));
04125 }
04126 
04127 /*
04128  * call-seq:
04129  *   ecerr.destination_encoding         -> string
04130  *
04131  * Returns the destination encoding as an encoding object.
04132  */
04133 static VALUE
04134 ecerr_destination_encoding(VALUE self)
04135 {
04136     return rb_attr_get(self, rb_intern("destination_encoding"));
04137 }
04138 
04139 /*
04140  * call-seq:
04141  *   ecerr.error_char         -> string
04142  *
04143  * Returns the one-character string which cause Encoding::UndefinedConversionError.
04144  *
04145  *  ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP")
04146  *  begin
04147  *    ec.convert("\xa0")
04148  *  rescue Encoding::UndefinedConversionError
04149  *    puts $!.error_char.dump   #=> "\xC2\xA0"
04150  *    p $!.error_char.encoding  #=> #<Encoding:UTF-8>
04151  *  end
04152  *
04153  */
04154 static VALUE
04155 ecerr_error_char(VALUE self)
04156 {
04157     return rb_attr_get(self, rb_intern("error_char"));
04158 }
04159 
04160 /*
04161  * call-seq:
04162  *   ecerr.error_bytes         -> string
04163  *
04164  * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs.
04165  *
04166  *  ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
04167  *  begin
04168  *    ec.convert("abc\xA1\xFFdef")
04169  *  rescue Encoding::InvalidByteSequenceError
04170  *    p $!      #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "\xFF" on EUC-JP>
04171  *    puts $!.error_bytes.dump          #=> "\xA1"
04172  *    puts $!.readagain_bytes.dump      #=> "\xFF"
04173  *  end
04174  */
04175 static VALUE
04176 ecerr_error_bytes(VALUE self)
04177 {
04178     return rb_attr_get(self, rb_intern("error_bytes"));
04179 }
04180 
04181 /*
04182  * call-seq:
04183  *   ecerr.readagain_bytes         -> string
04184  *
04185  * Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs.
04186  */
04187 static VALUE
04188 ecerr_readagain_bytes(VALUE self)
04189 {
04190     return rb_attr_get(self, rb_intern("readagain_bytes"));
04191 }
04192 
04193 /*
04194  * call-seq:
04195  *   ecerr.incomplete_input?         -> true or false
04196  *
04197  * Returns true if the invalid byte sequence error is caused by
04198  * premature end of string.
04199  *
04200  *  ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
04201  *
04202  *  begin
04203  *    ec.convert("abc\xA1z")
04204  *  rescue Encoding::InvalidByteSequenceError
04205  *    p $!      #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "z" on EUC-JP>
04206  *    p $!.incomplete_input?    #=> false
04207  *  end
04208  *
04209  *  begin
04210  *    ec.convert("abc\xA1")
04211  *    ec.finish
04212  *  rescue Encoding::InvalidByteSequenceError
04213  *    p $!      #=> #<Encoding::InvalidByteSequenceError: incomplete "\xA1" on EUC-JP>
04214  *    p $!.incomplete_input?    #=> true
04215  *  end
04216  */
04217 static VALUE
04218 ecerr_incomplete_input(VALUE self)
04219 {
04220     return rb_attr_get(self, rb_intern("incomplete_input"));
04221 }
04222 
04223 extern void Init_newline(void);
04224 
04225 /*
04226  *  Document-class: Encoding::UndefinedConversionError
04227  *
04228  *  Raised by Encoding and String methods when a transcoding operation
04229  *  fails.
04230  */
04231 
04232 /*
04233  *  Document-class: Encoding::InvalidByteSequenceError
04234  *
04235  *  Raised by Encoding and String methods when the string being
04236  *  transcoded contains a byte invalid for the either the source or
04237  *  target encoding.
04238  */
04239 
04240 /*
04241  *  Document-class: Encoding::ConverterNotFoundError
04242  *
04243  *  Raised by transcoding methods when a named encoding does not
04244  *  correspond with a known converter.
04245  */
04246 
04247 void
04248 Init_transcode(void)
04249 {
04250     rb_eUndefinedConversionError = rb_define_class_under(rb_cEncoding, "UndefinedConversionError", rb_eEncodingError);
04251     rb_eInvalidByteSequenceError = rb_define_class_under(rb_cEncoding, "InvalidByteSequenceError", rb_eEncodingError);
04252     rb_eConverterNotFoundError = rb_define_class_under(rb_cEncoding, "ConverterNotFoundError", rb_eEncodingError);
04253 
04254     transcoder_table = st_init_strcasetable();
04255 
04256     sym_invalid = ID2SYM(rb_intern("invalid"));
04257     sym_undef = ID2SYM(rb_intern("undef"));
04258     sym_replace = ID2SYM(rb_intern("replace"));
04259     sym_fallback = ID2SYM(rb_intern("fallback"));
04260     sym_xml = ID2SYM(rb_intern("xml"));
04261     sym_text = ID2SYM(rb_intern("text"));
04262     sym_attr = ID2SYM(rb_intern("attr"));
04263 
04264     sym_invalid_byte_sequence = ID2SYM(rb_intern("invalid_byte_sequence"));
04265     sym_undefined_conversion = ID2SYM(rb_intern("undefined_conversion"));
04266     sym_destination_buffer_full = ID2SYM(rb_intern("destination_buffer_full"));
04267     sym_source_buffer_empty = ID2SYM(rb_intern("source_buffer_empty"));
04268     sym_finished = ID2SYM(rb_intern("finished"));
04269     sym_after_output = ID2SYM(rb_intern("after_output"));
04270     sym_incomplete_input = ID2SYM(rb_intern("incomplete_input"));
04271     sym_universal_newline = ID2SYM(rb_intern("universal_newline"));
04272     sym_crlf_newline = ID2SYM(rb_intern("crlf_newline"));
04273     sym_cr_newline = ID2SYM(rb_intern("cr_newline"));
04274     sym_partial_input = ID2SYM(rb_intern("partial_input"));
04275 
04276     rb_define_method(rb_cString, "encode", str_encode, -1);
04277     rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
04278 
04279     rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
04280     rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
04281     rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
04282     rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1);
04283     rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
04284     rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
04285     rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0);
04286     rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0);
04287     rb_define_method(rb_cEncodingConverter, "destination_encoding", econv_destination_encoding, 0);
04288     rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, -1);
04289     rb_define_method(rb_cEncodingConverter, "convert", econv_convert, 1);
04290     rb_define_method(rb_cEncodingConverter, "finish", econv_finish, 0);
04291     rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0);
04292     rb_define_method(rb_cEncodingConverter, "insert_output", econv_insert_output, 1);
04293     rb_define_method(rb_cEncodingConverter, "putback", econv_putback, -1);
04294     rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0);
04295     rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0);
04296     rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1);
04297 
04298     rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK));
04299     rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE));
04300     rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK));
04301     rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE));
04302     rb_define_const(rb_cEncodingConverter, "UNDEF_HEX_CHARREF", INT2FIX(ECONV_UNDEF_HEX_CHARREF));
04303     rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT));
04304     rb_define_const(rb_cEncodingConverter, "AFTER_OUTPUT", INT2FIX(ECONV_AFTER_OUTPUT));
04305     rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
04306     rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_DECORATOR", INT2FIX(ECONV_CRLF_NEWLINE_DECORATOR));
04307     rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_DECORATOR", INT2FIX(ECONV_CR_NEWLINE_DECORATOR));
04308     rb_define_const(rb_cEncodingConverter, "XML_TEXT_DECORATOR", INT2FIX(ECONV_XML_TEXT_DECORATOR));
04309     rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_DECORATOR", INT2FIX(ECONV_XML_ATTR_CONTENT_DECORATOR));
04310     rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_DECORATOR", INT2FIX(ECONV_XML_ATTR_QUOTE_DECORATOR));
04311 
04312     rb_define_method(rb_eUndefinedConversionError, "source_encoding_name", ecerr_source_encoding_name, 0);
04313     rb_define_method(rb_eUndefinedConversionError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
04314     rb_define_method(rb_eUndefinedConversionError, "source_encoding", ecerr_source_encoding, 0);
04315     rb_define_method(rb_eUndefinedConversionError, "destination_encoding", ecerr_destination_encoding, 0);
04316     rb_define_method(rb_eUndefinedConversionError, "error_char", ecerr_error_char, 0);
04317 
04318     rb_define_method(rb_eInvalidByteSequenceError, "source_encoding_name", ecerr_source_encoding_name, 0);
04319     rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding_name", ecerr_destination_encoding_name, 0);
04320     rb_define_method(rb_eInvalidByteSequenceError, "source_encoding", ecerr_source_encoding, 0);
04321     rb_define_method(rb_eInvalidByteSequenceError, "destination_encoding", ecerr_destination_encoding, 0);
04322     rb_define_method(rb_eInvalidByteSequenceError, "error_bytes", ecerr_error_bytes, 0);
04323     rb_define_method(rb_eInvalidByteSequenceError, "readagain_bytes", ecerr_readagain_bytes, 0);
04324     rb_define_method(rb_eInvalidByteSequenceError, "incomplete_input?", ecerr_incomplete_input, 0);
04325 
04326     Init_newline();
04327 }
04328 

Generated on Wed Sep 8 2010 21:55:21 for Ruby by  doxygen 1.7.1