00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #define NKF_VERSION "2.1.1"
00024 #define NKF_RELEASE_DATE "2010-04-28"
00025 #define COPY_RIGHT \
00026 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
00027 "Copyright (C) 1996-2010, The nkf Project."
00028
00029 #include "config.h"
00030 #include "nkf.h"
00031 #include "utf8tbl.h"
00032 #ifdef __WIN32__
00033 #include <windows.h>
00034 #include <locale.h>
00035 #endif
00036 #if defined(__OS2__)
00037 # define INCL_DOS
00038 # define INCL_DOSERRORS
00039 # include <os2.h>
00040 #endif
00041 #include <assert.h>
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 #define FIXED_MIME 7
00058 #define STRICT_MIME 8
00059
00060
00061 enum byte_order {
00062 ENDIAN_BIG = 1,
00063 ENDIAN_LITTLE = 2,
00064 ENDIAN_2143 = 3,
00065 ENDIAN_3412 = 4
00066 };
00067
00068
00069
00070 #define BS 0x08
00071 #define TAB 0x09
00072 #define LF 0x0a
00073 #define CR 0x0d
00074 #define ESC 0x1b
00075 #define SP 0x20
00076 #define DEL 0x7f
00077 #define SI 0x0f
00078 #define SO 0x0e
00079 #define SS2 0x8e
00080 #define SS3 0x8f
00081 #define CRLF 0x0D0A
00082
00083
00084
00085
00086 enum nkf_encodings {
00087 ASCII,
00088 ISO_8859_1,
00089 ISO_2022_JP,
00090 CP50220,
00091 CP50221,
00092 CP50222,
00093 ISO_2022_JP_1,
00094 ISO_2022_JP_3,
00095 ISO_2022_JP_2004,
00096 SHIFT_JIS,
00097 WINDOWS_31J,
00098 CP10001,
00099 EUC_JP,
00100 EUCJP_NKF,
00101 CP51932,
00102 EUCJP_MS,
00103 EUCJP_ASCII,
00104 SHIFT_JISX0213,
00105 SHIFT_JIS_2004,
00106 EUC_JISX0213,
00107 EUC_JIS_2004,
00108 UTF_8,
00109 UTF_8N,
00110 UTF_8_BOM,
00111 UTF8_MAC,
00112 UTF_16,
00113 UTF_16BE,
00114 UTF_16BE_BOM,
00115 UTF_16LE,
00116 UTF_16LE_BOM,
00117 UTF_32,
00118 UTF_32BE,
00119 UTF_32BE_BOM,
00120 UTF_32LE,
00121 UTF_32LE_BOM,
00122 BINARY,
00123 NKF_ENCODING_TABLE_SIZE,
00124 JIS_X_0201_1976_K = 0x1013,
00125
00126
00127
00128 JIS_X_0208 = 0x1168,
00129 JIS_X_0212 = 0x1159,
00130
00131 JIS_X_0213_2 = 0x1229,
00132 JIS_X_0213_1 = 0x1233
00133 };
00134
00135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
00139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
00140 static void j_oconv(nkf_char c2, nkf_char c1);
00141 static void s_oconv(nkf_char c2, nkf_char c1);
00142 static void e_oconv(nkf_char c2, nkf_char c1);
00143 static void w_oconv(nkf_char c2, nkf_char c1);
00144 static void w_oconv16(nkf_char c2, nkf_char c1);
00145 static void w_oconv32(nkf_char c2, nkf_char c1);
00146
00147 typedef struct {
00148 const char *name;
00149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
00150 void (*oconv)(nkf_char c2, nkf_char c1);
00151 } nkf_native_encoding;
00152
00153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
00154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
00155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
00156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
00157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
00158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
00159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
00160
00161 typedef struct {
00162 const int id;
00163 const char *name;
00164 const nkf_native_encoding *base_encoding;
00165 } nkf_encoding;
00166
00167 nkf_encoding nkf_encoding_table[] = {
00168 {ASCII, "US-ASCII", &NkfEncodingASCII},
00169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
00170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
00171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
00172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
00173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
00174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
00175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
00176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
00177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
00178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
00179 {CP10001, "CP10001", &NkfEncodingShift_JIS},
00180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
00181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
00182 {CP51932, "CP51932", &NkfEncodingEUC_JP},
00183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
00184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
00185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
00186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
00187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
00188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
00189 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
00190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
00191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
00192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
00193 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
00194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
00195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
00196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
00197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
00198 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
00199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
00200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
00201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
00202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
00203 {BINARY, "BINARY", &NkfEncodingASCII},
00204 {-1, NULL, NULL}
00205 };
00206
00207 struct {
00208 const char *name;
00209 const int id;
00210 } encoding_name_to_id_table[] = {
00211 {"US-ASCII", ASCII},
00212 {"ASCII", ASCII},
00213 {"646", ASCII},
00214 {"ROMAN8", ASCII},
00215 {"ISO-2022-JP", ISO_2022_JP},
00216 {"ISO2022JP-CP932", CP50220},
00217 {"CP50220", CP50220},
00218 {"CP50221", CP50221},
00219 {"CSISO2022JP", CP50221},
00220 {"CP50222", CP50222},
00221 {"ISO-2022-JP-1", ISO_2022_JP_1},
00222 {"ISO-2022-JP-3", ISO_2022_JP_3},
00223 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
00224 {"SHIFT_JIS", SHIFT_JIS},
00225 {"SJIS", SHIFT_JIS},
00226 {"MS_Kanji", SHIFT_JIS},
00227 {"PCK", SHIFT_JIS},
00228 {"WINDOWS-31J", WINDOWS_31J},
00229 {"CSWINDOWS31J", WINDOWS_31J},
00230 {"CP932", WINDOWS_31J},
00231 {"MS932", WINDOWS_31J},
00232 {"CP10001", CP10001},
00233 {"EUCJP", EUC_JP},
00234 {"EUC-JP", EUC_JP},
00235 {"EUCJP-NKF", EUCJP_NKF},
00236 {"CP51932", CP51932},
00237 {"EUC-JP-MS", EUCJP_MS},
00238 {"EUCJP-MS", EUCJP_MS},
00239 {"EUCJPMS", EUCJP_MS},
00240 {"EUC-JP-ASCII", EUCJP_ASCII},
00241 {"EUCJP-ASCII", EUCJP_ASCII},
00242 {"SHIFT_JISX0213", SHIFT_JISX0213},
00243 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
00244 {"EUC-JISX0213", EUC_JISX0213},
00245 {"EUC-JIS-2004", EUC_JIS_2004},
00246 {"UTF-8", UTF_8},
00247 {"UTF-8N", UTF_8N},
00248 {"UTF-8-BOM", UTF_8_BOM},
00249 {"UTF8-MAC", UTF8_MAC},
00250 {"UTF-8-MAC", UTF8_MAC},
00251 {"UTF-16", UTF_16},
00252 {"UTF-16BE", UTF_16BE},
00253 {"UTF-16BE-BOM", UTF_16BE_BOM},
00254 {"UTF-16LE", UTF_16LE},
00255 {"UTF-16LE-BOM", UTF_16LE_BOM},
00256 {"UTF-32", UTF_32},
00257 {"UTF-32BE", UTF_32BE},
00258 {"UTF-32BE-BOM", UTF_32BE_BOM},
00259 {"UTF-32LE", UTF_32LE},
00260 {"UTF-32LE-BOM", UTF_32LE_BOM},
00261 {"BINARY", BINARY},
00262 {NULL, -1}
00263 };
00264
00265 #if defined(DEFAULT_CODE_JIS)
00266 #define DEFAULT_ENCIDX ISO_2022_JP
00267 #elif defined(DEFAULT_CODE_SJIS)
00268 #define DEFAULT_ENCIDX SHIFT_JIS
00269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
00270 #define DEFAULT_ENCIDX WINDOWS_31J
00271 #elif defined(DEFAULT_CODE_EUC)
00272 #define DEFAULT_ENCIDX EUC_JP
00273 #elif defined(DEFAULT_CODE_UTF8)
00274 #define DEFAULT_ENCIDX UTF_8
00275 #endif
00276
00277
00278 #define is_alnum(c) \
00279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
00280
00281
00282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
00283 #define nkf_isoctal(c) ('0'<=c && c<='7')
00284 #define nkf_isdigit(c) ('0'<=c && c<='9')
00285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
00286 #define nkf_isblank(c) (c == SP || c == TAB)
00287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
00288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
00289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
00290 #define nkf_isprint(c) (SP<=c && c<='~')
00291 #define nkf_isgraph(c) ('!'<=c && c<='~')
00292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
00293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
00294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
00295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
00296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
00297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
00298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
00299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
00300
00301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
00302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
00303
00304 #define HOLD_SIZE 1024
00305 #if defined(INT_IS_SHORT)
00306 #define IOBUF_SIZE 2048
00307 #else
00308 #define IOBUF_SIZE 16384
00309 #endif
00310
00311 #define DEFAULT_J 'B'
00312 #define DEFAULT_R 'B'
00313
00314
00315 #define GETA1 0x22
00316 #define GETA2 0x2e
00317
00318
00319
00320
00321 #ifdef EASYWIN
00322 extern POINT _BufferSize;
00323 #endif
00324
00325 struct input_code{
00326 const char *name;
00327 nkf_char stat;
00328 nkf_char score;
00329 nkf_char index;
00330 nkf_char buf[3];
00331 void (*status_func)(struct input_code *, nkf_char);
00332 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
00333 int _file_stat;
00334 };
00335
00336 static const char *input_codename = NULL;
00337 static nkf_encoding *input_encoding = NULL;
00338 static nkf_encoding *output_encoding = NULL;
00339
00340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
00341
00342
00343
00344
00345
00346
00347 #define UCS_MAP_ASCII 0
00348 #define UCS_MAP_MS 1
00349 #define UCS_MAP_CP932 2
00350 #define UCS_MAP_CP10001 3
00351 static int ms_ucs_map_f = UCS_MAP_ASCII;
00352 #endif
00353 #ifdef UTF8_INPUT_ENABLE
00354
00355 static int no_cp932ext_f = FALSE;
00356
00357 static int no_best_fit_chars_f = FALSE;
00358 static int input_endian = ENDIAN_BIG;
00359 static nkf_char unicode_subchar = '?';
00360 static void (*encode_fallback)(nkf_char c) = NULL;
00361 static void w_status(struct input_code *, nkf_char);
00362 #endif
00363 #ifdef UTF8_OUTPUT_ENABLE
00364 static int output_bom_f = FALSE;
00365 static int output_endian = ENDIAN_BIG;
00366 #endif
00367
00368 static void std_putc(nkf_char c);
00369 static nkf_char std_getc(FILE *f);
00370 static nkf_char std_ungetc(nkf_char c,FILE *f);
00371
00372 static nkf_char broken_getc(FILE *f);
00373 static nkf_char broken_ungetc(nkf_char c,FILE *f);
00374
00375 static nkf_char mime_getc(FILE *f);
00376
00377 static void mime_putc(nkf_char c);
00378
00379
00380
00381 #if !defined(PERL_XS) && !defined(WIN32DLL)
00382 static unsigned char stdibuf[IOBUF_SIZE];
00383 static unsigned char stdobuf[IOBUF_SIZE];
00384 #endif
00385
00386 #define NKF_UNSPECIFIED (-TRUE)
00387
00388
00389 static int unbuf_f = FALSE;
00390 static int estab_f = FALSE;
00391 static int nop_f = FALSE;
00392 static int binmode_f = TRUE;
00393 static int rot_f = FALSE;
00394 static int hira_f = FALSE;
00395 static int alpha_f = FALSE;
00396 static int mime_f = MIME_DECODE_DEFAULT;
00397 static int mime_decode_f = FALSE;
00398 static int mimebuf_f = FALSE;
00399 static int broken_f = FALSE;
00400 static int iso8859_f = FALSE;
00401 static int mimeout_f = FALSE;
00402 static int x0201_f = NKF_UNSPECIFIED;
00403 static int iso2022jp_f = FALSE;
00404
00405 #ifdef UNICODE_NORMALIZATION
00406 static int nfc_f = FALSE;
00407 static nkf_char (*i_nfc_getc)(FILE *) = std_getc;
00408 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
00409 #endif
00410
00411 #ifdef INPUT_OPTION
00412 static int cap_f = FALSE;
00413 static nkf_char (*i_cgetc)(FILE *) = std_getc;
00414 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
00415
00416 static int url_f = FALSE;
00417 static nkf_char (*i_ugetc)(FILE *) = std_getc;
00418 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
00419 #endif
00420
00421 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
00422 #define CLASS_MASK NKF_INT32_C(0xFF000000)
00423 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
00424 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
00425 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
00426 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
00427 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
00428 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
00429 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
00430 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
00431 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
00432
00433 #ifdef NUMCHAR_OPTION
00434 static int numchar_f = FALSE;
00435 static nkf_char (*i_ngetc)(FILE *) = std_getc;
00436 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
00437 #endif
00438
00439 #ifdef CHECK_OPTION
00440 static int noout_f = FALSE;
00441 static void no_putc(nkf_char c);
00442 static int debug_f = FALSE;
00443 static void debug(const char *str);
00444 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
00445 #endif
00446
00447 static int guess_f = 0;
00448 static void set_input_codename(const char *codename);
00449
00450 #ifdef EXEC_IO
00451 static int exec_f = 0;
00452 #endif
00453
00454 #ifdef SHIFTJIS_CP932
00455
00456 static int cp51932_f = FALSE;
00457
00458
00459 static int cp932inv_f = TRUE;
00460
00461
00462 #endif
00463
00464 static int x0212_f = FALSE;
00465 static int x0213_f = FALSE;
00466
00467 static unsigned char prefix_table[256];
00468
00469 static void e_status(struct input_code *, nkf_char);
00470 static void s_status(struct input_code *, nkf_char);
00471
00472 struct input_code input_code_list[] = {
00473 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
00474 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
00475 #ifdef UTF8_INPUT_ENABLE
00476 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
00477 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
00478 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
00479 #endif
00480 {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
00481 };
00482
00483 static int mimeout_mode = 0;
00484 static int base64_count = 0;
00485
00486
00487
00488
00489 static int f_line = 0;
00490 static int f_prev = 0;
00491 static int fold_preserve_f = FALSE;
00492 static int fold_f = FALSE;
00493 static int fold_len = 0;
00494
00495
00496 static unsigned char kanji_intro = DEFAULT_J;
00497 static unsigned char ascii_intro = DEFAULT_R;
00498
00499
00500
00501 #define FOLD_MARGIN 10
00502 #define DEFAULT_FOLD 60
00503
00504 static int fold_margin = FOLD_MARGIN;
00505
00506
00507
00508 static nkf_char
00509 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
00510 {
00511 fprintf(stderr,"nkf internal module connection failure.\n");
00512 exit(EXIT_FAILURE);
00513 return 0;
00514 }
00515
00516 static void
00517 no_connection(nkf_char c2, nkf_char c1)
00518 {
00519 no_connection2(c2,c1,0);
00520 }
00521
00522 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
00523 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
00524
00525 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
00526 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
00527 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
00528 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
00529 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
00530 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
00531 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
00532
00533
00534
00535 static void (*o_putc)(nkf_char c) = std_putc;
00536
00537 static nkf_char (*i_getc)(FILE *f) = std_getc;
00538 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
00539
00540 static nkf_char (*i_bgetc)(FILE *) = std_getc;
00541 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
00542
00543 static void (*o_mputc)(nkf_char c) = std_putc ;
00544
00545 static nkf_char (*i_mgetc)(FILE *) = std_getc;
00546 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
00547
00548
00549 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc;
00550 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
00551
00552
00553 static int output_mode = ASCII;
00554 static int input_mode = ASCII;
00555 static int mime_decode_mode = FALSE;
00556
00557
00558
00559
00560
00561 static const unsigned char cv[]= {
00562 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
00563 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
00564 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
00565 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
00566 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
00567 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
00568 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
00569 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
00570 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
00571 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
00572 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
00573 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
00574 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
00575 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
00576 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
00577 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
00578 0x00,0x00};
00579
00580
00581
00582
00583 static const unsigned char dv[]= {
00584 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00585 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00588 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
00589 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
00590 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
00591 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
00592 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
00593 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
00594 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
00595 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
00596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00600 0x00,0x00};
00601
00602
00603
00604 static const unsigned char ev[]= {
00605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00615 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
00616 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
00617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00621 0x00,0x00};
00622
00623
00624
00625
00626 static const unsigned char fv[] = {
00627
00628 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
00629 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
00630 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
00631 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
00632 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
00633 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
00634 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
00635 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
00636 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
00637 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00638 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
00639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
00640 } ;
00641
00642
00643
00644 static int option_mode = 0;
00645 static int file_out_f = FALSE;
00646 #ifdef OVERWRITE
00647 static int overwrite_f = FALSE;
00648 static int preserve_time_f = FALSE;
00649 static int backup_f = FALSE;
00650 static char *backup_suffix = "";
00651 #endif
00652
00653 static int eolmode_f = 0;
00654 static int input_eol = 0;
00655 static nkf_char prev_cr = 0;
00656 #ifdef EASYWIN
00657 static int end_check;
00658 #endif
00659
00660 static void *
00661 nkf_xmalloc(size_t size)
00662 {
00663 void *ptr;
00664
00665 if (size == 0) size = 1;
00666
00667 ptr = malloc(size);
00668 if (ptr == NULL) {
00669 perror("can't malloc");
00670 exit(EXIT_FAILURE);
00671 }
00672
00673 return ptr;
00674 }
00675
00676 static void *
00677 nkf_xrealloc(void *ptr, size_t size)
00678 {
00679 if (size == 0) size = 1;
00680
00681 ptr = realloc(ptr, size);
00682 if (ptr == NULL) {
00683 perror("can't realloc");
00684 exit(EXIT_FAILURE);
00685 }
00686
00687 return ptr;
00688 }
00689
00690 #define nkf_xfree(ptr) free(ptr)
00691
00692 static int
00693 nkf_str_caseeql(const char *src, const char *target)
00694 {
00695 int i;
00696 for (i = 0; src[i] && target[i]; i++) {
00697 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
00698 }
00699 if (src[i] || target[i]) return FALSE;
00700 else return TRUE;
00701 }
00702
00703 static nkf_encoding*
00704 nkf_enc_from_index(int idx)
00705 {
00706 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
00707 return 0;
00708 }
00709 return &nkf_encoding_table[idx];
00710 }
00711
00712 static int
00713 nkf_enc_find_index(const char *name)
00714 {
00715 int i;
00716 if (name[0] == 'X' && *(name+1) == '-') name += 2;
00717 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
00718 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
00719 return encoding_name_to_id_table[i].id;
00720 }
00721 }
00722 return -1;
00723 }
00724
00725 static nkf_encoding*
00726 nkf_enc_find(const char *name)
00727 {
00728 int idx = -1;
00729 idx = nkf_enc_find_index(name);
00730 if (idx < 0) return 0;
00731 return nkf_enc_from_index(idx);
00732 }
00733
00734 #define nkf_enc_name(enc) (enc)->name
00735 #define nkf_enc_to_index(enc) (enc)->id
00736 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
00737 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
00738 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
00739 #define nkf_enc_asciicompat(enc) (\
00740 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
00741 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
00742 #define nkf_enc_unicode_p(enc) (\
00743 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
00744 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
00745 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
00746 #define nkf_enc_cp5022x_p(enc) (\
00747 nkf_enc_to_index(enc) == CP50220 ||\
00748 nkf_enc_to_index(enc) == CP50221 ||\
00749 nkf_enc_to_index(enc) == CP50222)
00750
00751 #ifdef DEFAULT_CODE_LOCALE
00752 static const char*
00753 nkf_locale_charmap()
00754 {
00755 #ifdef HAVE_LANGINFO_H
00756 return nl_langinfo(CODESET);
00757 #elif defined(__WIN32__)
00758 static char buf[16];
00759 sprintf(buf, "CP%d", GetACP());
00760 return buf;
00761 #elif defined(__OS2__)
00762 # if defined(INT_IS_SHORT)
00763
00764 return NULL;
00765 # else
00766
00767 static char buf[16];
00768 ULONG ulCP[1], ulncp;
00769 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
00770 if (ulCP[0] == 932 || ulCP[0] == 943)
00771 strcpy(buf, "Shift_JIS");
00772 else
00773 sprintf(buf, "CP%lu", ulCP[0]);
00774 return buf;
00775 # endif
00776 #endif
00777 return NULL;
00778 }
00779
00780 static nkf_encoding*
00781 nkf_locale_encoding()
00782 {
00783 nkf_encoding *enc = 0;
00784 const char *encname = nkf_locale_charmap();
00785 if (encname)
00786 enc = nkf_enc_find(encname);
00787 return enc;
00788 }
00789 #endif
00790
00791 static nkf_encoding*
00792 nkf_utf8_encoding()
00793 {
00794 return &nkf_encoding_table[UTF_8];
00795 }
00796
00797 static nkf_encoding*
00798 nkf_default_encoding()
00799 {
00800 nkf_encoding *enc = 0;
00801 #ifdef DEFAULT_CODE_LOCALE
00802 enc = nkf_locale_encoding();
00803 #elif defined(DEFAULT_ENCIDX)
00804 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
00805 #endif
00806 if (!enc) enc = nkf_utf8_encoding();
00807 return enc;
00808 }
00809
00810 typedef struct {
00811 long capa;
00812 long len;
00813 nkf_char *ptr;
00814 } nkf_buf_t;
00815
00816 static nkf_buf_t *
00817 nkf_buf_new(int length)
00818 {
00819 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
00820 buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
00821 buf->capa = length;
00822 buf->len = 0;
00823 return buf;
00824 }
00825
00826 #if 0
00827 static void
00828 nkf_buf_dispose(nkf_buf_t *buf)
00829 {
00830 nkf_xfree(buf->ptr);
00831 nkf_xfree(buf);
00832 }
00833 #endif
00834
00835 #define nkf_buf_length(buf) ((buf)->len)
00836 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
00837
00838 static nkf_char
00839 nkf_buf_at(nkf_buf_t *buf, int index)
00840 {
00841 assert(index <= buf->len);
00842 return buf->ptr[index];
00843 }
00844
00845 static void
00846 nkf_buf_clear(nkf_buf_t *buf)
00847 {
00848 buf->len = 0;
00849 }
00850
00851 static void
00852 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
00853 {
00854 if (buf->capa <= buf->len) {
00855 exit(EXIT_FAILURE);
00856 }
00857 buf->ptr[buf->len++] = c;
00858 }
00859
00860 static nkf_char
00861 nkf_buf_pop(nkf_buf_t *buf)
00862 {
00863 assert(!nkf_buf_empty_p(buf));
00864 return buf->ptr[--buf->len];
00865 }
00866
00867
00868 #ifndef PERL_XS
00869 #ifdef WIN32DLL
00870 #define fprintf dllprintf
00871 #endif
00872
00873 static void
00874 version(void)
00875 {
00876 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
00877 }
00878
00879 static void
00880 usage(void)
00881 {
00882 fprintf(HELP_OUTPUT,
00883 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
00884 #ifdef UTF8_OUTPUT_ENABLE
00885 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00886 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
00887 #else
00888 #endif
00889 #ifdef UTF8_INPUT_ENABLE
00890 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00891 " UTF option is -W[8,[16,32][B,L]]\n"
00892 #else
00893 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00894 #endif
00895 );
00896 fprintf(HELP_OUTPUT,
00897 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
00898 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
00899 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
00900 );
00901 fprintf(HELP_OUTPUT,
00902 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
00903 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
00904 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
00905 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
00906 );
00907 fprintf(HELP_OUTPUT,
00908 " O Output to File (DEFAULT 'nkf.out')\n"
00909 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
00910 );
00911 fprintf(HELP_OUTPUT,
00912 " --ic=<encoding> Specify the input encoding\n"
00913 " --oc=<encoding> Specify the output encoding\n"
00914 " --hiragana --katakana Hiragana/Katakana Conversion\n"
00915 " --katakana-hiragana Converts each other\n"
00916 );
00917 fprintf(HELP_OUTPUT,
00918 #ifdef INPUT_OPTION
00919 " --{cap, url}-input Convert hex after ':' or '%%'\n"
00920 #endif
00921 #ifdef NUMCHAR_OPTION
00922 " --numchar-input Convert Unicode Character Reference\n"
00923 #endif
00924 #ifdef UTF8_INPUT_ENABLE
00925 " --fb-{skip, html, xml, perl, java, subchar}\n"
00926 " Specify unassigned character's replacement\n"
00927 #endif
00928 );
00929 fprintf(HELP_OUTPUT,
00930 #ifdef OVERWRITE
00931 " --in-place[=SUF] Overwrite original files\n"
00932 " --overwrite[=SUF] Preserve timestamp of original files\n"
00933 #endif
00934 " -g --guess Guess the input code\n"
00935 " -v --version Print the version\n"
00936 " --help/-V Print this help / configuration\n"
00937 );
00938 version();
00939 }
00940
00941 static void
00942 show_configuration(void)
00943 {
00944 fprintf(HELP_OUTPUT,
00945 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
00946 " Compile-time options:\n"
00947 " Compiled at: " __DATE__ " " __TIME__ "\n"
00948 );
00949 fprintf(HELP_OUTPUT,
00950 " Default output encoding: "
00951 #ifdef DEFAULT_CODE_LOCALE
00952 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
00953 #elif defined(DEFAULT_ENCIDX)
00954 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
00955 #else
00956 "NONE\n"
00957 #endif
00958 );
00959 fprintf(HELP_OUTPUT,
00960 " Default output end of line: "
00961 #if DEFAULT_NEWLINE == CR
00962 "CR"
00963 #elif DEFAULT_NEWLINE == CRLF
00964 "CRLF"
00965 #else
00966 "LF"
00967 #endif
00968 "\n"
00969 " Decode MIME encoded string: "
00970 #if MIME_DECODE_DEFAULT
00971 "ON"
00972 #else
00973 "OFF"
00974 #endif
00975 "\n"
00976 " Convert JIS X 0201 Katakana: "
00977 #if X0201_DEFAULT
00978 "ON"
00979 #else
00980 "OFF"
00981 #endif
00982 "\n"
00983 " --help, --version output: "
00984 #if HELP_OUTPUT_HELP_OUTPUT
00985 "HELP_OUTPUT"
00986 #else
00987 "STDOUT"
00988 #endif
00989 "\n");
00990 }
00991 #endif
00992
00993 #ifdef OVERWRITE
00994 static char*
00995 get_backup_filename(const char *suffix, const char *filename)
00996 {
00997 char *backup_filename;
00998 int asterisk_count = 0;
00999 int i, j;
01000 int filename_length = strlen(filename);
01001
01002 for(i = 0; suffix[i]; i++){
01003 if(suffix[i] == '*') asterisk_count++;
01004 }
01005
01006 if(asterisk_count){
01007 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
01008 for(i = 0, j = 0; suffix[i];){
01009 if(suffix[i] == '*'){
01010 backup_filename[j] = '\0';
01011 strncat(backup_filename, filename, filename_length);
01012 i++;
01013 j += filename_length;
01014 }else{
01015 backup_filename[j++] = suffix[i++];
01016 }
01017 }
01018 backup_filename[j] = '\0';
01019 }else{
01020 j = filename_length + strlen(suffix);
01021 backup_filename = nkf_xmalloc(j + 1);
01022 strcpy(backup_filename, filename);
01023 strcat(backup_filename, suffix);
01024 backup_filename[j] = '\0';
01025 }
01026 return backup_filename;
01027 }
01028 #endif
01029
01030 #ifdef UTF8_INPUT_ENABLE
01031 static void
01032 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
01033 {
01034 int shift = 20;
01035 c &= VALUE_MASK;
01036 while(shift >= 0){
01037 if(c >= NKF_INT32_C(1)<<shift){
01038 while(shift >= 0){
01039 (*f)(0, bin2hex(c>>shift));
01040 shift -= 4;
01041 }
01042 }else{
01043 shift -= 4;
01044 }
01045 }
01046 return;
01047 }
01048
01049 static void
01050 encode_fallback_html(nkf_char c)
01051 {
01052 (*oconv)(0, '&');
01053 (*oconv)(0, '#');
01054 c &= VALUE_MASK;
01055 if(c >= NKF_INT32_C(1000000))
01056 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
01057 if(c >= NKF_INT32_C(100000))
01058 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
01059 if(c >= 10000)
01060 (*oconv)(0, 0x30+(c/10000 )%10);
01061 if(c >= 1000)
01062 (*oconv)(0, 0x30+(c/1000 )%10);
01063 if(c >= 100)
01064 (*oconv)(0, 0x30+(c/100 )%10);
01065 if(c >= 10)
01066 (*oconv)(0, 0x30+(c/10 )%10);
01067 if(c >= 0)
01068 (*oconv)(0, 0x30+ c %10);
01069 (*oconv)(0, ';');
01070 return;
01071 }
01072
01073 static void
01074 encode_fallback_xml(nkf_char c)
01075 {
01076 (*oconv)(0, '&');
01077 (*oconv)(0, '#');
01078 (*oconv)(0, 'x');
01079 nkf_each_char_to_hex(oconv, c);
01080 (*oconv)(0, ';');
01081 return;
01082 }
01083
01084 static void
01085 encode_fallback_java(nkf_char c)
01086 {
01087 (*oconv)(0, '\\');
01088 c &= VALUE_MASK;
01089 if(!nkf_char_unicode_bmp_p(c)){
01090 (*oconv)(0, 'U');
01091 (*oconv)(0, '0');
01092 (*oconv)(0, '0');
01093 (*oconv)(0, bin2hex(c>>20));
01094 (*oconv)(0, bin2hex(c>>16));
01095 }else{
01096 (*oconv)(0, 'u');
01097 }
01098 (*oconv)(0, bin2hex(c>>12));
01099 (*oconv)(0, bin2hex(c>> 8));
01100 (*oconv)(0, bin2hex(c>> 4));
01101 (*oconv)(0, bin2hex(c ));
01102 return;
01103 }
01104
01105 static void
01106 encode_fallback_perl(nkf_char c)
01107 {
01108 (*oconv)(0, '\\');
01109 (*oconv)(0, 'x');
01110 (*oconv)(0, '{');
01111 nkf_each_char_to_hex(oconv, c);
01112 (*oconv)(0, '}');
01113 return;
01114 }
01115
01116 static void
01117 encode_fallback_subchar(nkf_char c)
01118 {
01119 c = unicode_subchar;
01120 (*oconv)((c>>8)&0xFF, c&0xFF);
01121 return;
01122 }
01123 #endif
01124
01125 static const struct {
01126 const char *name;
01127 const char *alias;
01128 } long_option[] = {
01129 {"ic=", ""},
01130 {"oc=", ""},
01131 {"base64","jMB"},
01132 {"euc","e"},
01133 {"euc-input","E"},
01134 {"fj","jm"},
01135 {"help",""},
01136 {"jis","j"},
01137 {"jis-input","J"},
01138 {"mac","sLm"},
01139 {"mime","jM"},
01140 {"mime-input","m"},
01141 {"msdos","sLw"},
01142 {"sjis","s"},
01143 {"sjis-input","S"},
01144 {"unix","eLu"},
01145 {"version","v"},
01146 {"windows","sLw"},
01147 {"hiragana","h1"},
01148 {"katakana","h2"},
01149 {"katakana-hiragana","h3"},
01150 {"guess=", ""},
01151 {"guess", "g2"},
01152 {"cp932", ""},
01153 {"no-cp932", ""},
01154 #ifdef X0212_ENABLE
01155 {"x0212", ""},
01156 #endif
01157 #ifdef UTF8_OUTPUT_ENABLE
01158 {"utf8", "w"},
01159 {"utf16", "w16"},
01160 {"ms-ucs-map", ""},
01161 {"fb-skip", ""},
01162 {"fb-html", ""},
01163 {"fb-xml", ""},
01164 {"fb-perl", ""},
01165 {"fb-java", ""},
01166 {"fb-subchar", ""},
01167 {"fb-subchar=", ""},
01168 #endif
01169 #ifdef UTF8_INPUT_ENABLE
01170 {"utf8-input", "W"},
01171 {"utf16-input", "W16"},
01172 {"no-cp932ext", ""},
01173 {"no-best-fit-chars",""},
01174 #endif
01175 #ifdef UNICODE_NORMALIZATION
01176 {"utf8mac-input", ""},
01177 #endif
01178 #ifdef OVERWRITE
01179 {"overwrite", ""},
01180 {"overwrite=", ""},
01181 {"in-place", ""},
01182 {"in-place=", ""},
01183 #endif
01184 #ifdef INPUT_OPTION
01185 {"cap-input", ""},
01186 {"url-input", ""},
01187 #endif
01188 #ifdef NUMCHAR_OPTION
01189 {"numchar-input", ""},
01190 #endif
01191 #ifdef CHECK_OPTION
01192 {"no-output", ""},
01193 {"debug", ""},
01194 #endif
01195 #ifdef SHIFTJIS_CP932
01196 {"cp932inv", ""},
01197 #endif
01198 #ifdef EXEC_IO
01199 {"exec-in", ""},
01200 {"exec-out", ""},
01201 #endif
01202 {"prefix=", ""},
01203 };
01204
01205 static void
01206 set_input_encoding(nkf_encoding *enc)
01207 {
01208 switch (nkf_enc_to_index(enc)) {
01209 case ISO_8859_1:
01210 iso8859_f = TRUE;
01211 break;
01212 case CP50221:
01213 case CP50222:
01214 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01215 case CP50220:
01216 #ifdef SHIFTJIS_CP932
01217 cp51932_f = TRUE;
01218 #endif
01219 #ifdef UTF8_OUTPUT_ENABLE
01220 ms_ucs_map_f = UCS_MAP_CP932;
01221 #endif
01222 break;
01223 case ISO_2022_JP_1:
01224 x0212_f = TRUE;
01225 break;
01226 case ISO_2022_JP_3:
01227 x0212_f = TRUE;
01228 x0213_f = TRUE;
01229 break;
01230 case ISO_2022_JP_2004:
01231 x0212_f = TRUE;
01232 x0213_f = TRUE;
01233 break;
01234 case SHIFT_JIS:
01235 break;
01236 case WINDOWS_31J:
01237 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01238 #ifdef SHIFTJIS_CP932
01239 cp51932_f = TRUE;
01240 #endif
01241 #ifdef UTF8_OUTPUT_ENABLE
01242 ms_ucs_map_f = UCS_MAP_CP932;
01243 #endif
01244 break;
01245 break;
01246 case CP10001:
01247 #ifdef SHIFTJIS_CP932
01248 cp51932_f = TRUE;
01249 #endif
01250 #ifdef UTF8_OUTPUT_ENABLE
01251 ms_ucs_map_f = UCS_MAP_CP10001;
01252 #endif
01253 break;
01254 case EUC_JP:
01255 break;
01256 case EUCJP_NKF:
01257 break;
01258 case CP51932:
01259 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01260 #ifdef SHIFTJIS_CP932
01261 cp51932_f = TRUE;
01262 #endif
01263 #ifdef UTF8_OUTPUT_ENABLE
01264 ms_ucs_map_f = UCS_MAP_CP932;
01265 #endif
01266 break;
01267 case EUCJP_MS:
01268 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01269 #ifdef SHIFTJIS_CP932
01270 cp51932_f = FALSE;
01271 #endif
01272 #ifdef UTF8_OUTPUT_ENABLE
01273 ms_ucs_map_f = UCS_MAP_MS;
01274 #endif
01275 break;
01276 case EUCJP_ASCII:
01277 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01278 #ifdef SHIFTJIS_CP932
01279 cp51932_f = FALSE;
01280 #endif
01281 #ifdef UTF8_OUTPUT_ENABLE
01282 ms_ucs_map_f = UCS_MAP_ASCII;
01283 #endif
01284 break;
01285 case SHIFT_JISX0213:
01286 case SHIFT_JIS_2004:
01287 x0213_f = TRUE;
01288 #ifdef SHIFTJIS_CP932
01289 cp51932_f = FALSE;
01290 #endif
01291 break;
01292 case EUC_JISX0213:
01293 case EUC_JIS_2004:
01294 x0213_f = TRUE;
01295 #ifdef SHIFTJIS_CP932
01296 cp51932_f = FALSE;
01297 #endif
01298 break;
01299 #ifdef UTF8_INPUT_ENABLE
01300 #ifdef UNICODE_NORMALIZATION
01301 case UTF8_MAC:
01302 nfc_f = TRUE;
01303 break;
01304 #endif
01305 case UTF_16:
01306 case UTF_16BE:
01307 case UTF_16BE_BOM:
01308 input_endian = ENDIAN_BIG;
01309 break;
01310 case UTF_16LE:
01311 case UTF_16LE_BOM:
01312 input_endian = ENDIAN_LITTLE;
01313 break;
01314 case UTF_32:
01315 case UTF_32BE:
01316 case UTF_32BE_BOM:
01317 input_endian = ENDIAN_BIG;
01318 break;
01319 case UTF_32LE:
01320 case UTF_32LE_BOM:
01321 input_endian = ENDIAN_LITTLE;
01322 break;
01323 #endif
01324 }
01325 }
01326
01327 static void
01328 set_output_encoding(nkf_encoding *enc)
01329 {
01330 switch (nkf_enc_to_index(enc)) {
01331 case CP50220:
01332 #ifdef SHIFTJIS_CP932
01333 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01334 #endif
01335 #ifdef UTF8_OUTPUT_ENABLE
01336 ms_ucs_map_f = UCS_MAP_CP932;
01337 #endif
01338 break;
01339 case CP50221:
01340 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01341 #ifdef SHIFTJIS_CP932
01342 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01343 #endif
01344 #ifdef UTF8_OUTPUT_ENABLE
01345 ms_ucs_map_f = UCS_MAP_CP932;
01346 #endif
01347 break;
01348 case ISO_2022_JP:
01349 #ifdef SHIFTJIS_CP932
01350 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01351 #endif
01352 break;
01353 case ISO_2022_JP_1:
01354 x0212_f = TRUE;
01355 #ifdef SHIFTJIS_CP932
01356 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01357 #endif
01358 break;
01359 case ISO_2022_JP_3:
01360 x0212_f = TRUE;
01361 x0213_f = TRUE;
01362 #ifdef SHIFTJIS_CP932
01363 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01364 #endif
01365 break;
01366 case SHIFT_JIS:
01367 break;
01368 case WINDOWS_31J:
01369 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01370 #ifdef UTF8_OUTPUT_ENABLE
01371 ms_ucs_map_f = UCS_MAP_CP932;
01372 #endif
01373 break;
01374 case CP10001:
01375 #ifdef UTF8_OUTPUT_ENABLE
01376 ms_ucs_map_f = UCS_MAP_CP10001;
01377 #endif
01378 break;
01379 case EUC_JP:
01380 x0212_f = TRUE;
01381 #ifdef SHIFTJIS_CP932
01382 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01383 #endif
01384 #ifdef UTF8_OUTPUT_ENABLE
01385 ms_ucs_map_f = UCS_MAP_ASCII;
01386 #endif
01387 break;
01388 case EUCJP_NKF:
01389 x0212_f = FALSE;
01390 #ifdef SHIFTJIS_CP932
01391 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01392 #endif
01393 #ifdef UTF8_OUTPUT_ENABLE
01394 ms_ucs_map_f = UCS_MAP_ASCII;
01395 #endif
01396 break;
01397 case CP51932:
01398 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01399 #ifdef SHIFTJIS_CP932
01400 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01401 #endif
01402 #ifdef UTF8_OUTPUT_ENABLE
01403 ms_ucs_map_f = UCS_MAP_CP932;
01404 #endif
01405 break;
01406 case EUCJP_MS:
01407 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01408 x0212_f = TRUE;
01409 #ifdef UTF8_OUTPUT_ENABLE
01410 ms_ucs_map_f = UCS_MAP_MS;
01411 #endif
01412 break;
01413 case EUCJP_ASCII:
01414 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;
01415 x0212_f = TRUE;
01416 #ifdef UTF8_OUTPUT_ENABLE
01417 ms_ucs_map_f = UCS_MAP_ASCII;
01418 #endif
01419 break;
01420 case SHIFT_JISX0213:
01421 case SHIFT_JIS_2004:
01422 x0213_f = TRUE;
01423 #ifdef SHIFTJIS_CP932
01424 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01425 #endif
01426 break;
01427 case EUC_JISX0213:
01428 case EUC_JIS_2004:
01429 x0212_f = TRUE;
01430 x0213_f = TRUE;
01431 #ifdef SHIFTJIS_CP932
01432 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01433 #endif
01434 break;
01435 #ifdef UTF8_OUTPUT_ENABLE
01436 case UTF_8_BOM:
01437 output_bom_f = TRUE;
01438 break;
01439 case UTF_16:
01440 case UTF_16BE_BOM:
01441 output_bom_f = TRUE;
01442 break;
01443 case UTF_16LE:
01444 output_endian = ENDIAN_LITTLE;
01445 output_bom_f = FALSE;
01446 break;
01447 case UTF_16LE_BOM:
01448 output_endian = ENDIAN_LITTLE;
01449 output_bom_f = TRUE;
01450 break;
01451 case UTF_32:
01452 case UTF_32BE_BOM:
01453 output_bom_f = TRUE;
01454 break;
01455 case UTF_32LE:
01456 output_endian = ENDIAN_LITTLE;
01457 output_bom_f = FALSE;
01458 break;
01459 case UTF_32LE_BOM:
01460 output_endian = ENDIAN_LITTLE;
01461 output_bom_f = TRUE;
01462 break;
01463 #endif
01464 }
01465 }
01466
01467 static struct input_code*
01468 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01469 {
01470 if (iconv_func){
01471 struct input_code *p = input_code_list;
01472 while (p->name){
01473 if (iconv_func == p->iconv_func){
01474 return p;
01475 }
01476 p++;
01477 }
01478 }
01479 return 0;
01480 }
01481
01482 static void
01483 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01484 {
01485 #ifdef INPUT_CODE_FIX
01486 if (f || !input_encoding)
01487 #endif
01488 if (estab_f != f){
01489 estab_f = f;
01490 }
01491
01492 if (iconv_func
01493 #ifdef INPUT_CODE_FIX
01494 && (f == -TRUE || !input_encoding)
01495 #endif
01496 ){
01497 iconv = iconv_func;
01498 }
01499 #ifdef CHECK_OPTION
01500 if (estab_f && iconv_for_check != iconv){
01501 struct input_code *p = find_inputcode_byfunc(iconv);
01502 if (p){
01503 set_input_codename(p->name);
01504 debug(p->name);
01505 }
01506 iconv_for_check = iconv;
01507 }
01508 #endif
01509 }
01510
01511 #ifdef X0212_ENABLE
01512 static nkf_char
01513 x0212_shift(nkf_char c)
01514 {
01515 nkf_char ret = c;
01516 c &= 0x7f;
01517 if (is_eucg3(ret)){
01518 if (0x75 <= c && c <= 0x7f){
01519 ret = c + (0x109 - 0x75);
01520 }
01521 }else{
01522 if (0x75 <= c && c <= 0x7f){
01523 ret = c + (0x113 - 0x75);
01524 }
01525 }
01526 return ret;
01527 }
01528
01529
01530 static nkf_char
01531 x0212_unshift(nkf_char c)
01532 {
01533 nkf_char ret = c;
01534 if (0x7f <= c && c <= 0x88){
01535 ret = c + (0x75 - 0x7f);
01536 }else if (0x89 <= c && c <= 0x92){
01537 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
01538 }
01539 return ret;
01540 }
01541 #endif
01542
01543 static nkf_char
01544 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01545 {
01546 nkf_char ndx;
01547 if (is_eucg3(c2)){
01548 ndx = c2 & 0x7f;
01549 if (x0213_f){
01550 if((0x21 <= ndx && ndx <= 0x2F)){
01551 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
01552 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01553 return 0;
01554 }else if(0x6E <= ndx && ndx <= 0x7E){
01555 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
01556 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01557 return 0;
01558 }
01559 return 1;
01560 }
01561 #ifdef X0212_ENABLE
01562 else if(nkf_isgraph(ndx)){
01563 nkf_char val = 0;
01564 const unsigned short *ptr;
01565 ptr = x0212_shiftjis[ndx - 0x21];
01566 if (ptr){
01567 val = ptr[(c1 & 0x7f) - 0x21];
01568 }
01569 if (val){
01570 c2 = val >> 8;
01571 c1 = val & 0xff;
01572 if (p2) *p2 = c2;
01573 if (p1) *p1 = c1;
01574 return 0;
01575 }
01576 c2 = x0212_shift(c2);
01577 }
01578 #endif
01579 }
01580 if(0x7F < c2) return 1;
01581 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
01582 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01583 return 0;
01584 }
01585
01586 static nkf_char
01587 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01588 {
01589 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
01590 nkf_char val;
01591 #endif
01592 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
01593 if (0xFC < c1) return 1;
01594 #ifdef SHIFTJIS_CP932
01595 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
01596 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
01597 if (val){
01598 c2 = val >> 8;
01599 c1 = val & 0xff;
01600 }
01601 }
01602 if (cp932inv_f
01603 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
01604 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
01605 if (val){
01606 c2 = val >> 8;
01607 c1 = val & 0xff;
01608 }
01609 }
01610 #endif
01611 #ifdef X0212_ENABLE
01612 if (!x0213_f && is_ibmext_in_sjis(c2)){
01613 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
01614 if (val){
01615 if (val > 0x7FFF){
01616 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
01617 c1 = val & 0xff;
01618 }else{
01619 c2 = val >> 8;
01620 c1 = val & 0xff;
01621 }
01622 if (p2) *p2 = c2;
01623 if (p1) *p1 = c1;
01624 return 0;
01625 }
01626 }
01627 #endif
01628 if(c2 >= 0x80){
01629 if(x0213_f && c2 >= 0xF0){
01630 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){
01631 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
01632 }else{
01633 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
01634 if (0x9E < c1) c2++;
01635 }
01636 }else{
01637 #define SJ0162 0x00e1
01638 #define SJ6394 0x0161
01639 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
01640 if (0x9E < c1) c2++;
01641 }
01642 if (c1 < 0x9F)
01643 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
01644 else {
01645 c1 = c1 - 0x7E;
01646 }
01647 }
01648
01649 #ifdef X0212_ENABLE
01650 c2 = x0212_unshift(c2);
01651 #endif
01652 if (p2) *p2 = c2;
01653 if (p1) *p1 = c1;
01654 return 0;
01655 }
01656
01657 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
01658 static void
01659 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
01660 {
01661 val &= VALUE_MASK;
01662 if (val < 0x80){
01663 *p1 = val;
01664 *p2 = 0;
01665 *p3 = 0;
01666 *p4 = 0;
01667 }else if (val < 0x800){
01668 *p1 = 0xc0 | (val >> 6);
01669 *p2 = 0x80 | (val & 0x3f);
01670 *p3 = 0;
01671 *p4 = 0;
01672 } else if (nkf_char_unicode_bmp_p(val)) {
01673 *p1 = 0xe0 | (val >> 12);
01674 *p2 = 0x80 | ((val >> 6) & 0x3f);
01675 *p3 = 0x80 | ( val & 0x3f);
01676 *p4 = 0;
01677 } else if (nkf_char_unicode_value_p(val)) {
01678 *p1 = 0xf0 | (val >> 18);
01679 *p2 = 0x80 | ((val >> 12) & 0x3f);
01680 *p3 = 0x80 | ((val >> 6) & 0x3f);
01681 *p4 = 0x80 | ( val & 0x3f);
01682 } else {
01683 *p1 = 0;
01684 *p2 = 0;
01685 *p3 = 0;
01686 *p4 = 0;
01687 }
01688 }
01689
01690 static nkf_char
01691 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
01692 {
01693 nkf_char wc;
01694 if (c1 <= 0x7F) {
01695
01696 wc = c1;
01697 }
01698 else if (c1 <= 0xC3) {
01699
01700 return -1;
01701 }
01702 else if (c1 <= 0xDF) {
01703
01704 wc = (c1 & 0x1F) << 6;
01705 wc |= (c2 & 0x3F);
01706 }
01707 else if (c1 <= 0xEF) {
01708
01709 wc = (c1 & 0x0F) << 12;
01710 wc |= (c2 & 0x3F) << 6;
01711 wc |= (c3 & 0x3F);
01712 }
01713 else if (c2 <= 0xF4) {
01714
01715 wc = (c1 & 0x0F) << 18;
01716 wc |= (c2 & 0x3F) << 12;
01717 wc |= (c3 & 0x3F) << 6;
01718 wc |= (c4 & 0x3F);
01719 }
01720 else {
01721 return -1;
01722 }
01723 return wc;
01724 }
01725 #endif
01726
01727 #ifdef UTF8_INPUT_ENABLE
01728 static int
01729 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
01730 const unsigned short *const *pp, nkf_char psize,
01731 nkf_char *p2, nkf_char *p1)
01732 {
01733 nkf_char c2;
01734 const unsigned short *p;
01735 unsigned short val;
01736
01737 if (pp == 0) return 1;
01738
01739 c1 -= 0x80;
01740 if (c1 < 0 || psize <= c1) return 1;
01741 p = pp[c1];
01742 if (p == 0) return 1;
01743
01744 c0 -= 0x80;
01745 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
01746 val = p[c0];
01747 if (val == 0) return 1;
01748 if (no_cp932ext_f && (
01749 (val>>8) == 0x2D ||
01750 val > NKF_INT32_C(0xF300)
01751 )) return 1;
01752
01753 c2 = val >> 8;
01754 if (val > 0x7FFF){
01755 c2 &= 0x7f;
01756 c2 |= PREFIX_EUCG3;
01757 }
01758 if (c2 == SO) c2 = JIS_X_0201_1976_K;
01759 c1 = val & 0xFF;
01760 if (p2) *p2 = c2;
01761 if (p1) *p1 = c1;
01762 return 0;
01763 }
01764
01765 static int
01766 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
01767 {
01768 const unsigned short *const *pp;
01769 const unsigned short *const *const *ppp;
01770 static const char no_best_fit_chars_table_C2[] =
01771 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01772 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01773 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
01774 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
01775 static const char no_best_fit_chars_table_C2_ms[] =
01776 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01777 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01778 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
01779 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
01780 static const char no_best_fit_chars_table_932_C2[] =
01781 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01782 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01783 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
01784 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
01785 static const char no_best_fit_chars_table_932_C3[] =
01786 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01787 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
01788 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01789 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
01790 nkf_char ret = 0;
01791
01792 if(c2 < 0x80){
01793 *p2 = 0;
01794 *p1 = c2;
01795 }else if(c2 < 0xe0){
01796 if(no_best_fit_chars_f){
01797 if(ms_ucs_map_f == UCS_MAP_CP932){
01798 switch(c2){
01799 case 0xC2:
01800 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
01801 break;
01802 case 0xC3:
01803 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01804 break;
01805 }
01806 }else if(!cp932inv_f){
01807 switch(c2){
01808 case 0xC2:
01809 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
01810 break;
01811 case 0xC3:
01812 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01813 break;
01814 }
01815 }else if(ms_ucs_map_f == UCS_MAP_MS){
01816 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
01817 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01818 switch(c2){
01819 case 0xC2:
01820 switch(c1){
01821 case 0xA2:
01822 case 0xA3:
01823 case 0xA5:
01824 case 0xA6:
01825 case 0xAC:
01826 case 0xAF:
01827 case 0xB8:
01828 return 1;
01829 }
01830 break;
01831 }
01832 }
01833 }
01834 pp =
01835 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
01836 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
01837 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
01838 utf8_to_euc_2bytes;
01839 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
01840 }else if(c0 < 0xF0){
01841 if(no_best_fit_chars_f){
01842 if(ms_ucs_map_f == UCS_MAP_CP932){
01843 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
01844 }else if(ms_ucs_map_f == UCS_MAP_MS){
01845 switch(c2){
01846 case 0xE2:
01847 switch(c1){
01848 case 0x80:
01849 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
01850 break;
01851 case 0x88:
01852 if(c0 == 0x92) return 1;
01853 break;
01854 }
01855 break;
01856 case 0xE3:
01857 if(c1 == 0x80 || c0 == 0x9C) return 1;
01858 break;
01859 }
01860 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01861 switch(c2){
01862 case 0xE3:
01863 switch(c1){
01864 case 0x82:
01865 if(c0 == 0x94) return 1;
01866 break;
01867 case 0x83:
01868 if(c0 == 0xBB) return 1;
01869 break;
01870 }
01871 break;
01872 }
01873 }else{
01874 switch(c2){
01875 case 0xE2:
01876 switch(c1){
01877 case 0x80:
01878 if(c0 == 0x95) return 1;
01879 break;
01880 case 0x88:
01881 if(c0 == 0xA5) return 1;
01882 break;
01883 }
01884 break;
01885 case 0xEF:
01886 switch(c1){
01887 case 0xBC:
01888 if(c0 == 0x8D) return 1;
01889 break;
01890 case 0xBD:
01891 if(c0 == 0x9E && !cp932inv_f) return 1;
01892 break;
01893 case 0xBF:
01894 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
01895 break;
01896 }
01897 break;
01898 }
01899 }
01900 }
01901 ppp =
01902 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
01903 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
01904 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
01905 utf8_to_euc_3bytes;
01906 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
01907 }else return -1;
01908 #ifdef SHIFTJIS_CP932
01909 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
01910 nkf_char s2, s1;
01911 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
01912 s2e_conv(s2, s1, p2, p1);
01913 }else{
01914 ret = 1;
01915 }
01916 }
01917 #endif
01918 return ret;
01919 }
01920
01921 #ifdef UTF8_OUTPUT_ENABLE
01922 static nkf_char
01923 e2w_conv(nkf_char c2, nkf_char c1)
01924 {
01925 const unsigned short *p;
01926
01927 if (c2 == JIS_X_0201_1976_K) {
01928 if (ms_ucs_map_f == UCS_MAP_CP10001) {
01929 switch (c1) {
01930 case 0x20:
01931 return 0xA0;
01932 case 0x7D:
01933 return 0xA9;
01934 }
01935 }
01936 p = euc_to_utf8_1byte;
01937 #ifdef X0212_ENABLE
01938 } else if (is_eucg3(c2)){
01939 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
01940 return 0xA6;
01941 }
01942 c2 = (c2&0x7f) - 0x21;
01943 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
01944 p = x0212_to_utf8_2bytes[c2];
01945 else
01946 return 0;
01947 #endif
01948 } else {
01949 c2 &= 0x7f;
01950 c2 = (c2&0x7f) - 0x21;
01951 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
01952 p =
01953 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
01954 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
01955 euc_to_utf8_2bytes_ms[c2];
01956 else
01957 return 0;
01958 }
01959 if (!p) return 0;
01960 c1 = (c1 & 0x7f) - 0x21;
01961 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
01962 return p[c1];
01963 return 0;
01964 }
01965 #endif
01966
01967 static nkf_char
01968 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
01969 {
01970 nkf_char ret = 0;
01971
01972 if (!c1){
01973 *p2 = 0;
01974 *p1 = c2;
01975 }else if (0xc0 <= c2 && c2 <= 0xef) {
01976 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
01977 #ifdef NUMCHAR_OPTION
01978 if (ret > 0){
01979 if (p2) *p2 = 0;
01980 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
01981 ret = 0;
01982 }
01983 #endif
01984 }
01985 return ret;
01986 }
01987
01988 #ifdef UTF8_INPUT_ENABLE
01989 static nkf_char
01990 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
01991 {
01992 nkf_char c1, c2, c3, c4;
01993 nkf_char ret = 0;
01994 val &= VALUE_MASK;
01995 if (val < 0x80) {
01996 *p2 = 0;
01997 *p1 = val;
01998 }
01999 else if (nkf_char_unicode_bmp_p(val)){
02000 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02001 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
02002 if (ret > 0){
02003 *p2 = 0;
02004 *p1 = nkf_char_unicode_new(val);
02005 ret = 0;
02006 }
02007 }
02008 else {
02009 *p2 = 0;
02010 *p1 = nkf_char_unicode_new(val);
02011 }
02012 return ret;
02013 }
02014 #endif
02015
02016 static nkf_char
02017 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
02018 {
02019 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
02020 if (iso2022jp_f && !x0201_f) {
02021 c2 = GETA1; c1 = GETA2;
02022 } else {
02023 c2 = JIS_X_0201_1976_K;
02024 c1 &= 0x7f;
02025 }
02026 #ifdef X0212_ENABLE
02027 }else if (c2 == 0x8f){
02028 if (c0 == 0){
02029 return -1;
02030 }
02031 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
02032
02033 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
02034 c2 = 0;
02035 } else {
02036 c2 = (c2 << 8) | (c1 & 0x7f);
02037 c1 = c0 & 0x7f;
02038 #ifdef SHIFTJIS_CP932
02039 if (cp51932_f){
02040 nkf_char s2, s1;
02041 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02042 s2e_conv(s2, s1, &c2, &c1);
02043 if (c2 < 0x100){
02044 c1 &= 0x7f;
02045 c2 &= 0x7f;
02046 }
02047 }
02048 }
02049 #endif
02050 }
02051 #endif
02052 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
02053
02054 } else {
02055 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
02056
02057 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
02058 c2 = 0;
02059 } else {
02060 c1 &= 0x7f;
02061 c2 &= 0x7f;
02062 #ifdef SHIFTJIS_CP932
02063 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
02064 nkf_char s2, s1;
02065 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02066 s2e_conv(s2, s1, &c2, &c1);
02067 if (c2 < 0x100){
02068 c1 &= 0x7f;
02069 c2 &= 0x7f;
02070 }
02071 }
02072 }
02073 #endif
02074 }
02075 }
02076 (*oconv)(c2, c1);
02077 return 0;
02078 }
02079
02080 static nkf_char
02081 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
02082 {
02083 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
02084 if (iso2022jp_f && !x0201_f) {
02085 c2 = GETA1; c1 = GETA2;
02086 } else {
02087 c1 &= 0x7f;
02088 }
02089 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
02090
02091 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
02092
02093 if(c1 == 0x7F) return 0;
02094 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
02095 c2 = 0;
02096 } else {
02097 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
02098 if (ret) return ret;
02099 }
02100 (*oconv)(c2, c1);
02101 return 0;
02102 }
02103
02104 static nkf_char
02105 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
02106 {
02107 nkf_char ret = 0, c4 = 0;
02108 static const char w_iconv_utf8_1st_byte[] =
02109 {
02110 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02111 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02112 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
02113 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
02114
02115 if (c3 > 0xFF) {
02116 c4 = c3 & 0xFF;
02117 c3 >>= 8;
02118 }
02119
02120 if (c1 < 0 || 0xff < c1) {
02121 }else if (c1 == 0) {
02122 c3 = 0;
02123 } else if ((c1 & 0xC0) == 0x80) {
02124 return 0;
02125 } else{
02126 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
02127 case 21:
02128 if (c2 < 0x80 || 0xBF < c2) return 0;
02129 break;
02130 case 30:
02131 if (c3 == 0) return -1;
02132 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
02133 return 0;
02134 break;
02135 case 31:
02136 case 33:
02137 if (c3 == 0) return -1;
02138 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
02139 return 0;
02140 break;
02141 case 32:
02142 if (c3 == 0) return -1;
02143 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
02144 return 0;
02145 break;
02146 case 40:
02147 if (c3 == 0) return -2;
02148 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02149 return 0;
02150 break;
02151 case 41:
02152 if (c3 == 0) return -2;
02153 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02154 return 0;
02155 break;
02156 case 42:
02157 if (c3 == 0) return -2;
02158 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02159 return 0;
02160 break;
02161 default:
02162 return 0;
02163 break;
02164 }
02165 }
02166 if (c1 == 0 || c1 == EOF){
02167 } else if ((c1 & 0xf8) == 0xf0) {
02168 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
02169 c1 = 0;
02170 } else {
02171 ret = w2e_conv(c1, c2, c3, &c1, &c2);
02172 }
02173 if (ret == 0){
02174 (*oconv)(c1, c2);
02175 }
02176 return ret;
02177 }
02178
02179 #define NKF_ICONV_INVALID_CODE_RANGE -13
02180 static size_t
02181 unicode_iconv(nkf_char wc)
02182 {
02183 nkf_char c1, c2;
02184 int ret = 0;
02185
02186 if (wc < 0x80) {
02187 c2 = 0;
02188 c1 = wc;
02189 }else if ((wc>>11) == 27) {
02190
02191 return NKF_ICONV_INVALID_CODE_RANGE;
02192 }else if (wc < 0xFFFF) {
02193 ret = w16e_conv(wc, &c2, &c1);
02194 if (ret) return ret;
02195 }else if (wc < 0x10FFFF) {
02196 c2 = 0;
02197 c1 = nkf_char_unicode_new(wc);
02198 } else {
02199 return NKF_ICONV_INVALID_CODE_RANGE;
02200 }
02201 (*oconv)(c2, c1);
02202 return 0;
02203 }
02204
02205 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
02206 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
02207 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
02208 static size_t
02209 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02210 {
02211 nkf_char wc;
02212
02213 if (c1 == EOF) {
02214 (*oconv)(EOF, 0);
02215 return 0;
02216 }
02217
02218 if (input_endian == ENDIAN_BIG) {
02219 if (0xD8 <= c1 && c1 <= 0xDB) {
02220 if (0xDC <= c3 && c3 <= 0xDF) {
02221 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
02222 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02223 } else {
02224 wc = c1 << 8 | c2;
02225 }
02226 } else {
02227 if (0xD8 <= c2 && c2 <= 0xDB) {
02228 if (0xDC <= c4 && c4 <= 0xDF) {
02229 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
02230 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02231 } else {
02232 wc = c2 << 8 | c1;
02233 }
02234 }
02235
02236 return (*unicode_iconv)(wc);
02237 }
02238
02239 static nkf_char
02240 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
02241 {
02242 (*oconv)(c2, c1);
02243 return 16;
02244 }
02245
02246 static nkf_char
02247 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
02248 {
02249 (*oconv)(c2, c1);
02250 return 32;
02251 }
02252
02253 static size_t
02254 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02255 {
02256 nkf_char wc;
02257
02258 if (c1 == EOF) {
02259 (*oconv)(EOF, 0);
02260 return 0;
02261 }
02262
02263 switch(input_endian){
02264 case ENDIAN_BIG:
02265 wc = c2 << 16 | c3 << 8 | c4;
02266 break;
02267 case ENDIAN_LITTLE:
02268 wc = c3 << 16 | c2 << 8 | c1;
02269 break;
02270 case ENDIAN_2143:
02271 wc = c1 << 16 | c4 << 8 | c3;
02272 break;
02273 case ENDIAN_3412:
02274 wc = c4 << 16 | c1 << 8 | c2;
02275 break;
02276 default:
02277 return NKF_ICONV_INVALID_CODE_RANGE;
02278 }
02279
02280 return (*unicode_iconv)(wc);
02281 }
02282 #endif
02283
02284 #define output_ascii_escape_sequence(mode) do { \
02285 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
02286 (*o_putc)(ESC); \
02287 (*o_putc)('('); \
02288 (*o_putc)(ascii_intro); \
02289 output_mode = mode; \
02290 } \
02291 } while (0)
02292
02293 static void
02294 output_escape_sequence(int mode)
02295 {
02296 if (output_mode == mode)
02297 return;
02298 switch(mode) {
02299 case ISO_8859_1:
02300 (*o_putc)(ESC);
02301 (*o_putc)('.');
02302 (*o_putc)('A');
02303 break;
02304 case JIS_X_0201_1976_K:
02305 (*o_putc)(ESC);
02306 (*o_putc)('(');
02307 (*o_putc)('I');
02308 break;
02309 case JIS_X_0208:
02310 (*o_putc)(ESC);
02311 (*o_putc)('$');
02312 (*o_putc)(kanji_intro);
02313 break;
02314 case JIS_X_0212:
02315 (*o_putc)(ESC);
02316 (*o_putc)('$');
02317 (*o_putc)('(');
02318 (*o_putc)('D');
02319 break;
02320 case JIS_X_0213_1:
02321 (*o_putc)(ESC);
02322 (*o_putc)('$');
02323 (*o_putc)('(');
02324 (*o_putc)('Q');
02325 break;
02326 case JIS_X_0213_2:
02327 (*o_putc)(ESC);
02328 (*o_putc)('$');
02329 (*o_putc)('(');
02330 (*o_putc)('P');
02331 break;
02332 }
02333 output_mode = mode;
02334 }
02335
02336 static void
02337 j_oconv(nkf_char c2, nkf_char c1)
02338 {
02339 #ifdef NUMCHAR_OPTION
02340 if (c2 == 0 && nkf_char_unicode_p(c1)){
02341 w16e_conv(c1, &c2, &c1);
02342 if (c2 == 0 && nkf_char_unicode_p(c1)){
02343 c2 = c1 & VALUE_MASK;
02344 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
02345
02346 c1 &= 0xFFF;
02347 c2 = 0x7F + c1 / 94;
02348 c1 = 0x21 + c1 % 94;
02349 } else {
02350 if (encode_fallback) (*encode_fallback)(c1);
02351 return;
02352 }
02353 }
02354 }
02355 #endif
02356 if (c2 == 0) {
02357 output_ascii_escape_sequence(ASCII);
02358 (*o_putc)(c1);
02359 }
02360 else if (c2 == EOF) {
02361 output_ascii_escape_sequence(ASCII);
02362 (*o_putc)(EOF);
02363 }
02364 else if (c2 == ISO_8859_1) {
02365 output_ascii_escape_sequence(ISO_8859_1);
02366 (*o_putc)(c1|0x80);
02367 }
02368 else if (c2 == JIS_X_0201_1976_K) {
02369 output_escape_sequence(JIS_X_0201_1976_K);
02370 (*o_putc)(c1);
02371 #ifdef X0212_ENABLE
02372 } else if (is_eucg3(c2)){
02373 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
02374 (*o_putc)(c2 & 0x7f);
02375 (*o_putc)(c1);
02376 #endif
02377 } else {
02378 if(ms_ucs_map_f
02379 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
02380 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
02381 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
02382 (*o_putc)(c2);
02383 (*o_putc)(c1);
02384 }
02385 }
02386
02387 static void
02388 e_oconv(nkf_char c2, nkf_char c1)
02389 {
02390 if (c2 == 0 && nkf_char_unicode_p(c1)){
02391 w16e_conv(c1, &c2, &c1);
02392 if (c2 == 0 && nkf_char_unicode_p(c1)){
02393 c2 = c1 & VALUE_MASK;
02394 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
02395
02396 c1 &= 0xFFF;
02397 c2 = c1 / 94;
02398 c2 += c2 < 10 ? 0x75 : 0x8FEB;
02399 c1 = 0x21 + c1 % 94;
02400 if (is_eucg3(c2)){
02401 (*o_putc)(0x8f);
02402 (*o_putc)((c2 & 0x7f) | 0x080);
02403 (*o_putc)(c1 | 0x080);
02404 }else{
02405 (*o_putc)((c2 & 0x7f) | 0x080);
02406 (*o_putc)(c1 | 0x080);
02407 }
02408 return;
02409 } else {
02410 if (encode_fallback) (*encode_fallback)(c1);
02411 return;
02412 }
02413 }
02414 }
02415
02416 if (c2 == EOF) {
02417 (*o_putc)(EOF);
02418 } else if (c2 == 0) {
02419 output_mode = ASCII;
02420 (*o_putc)(c1);
02421 } else if (c2 == JIS_X_0201_1976_K) {
02422 output_mode = EUC_JP;
02423 (*o_putc)(SS2); (*o_putc)(c1|0x80);
02424 } else if (c2 == ISO_8859_1) {
02425 output_mode = ISO_8859_1;
02426 (*o_putc)(c1 | 0x080);
02427 #ifdef X0212_ENABLE
02428 } else if (is_eucg3(c2)){
02429 output_mode = EUC_JP;
02430 #ifdef SHIFTJIS_CP932
02431 if (!cp932inv_f){
02432 nkf_char s2, s1;
02433 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02434 s2e_conv(s2, s1, &c2, &c1);
02435 }
02436 }
02437 #endif
02438 if (c2 == 0) {
02439 output_mode = ASCII;
02440 (*o_putc)(c1);
02441 }else if (is_eucg3(c2)){
02442 if (x0212_f){
02443 (*o_putc)(0x8f);
02444 (*o_putc)((c2 & 0x7f) | 0x080);
02445 (*o_putc)(c1 | 0x080);
02446 }
02447 }else{
02448 (*o_putc)((c2 & 0x7f) | 0x080);
02449 (*o_putc)(c1 | 0x080);
02450 }
02451 #endif
02452 } else {
02453 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
02454 set_iconv(FALSE, 0);
02455 return;
02456 }
02457 output_mode = EUC_JP;
02458 (*o_putc)(c2 | 0x080);
02459 (*o_putc)(c1 | 0x080);
02460 }
02461 }
02462
02463 static void
02464 s_oconv(nkf_char c2, nkf_char c1)
02465 {
02466 #ifdef NUMCHAR_OPTION
02467 if (c2 == 0 && nkf_char_unicode_p(c1)){
02468 w16e_conv(c1, &c2, &c1);
02469 if (c2 == 0 && nkf_char_unicode_p(c1)){
02470 c2 = c1 & VALUE_MASK;
02471 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
02472
02473 c1 &= 0xFFF;
02474 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
02475 c1 = c1 % 188;
02476 c1 += 0x40 + (c1 > 0x3e);
02477 (*o_putc)(c2);
02478 (*o_putc)(c1);
02479 return;
02480 } else {
02481 if(encode_fallback)(*encode_fallback)(c1);
02482 return;
02483 }
02484 }
02485 }
02486 #endif
02487 if (c2 == EOF) {
02488 (*o_putc)(EOF);
02489 return;
02490 } else if (c2 == 0) {
02491 output_mode = ASCII;
02492 (*o_putc)(c1);
02493 } else if (c2 == JIS_X_0201_1976_K) {
02494 output_mode = SHIFT_JIS;
02495 (*o_putc)(c1|0x80);
02496 } else if (c2 == ISO_8859_1) {
02497 output_mode = ISO_8859_1;
02498 (*o_putc)(c1 | 0x080);
02499 #ifdef X0212_ENABLE
02500 } else if (is_eucg3(c2)){
02501 output_mode = SHIFT_JIS;
02502 if (e2s_conv(c2, c1, &c2, &c1) == 0){
02503 (*o_putc)(c2);
02504 (*o_putc)(c1);
02505 }
02506 #endif
02507 } else {
02508 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
02509 set_iconv(FALSE, 0);
02510 return;
02511 }
02512 output_mode = SHIFT_JIS;
02513 e2s_conv(c2, c1, &c2, &c1);
02514
02515 #ifdef SHIFTJIS_CP932
02516 if (cp932inv_f
02517 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
02518 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
02519 if (c){
02520 c2 = c >> 8;
02521 c1 = c & 0xff;
02522 }
02523 }
02524 #endif
02525
02526 (*o_putc)(c2);
02527 if (prefix_table[(unsigned char)c1]){
02528 (*o_putc)(prefix_table[(unsigned char)c1]);
02529 }
02530 (*o_putc)(c1);
02531 }
02532 }
02533
02534 #ifdef UTF8_OUTPUT_ENABLE
02535 static void
02536 w_oconv(nkf_char c2, nkf_char c1)
02537 {
02538 nkf_char c3, c4;
02539 nkf_char val;
02540
02541 if (output_bom_f) {
02542 output_bom_f = FALSE;
02543 (*o_putc)('\357');
02544 (*o_putc)('\273');
02545 (*o_putc)('\277');
02546 }
02547
02548 if (c2 == EOF) {
02549 (*o_putc)(EOF);
02550 return;
02551 }
02552
02553 if (c2 == 0 && nkf_char_unicode_p(c1)){
02554 val = c1 & VALUE_MASK;
02555 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02556 (*o_putc)(c1);
02557 if (c2) (*o_putc)(c2);
02558 if (c3) (*o_putc)(c3);
02559 if (c4) (*o_putc)(c4);
02560 return;
02561 }
02562
02563 if (c2 == 0) {
02564 (*o_putc)(c1);
02565 } else {
02566 val = e2w_conv(c2, c1);
02567 if (val){
02568 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02569 (*o_putc)(c1);
02570 if (c2) (*o_putc)(c2);
02571 if (c3) (*o_putc)(c3);
02572 if (c4) (*o_putc)(c4);
02573 }
02574 }
02575 }
02576
02577 static void
02578 w_oconv16(nkf_char c2, nkf_char c1)
02579 {
02580 if (output_bom_f) {
02581 output_bom_f = FALSE;
02582 if (output_endian == ENDIAN_LITTLE){
02583 (*o_putc)(0xFF);
02584 (*o_putc)(0xFE);
02585 }else{
02586 (*o_putc)(0xFE);
02587 (*o_putc)(0xFF);
02588 }
02589 }
02590
02591 if (c2 == EOF) {
02592 (*o_putc)(EOF);
02593 return;
02594 }
02595
02596 if (c2 == 0 && nkf_char_unicode_p(c1)) {
02597 if (nkf_char_unicode_bmp_p(c1)) {
02598 c2 = (c1 >> 8) & 0xff;
02599 c1 &= 0xff;
02600 } else {
02601 c1 &= VALUE_MASK;
02602 if (c1 <= UNICODE_MAX) {
02603 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);
02604 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00);
02605 if (output_endian == ENDIAN_LITTLE){
02606 (*o_putc)(c2 & 0xff);
02607 (*o_putc)((c2 >> 8) & 0xff);
02608 (*o_putc)(c1 & 0xff);
02609 (*o_putc)((c1 >> 8) & 0xff);
02610 }else{
02611 (*o_putc)((c2 >> 8) & 0xff);
02612 (*o_putc)(c2 & 0xff);
02613 (*o_putc)((c1 >> 8) & 0xff);
02614 (*o_putc)(c1 & 0xff);
02615 }
02616 }
02617 return;
02618 }
02619 } else if (c2) {
02620 nkf_char val = e2w_conv(c2, c1);
02621 c2 = (val >> 8) & 0xff;
02622 c1 = val & 0xff;
02623 if (!val) return;
02624 }
02625
02626 if (output_endian == ENDIAN_LITTLE){
02627 (*o_putc)(c1);
02628 (*o_putc)(c2);
02629 }else{
02630 (*o_putc)(c2);
02631 (*o_putc)(c1);
02632 }
02633 }
02634
02635 static void
02636 w_oconv32(nkf_char c2, nkf_char c1)
02637 {
02638 if (output_bom_f) {
02639 output_bom_f = FALSE;
02640 if (output_endian == ENDIAN_LITTLE){
02641 (*o_putc)(0xFF);
02642 (*o_putc)(0xFE);
02643 (*o_putc)(0);
02644 (*o_putc)(0);
02645 }else{
02646 (*o_putc)(0);
02647 (*o_putc)(0);
02648 (*o_putc)(0xFE);
02649 (*o_putc)(0xFF);
02650 }
02651 }
02652
02653 if (c2 == EOF) {
02654 (*o_putc)(EOF);
02655 return;
02656 }
02657
02658 if (c2 == ISO_8859_1) {
02659 c1 |= 0x80;
02660 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
02661 c1 &= VALUE_MASK;
02662 } else if (c2) {
02663 c1 = e2w_conv(c2, c1);
02664 if (!c1) return;
02665 }
02666 if (output_endian == ENDIAN_LITTLE){
02667 (*o_putc)( c1 & 0xFF);
02668 (*o_putc)((c1 >> 8) & 0xFF);
02669 (*o_putc)((c1 >> 16) & 0xFF);
02670 (*o_putc)(0);
02671 }else{
02672 (*o_putc)(0);
02673 (*o_putc)((c1 >> 16) & 0xFF);
02674 (*o_putc)((c1 >> 8) & 0xFF);
02675 (*o_putc)( c1 & 0xFF);
02676 }
02677 }
02678 #endif
02679
02680 #define SCORE_L2 (1)
02681 #define SCORE_KANA (SCORE_L2 << 1)
02682 #define SCORE_DEPEND (SCORE_KANA << 1)
02683 #define SCORE_CP932 (SCORE_DEPEND << 1)
02684 #define SCORE_X0212 (SCORE_CP932 << 1)
02685 #define SCORE_NO_EXIST (SCORE_X0212 << 1)
02686 #define SCORE_iMIME (SCORE_NO_EXIST << 1)
02687 #define SCORE_ERROR (SCORE_iMIME << 1)
02688
02689 #define SCORE_INIT (SCORE_iMIME)
02690
02691 static const nkf_char score_table_A0[] = {
02692 0, 0, 0, 0,
02693 0, 0, 0, 0,
02694 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
02695 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
02696 };
02697
02698 static const nkf_char score_table_F0[] = {
02699 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
02700 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
02701 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
02702 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
02703 };
02704
02705 static void
02706 set_code_score(struct input_code *ptr, nkf_char score)
02707 {
02708 if (ptr){
02709 ptr->score |= score;
02710 }
02711 }
02712
02713 static void
02714 clr_code_score(struct input_code *ptr, nkf_char score)
02715 {
02716 if (ptr){
02717 ptr->score &= ~score;
02718 }
02719 }
02720
02721 static void
02722 code_score(struct input_code *ptr)
02723 {
02724 nkf_char c2 = ptr->buf[0];
02725 #ifdef UTF8_OUTPUT_ENABLE
02726 nkf_char c1 = ptr->buf[1];
02727 #endif
02728 if (c2 < 0){
02729 set_code_score(ptr, SCORE_ERROR);
02730 }else if (c2 == SS2){
02731 set_code_score(ptr, SCORE_KANA);
02732 }else if (c2 == 0x8f){
02733 set_code_score(ptr, SCORE_X0212);
02734 #ifdef UTF8_OUTPUT_ENABLE
02735 }else if (!e2w_conv(c2, c1)){
02736 set_code_score(ptr, SCORE_NO_EXIST);
02737 #endif
02738 }else if ((c2 & 0x70) == 0x20){
02739 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
02740 }else if ((c2 & 0x70) == 0x70){
02741 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
02742 }else if ((c2 & 0x70) >= 0x50){
02743 set_code_score(ptr, SCORE_L2);
02744 }
02745 }
02746
02747 static void
02748 status_disable(struct input_code *ptr)
02749 {
02750 ptr->stat = -1;
02751 ptr->buf[0] = -1;
02752 code_score(ptr);
02753 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
02754 }
02755
02756 static void
02757 status_push_ch(struct input_code *ptr, nkf_char c)
02758 {
02759 ptr->buf[ptr->index++] = c;
02760 }
02761
02762 static void
02763 status_clear(struct input_code *ptr)
02764 {
02765 ptr->stat = 0;
02766 ptr->index = 0;
02767 }
02768
02769 static void
02770 status_reset(struct input_code *ptr)
02771 {
02772 status_clear(ptr);
02773 ptr->score = SCORE_INIT;
02774 }
02775
02776 static void
02777 status_reinit(struct input_code *ptr)
02778 {
02779 status_reset(ptr);
02780 ptr->_file_stat = 0;
02781 }
02782
02783 static void
02784 status_check(struct input_code *ptr, nkf_char c)
02785 {
02786 if (c <= DEL && estab_f){
02787 status_reset(ptr);
02788 }
02789 }
02790
02791 static void
02792 s_status(struct input_code *ptr, nkf_char c)
02793 {
02794 switch(ptr->stat){
02795 case -1:
02796 status_check(ptr, c);
02797 break;
02798 case 0:
02799 if (c <= DEL){
02800 break;
02801 }else if (nkf_char_unicode_p(c)){
02802 break;
02803 }else if (0xa1 <= c && c <= 0xdf){
02804 status_push_ch(ptr, SS2);
02805 status_push_ch(ptr, c);
02806 code_score(ptr);
02807 status_clear(ptr);
02808 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
02809 ptr->stat = 1;
02810 status_push_ch(ptr, c);
02811 }else if (0xed <= c && c <= 0xee){
02812 ptr->stat = 3;
02813 status_push_ch(ptr, c);
02814 #ifdef SHIFTJIS_CP932
02815 }else if (is_ibmext_in_sjis(c)){
02816 ptr->stat = 2;
02817 status_push_ch(ptr, c);
02818 #endif
02819 #ifdef X0212_ENABLE
02820 }else if (0xf0 <= c && c <= 0xfc){
02821 ptr->stat = 1;
02822 status_push_ch(ptr, c);
02823 #endif
02824 }else{
02825 status_disable(ptr);
02826 }
02827 break;
02828 case 1:
02829 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
02830 status_push_ch(ptr, c);
02831 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
02832 code_score(ptr);
02833 status_clear(ptr);
02834 }else{
02835 status_disable(ptr);
02836 }
02837 break;
02838 case 2:
02839 #ifdef SHIFTJIS_CP932
02840 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
02841 status_push_ch(ptr, c);
02842 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
02843 set_code_score(ptr, SCORE_CP932);
02844 status_clear(ptr);
02845 break;
02846 }
02847 }
02848 #endif
02849 status_disable(ptr);
02850 break;
02851 case 3:
02852 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
02853 status_push_ch(ptr, c);
02854 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
02855 set_code_score(ptr, SCORE_CP932);
02856 status_clear(ptr);
02857 }else{
02858 status_disable(ptr);
02859 }
02860 break;
02861 }
02862 }
02863
02864 static void
02865 e_status(struct input_code *ptr, nkf_char c)
02866 {
02867 switch (ptr->stat){
02868 case -1:
02869 status_check(ptr, c);
02870 break;
02871 case 0:
02872 if (c <= DEL){
02873 break;
02874 }else if (nkf_char_unicode_p(c)){
02875 break;
02876 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
02877 ptr->stat = 1;
02878 status_push_ch(ptr, c);
02879 #ifdef X0212_ENABLE
02880 }else if (0x8f == c){
02881 ptr->stat = 2;
02882 status_push_ch(ptr, c);
02883 #endif
02884 }else{
02885 status_disable(ptr);
02886 }
02887 break;
02888 case 1:
02889 if (0xa1 <= c && c <= 0xfe){
02890 status_push_ch(ptr, c);
02891 code_score(ptr);
02892 status_clear(ptr);
02893 }else{
02894 status_disable(ptr);
02895 }
02896 break;
02897 #ifdef X0212_ENABLE
02898 case 2:
02899 if (0xa1 <= c && c <= 0xfe){
02900 ptr->stat = 1;
02901 status_push_ch(ptr, c);
02902 }else{
02903 status_disable(ptr);
02904 }
02905 #endif
02906 }
02907 }
02908
02909 #ifdef UTF8_INPUT_ENABLE
02910 static void
02911 w_status(struct input_code *ptr, nkf_char c)
02912 {
02913 switch (ptr->stat){
02914 case -1:
02915 status_check(ptr, c);
02916 break;
02917 case 0:
02918 if (c <= DEL){
02919 break;
02920 }else if (nkf_char_unicode_p(c)){
02921 break;
02922 }else if (0xc0 <= c && c <= 0xdf){
02923 ptr->stat = 1;
02924 status_push_ch(ptr, c);
02925 }else if (0xe0 <= c && c <= 0xef){
02926 ptr->stat = 2;
02927 status_push_ch(ptr, c);
02928 }else if (0xf0 <= c && c <= 0xf4){
02929 ptr->stat = 3;
02930 status_push_ch(ptr, c);
02931 }else{
02932 status_disable(ptr);
02933 }
02934 break;
02935 case 1:
02936 case 2:
02937 if (0x80 <= c && c <= 0xbf){
02938 status_push_ch(ptr, c);
02939 if (ptr->index > ptr->stat){
02940 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
02941 && ptr->buf[2] == 0xbf);
02942 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
02943 &ptr->buf[0], &ptr->buf[1]);
02944 if (!bom){
02945 code_score(ptr);
02946 }
02947 status_clear(ptr);
02948 }
02949 }else{
02950 status_disable(ptr);
02951 }
02952 break;
02953 case 3:
02954 if (0x80 <= c && c <= 0xbf){
02955 if (ptr->index < ptr->stat){
02956 status_push_ch(ptr, c);
02957 } else {
02958 status_clear(ptr);
02959 }
02960 }else{
02961 status_disable(ptr);
02962 }
02963 break;
02964 }
02965 }
02966 #endif
02967
02968 static void
02969 code_status(nkf_char c)
02970 {
02971 int action_flag = 1;
02972 struct input_code *result = 0;
02973 struct input_code *p = input_code_list;
02974 while (p->name){
02975 if (!p->status_func) {
02976 ++p;
02977 continue;
02978 }
02979 if (!p->status_func)
02980 continue;
02981 (p->status_func)(p, c);
02982 if (p->stat > 0){
02983 action_flag = 0;
02984 }else if(p->stat == 0){
02985 if (result){
02986 action_flag = 0;
02987 }else{
02988 result = p;
02989 }
02990 }
02991 ++p;
02992 }
02993
02994 if (action_flag){
02995 if (result && !estab_f){
02996 set_iconv(TRUE, result->iconv_func);
02997 }else if (c <= DEL){
02998 struct input_code *ptr = input_code_list;
02999 while (ptr->name){
03000 status_reset(ptr);
03001 ++ptr;
03002 }
03003 }
03004 }
03005 }
03006
03007 typedef struct {
03008 nkf_buf_t *std_gc_buf;
03009 nkf_char broken_state;
03010 nkf_buf_t *broken_buf;
03011 nkf_char mimeout_state;
03012 nkf_buf_t *nfc_buf;
03013 } nkf_state_t;
03014
03015 static nkf_state_t *nkf_state = NULL;
03016
03017 #define STD_GC_BUFSIZE (256)
03018
03019 static void
03020 nkf_state_init(void)
03021 {
03022 if (nkf_state) {
03023 nkf_buf_clear(nkf_state->std_gc_buf);
03024 nkf_buf_clear(nkf_state->broken_buf);
03025 nkf_buf_clear(nkf_state->nfc_buf);
03026 }
03027 else {
03028 nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
03029 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
03030 nkf_state->broken_buf = nkf_buf_new(3);
03031 nkf_state->nfc_buf = nkf_buf_new(9);
03032 }
03033 nkf_state->broken_state = 0;
03034 nkf_state->mimeout_state = 0;
03035 }
03036
03037 #ifndef WIN32DLL
03038 static nkf_char
03039 std_getc(FILE *f)
03040 {
03041 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
03042 return nkf_buf_pop(nkf_state->std_gc_buf);
03043 }
03044 return getc(f);
03045 }
03046 #endif
03047
03048 static nkf_char
03049 std_ungetc(nkf_char c, FILE *f)
03050 {
03051 nkf_buf_push(nkf_state->std_gc_buf, c);
03052 return c;
03053 }
03054
03055 #ifndef WIN32DLL
03056 static void
03057 std_putc(nkf_char c)
03058 {
03059 if(c!=EOF)
03060 putchar(c);
03061 }
03062 #endif
03063
03064 static nkf_char hold_buf[HOLD_SIZE*2];
03065 static int hold_count = 0;
03066 static nkf_char
03067 push_hold_buf(nkf_char c2)
03068 {
03069 if (hold_count >= HOLD_SIZE*2)
03070 return (EOF);
03071 hold_buf[hold_count++] = c2;
03072 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
03073 }
03074
03075 static int
03076 h_conv(FILE *f, nkf_char c1, nkf_char c2)
03077 {
03078 int ret;
03079 int hold_index;
03080 nkf_char c3, c4;
03081
03086 hold_count = 0;
03087 push_hold_buf(c1);
03088 push_hold_buf(c2);
03089
03090 while ((c2 = (*i_getc)(f)) != EOF) {
03091 if (c2 == ESC){
03092 (*i_ungetc)(c2,f);
03093 break;
03094 }
03095 code_status(c2);
03096 if (push_hold_buf(c2) == EOF || estab_f) {
03097 break;
03098 }
03099 }
03100
03101 if (!estab_f) {
03102 struct input_code *p = input_code_list;
03103 struct input_code *result = p;
03104 if (c2 == EOF) {
03105 code_status(c2);
03106 }
03107 while (p->name) {
03108 if (p->status_func && p->score < result->score) {
03109 result = p;
03110 }
03111 p++;
03112 }
03113 set_iconv(TRUE, result->iconv_func);
03114 }
03115
03116
03126 ret = c2;
03127 hold_index = 0;
03128 while (hold_index < hold_count){
03129 c1 = hold_buf[hold_index++];
03130 if (nkf_char_unicode_p(c1)) {
03131 (*oconv)(0, c1);
03132 continue;
03133 }
03134 else if (c1 <= DEL){
03135 (*iconv)(0, c1, 0);
03136 continue;
03137 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
03138 (*iconv)(JIS_X_0201_1976_K, c1, 0);
03139 continue;
03140 }
03141 if (hold_index < hold_count){
03142 c2 = hold_buf[hold_index++];
03143 }else{
03144 c2 = (*i_getc)(f);
03145 if (c2 == EOF){
03146 c4 = EOF;
03147 break;
03148 }
03149 code_status(c2);
03150 }
03151 c3 = 0;
03152 switch ((*iconv)(c1, c2, 0)) {
03153 case -2:
03154
03155 if (hold_index < hold_count){
03156 c3 = hold_buf[hold_index++];
03157 } else if ((c3 = (*i_getc)(f)) == EOF) {
03158 ret = EOF;
03159 break;
03160 }
03161 code_status(c3);
03162 if (hold_index < hold_count){
03163 c4 = hold_buf[hold_index++];
03164 } else if ((c4 = (*i_getc)(f)) == EOF) {
03165 c3 = ret = EOF;
03166 break;
03167 }
03168 code_status(c4);
03169 (*iconv)(c1, c2, (c3<<8)|c4);
03170 break;
03171 case -1:
03172
03173 if (hold_index < hold_count){
03174 c3 = hold_buf[hold_index++];
03175 } else if ((c3 = (*i_getc)(f)) == EOF) {
03176 ret = EOF;
03177 break;
03178 } else {
03179 code_status(c3);
03180 }
03181 (*iconv)(c1, c2, c3);
03182 break;
03183 }
03184 if (c3 == EOF) break;
03185 }
03186 return ret;
03187 }
03188
03189
03190
03191
03192 static void
03193 check_bom(FILE *f)
03194 {
03195 int c2;
03196 switch(c2 = (*i_getc)(f)){
03197 case 0x00:
03198 if((c2 = (*i_getc)(f)) == 0x00){
03199 if((c2 = (*i_getc)(f)) == 0xFE){
03200 if((c2 = (*i_getc)(f)) == 0xFF){
03201 if(!input_encoding){
03202 set_iconv(TRUE, w_iconv32);
03203 }
03204 if (iconv == w_iconv32) {
03205 input_endian = ENDIAN_BIG;
03206 return;
03207 }
03208 (*i_ungetc)(0xFF,f);
03209 }else (*i_ungetc)(c2,f);
03210 (*i_ungetc)(0xFE,f);
03211 }else if(c2 == 0xFF){
03212 if((c2 = (*i_getc)(f)) == 0xFE){
03213 if(!input_encoding){
03214 set_iconv(TRUE, w_iconv32);
03215 }
03216 if (iconv == w_iconv32) {
03217 input_endian = ENDIAN_2143;
03218 return;
03219 }
03220 (*i_ungetc)(0xFF,f);
03221 }else (*i_ungetc)(c2,f);
03222 (*i_ungetc)(0xFF,f);
03223 }else (*i_ungetc)(c2,f);
03224 (*i_ungetc)(0x00,f);
03225 }else (*i_ungetc)(c2,f);
03226 (*i_ungetc)(0x00,f);
03227 break;
03228 case 0xEF:
03229 if((c2 = (*i_getc)(f)) == 0xBB){
03230 if((c2 = (*i_getc)(f)) == 0xBF){
03231 if(!input_encoding){
03232 set_iconv(TRUE, w_iconv);
03233 }
03234 if (iconv == w_iconv) {
03235 return;
03236 }
03237 (*i_ungetc)(0xBF,f);
03238 }else (*i_ungetc)(c2,f);
03239 (*i_ungetc)(0xBB,f);
03240 }else (*i_ungetc)(c2,f);
03241 (*i_ungetc)(0xEF,f);
03242 break;
03243 case 0xFE:
03244 if((c2 = (*i_getc)(f)) == 0xFF){
03245 if((c2 = (*i_getc)(f)) == 0x00){
03246 if((c2 = (*i_getc)(f)) == 0x00){
03247 if(!input_encoding){
03248 set_iconv(TRUE, w_iconv32);
03249 }
03250 if (iconv == w_iconv32) {
03251 input_endian = ENDIAN_3412;
03252 return;
03253 }
03254 (*i_ungetc)(0x00,f);
03255 }else (*i_ungetc)(c2,f);
03256 (*i_ungetc)(0x00,f);
03257 }else (*i_ungetc)(c2,f);
03258 if(!input_encoding){
03259 set_iconv(TRUE, w_iconv16);
03260 }
03261 if (iconv == w_iconv16) {
03262 input_endian = ENDIAN_BIG;
03263 return;
03264 }
03265 (*i_ungetc)(0xFF,f);
03266 }else (*i_ungetc)(c2,f);
03267 (*i_ungetc)(0xFE,f);
03268 break;
03269 case 0xFF:
03270 if((c2 = (*i_getc)(f)) == 0xFE){
03271 if((c2 = (*i_getc)(f)) == 0x00){
03272 if((c2 = (*i_getc)(f)) == 0x00){
03273 if(!input_encoding){
03274 set_iconv(TRUE, w_iconv32);
03275 }
03276 if (iconv == w_iconv32) {
03277 input_endian = ENDIAN_LITTLE;
03278 return;
03279 }
03280 (*i_ungetc)(0x00,f);
03281 }else (*i_ungetc)(c2,f);
03282 (*i_ungetc)(0x00,f);
03283 }else (*i_ungetc)(c2,f);
03284 if(!input_encoding){
03285 set_iconv(TRUE, w_iconv16);
03286 }
03287 if (iconv == w_iconv16) {
03288 input_endian = ENDIAN_LITTLE;
03289 return;
03290 }
03291 (*i_ungetc)(0xFE,f);
03292 }else (*i_ungetc)(c2,f);
03293 (*i_ungetc)(0xFF,f);
03294 break;
03295 default:
03296 (*i_ungetc)(c2,f);
03297 break;
03298 }
03299 }
03300
03301 static nkf_char
03302 broken_getc(FILE *f)
03303 {
03304 nkf_char c, c1;
03305
03306 if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
03307 return nkf_buf_pop(nkf_state->broken_buf);
03308 }
03309 c = (*i_bgetc)(f);
03310 if (c=='$' && nkf_state->broken_state != ESC
03311 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
03312 c1= (*i_bgetc)(f);
03313 nkf_state->broken_state = 0;
03314 if (c1=='@'|| c1=='B') {
03315 nkf_buf_push(nkf_state->broken_buf, c1);
03316 nkf_buf_push(nkf_state->broken_buf, c);
03317 return ESC;
03318 } else {
03319 (*i_bungetc)(c1,f);
03320 return c;
03321 }
03322 } else if (c=='(' && nkf_state->broken_state != ESC
03323 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
03324 c1= (*i_bgetc)(f);
03325 nkf_state->broken_state = 0;
03326 if (c1=='J'|| c1=='B') {
03327 nkf_buf_push(nkf_state->broken_buf, c1);
03328 nkf_buf_push(nkf_state->broken_buf, c);
03329 return ESC;
03330 } else {
03331 (*i_bungetc)(c1,f);
03332 return c;
03333 }
03334 } else {
03335 nkf_state->broken_state = c;
03336 return c;
03337 }
03338 }
03339
03340 static nkf_char
03341 broken_ungetc(nkf_char c, FILE *f)
03342 {
03343 if (nkf_buf_length(nkf_state->broken_buf) < 2)
03344 nkf_buf_push(nkf_state->broken_buf, c);
03345 return c;
03346 }
03347
03348 static void
03349 eol_conv(nkf_char c2, nkf_char c1)
03350 {
03351 if (guess_f && input_eol != EOF) {
03352 if (c2 == 0 && c1 == LF) {
03353 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
03354 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
03355 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
03356 else if (!prev_cr);
03357 else if (!input_eol) input_eol = CR;
03358 else if (input_eol != CR) input_eol = EOF;
03359 }
03360 if (prev_cr || (c2 == 0 && c1 == LF)) {
03361 prev_cr = 0;
03362 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
03363 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
03364 }
03365 if (c2 == 0 && c1 == CR) prev_cr = CR;
03366 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
03367 }
03368
03369 static void
03370 put_newline(void (*func)(nkf_char))
03371 {
03372 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03373 case CRLF:
03374 (*func)(0x0D);
03375 (*func)(0x0A);
03376 break;
03377 case CR:
03378 (*func)(0x0D);
03379 break;
03380 case LF:
03381 (*func)(0x0A);
03382 break;
03383 }
03384 }
03385
03386 static void
03387 oconv_newline(void (*func)(nkf_char, nkf_char))
03388 {
03389 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03390 case CRLF:
03391 (*func)(0, 0x0D);
03392 (*func)(0, 0x0A);
03393 break;
03394 case CR:
03395 (*func)(0, 0x0D);
03396 break;
03397 case LF:
03398 (*func)(0, 0x0A);
03399 break;
03400 }
03401 }
03402
03403
03404
03405
03406
03407
03408
03409
03410
03411
03412
03413
03414
03415
03416
03417
03418
03419
03420
03421
03422
03423 #define char_size(c2,c1) (c2?2:1)
03424
03425 static void
03426 fold_conv(nkf_char c2, nkf_char c1)
03427 {
03428 nkf_char prev0;
03429 nkf_char fold_state;
03430
03431 if (c1== CR && !fold_preserve_f) {
03432 fold_state=0;
03433 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
03434 f_prev = LF;
03435 fold_state=0;
03436 } else if (c1== BS) {
03437 if (f_line>0) f_line--;
03438 fold_state = 1;
03439 } else if (c2==EOF && f_line != 0) {
03440 fold_state = LF;
03441 } else if ((c1==LF && !fold_preserve_f)
03442 || ((c1==CR||(c1==LF&&f_prev!=CR))
03443 && fold_preserve_f)) {
03444
03445 if (fold_preserve_f) {
03446 f_prev = c1;
03447 f_line = 0;
03448 fold_state = CR;
03449 } else if ((f_prev == c1 && !fold_preserve_f)
03450 || (f_prev == LF && fold_preserve_f)
03451 ) {
03452 if (f_line) {
03453 f_line = 0;
03454 fold_state = LF;
03455 } else {
03456 f_line = 0;
03457 fold_state = 1;
03458 }
03459 } else {
03460 if (f_prev&0x80) {
03461 f_prev = c1;
03462 fold_state = 0;
03463 } else if (f_prev==SP) {
03464 fold_state = 0;
03465 } else {
03466 f_prev = c1;
03467 if (++f_line<=fold_len)
03468 fold_state = SP;
03469 else {
03470 f_line = 0;
03471 fold_state = CR;
03472 }
03473 }
03474 }
03475 } else if (c1=='\f') {
03476 f_prev = LF;
03477 f_line = 0;
03478 fold_state = LF;
03479 } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
03480
03481 if (f_prev == SP) {
03482 fold_state = 0;
03483 } else {
03484 f_prev = SP;
03485 if (++f_line<=fold_len)
03486 fold_state = SP;
03487 else {
03488 f_prev = SP; f_line = 0;
03489 fold_state = CR;
03490 }
03491 }
03492 } else {
03493 prev0 = f_prev;
03494 f_prev = c1;
03495 if (c2 || c2 == JIS_X_0201_1976_K)
03496 f_prev |= 0x80;
03497 f_line += char_size(c2,c1);
03498 if (f_line<=fold_len) {
03499 fold_state = 1;
03500 } else {
03501 if (f_line>fold_len+fold_margin) {
03502 f_line = char_size(c2,c1);
03503 fold_state = LF;
03504 } else if (c2 == JIS_X_0201_1976_K) {
03505
03506 if (c1==(0xde&0x7f)) fold_state = 1;
03507 else if (c1==(0xdf&0x7f)) fold_state = 1;
03508 else if (c1==(0xa4&0x7f)) fold_state = 1;
03509 else if (c1==(0xa3&0x7f)) fold_state = 1;
03510 else if (c1==(0xa1&0x7f)) fold_state = 1;
03511 else if (c1==(0xb0&0x7f)) fold_state = 1;
03512 else if (SP<=c1 && c1<=(0xdf&0x7f)) {
03513 f_line = 1;
03514 fold_state = LF;
03515 } else {
03516 f_line = 1;
03517 fold_state = LF;
03518 }
03519 } else if (c2==0) {
03520
03521 if ( c1==')'||
03522 c1==']'||
03523 c1=='}'||
03524 c1=='.'||
03525 c1==','||
03526 c1=='!'||
03527 c1=='?'||
03528 c1=='/'||
03529 c1==':'||
03530 c1==';') {
03531 fold_state = 1;
03532
03533 } else if (!is_alnum(prev0)) {
03534 f_line = char_size(c2,c1);
03535 fold_state = LF;
03536 } else if ((prev0==SP) ||
03537 (prev0==LF)||
03538 (prev0&0x80)) {
03539 f_line = char_size(c2,c1);
03540 fold_state = LF;
03541 } else {
03542 fold_state = 1;
03543 }
03544 } else {
03545 if (c2=='!') {
03546 if (c1=='"') fold_state = 1;
03547 else if (c1=='#') fold_state = 1;
03548 else if (c1=='W') fold_state = 1;
03549 else if (c1=='K') fold_state = 1;
03550 else if (c1=='$') fold_state = 1;
03551 else if (c1=='%') fold_state = 1;
03552 else if (c1=='\'') fold_state = 1;
03553 else if (c1=='(') fold_state = 1;
03554 else if (c1==')') fold_state = 1;
03555 else if (c1=='*') fold_state = 1;
03556 else if (c1=='+') fold_state = 1;
03557 else if (c1==',') fold_state = 1;
03558
03559 else {
03560 fold_state = LF;
03561 f_line = char_size(c2,c1);
03562
03563 }
03564 } else {
03565 f_line = char_size(c2,c1);
03566 fold_state = LF;
03567
03568 }
03569 }
03570 }
03571 }
03572
03573 switch(fold_state) {
03574 case LF:
03575 oconv_newline(o_fconv);
03576 (*o_fconv)(c2,c1);
03577 break;
03578 case 0:
03579 return;
03580 case CR:
03581 oconv_newline(o_fconv);
03582 break;
03583 case TAB:
03584 case SP:
03585 (*o_fconv)(0,SP);
03586 break;
03587 default:
03588 (*o_fconv)(c2,c1);
03589 }
03590 }
03591
03592 static nkf_char z_prev2=0,z_prev1=0;
03593
03594 static void
03595 z_conv(nkf_char c2, nkf_char c1)
03596 {
03597
03598
03599
03600 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
03601 (*o_zconv)(c2,c1);
03602 return;
03603 }
03604
03605 if (x0201_f) {
03606 if (z_prev2 == JIS_X_0201_1976_K) {
03607 if (c2 == JIS_X_0201_1976_K) {
03608 if (c1 == (0xde&0x7f)) {
03609 z_prev2 = 0;
03610 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
03611 return;
03612 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) {
03613 z_prev2 = 0;
03614 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
03615 return;
03616 }
03617 }
03618 z_prev2 = 0;
03619 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
03620 }
03621 if (c2 == JIS_X_0201_1976_K) {
03622 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
03623
03624 z_prev1 = c1;
03625 z_prev2 = c2;
03626 return;
03627 } else {
03628 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
03629 return;
03630 }
03631 }
03632 }
03633
03634 if (c2 == EOF) {
03635 (*o_zconv)(c2, c1);
03636 return;
03637 }
03638
03639 if (alpha_f&1 && c2 == 0x23) {
03640
03641 c2 = 0;
03642 } else if (c2 == 0x21) {
03643
03644 if (0x21==c1) {
03645 if (alpha_f&2) {
03646 c2 = 0;
03647 c1 = SP;
03648 } else if (alpha_f&4) {
03649 (*o_zconv)(0, SP);
03650 (*o_zconv)(0, SP);
03651 return;
03652 }
03653 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
03654 c2 = 0;
03655 c1 = fv[c1-0x20];
03656 }
03657 }
03658
03659 if (alpha_f&8 && c2 == 0) {
03660
03661 const char *entity = 0;
03662 switch (c1){
03663 case '>': entity = ">"; break;
03664 case '<': entity = "<"; break;
03665 case '\"': entity = """; break;
03666 case '&': entity = "&"; break;
03667 }
03668 if (entity){
03669 while (*entity) (*o_zconv)(0, *entity++);
03670 return;
03671 }
03672 }
03673
03674 if (alpha_f & 16) {
03675
03676 if (c2 == 0x21) {
03677 nkf_char c = 0;
03678 switch (c1) {
03679 case 0x23:
03680
03681 c = 0xA1;
03682 break;
03683 case 0x56:
03684
03685 c = 0xA2;
03686 break;
03687 case 0x57:
03688
03689 c = 0xA3;
03690 break;
03691 case 0x22:
03692
03693 c = 0xA4;
03694 break;
03695 case 0x26:
03696
03697 c = 0xA5;
03698 break;
03699 case 0x3C:
03700
03701 c = 0xB0;
03702 break;
03703 case 0x2B:
03704
03705 c = 0xDE;
03706 break;
03707 case 0x2C:
03708
03709 c = 0xDF;
03710 break;
03711 }
03712 if (c) {
03713 (*o_zconv)(JIS_X_0201_1976_K, c);
03714 return;
03715 }
03716 } else if (c2 == 0x25) {
03717
03718 static const int fullwidth_to_halfwidth[] =
03719 {
03720 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
03721 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
03722 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
03723 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
03724 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
03725 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
03726 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
03727 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
03728 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
03729 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
03730 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
03731 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
03732 };
03733 if (fullwidth_to_halfwidth[c1-0x20]){
03734 c2 = fullwidth_to_halfwidth[c1-0x20];
03735 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
03736 if (c2 & 0xFF) {
03737 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
03738 }
03739 return;
03740 }
03741 }
03742 }
03743 (*o_zconv)(c2,c1);
03744 }
03745
03746
03747 #define rot13(c) ( \
03748 ( c < 'A') ? c: \
03749 (c <= 'M') ? (c + 13): \
03750 (c <= 'Z') ? (c - 13): \
03751 (c < 'a') ? (c): \
03752 (c <= 'm') ? (c + 13): \
03753 (c <= 'z') ? (c - 13): \
03754 (c) \
03755 )
03756
03757 #define rot47(c) ( \
03758 ( c < '!') ? c: \
03759 ( c <= 'O') ? (c + 47) : \
03760 ( c <= '~') ? (c - 47) : \
03761 c \
03762 )
03763
03764 static void
03765 rot_conv(nkf_char c2, nkf_char c1)
03766 {
03767 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
03768 c1 = rot13(c1);
03769 } else if (c2) {
03770 c1 = rot47(c1);
03771 c2 = rot47(c2);
03772 }
03773 (*o_rot_conv)(c2,c1);
03774 }
03775
03776 static void
03777 hira_conv(nkf_char c2, nkf_char c1)
03778 {
03779 if (hira_f & 1) {
03780 if (c2 == 0x25) {
03781 if (0x20 < c1 && c1 < 0x74) {
03782 c2 = 0x24;
03783 (*o_hira_conv)(c2,c1);
03784 return;
03785 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
03786 c2 = 0;
03787 c1 = nkf_char_unicode_new(0x3094);
03788 (*o_hira_conv)(c2,c1);
03789 return;
03790 }
03791 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
03792 c1 += 2;
03793 (*o_hira_conv)(c2,c1);
03794 return;
03795 }
03796 }
03797 if (hira_f & 2) {
03798 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
03799 c2 = 0x25;
03800 c1 = 0x74;
03801 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
03802 c2 = 0x25;
03803 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
03804 c1 -= 2;
03805 }
03806 }
03807 (*o_hira_conv)(c2,c1);
03808 }
03809
03810
03811 static void
03812 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
03813 {
03814 #define RANGE_NUM_MAX 18
03815 static const nkf_char range[RANGE_NUM_MAX][2] = {
03816 {0x222f, 0x2239,},
03817 {0x2242, 0x2249,},
03818 {0x2251, 0x225b,},
03819 {0x226b, 0x2271,},
03820 {0x227a, 0x227d,},
03821 {0x2321, 0x232f,},
03822 {0x233a, 0x2340,},
03823 {0x235b, 0x2360,},
03824 {0x237b, 0x237e,},
03825 {0x2474, 0x247e,},
03826 {0x2577, 0x257e,},
03827 {0x2639, 0x2640,},
03828 {0x2659, 0x267e,},
03829 {0x2742, 0x2750,},
03830 {0x2772, 0x277e,},
03831 {0x2841, 0x287e,},
03832 {0x4f54, 0x4f7e,},
03833 {0x7425, 0x747e},
03834 };
03835 nkf_char i;
03836 nkf_char start, end, c;
03837
03838 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
03839 c2 = GETA1;
03840 c1 = GETA2;
03841 }
03842 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
03843 c2 = GETA1;
03844 c1 = GETA2;
03845 }
03846
03847 for (i = 0; i < RANGE_NUM_MAX; i++) {
03848 start = range[i][0];
03849 end = range[i][1];
03850 c = (c2 << 8) + c1;
03851 if (c >= start && c <= end) {
03852 c2 = GETA1;
03853 c1 = GETA2;
03854 }
03855 }
03856 (*o_iso2022jp_check_conv)(c2,c1);
03857 }
03858
03859
03860
03861
03862 static const unsigned char *mime_pattern[] = {
03863 (const unsigned char *)"\075?EUC-JP?B?",
03864 (const unsigned char *)"\075?SHIFT_JIS?B?",
03865 (const unsigned char *)"\075?ISO-8859-1?Q?",
03866 (const unsigned char *)"\075?ISO-8859-1?B?",
03867 (const unsigned char *)"\075?ISO-2022-JP?B?",
03868 (const unsigned char *)"\075?ISO-2022-JP?B?",
03869 (const unsigned char *)"\075?ISO-2022-JP?Q?",
03870 #if defined(UTF8_INPUT_ENABLE)
03871 (const unsigned char *)"\075?UTF-8?B?",
03872 (const unsigned char *)"\075?UTF-8?Q?",
03873 #endif
03874 (const unsigned char *)"\075?US-ASCII?Q?",
03875 NULL
03876 };
03877
03878
03879
03880 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
03881 e_iconv, s_iconv, 0, 0, 0, 0,
03882 #if defined(UTF8_INPUT_ENABLE)
03883 w_iconv, w_iconv,
03884 #endif
03885 0,
03886 };
03887
03888 static const nkf_char mime_encode[] = {
03889 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
03890 #if defined(UTF8_INPUT_ENABLE)
03891 UTF_8, UTF_8,
03892 #endif
03893 ASCII,
03894 0
03895 };
03896
03897 static const nkf_char mime_encode_method[] = {
03898 'B', 'B','Q', 'B', 'B', 'B', 'Q',
03899 #if defined(UTF8_INPUT_ENABLE)
03900 'B', 'Q',
03901 #endif
03902 'Q',
03903 0
03904 };
03905
03906
03907
03908
03909 #define MIME_BUF_SIZE (1024)
03910 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
03911 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
03912 static struct {
03913 unsigned char buf[MIME_BUF_SIZE];
03914 unsigned int top;
03915 unsigned int last;
03916 unsigned int input;
03917 } mime_input_state;
03918 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
03919
03920 #define MAXRECOVER 20
03921
03922 static void
03923 mime_input_buf_unshift(nkf_char c)
03924 {
03925 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
03926 }
03927
03928 static nkf_char
03929 mime_ungetc(nkf_char c, FILE *f)
03930 {
03931 mime_input_buf_unshift(c);
03932 return c;
03933 }
03934
03935 static nkf_char
03936 mime_ungetc_buf(nkf_char c, FILE *f)
03937 {
03938 if (mimebuf_f)
03939 (*i_mungetc_buf)(c,f);
03940 else
03941 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
03942 return c;
03943 }
03944
03945 static nkf_char
03946 mime_getc_buf(FILE *f)
03947 {
03948
03949
03950 return ((mimebuf_f)?
03951 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
03952 }
03953
03954 static void
03955 switch_mime_getc(void)
03956 {
03957 if (i_getc!=mime_getc) {
03958 i_mgetc = i_getc; i_getc = mime_getc;
03959 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
03960 if(mime_f==STRICT_MIME) {
03961 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
03962 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
03963 }
03964 }
03965 }
03966
03967 static void
03968 unswitch_mime_getc(void)
03969 {
03970 if(mime_f==STRICT_MIME) {
03971 i_mgetc = i_mgetc_buf;
03972 i_mungetc = i_mungetc_buf;
03973 }
03974 i_getc = i_mgetc;
03975 i_ungetc = i_mungetc;
03976 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
03977 mime_iconv_back = NULL;
03978 }
03979
03980 static nkf_char
03981 mime_integrity(FILE *f, const unsigned char *p)
03982 {
03983 nkf_char c,d;
03984 unsigned int q;
03985
03986
03987 mime_input_state.input = mime_input_state.top;
03988 mime_input_state.last = mime_input_state.top;
03989
03990 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
03991 d = 0;
03992 q = mime_input_state.input;
03993 while((c=(*i_getc)(f))!=EOF) {
03994 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
03995 break;
03996 }
03997 if (c=='=' && d=='?') {
03998
03999 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04000
04001 mime_input_state.input = q;
04002 switch_mime_getc();
04003 return 1;
04004 }
04005 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
04006 break;
04007
04008 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04009 d=c;
04010 }
04011
04012 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04013 mime_input_state.last = mime_input_state.input;
04014 mime_decode_mode = 1;
04015 switch_mime_getc();
04016 return 1;
04017 }
04018
04019 static nkf_char
04020 mime_begin_strict(FILE *f)
04021 {
04022 nkf_char c1 = 0;
04023 int i,j,k;
04024 const unsigned char *p,*q;
04025 nkf_char r[MAXRECOVER];
04026
04027 mime_decode_mode = FALSE;
04028
04029 j = 0;
04030 p = mime_pattern[j];
04031 r[0]='='; r[1]='?';
04032
04033 for(i=2;p[i]>SP;i++) {
04034 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
04035
04036 q = p;
04037 while (mime_pattern[++j]) {
04038 p = mime_pattern[j];
04039 for(k=2;k<i;k++)
04040 if (p[k]!=q[k]) break;
04041 if (k==i && nkf_toupper(c1)==p[k]) break;
04042 }
04043 p = mime_pattern[j];
04044 if (p) continue;
04045
04046 (*i_ungetc)(c1,f);
04047 for(j=0;j<i;j++) {
04048 (*oconv)(0,r[j]);
04049 }
04050 return c1;
04051 }
04052 }
04053 mime_decode_mode = p[i-2];
04054
04055 mime_iconv_back = iconv;
04056 set_iconv(FALSE, mime_priority_func[j]);
04057 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
04058
04059 if (mime_decode_mode=='B') {
04060 mimebuf_f = unbuf_f;
04061 if (!unbuf_f) {
04062
04063 return mime_integrity(f,mime_pattern[j]);
04064 }
04065 }
04066 switch_mime_getc();
04067 mimebuf_f = TRUE;
04068 return c1;
04069 }
04070
04071 static nkf_char
04072 mime_begin(FILE *f)
04073 {
04074 nkf_char c1;
04075 int i,k;
04076
04077
04078
04079
04080
04081 k = mime_input_state.last;
04082 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
04083 for(i=2;i<MAXRECOVER;i++) {
04084
04085 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04086 if (c1==LF||c1==SP||c1==CR||
04087 c1=='-'||c1=='_'||is_alnum(c1)) continue;
04088 if (c1=='=') {
04089
04090 (*i_ungetc)(c1,f);
04091 mime_input_state.last--;
04092 break;
04093 }
04094 if (c1!='?') break;
04095 else {
04096
04097 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04098 if (!(++i<MAXRECOVER) || c1==EOF) break;
04099 if (c1=='b'||c1=='B') {
04100 mime_decode_mode = 'B';
04101 } else if (c1=='q'||c1=='Q') {
04102 mime_decode_mode = 'Q';
04103 } else {
04104 break;
04105 }
04106 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04107 if (!(++i<MAXRECOVER) || c1==EOF) break;
04108 if (c1!='?') {
04109 mime_decode_mode = FALSE;
04110 }
04111 break;
04112 }
04113 }
04114 switch_mime_getc();
04115 if (!mime_decode_mode) {
04116
04117 mime_decode_mode = 1;
04118
04119
04120 return c1;
04121 }
04122
04123 mime_input_state.last = k;
04124
04125 return c1;
04126 }
04127
04128 #ifdef CHECK_OPTION
04129 static void
04130 no_putc(nkf_char c)
04131 {
04132 ;
04133 }
04134
04135 static void
04136 debug(const char *str)
04137 {
04138 if (debug_f){
04139 fprintf(stderr, "%s\n", str ? str : "NULL");
04140 }
04141 }
04142 #endif
04143
04144 static void
04145 set_input_codename(const char *codename)
04146 {
04147 if (!input_codename) {
04148 input_codename = codename;
04149 } else if (strcmp(codename, input_codename) != 0) {
04150 input_codename = "";
04151 }
04152 }
04153
04154 static const char*
04155 get_guessed_code(void)
04156 {
04157 if (input_codename && !*input_codename) {
04158 input_codename = "BINARY";
04159 } else {
04160 struct input_code *p = find_inputcode_byfunc(iconv);
04161 if (!input_codename) {
04162 input_codename = "ASCII";
04163 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
04164 if (p->score & (SCORE_DEPEND|SCORE_CP932))
04165 input_codename = "CP932";
04166 } else if (strcmp(input_codename, "EUC-JP") == 0) {
04167 if (p->score & (SCORE_X0212))
04168 input_codename = "EUCJP-MS";
04169 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04170 input_codename = "CP51932";
04171 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
04172 if (p->score & (SCORE_KANA))
04173 input_codename = "CP50221";
04174 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04175 input_codename = "CP50220";
04176 }
04177 }
04178 return input_codename;
04179 }
04180
04181 #if !defined(PERL_XS) && !defined(WIN32DLL)
04182 static void
04183 print_guessed_code(char *filename)
04184 {
04185 if (filename != NULL) printf("%s: ", filename);
04186 if (input_codename && !*input_codename) {
04187 printf("BINARY\n");
04188 } else {
04189 input_codename = get_guessed_code();
04190 if (guess_f == 1) {
04191 printf("%s\n", input_codename);
04192 } else {
04193 printf("%s%s\n",
04194 input_codename,
04195 input_eol == CR ? " (CR)" :
04196 input_eol == LF ? " (LF)" :
04197 input_eol == CRLF ? " (CRLF)" :
04198 input_eol == EOF ? " (MIXED NL)" :
04199 "");
04200 }
04201 }
04202 }
04203 #endif
04204
04205 #ifdef INPUT_OPTION
04206
04207 static nkf_char
04208 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
04209 {
04210 nkf_char c1, c2, c3;
04211 c1 = (*g)(f);
04212 if (c1 != ch){
04213 return c1;
04214 }
04215 c2 = (*g)(f);
04216 if (!nkf_isxdigit(c2)){
04217 (*u)(c2, f);
04218 return c1;
04219 }
04220 c3 = (*g)(f);
04221 if (!nkf_isxdigit(c3)){
04222 (*u)(c2, f);
04223 (*u)(c3, f);
04224 return c1;
04225 }
04226 return (hex2bin(c2) << 4) | hex2bin(c3);
04227 }
04228
04229 static nkf_char
04230 cap_getc(FILE *f)
04231 {
04232 return hex_getc(':', f, i_cgetc, i_cungetc);
04233 }
04234
04235 static nkf_char
04236 cap_ungetc(nkf_char c, FILE *f)
04237 {
04238 return (*i_cungetc)(c, f);
04239 }
04240
04241 static nkf_char
04242 url_getc(FILE *f)
04243 {
04244 return hex_getc('%', f, i_ugetc, i_uungetc);
04245 }
04246
04247 static nkf_char
04248 url_ungetc(nkf_char c, FILE *f)
04249 {
04250 return (*i_uungetc)(c, f);
04251 }
04252 #endif
04253
04254 #ifdef NUMCHAR_OPTION
04255 static nkf_char
04256 numchar_getc(FILE *f)
04257 {
04258 nkf_char (*g)(FILE *) = i_ngetc;
04259 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
04260 int i = 0, j;
04261 nkf_char buf[12];
04262 long c = -1;
04263
04264 buf[i] = (*g)(f);
04265 if (buf[i] == '&'){
04266 buf[++i] = (*g)(f);
04267 if (buf[i] == '#'){
04268 c = 0;
04269 buf[++i] = (*g)(f);
04270 if (buf[i] == 'x' || buf[i] == 'X'){
04271 for (j = 0; j < 7; j++){
04272 buf[++i] = (*g)(f);
04273 if (!nkf_isxdigit(buf[i])){
04274 if (buf[i] != ';'){
04275 c = -1;
04276 }
04277 break;
04278 }
04279 c <<= 4;
04280 c |= hex2bin(buf[i]);
04281 }
04282 }else{
04283 for (j = 0; j < 8; j++){
04284 if (j){
04285 buf[++i] = (*g)(f);
04286 }
04287 if (!nkf_isdigit(buf[i])){
04288 if (buf[i] != ';'){
04289 c = -1;
04290 }
04291 break;
04292 }
04293 c *= 10;
04294 c += hex2bin(buf[i]);
04295 }
04296 }
04297 }
04298 }
04299 if (c != -1){
04300 return nkf_char_unicode_new(c);
04301 }
04302 while (i > 0){
04303 (*u)(buf[i], f);
04304 --i;
04305 }
04306 return buf[0];
04307 }
04308
04309 static nkf_char
04310 numchar_ungetc(nkf_char c, FILE *f)
04311 {
04312 return (*i_nungetc)(c, f);
04313 }
04314 #endif
04315
04316 #ifdef UNICODE_NORMALIZATION
04317
04318 static nkf_char
04319 nfc_getc(FILE *f)
04320 {
04321 nkf_char (*g)(FILE *f) = i_nfc_getc;
04322 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
04323 nkf_buf_t *buf = nkf_state->nfc_buf;
04324 const unsigned char *array;
04325 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
04326 nkf_char c = (*g)(f);
04327
04328 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
04329
04330 nkf_buf_push(buf, c);
04331 do {
04332 while (lower <= upper) {
04333 int mid = (lower+upper) / 2;
04334 int len;
04335 array = normalization_table[mid].nfd;
04336 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
04337 if (len >= nkf_buf_length(buf)) {
04338 c = (*g)(f);
04339 if (c == EOF) {
04340 len = 0;
04341 lower = 1, upper = 0;
04342 break;
04343 }
04344 nkf_buf_push(buf, c);
04345 }
04346 if (array[len] != nkf_buf_at(buf, len)) {
04347 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
04348 else upper = mid - 1;
04349 len = 0;
04350 break;
04351 }
04352 }
04353 if (len > 0) {
04354 int i;
04355 array = normalization_table[mid].nfc;
04356 nkf_buf_clear(buf);
04357 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
04358 nkf_buf_push(buf, array[i]);
04359 break;
04360 }
04361 }
04362 } while (lower <= upper);
04363
04364 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
04365 c = nkf_buf_pop(buf);
04366
04367 return c;
04368 }
04369
04370 static nkf_char
04371 nfc_ungetc(nkf_char c, FILE *f)
04372 {
04373 return (*i_nfc_ungetc)(c, f);
04374 }
04375 #endif
04376
04377
04378 static nkf_char
04379 base64decode(nkf_char c)
04380 {
04381 int i;
04382 if (c > '@') {
04383 if (c < '[') {
04384 i = c - 'A';
04385 } else if (c == '_') {
04386 i = '?' ;
04387 } else {
04388 i = c - 'G' ;
04389 }
04390 } else if (c > '/') {
04391 i = c - '0' + '4' ;
04392 } else if (c == '+' || c == '-') {
04393 i = '>' ;
04394 } else {
04395 i = '?' ;
04396 }
04397 return (i);
04398 }
04399
04400 static nkf_char
04401 mime_getc(FILE *f)
04402 {
04403 nkf_char c1, c2, c3, c4, cc;
04404 nkf_char t1, t2, t3, t4, mode, exit_mode;
04405 nkf_char lwsp_count;
04406 char *lwsp_buf;
04407 char *lwsp_buf_new;
04408 nkf_char lwsp_size = 128;
04409
04410 if (mime_input_state.top != mime_input_state.last) {
04411 return mime_input_buf(mime_input_state.top++);
04412 }
04413 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
04414 mime_decode_mode=FALSE;
04415 unswitch_mime_getc();
04416 return (*i_getc)(f);
04417 }
04418
04419 if (mimebuf_f == FIXED_MIME)
04420 exit_mode = mime_decode_mode;
04421 else
04422 exit_mode = FALSE;
04423 if (mime_decode_mode == 'Q') {
04424 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
04425 restart_mime_q:
04426 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
04427 if (c1<=SP || DEL<=c1) {
04428 mime_decode_mode = exit_mode;
04429 return c1;
04430 }
04431 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
04432 return c1;
04433 }
04434
04435 mime_decode_mode = exit_mode;
04436 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
04437 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
04438
04439 input_mode = exit_mode;
04440 lwsp_count = 0;
04441 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04442 while ((c1=(*i_getc)(f))!=EOF) {
04443 switch (c1) {
04444 case LF:
04445 case CR:
04446 if (c1==LF) {
04447 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04448 i_ungetc(SP,f);
04449 continue;
04450 } else {
04451 i_ungetc(c1,f);
04452 }
04453 c1 = LF;
04454 } else {
04455 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
04456 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04457 i_ungetc(SP,f);
04458 continue;
04459 } else {
04460 i_ungetc(c1,f);
04461 }
04462 i_ungetc(LF,f);
04463 } else {
04464 i_ungetc(c1,f);
04465 }
04466 c1 = CR;
04467 }
04468 break;
04469 case SP:
04470 case TAB:
04471 lwsp_buf[lwsp_count] = (unsigned char)c1;
04472 if (lwsp_count++>lwsp_size){
04473 lwsp_size <<= 1;
04474 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04475 lwsp_buf = lwsp_buf_new;
04476 }
04477 continue;
04478 }
04479 break;
04480 }
04481 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04482 i_ungetc(c1,f);
04483 for(lwsp_count--;lwsp_count>0;lwsp_count--)
04484 i_ungetc(lwsp_buf[lwsp_count],f);
04485 c1 = lwsp_buf[0];
04486 }
04487 nkf_xfree(lwsp_buf);
04488 return c1;
04489 }
04490 if (c1=='='&&c2<SP) {
04491 while((c1 = (*i_mgetc)(f)) <=SP) {
04492 if (c1 == EOF) return (EOF);
04493 }
04494 mime_decode_mode = 'Q';
04495 goto restart_mime_q;
04496 }
04497 if (c1=='?') {
04498 mime_decode_mode = 'Q';
04499 (*i_mungetc)(c2,f);
04500 return c1;
04501 }
04502 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
04503 if (c2<=SP) return c2;
04504 mime_decode_mode = 'Q';
04505 return ((hex2bin(c2)<<4) + hex2bin(c3));
04506 }
04507
04508 if (mime_decode_mode != 'B') {
04509 mime_decode_mode = FALSE;
04510 return (*i_mgetc)(f);
04511 }
04512
04513
04514
04515
04516
04517
04518
04519
04520
04521
04522 mode = mime_decode_mode;
04523 mime_decode_mode = exit_mode;
04524
04525 while ((c1 = (*i_mgetc)(f))<=SP) {
04526 if (c1==EOF)
04527 return (EOF);
04528 }
04529 mime_c2_retry:
04530 if ((c2 = (*i_mgetc)(f))<=SP) {
04531 if (c2==EOF)
04532 return (EOF);
04533 if (mime_f != STRICT_MIME) goto mime_c2_retry;
04534 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04535 return c2;
04536 }
04537 if ((c1 == '?') && (c2 == '=')) {
04538 input_mode = ASCII;
04539 lwsp_count = 0;
04540 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04541 while ((c1=(*i_getc)(f))!=EOF) {
04542 switch (c1) {
04543 case LF:
04544 case CR:
04545 if (c1==LF) {
04546 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04547 i_ungetc(SP,f);
04548 continue;
04549 } else {
04550 i_ungetc(c1,f);
04551 }
04552 c1 = LF;
04553 } else {
04554 if ((c1=(*i_getc)(f))!=EOF) {
04555 if (c1==SP) {
04556 i_ungetc(SP,f);
04557 continue;
04558 } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04559 i_ungetc(SP,f);
04560 continue;
04561 } else {
04562 i_ungetc(c1,f);
04563 }
04564 i_ungetc(LF,f);
04565 } else {
04566 i_ungetc(c1,f);
04567 }
04568 c1 = CR;
04569 }
04570 break;
04571 case SP:
04572 case TAB:
04573 lwsp_buf[lwsp_count] = (unsigned char)c1;
04574 if (lwsp_count++>lwsp_size){
04575 lwsp_size <<= 1;
04576 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04577 lwsp_buf = lwsp_buf_new;
04578 }
04579 continue;
04580 }
04581 break;
04582 }
04583 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04584 i_ungetc(c1,f);
04585 for(lwsp_count--;lwsp_count>0;lwsp_count--)
04586 i_ungetc(lwsp_buf[lwsp_count],f);
04587 c1 = lwsp_buf[0];
04588 }
04589 nkf_xfree(lwsp_buf);
04590 return c1;
04591 }
04592 mime_c3_retry:
04593 if ((c3 = (*i_mgetc)(f))<=SP) {
04594 if (c3==EOF)
04595 return (EOF);
04596 if (mime_f != STRICT_MIME) goto mime_c3_retry;
04597 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04598 return c3;
04599 }
04600 mime_c4_retry:
04601 if ((c4 = (*i_mgetc)(f))<=SP) {
04602 if (c4==EOF)
04603 return (EOF);
04604 if (mime_f != STRICT_MIME) goto mime_c4_retry;
04605 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04606 return c4;
04607 }
04608
04609 mime_decode_mode = mode;
04610
04611
04612
04613 t1 = 0x3f & base64decode(c1);
04614 t2 = 0x3f & base64decode(c2);
04615 t3 = 0x3f & base64decode(c3);
04616 t4 = 0x3f & base64decode(c4);
04617 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
04618 if (c2 != '=') {
04619 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
04620 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
04621 if (c3 != '=') {
04622 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
04623 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
04624 if (c4 != '=')
04625 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
04626 }
04627 } else {
04628 return c1;
04629 }
04630 return mime_input_buf(mime_input_state.top++);
04631 }
04632
04633 static const char basis_64[] =
04634 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
04635
04636 #define MIMEOUT_BUF_LENGTH 74
04637 static struct {
04638 char buf[MIMEOUT_BUF_LENGTH+1];
04639 int count;
04640 } mimeout_state;
04641
04642
04643
04644 static void
04645 open_mime(nkf_char mode)
04646 {
04647 const unsigned char *p;
04648 int i;
04649 int j;
04650 p = mime_pattern[0];
04651 for(i=0;mime_pattern[i];i++) {
04652 if (mode == mime_encode[i]) {
04653 p = mime_pattern[i];
04654 break;
04655 }
04656 }
04657 mimeout_mode = mime_encode_method[i];
04658 i = 0;
04659 if (base64_count>45) {
04660 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
04661 (*o_mputc)(mimeout_state.buf[i]);
04662 i++;
04663 }
04664 put_newline(o_mputc);
04665 (*o_mputc)(SP);
04666 base64_count = 1;
04667 if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
04668 i++;
04669 }
04670 }
04671 for (;i<mimeout_state.count;i++) {
04672 if (nkf_isspace(mimeout_state.buf[i])) {
04673 (*o_mputc)(mimeout_state.buf[i]);
04674 base64_count ++;
04675 } else {
04676 break;
04677 }
04678 }
04679 while(*p) {
04680 (*o_mputc)(*p++);
04681 base64_count ++;
04682 }
04683 j = mimeout_state.count;
04684 mimeout_state.count = 0;
04685 for (;i<j;i++) {
04686 mime_putc(mimeout_state.buf[i]);
04687 }
04688 }
04689
04690 static void
04691 mime_prechar(nkf_char c2, nkf_char c1)
04692 {
04693 if (mimeout_mode > 0){
04694 if (c2 == EOF){
04695 if (base64_count + mimeout_state.count/3*4> 73){
04696 (*o_base64conv)(EOF,0);
04697 oconv_newline(o_base64conv);
04698 (*o_base64conv)(0,SP);
04699 base64_count = 1;
04700 }
04701 } else {
04702 if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
04703 (*o_base64conv)(EOF,0);
04704 oconv_newline(o_base64conv);
04705 (*o_base64conv)(0,SP);
04706 base64_count = 1;
04707 mimeout_mode = -1;
04708 }
04709 }
04710 } else if (c2) {
04711 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
04712 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
04713 open_mime(output_mode);
04714 (*o_base64conv)(EOF,0);
04715 oconv_newline(o_base64conv);
04716 (*o_base64conv)(0,SP);
04717 base64_count = 1;
04718 mimeout_mode = -1;
04719 }
04720 }
04721 }
04722
04723 static void
04724 close_mime(void)
04725 {
04726 (*o_mputc)('?');
04727 (*o_mputc)('=');
04728 base64_count += 2;
04729 mimeout_mode = 0;
04730 }
04731
04732 static void
04733 eof_mime(void)
04734 {
04735 switch(mimeout_mode) {
04736 case 'Q':
04737 case 'B':
04738 break;
04739 case 2:
04740 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
04741 (*o_mputc)('=');
04742 (*o_mputc)('=');
04743 base64_count += 3;
04744 break;
04745 case 1:
04746 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
04747 (*o_mputc)('=');
04748 base64_count += 2;
04749 break;
04750 }
04751 if (mimeout_mode > 0) {
04752 if (mimeout_f!=FIXED_MIME) {
04753 close_mime();
04754 } else if (mimeout_mode != 'Q')
04755 mimeout_mode = 'B';
04756 }
04757 }
04758
04759 static void
04760 mimeout_addchar(nkf_char c)
04761 {
04762 switch(mimeout_mode) {
04763 case 'Q':
04764 if (c==CR||c==LF) {
04765 (*o_mputc)(c);
04766 base64_count = 0;
04767 } else if(!nkf_isalnum(c)) {
04768 (*o_mputc)('=');
04769 (*o_mputc)(bin2hex(((c>>4)&0xf)));
04770 (*o_mputc)(bin2hex((c&0xf)));
04771 base64_count += 3;
04772 } else {
04773 (*o_mputc)(c);
04774 base64_count++;
04775 }
04776 break;
04777 case 'B':
04778 nkf_state->mimeout_state=c;
04779 (*o_mputc)(basis_64[c>>2]);
04780 mimeout_mode=2;
04781 base64_count ++;
04782 break;
04783 case 2:
04784 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
04785 nkf_state->mimeout_state=c;
04786 mimeout_mode=1;
04787 base64_count ++;
04788 break;
04789 case 1:
04790 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
04791 (*o_mputc)(basis_64[c & 0x3F]);
04792 mimeout_mode='B';
04793 base64_count += 2;
04794 break;
04795 default:
04796 (*o_mputc)(c);
04797 base64_count++;
04798 break;
04799 }
04800 }
04801
04802 static void
04803 mime_putc(nkf_char c)
04804 {
04805 int i, j;
04806 nkf_char lastchar;
04807
04808 if (mimeout_f == FIXED_MIME){
04809 if (mimeout_mode == 'Q'){
04810 if (base64_count > 71){
04811 if (c!=CR && c!=LF) {
04812 (*o_mputc)('=');
04813 put_newline(o_mputc);
04814 }
04815 base64_count = 0;
04816 }
04817 }else{
04818 if (base64_count > 71){
04819 eof_mime();
04820 put_newline(o_mputc);
04821 base64_count = 0;
04822 }
04823 if (c == EOF) {
04824 eof_mime();
04825 }
04826 }
04827 if (c != EOF) {
04828 mimeout_addchar(c);
04829 }
04830 return;
04831 }
04832
04833
04834
04835 if (c == EOF) {
04836 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
04837 j = mimeout_state.count;
04838 mimeout_state.count = 0;
04839 i = 0;
04840 if (mimeout_mode > 0) {
04841 if (!nkf_isblank(mimeout_state.buf[j-1])) {
04842 for (;i<j;i++) {
04843 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
04844 break;
04845 }
04846 mimeout_addchar(mimeout_state.buf[i]);
04847 }
04848 eof_mime();
04849 for (;i<j;i++) {
04850 mimeout_addchar(mimeout_state.buf[i]);
04851 }
04852 } else {
04853 for (;i<j;i++) {
04854 mimeout_addchar(mimeout_state.buf[i]);
04855 }
04856 eof_mime();
04857 }
04858 } else {
04859 for (;i<j;i++) {
04860 mimeout_addchar(mimeout_state.buf[i]);
04861 }
04862 }
04863 return;
04864 }
04865
04866 if (mimeout_state.count > 0){
04867 lastchar = mimeout_state.buf[mimeout_state.count - 1];
04868 }else{
04869 lastchar = -1;
04870 }
04871
04872 if (mimeout_mode=='Q') {
04873 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
04874 if (c == CR || c == LF) {
04875 close_mime();
04876 (*o_mputc)(c);
04877 base64_count = 0;
04878 return;
04879 } else if (c <= SP) {
04880 close_mime();
04881 if (base64_count > 70) {
04882 put_newline(o_mputc);
04883 base64_count = 0;
04884 }
04885 if (!nkf_isblank(c)) {
04886 (*o_mputc)(SP);
04887 base64_count++;
04888 }
04889 } else {
04890 if (base64_count > 70) {
04891 close_mime();
04892 put_newline(o_mputc);
04893 (*o_mputc)(SP);
04894 base64_count = 1;
04895 open_mime(output_mode);
04896 }
04897 if (!nkf_noescape_mime(c)) {
04898 mimeout_addchar(c);
04899 return;
04900 }
04901 }
04902 if (c != 0x1B) {
04903 (*o_mputc)(c);
04904 base64_count++;
04905 return;
04906 }
04907 }
04908 }
04909
04910 if (mimeout_mode <= 0) {
04911 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
04912 output_mode == UTF_8)) {
04913 if (nkf_isspace(c)) {
04914 int flag = 0;
04915 if (mimeout_mode == -1) {
04916 flag = 1;
04917 }
04918 if (c==CR || c==LF) {
04919 if (flag) {
04920 open_mime(output_mode);
04921 output_mode = 0;
04922 } else {
04923 base64_count = 0;
04924 }
04925 }
04926 for (i=0;i<mimeout_state.count;i++) {
04927 (*o_mputc)(mimeout_state.buf[i]);
04928 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
04929 base64_count = 0;
04930 }else{
04931 base64_count++;
04932 }
04933 }
04934 if (flag) {
04935 eof_mime();
04936 base64_count = 0;
04937 mimeout_mode = 0;
04938 }
04939 mimeout_state.buf[0] = (char)c;
04940 mimeout_state.count = 1;
04941 }else{
04942 if (base64_count > 1
04943 && base64_count + mimeout_state.count > 76
04944 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
04945 static const char *str = "boundary=\"";
04946 static int len = 10;
04947 i = 0;
04948
04949 for (; i < mimeout_state.count - len; ++i) {
04950 if (!strncmp(mimeout_state.buf+i, str, len)) {
04951 i += len - 2;
04952 break;
04953 }
04954 }
04955
04956 if (i == 0 || i == mimeout_state.count - len) {
04957 put_newline(o_mputc);
04958 base64_count = 0;
04959 if (!nkf_isspace(mimeout_state.buf[0])){
04960 (*o_mputc)(SP);
04961 base64_count++;
04962 }
04963 }
04964 else {
04965 int j;
04966 for (j = 0; j <= i; ++j) {
04967 (*o_mputc)(mimeout_state.buf[j]);
04968 }
04969 put_newline(o_mputc);
04970 base64_count = 1;
04971 for (; j <= mimeout_state.count; ++j) {
04972 mimeout_state.buf[j - i] = mimeout_state.buf[j];
04973 }
04974 mimeout_state.count -= i;
04975 }
04976 }
04977 mimeout_state.buf[mimeout_state.count++] = (char)c;
04978 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
04979 open_mime(output_mode);
04980 }
04981 }
04982 return;
04983 }else{
04984 if (lastchar==CR || lastchar == LF){
04985 for (i=0;i<mimeout_state.count;i++) {
04986 (*o_mputc)(mimeout_state.buf[i]);
04987 }
04988 base64_count = 0;
04989 mimeout_state.count = 0;
04990 }
04991 if (lastchar==SP) {
04992 for (i=0;i<mimeout_state.count-1;i++) {
04993 (*o_mputc)(mimeout_state.buf[i]);
04994 base64_count++;
04995 }
04996 mimeout_state.buf[0] = SP;
04997 mimeout_state.count = 1;
04998 }
04999 open_mime(output_mode);
05000 }
05001 }else{
05002
05003 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
05004 output_mode == UTF_8)) {
05005 if (lastchar == CR || lastchar == LF){
05006 if (nkf_isblank(c)) {
05007 for (i=0;i<mimeout_state.count;i++) {
05008 mimeout_addchar(mimeout_state.buf[i]);
05009 }
05010 mimeout_state.count = 0;
05011 } else {
05012 eof_mime();
05013 for (i=0;i<mimeout_state.count;i++) {
05014 (*o_mputc)(mimeout_state.buf[i]);
05015 }
05016 base64_count = 0;
05017 mimeout_state.count = 0;
05018 }
05019 mimeout_state.buf[mimeout_state.count++] = (char)c;
05020 return;
05021 }
05022 if (nkf_isspace(c)) {
05023 for (i=0;i<mimeout_state.count;i++) {
05024 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
05025 eof_mime();
05026 for (i=0;i<mimeout_state.count;i++) {
05027 (*o_mputc)(mimeout_state.buf[i]);
05028 base64_count++;
05029 }
05030 mimeout_state.count = 0;
05031 }
05032 }
05033 mimeout_state.buf[mimeout_state.count++] = (char)c;
05034 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05035 eof_mime();
05036 for (i=0;i<mimeout_state.count;i++) {
05037 (*o_mputc)(mimeout_state.buf[i]);
05038 base64_count++;
05039 }
05040 mimeout_state.count = 0;
05041 }
05042 return;
05043 }
05044 if (mimeout_state.count>0 && SP<c && c!='=') {
05045 mimeout_state.buf[mimeout_state.count++] = (char)c;
05046 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05047 j = mimeout_state.count;
05048 mimeout_state.count = 0;
05049 for (i=0;i<j;i++) {
05050 mimeout_addchar(mimeout_state.buf[i]);
05051 }
05052 }
05053 return;
05054 }
05055 }
05056 }
05057 if (mimeout_state.count>0) {
05058 j = mimeout_state.count;
05059 mimeout_state.count = 0;
05060 for (i=0;i<j;i++) {
05061 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
05062 break;
05063 mimeout_addchar(mimeout_state.buf[i]);
05064 }
05065 if (i<j) {
05066 eof_mime();
05067 base64_count=0;
05068 for (;i<j;i++) {
05069 (*o_mputc)(mimeout_state.buf[i]);
05070 }
05071 open_mime(output_mode);
05072 }
05073 }
05074 mimeout_addchar(c);
05075 }
05076
05077 static void
05078 base64_conv(nkf_char c2, nkf_char c1)
05079 {
05080 mime_prechar(c2, c1);
05081 (*o_base64conv)(c2,c1);
05082 }
05083
05084 #ifdef HAVE_ICONV_H
05085 typedef struct nkf_iconv_t {
05086 iconv_t cd;
05087 char *input_buffer;
05088 size_t input_buffer_size;
05089 char *output_buffer;
05090 size_t output_buffer_size;
05091 }
05092
05093 static nkf_iconv_t
05094 nkf_iconv_new(char *tocode, char *fromcode)
05095 {
05096 nkf_iconv_t converter;
05097
05098 converter->input_buffer_size = IOBUF_SIZE;
05099 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
05100 converter->output_buffer_size = IOBUF_SIZE * 2;
05101 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
05102 converter->cd = iconv_open(tocode, fromcode);
05103 if (converter->cd == (iconv_t)-1)
05104 {
05105 switch (errno) {
05106 case EINVAL:
05107 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
05108 return -1;
05109 default:
05110 perror("can't iconv_open");
05111 }
05112 }
05113 }
05114
05115 static size_t
05116 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
05117 {
05118 size_t invalid = (size_t)0;
05119 char *input_buffer = converter->input_buffer;
05120 size_t input_length = (size_t)0;
05121 char *output_buffer = converter->output_buffer;
05122 size_t output_length = converter->output_buffer_size;
05123 int c;
05124
05125 do {
05126 if (c != EOF) {
05127 while ((c = (*i_getc)(f)) != EOF) {
05128 input_buffer[input_length++] = c;
05129 if (input_length < converter->input_buffer_size) break;
05130 }
05131 }
05132
05133 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
05134 while (output_length-- > 0) {
05135 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
05136 }
05137 if (ret == (size_t) - 1) {
05138 switch (errno) {
05139 case EINVAL:
05140 if (input_buffer != converter->input_buffer)
05141 memmove(converter->input_buffer, input_buffer, input_length);
05142 break;
05143 case E2BIG:
05144 converter->output_buffer_size *= 2;
05145 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
05146 if (output_buffer == NULL) {
05147 perror("can't realloc");
05148 return -1;
05149 }
05150 converter->output_buffer = output_buffer;
05151 break;
05152 default:
05153 perror("can't iconv");
05154 return -1;
05155 }
05156 } else {
05157 invalid += ret;
05158 }
05159 } while (1);
05160
05161 return invalid;
05162 }
05163
05164
05165 static void
05166 nkf_iconv_close(nkf_iconv_t *convert)
05167 {
05168 nkf_xfree(converter->inbuf);
05169 nkf_xfree(converter->outbuf);
05170 iconv_close(converter->cd);
05171 }
05172 #endif
05173
05174
05175 static void
05176 reinit(void)
05177 {
05178 {
05179 struct input_code *p = input_code_list;
05180 while (p->name){
05181 status_reinit(p++);
05182 }
05183 }
05184 unbuf_f = FALSE;
05185 estab_f = FALSE;
05186 nop_f = FALSE;
05187 binmode_f = TRUE;
05188 rot_f = FALSE;
05189 hira_f = FALSE;
05190 alpha_f = FALSE;
05191 mime_f = MIME_DECODE_DEFAULT;
05192 mime_decode_f = FALSE;
05193 mimebuf_f = FALSE;
05194 broken_f = FALSE;
05195 iso8859_f = FALSE;
05196 mimeout_f = FALSE;
05197 x0201_f = NKF_UNSPECIFIED;
05198 iso2022jp_f = FALSE;
05199 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
05200 ms_ucs_map_f = UCS_MAP_ASCII;
05201 #endif
05202 #ifdef UTF8_INPUT_ENABLE
05203 no_cp932ext_f = FALSE;
05204 no_best_fit_chars_f = FALSE;
05205 encode_fallback = NULL;
05206 unicode_subchar = '?';
05207 input_endian = ENDIAN_BIG;
05208 #endif
05209 #ifdef UTF8_OUTPUT_ENABLE
05210 output_bom_f = FALSE;
05211 output_endian = ENDIAN_BIG;
05212 #endif
05213 #ifdef UNICODE_NORMALIZATION
05214 nfc_f = FALSE;
05215 #endif
05216 #ifdef INPUT_OPTION
05217 cap_f = FALSE;
05218 url_f = FALSE;
05219 numchar_f = FALSE;
05220 #endif
05221 #ifdef CHECK_OPTION
05222 noout_f = FALSE;
05223 debug_f = FALSE;
05224 #endif
05225 guess_f = 0;
05226 #ifdef EXEC_IO
05227 exec_f = 0;
05228 #endif
05229 #ifdef SHIFTJIS_CP932
05230 cp51932_f = TRUE;
05231 cp932inv_f = TRUE;
05232 #endif
05233 #ifdef X0212_ENABLE
05234 x0212_f = FALSE;
05235 x0213_f = FALSE;
05236 #endif
05237 {
05238 int i;
05239 for (i = 0; i < 256; i++){
05240 prefix_table[i] = 0;
05241 }
05242 }
05243 hold_count = 0;
05244 mimeout_state.count = 0;
05245 mimeout_mode = 0;
05246 base64_count = 0;
05247 f_line = 0;
05248 f_prev = 0;
05249 fold_preserve_f = FALSE;
05250 fold_f = FALSE;
05251 fold_len = 0;
05252 kanji_intro = DEFAULT_J;
05253 ascii_intro = DEFAULT_R;
05254 fold_margin = FOLD_MARGIN;
05255 o_zconv = no_connection;
05256 o_fconv = no_connection;
05257 o_eol_conv = no_connection;
05258 o_rot_conv = no_connection;
05259 o_hira_conv = no_connection;
05260 o_base64conv = no_connection;
05261 o_iso2022jp_check_conv = no_connection;
05262 o_putc = std_putc;
05263 i_getc = std_getc;
05264 i_ungetc = std_ungetc;
05265 i_bgetc = std_getc;
05266 i_bungetc = std_ungetc;
05267 o_mputc = std_putc;
05268 i_mgetc = std_getc;
05269 i_mungetc = std_ungetc;
05270 i_mgetc_buf = std_getc;
05271 i_mungetc_buf = std_ungetc;
05272 output_mode = ASCII;
05273 input_mode = ASCII;
05274 mime_decode_mode = FALSE;
05275 file_out_f = FALSE;
05276 eolmode_f = 0;
05277 input_eol = 0;
05278 prev_cr = 0;
05279 option_mode = 0;
05280 z_prev2=0,z_prev1=0;
05281 #ifdef CHECK_OPTION
05282 iconv_for_check = 0;
05283 #endif
05284 input_codename = NULL;
05285 input_encoding = NULL;
05286 output_encoding = NULL;
05287 nkf_state_init();
05288 #ifdef WIN32DLL
05289 reinitdll();
05290 #endif
05291 }
05292
05293 static int
05294 module_connection(void)
05295 {
05296 if (input_encoding) set_input_encoding(input_encoding);
05297 if (!output_encoding) {
05298 output_encoding = nkf_default_encoding();
05299 }
05300 if (!output_encoding) {
05301 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
05302 else return -1;
05303 }
05304 set_output_encoding(output_encoding);
05305 oconv = nkf_enc_to_oconv(output_encoding);
05306 o_putc = std_putc;
05307 if (nkf_enc_unicode_p(output_encoding))
05308 output_mode = UTF_8;
05309
05310 if (x0201_f == NKF_UNSPECIFIED) {
05311 x0201_f = X0201_DEFAULT;
05312 }
05313
05314
05315
05316
05317 #ifdef CHECK_OPTION
05318 if (noout_f || guess_f){
05319 o_putc = no_putc;
05320 }
05321 #endif
05322 if (mimeout_f) {
05323 o_mputc = o_putc;
05324 o_putc = mime_putc;
05325 if (mimeout_f == TRUE) {
05326 o_base64conv = oconv; oconv = base64_conv;
05327 }
05328
05329 }
05330
05331 if (eolmode_f || guess_f) {
05332 o_eol_conv = oconv; oconv = eol_conv;
05333 }
05334 if (rot_f) {
05335 o_rot_conv = oconv; oconv = rot_conv;
05336 }
05337 if (iso2022jp_f) {
05338 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
05339 }
05340 if (hira_f) {
05341 o_hira_conv = oconv; oconv = hira_conv;
05342 }
05343 if (fold_f) {
05344 o_fconv = oconv; oconv = fold_conv;
05345 f_line = 0;
05346 }
05347 if (alpha_f || x0201_f) {
05348 o_zconv = oconv; oconv = z_conv;
05349 }
05350
05351 i_getc = std_getc;
05352 i_ungetc = std_ungetc;
05353
05354 #ifdef INPUT_OPTION
05355 if (cap_f){
05356 i_cgetc = i_getc; i_getc = cap_getc;
05357 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
05358 }
05359 if (url_f){
05360 i_ugetc = i_getc; i_getc = url_getc;
05361 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
05362 }
05363 #endif
05364 #ifdef NUMCHAR_OPTION
05365 if (numchar_f){
05366 i_ngetc = i_getc; i_getc = numchar_getc;
05367 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
05368 }
05369 #endif
05370 #ifdef UNICODE_NORMALIZATION
05371 if (nfc_f){
05372 i_nfc_getc = i_getc; i_getc = nfc_getc;
05373 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
05374 }
05375 #endif
05376 if (mime_f && mimebuf_f==FIXED_MIME) {
05377 i_mgetc = i_getc; i_getc = mime_getc;
05378 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
05379 }
05380 if (broken_f & 1) {
05381 i_bgetc = i_getc; i_getc = broken_getc;
05382 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
05383 }
05384 if (input_encoding) {
05385 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
05386 } else {
05387 set_iconv(FALSE, e_iconv);
05388 }
05389
05390 {
05391 struct input_code *p = input_code_list;
05392 while (p->name){
05393 status_reinit(p++);
05394 }
05395 }
05396 return 0;
05397 }
05398
05399
05400
05401
05402
05403 #if !defined(PERL_XS) && !defined(WIN32DLL)
05404 static nkf_char
05405 noconvert(FILE *f)
05406 {
05407 nkf_char c;
05408
05409 if (nop_f == 2)
05410 module_connection();
05411 while ((c = (*i_getc)(f)) != EOF)
05412 (*o_putc)(c);
05413 (*o_putc)(EOF);
05414 return 1;
05415 }
05416 #endif
05417
05418 #define NEXT continue
05419 #define SKIP c2=0;continue
05420 #define MORE c2=c1;continue
05421 #define SEND (void)0
05422 #define LAST break
05423 #define set_input_mode(mode) do { \
05424 input_mode = mode; \
05425 shift_mode = 0; \
05426 set_input_codename("ISO-2022-JP"); \
05427 debug("ISO-2022-JP"); \
05428 } while (0)
05429
05430 static int
05431 kanji_convert(FILE *f)
05432 {
05433 nkf_char c1=0, c2=0, c3=0, c4=0;
05434 int shift_mode = 0;
05435 int g2 = 0;
05436 int is_8bit = FALSE;
05437
05438 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
05439 is_8bit = TRUE;
05440 }
05441
05442 input_mode = ASCII;
05443 output_mode = ASCII;
05444
05445 if (module_connection() < 0) {
05446 #if !defined(PERL_XS) && !defined(WIN32DLL)
05447 fprintf(stderr, "no output encoding given\n");
05448 #endif
05449 return -1;
05450 }
05451 check_bom(f);
05452
05453 #ifdef UTF8_INPUT_ENABLE
05454 if(iconv == w_iconv32){
05455 while ((c1 = (*i_getc)(f)) != EOF &&
05456 (c2 = (*i_getc)(f)) != EOF &&
05457 (c3 = (*i_getc)(f)) != EOF &&
05458 (c4 = (*i_getc)(f)) != EOF) {
05459 nkf_iconv_utf_32(c1, c2, c3, c4);
05460 }
05461 goto finished;
05462 }
05463 else if (iconv == w_iconv16) {
05464 while ((c1 = (*i_getc)(f)) != EOF &&
05465 (c2 = (*i_getc)(f)) != EOF) {
05466 if (nkf_iconv_utf_16(c1, c2, 0, 0) == NKF_ICONV_NEED_TWO_MORE_BYTES &&
05467 (c3 = (*i_getc)(f)) != EOF &&
05468 (c4 = (*i_getc)(f)) != EOF) {
05469 nkf_iconv_utf_16(c1, c2, c3, c4);
05470 }
05471 }
05472 goto finished;
05473 }
05474 #endif
05475
05476 while ((c1 = (*i_getc)(f)) != EOF) {
05477 #ifdef INPUT_CODE_FIX
05478 if (!input_encoding)
05479 #endif
05480 code_status(c1);
05481 if (c2) {
05482
05483 if (c2 > DEL) {
05484
05485 if (!estab_f&&!mime_decode_mode) {
05486
05487
05488 if (h_conv(f, c2, c1)==EOF) {
05489 LAST;
05490 }
05491 else {
05492 SKIP;
05493 }
05494 }
05495 else {
05496
05497 if (c1 < 0x40) {
05498
05499 SKIP;
05500 } else {
05501 SEND;
05502 }
05503 }
05504 }
05505 else {
05506
05507 SEND;
05508 }
05509 }
05510 else if (nkf_char_unicode_p(c1)) {
05511 (*oconv)(0, c1);
05512 NEXT;
05513 }
05514 else {
05515
05516 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
05517
05518 MORE;
05519 }else if (input_codename && input_codename[0] == 'I' &&
05520 0xA1 <= c1 && c1 <= 0xDF) {
05521
05522 c2 = JIS_X_0201_1976_K;
05523 c1 &= 0x7f;
05524 SEND;
05525 } else if (c1 > DEL) {
05526
05527 if (!estab_f && !iso8859_f) {
05528
05529 MORE;
05530 } else {
05531 if (iso8859_f) {
05532 c2 = ISO_8859_1;
05533 c1 &= 0x7f;
05534 SEND;
05535 }
05536 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
05537 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
05538
05539 c2 = JIS_X_0201_1976_K;
05540 c1 &= 0x7f;
05541 SEND;
05542 }
05543 else {
05544
05545 MORE;
05546 }
05547 }
05548 } else if (SP < c1 && c1 < DEL) {
05549
05550 if (shift_mode) {
05551
05552 if (iso8859_f) {
05553 c2 = ISO_8859_1;
05554 SEND;
05555 } else if (nkf_byte_jisx0201_katakana_p(c1)){
05556
05557 c2 = JIS_X_0201_1976_K;
05558 SEND;
05559 } else {
05560
05561 SKIP;
05562 }
05563 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
05564 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
05565
05566 MORE;
05567 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
05568
05569 if ((c1 = (*i_getc)(f)) == EOF) {
05570 (*oconv)(0, '=');
05571 LAST;
05572 } else if (c1 == '?') {
05573
05574 if(mime_f == STRICT_MIME) {
05575
05576 if (mime_begin_strict(f) == EOF)
05577 LAST;
05578 SKIP;
05579 } else if (mime_begin(f) == EOF)
05580 LAST;
05581 SKIP;
05582 } else {
05583 (*oconv)(0, '=');
05584 (*i_ungetc)(c1,f);
05585 SKIP;
05586 }
05587 } else {
05588
05589 SEND;
05590 }
05591 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
05592 shift_mode = 0;
05593 SKIP;
05594 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
05595 shift_mode = 1;
05596 SKIP;
05597 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
05598 if ((c1 = (*i_getc)(f)) == EOF) {
05599 (*oconv)(0, ESC);
05600 LAST;
05601 }
05602 else if (c1 == '&') {
05603
05604 if ((c1 = (*i_getc)(f)) == EOF) {
05605 LAST;
05606 } else {
05607 SKIP;
05608 }
05609 }
05610 else if (c1 == '$') {
05611
05612 if ((c1 = (*i_getc)(f)) == EOF) {
05613
05614
05615
05616 LAST;
05617 } else if (c1 == '@' || c1 == 'B') {
05618
05619 set_input_mode(JIS_X_0208);
05620 SKIP;
05621 } else if (c1 == '(') {
05622
05623 if ((c1 = (*i_getc)(f)) == EOF) {
05624
05625
05626
05627
05628
05629 LAST;
05630 } else if (c1 == '@'|| c1 == 'B') {
05631
05632 set_input_mode(JIS_X_0208);
05633 SKIP;
05634 #ifdef X0212_ENABLE
05635 } else if (c1 == 'D'){
05636 set_input_mode(JIS_X_0212);
05637 SKIP;
05638 #endif
05639 } else if (c1 == 'O' || c1 == 'Q'){
05640 set_input_mode(JIS_X_0213_1);
05641 SKIP;
05642 } else if (c1 == 'P'){
05643 set_input_mode(JIS_X_0213_2);
05644 SKIP;
05645 } else {
05646
05647 (*oconv)(0, ESC);
05648 (*oconv)(0, '$');
05649 (*oconv)(0, '(');
05650 (*oconv)(0, c1);
05651 SKIP;
05652 }
05653 } else if (broken_f&0x2) {
05654
05655 input_mode = JIS_X_0208;
05656 shift_mode = 0;
05657 SKIP;
05658 } else {
05659 (*oconv)(0, ESC);
05660 (*oconv)(0, '$');
05661 (*oconv)(0, c1);
05662 SKIP;
05663 }
05664 } else if (c1 == '(') {
05665
05666 if ((c1 = (*i_getc)(f)) == EOF) {
05667
05668
05669
05670 LAST;
05671 }
05672 else if (c1 == 'I') {
05673
05674 set_input_mode(JIS_X_0201_1976_K);
05675 SKIP;
05676 }
05677 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
05678
05679 set_input_mode(ASCII);
05680 SKIP;
05681 }
05682 else if (broken_f&0x2) {
05683 set_input_mode(ASCII);
05684 SKIP;
05685 }
05686 else {
05687 (*oconv)(0, ESC);
05688 (*oconv)(0, '(');
05689 SEND;
05690 }
05691 }
05692 else if (c1 == '.') {
05693
05694 if ((c1 = (*i_getc)(f)) == EOF) {
05695 LAST;
05696 }
05697 else if (c1 == 'A') {
05698
05699 g2 = ISO_8859_1;
05700 SKIP;
05701 }
05702 else {
05703 (*oconv)(0, ESC);
05704 (*oconv)(0, '.');
05705 SEND;
05706 }
05707 }
05708 else if (c1 == 'N') {
05709
05710 c1 = (*i_getc)(f);
05711 if (g2 == ISO_8859_1) {
05712 c2 = ISO_8859_1;
05713 SEND;
05714 }else{
05715 (*i_ungetc)(c1, f);
05716
05717 (*oconv)(0, ESC);
05718 SEND;
05719 }
05720 }
05721 else {
05722
05723 (*oconv)(0, ESC);
05724 SEND;
05725 }
05726 } else if (c1 == ESC && iconv == s_iconv) {
05727
05728 if ((c1 = (*i_getc)(f)) == EOF) {
05729 (*oconv)(0, ESC);
05730 LAST;
05731 } else if (c1 == '$') {
05732
05733 if ((c1 = (*i_getc)(f)) == EOF) {
05734 LAST;
05735 } else if (('E' <= c1 && c1 <= 'G') ||
05736 ('O' <= c1 && c1 <= 'Q')) {
05737
05738
05739
05740
05741
05742
05743
05744 static const nkf_char jphone_emoji_first_table[7] =
05745 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
05746 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
05747 if ((c1 = (*i_getc)(f)) == EOF) LAST;
05748 while (SP <= c1 && c1 <= 'z') {
05749 (*oconv)(0, c1 + c3);
05750 if ((c1 = (*i_getc)(f)) == EOF) LAST;
05751 }
05752 SKIP;
05753 }
05754 else {
05755 (*oconv)(0, ESC);
05756 (*oconv)(0, '$');
05757 SEND;
05758 }
05759 }
05760 else {
05761
05762 (*oconv)(0, ESC);
05763 SEND;
05764 }
05765 } else if (c1 == LF || c1 == CR) {
05766 if (broken_f&4) {
05767 input_mode = ASCII; set_iconv(FALSE, 0);
05768 SEND;
05769 } else if (mime_decode_f && !mime_decode_mode){
05770 if (c1 == LF) {
05771 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
05772 i_ungetc(SP,f);
05773 continue;
05774 } else {
05775 i_ungetc(c1,f);
05776 }
05777 c1 = LF;
05778 SEND;
05779 } else {
05780 if ((c1=(*i_getc)(f))!=EOF) {
05781 if (c1==SP) {
05782 i_ungetc(SP,f);
05783 continue;
05784 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
05785 i_ungetc(SP,f);
05786 continue;
05787 } else {
05788 i_ungetc(c1,f);
05789 }
05790 i_ungetc(LF,f);
05791 } else {
05792 i_ungetc(c1,f);
05793 }
05794 c1 = CR;
05795 SEND;
05796 }
05797 }
05798 } else
05799 SEND;
05800 }
05801
05802 switch(input_mode){
05803 case ASCII:
05804 switch ((*iconv)(c2, c1, 0)) {
05805 case -2:
05806
05807 if ((c3 = (*i_getc)(f)) != EOF) {
05808 code_status(c3);
05809 c3 <<= 8;
05810 if ((c4 = (*i_getc)(f)) != EOF) {
05811 code_status(c4);
05812 (*iconv)(c2, c1, c3|c4);
05813 }
05814 }
05815 break;
05816 case -1:
05817
05818 if ((c3 = (*i_getc)(f)) != EOF) {
05819 code_status(c3);
05820 (*iconv)(c2, c1, c3);
05821 }
05822 break;
05823 }
05824 break;
05825 case JIS_X_0208:
05826 case JIS_X_0213_1:
05827 if (ms_ucs_map_f &&
05828 0x7F <= c2 && c2 <= 0x92 &&
05829 0x21 <= c1 && c1 <= 0x7E) {
05830
05831 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
05832 c2 = 0;
05833 }
05834 (*oconv)(c2, c1);
05835 break;
05836 #ifdef X0212_ENABLE
05837 case JIS_X_0212:
05838 (*oconv)(PREFIX_EUCG3 | c2, c1);
05839 break;
05840 #endif
05841 case JIS_X_0213_2:
05842 (*oconv)(PREFIX_EUCG3 | c2, c1);
05843 break;
05844 default:
05845 (*oconv)(input_mode, c1);
05846 }
05847
05848 c2 = 0;
05849 c3 = 0;
05850 continue;
05851
05852 }
05853
05854 finished:
05855
05856 (*iconv)(EOF, 0, 0);
05857 if (!input_codename)
05858 {
05859 if (is_8bit) {
05860 struct input_code *p = input_code_list;
05861 struct input_code *result = p;
05862 while (p->name){
05863 if (p->score < result->score) result = p;
05864 ++p;
05865 }
05866 set_input_codename(result->name);
05867 #ifdef CHECK_OPTION
05868 debug(result->name);
05869 #endif
05870 }
05871 }
05872 return 0;
05873 }
05874
05875
05876
05877
05878
05879
05880
05881
05882 static int
05883 options(unsigned char *cp)
05884 {
05885 nkf_char i, j;
05886 unsigned char *p;
05887 unsigned char *cp_back = NULL;
05888 nkf_encoding *enc;
05889
05890 if (option_mode==1)
05891 return 0;
05892 while(*cp && *cp++!='-');
05893 while (*cp || cp_back) {
05894 if(!*cp){
05895 cp = cp_back;
05896 cp_back = NULL;
05897 continue;
05898 }
05899 p = 0;
05900 switch (*cp++) {
05901 case '-':
05902 if (!*cp || *cp == SP) {
05903 option_mode = 1;
05904 return 0;
05905 }
05906 for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
05907 p = (unsigned char *)long_option[i].name;
05908 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
05909 if (*p == cp[j] || cp[j] == SP){
05910 p = &cp[j] + 1;
05911 break;
05912 }
05913 p = 0;
05914 }
05915 if (p == 0) {
05916 #if !defined(PERL_XS) && !defined(WIN32DLL)
05917 fprintf(stderr, "unknown long option: --%s\n", cp);
05918 #endif
05919 return -1;
05920 }
05921 while(*cp && *cp != SP && cp++);
05922 if (long_option[i].alias[0]){
05923 cp_back = cp;
05924 cp = (unsigned char *)long_option[i].alias;
05925 }else{
05926 #ifndef PERL_XS
05927 if (strcmp(long_option[i].name, "help") == 0){
05928 usage();
05929 exit(EXIT_SUCCESS);
05930 }
05931 #endif
05932 if (strcmp(long_option[i].name, "ic=") == 0){
05933 enc = nkf_enc_find((char *)p);
05934 if (!enc) continue;
05935 input_encoding = enc;
05936 continue;
05937 }
05938 if (strcmp(long_option[i].name, "oc=") == 0){
05939 enc = nkf_enc_find((char *)p);
05940
05941 if (!enc) continue;
05942 output_encoding = enc;
05943 continue;
05944 }
05945 if (strcmp(long_option[i].name, "guess=") == 0){
05946 if (p[0] == '0' || p[0] == '1') {
05947 guess_f = 1;
05948 } else {
05949 guess_f = 2;
05950 }
05951 continue;
05952 }
05953 #ifdef OVERWRITE
05954 if (strcmp(long_option[i].name, "overwrite") == 0){
05955 file_out_f = TRUE;
05956 overwrite_f = TRUE;
05957 preserve_time_f = TRUE;
05958 continue;
05959 }
05960 if (strcmp(long_option[i].name, "overwrite=") == 0){
05961 file_out_f = TRUE;
05962 overwrite_f = TRUE;
05963 preserve_time_f = TRUE;
05964 backup_f = TRUE;
05965 backup_suffix = (char *)p;
05966 continue;
05967 }
05968 if (strcmp(long_option[i].name, "in-place") == 0){
05969 file_out_f = TRUE;
05970 overwrite_f = TRUE;
05971 preserve_time_f = FALSE;
05972 continue;
05973 }
05974 if (strcmp(long_option[i].name, "in-place=") == 0){
05975 file_out_f = TRUE;
05976 overwrite_f = TRUE;
05977 preserve_time_f = FALSE;
05978 backup_f = TRUE;
05979 backup_suffix = (char *)p;
05980 continue;
05981 }
05982 #endif
05983 #ifdef INPUT_OPTION
05984 if (strcmp(long_option[i].name, "cap-input") == 0){
05985 cap_f = TRUE;
05986 continue;
05987 }
05988 if (strcmp(long_option[i].name, "url-input") == 0){
05989 url_f = TRUE;
05990 continue;
05991 }
05992 #endif
05993 #ifdef NUMCHAR_OPTION
05994 if (strcmp(long_option[i].name, "numchar-input") == 0){
05995 numchar_f = TRUE;
05996 continue;
05997 }
05998 #endif
05999 #ifdef CHECK_OPTION
06000 if (strcmp(long_option[i].name, "no-output") == 0){
06001 noout_f = TRUE;
06002 continue;
06003 }
06004 if (strcmp(long_option[i].name, "debug") == 0){
06005 debug_f = TRUE;
06006 continue;
06007 }
06008 #endif
06009 if (strcmp(long_option[i].name, "cp932") == 0){
06010 #ifdef SHIFTJIS_CP932
06011 cp51932_f = TRUE;
06012 cp932inv_f = -TRUE;
06013 #endif
06014 #ifdef UTF8_OUTPUT_ENABLE
06015 ms_ucs_map_f = UCS_MAP_CP932;
06016 #endif
06017 continue;
06018 }
06019 if (strcmp(long_option[i].name, "no-cp932") == 0){
06020 #ifdef SHIFTJIS_CP932
06021 cp51932_f = FALSE;
06022 cp932inv_f = FALSE;
06023 #endif
06024 #ifdef UTF8_OUTPUT_ENABLE
06025 ms_ucs_map_f = UCS_MAP_ASCII;
06026 #endif
06027 continue;
06028 }
06029 #ifdef SHIFTJIS_CP932
06030 if (strcmp(long_option[i].name, "cp932inv") == 0){
06031 cp932inv_f = -TRUE;
06032 continue;
06033 }
06034 #endif
06035
06036 #ifdef X0212_ENABLE
06037 if (strcmp(long_option[i].name, "x0212") == 0){
06038 x0212_f = TRUE;
06039 continue;
06040 }
06041 #endif
06042
06043 #ifdef EXEC_IO
06044 if (strcmp(long_option[i].name, "exec-in") == 0){
06045 exec_f = 1;
06046 return 0;
06047 }
06048 if (strcmp(long_option[i].name, "exec-out") == 0){
06049 exec_f = -1;
06050 return 0;
06051 }
06052 #endif
06053 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
06054 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
06055 no_cp932ext_f = TRUE;
06056 continue;
06057 }
06058 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
06059 no_best_fit_chars_f = TRUE;
06060 continue;
06061 }
06062 if (strcmp(long_option[i].name, "fb-skip") == 0){
06063 encode_fallback = NULL;
06064 continue;
06065 }
06066 if (strcmp(long_option[i].name, "fb-html") == 0){
06067 encode_fallback = encode_fallback_html;
06068 continue;
06069 }
06070 if (strcmp(long_option[i].name, "fb-xml") == 0){
06071 encode_fallback = encode_fallback_xml;
06072 continue;
06073 }
06074 if (strcmp(long_option[i].name, "fb-java") == 0){
06075 encode_fallback = encode_fallback_java;
06076 continue;
06077 }
06078 if (strcmp(long_option[i].name, "fb-perl") == 0){
06079 encode_fallback = encode_fallback_perl;
06080 continue;
06081 }
06082 if (strcmp(long_option[i].name, "fb-subchar") == 0){
06083 encode_fallback = encode_fallback_subchar;
06084 continue;
06085 }
06086 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
06087 encode_fallback = encode_fallback_subchar;
06088 unicode_subchar = 0;
06089 if (p[0] != '0'){
06090
06091 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
06092 unicode_subchar *= 10;
06093 unicode_subchar += hex2bin(p[i]);
06094 }
06095 }else if(p[1] == 'x' || p[1] == 'X'){
06096
06097 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
06098 unicode_subchar <<= 4;
06099 unicode_subchar |= hex2bin(p[i]);
06100 }
06101 }else{
06102
06103 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
06104 unicode_subchar *= 8;
06105 unicode_subchar += hex2bin(p[i]);
06106 }
06107 }
06108 w16e_conv(unicode_subchar, &i, &j);
06109 unicode_subchar = i<<8 | j;
06110 continue;
06111 }
06112 #endif
06113 #ifdef UTF8_OUTPUT_ENABLE
06114 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
06115 ms_ucs_map_f = UCS_MAP_MS;
06116 continue;
06117 }
06118 #endif
06119 #ifdef UNICODE_NORMALIZATION
06120 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
06121 nfc_f = TRUE;
06122 continue;
06123 }
06124 #endif
06125 if (strcmp(long_option[i].name, "prefix=") == 0){
06126 if (nkf_isgraph(p[0])){
06127 for (i = 1; nkf_isgraph(p[i]); i++){
06128 prefix_table[p[i]] = p[0];
06129 }
06130 }
06131 continue;
06132 }
06133 #if !defined(PERL_XS) && !defined(WIN32DLL)
06134 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
06135 #endif
06136 return -1;
06137 }
06138 continue;
06139 case 'b':
06140 unbuf_f = FALSE;
06141 continue;
06142 case 'u':
06143 unbuf_f = TRUE;
06144 continue;
06145 case 't':
06146 if (*cp=='1') {
06147
06148 cp++;
06149 nop_f = TRUE;
06150 } else if (*cp=='2') {
06151
06152
06153
06154
06155
06156
06157 cp++;
06158 nop_f = 2;
06159 } else
06160 nop_f = TRUE;
06161 continue;
06162 case 'j':
06163 case 'n':
06164 output_encoding = nkf_enc_from_index(ISO_2022_JP);
06165 continue;
06166 case 'e':
06167 output_encoding = nkf_enc_from_index(EUCJP_NKF);
06168 continue;
06169 case 's':
06170 output_encoding = nkf_enc_from_index(SHIFT_JIS);
06171 continue;
06172 case 'l':
06173 iso8859_f = TRUE;
06174 input_encoding = nkf_enc_from_index(ISO_8859_1);
06175 continue;
06176 case 'i':
06177 if (*cp=='@'||*cp=='B')
06178 kanji_intro = *cp++;
06179 continue;
06180 case 'o':
06181
06182 if (*cp=='J'||*cp=='B'||*cp=='H')
06183 ascii_intro = *cp++;
06184 continue;
06185 case 'h':
06186
06187
06188
06189
06190 if ('9'>= *cp && *cp>='0')
06191 hira_f |= (*cp++ -'0');
06192 else
06193 hira_f |= 1;
06194 continue;
06195 case 'r':
06196 rot_f = TRUE;
06197 continue;
06198 #if defined(MSDOS) || defined(__OS2__)
06199 case 'T':
06200 binmode_f = FALSE;
06201 continue;
06202 #endif
06203 #ifndef PERL_XS
06204 case 'V':
06205 show_configuration();
06206 exit(EXIT_SUCCESS);
06207 break;
06208 case 'v':
06209 version();
06210 exit(EXIT_SUCCESS);
06211 break;
06212 #endif
06213 #ifdef UTF8_OUTPUT_ENABLE
06214 case 'w':
06215 if (cp[0] == '8') {
06216 cp++;
06217 if (cp[0] == '0'){
06218 cp++;
06219 output_encoding = nkf_enc_from_index(UTF_8N);
06220 } else {
06221 output_bom_f = TRUE;
06222 output_encoding = nkf_enc_from_index(UTF_8_BOM);
06223 }
06224 } else {
06225 int enc_idx;
06226 if ('1'== cp[0] && '6'==cp[1]) {
06227 cp += 2;
06228 enc_idx = UTF_16;
06229 } else if ('3'== cp[0] && '2'==cp[1]) {
06230 cp += 2;
06231 enc_idx = UTF_32;
06232 } else {
06233 output_encoding = nkf_enc_from_index(UTF_8);
06234 continue;
06235 }
06236 if (cp[0]=='L') {
06237 cp++;
06238 output_endian = ENDIAN_LITTLE;
06239 output_bom_f = TRUE;
06240 } else if (cp[0] == 'B') {
06241 cp++;
06242 output_bom_f = TRUE;
06243 }
06244 if (cp[0] == '0'){
06245 output_bom_f = FALSE;
06246 cp++;
06247 enc_idx = enc_idx == UTF_16
06248 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06249 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
06250 } else {
06251 enc_idx = enc_idx == UTF_16
06252 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
06253 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
06254 }
06255 output_encoding = nkf_enc_from_index(enc_idx);
06256 }
06257 continue;
06258 #endif
06259 #ifdef UTF8_INPUT_ENABLE
06260 case 'W':
06261 if (cp[0] == '8') {
06262 cp++;
06263 input_encoding = nkf_enc_from_index(UTF_8);
06264 }else{
06265 int enc_idx;
06266 if ('1'== cp[0] && '6'==cp[1]) {
06267 cp += 2;
06268 input_endian = ENDIAN_BIG;
06269 enc_idx = UTF_16;
06270 } else if ('3'== cp[0] && '2'==cp[1]) {
06271 cp += 2;
06272 input_endian = ENDIAN_BIG;
06273 enc_idx = UTF_32;
06274 } else {
06275 input_encoding = nkf_enc_from_index(UTF_8);
06276 continue;
06277 }
06278 if (cp[0]=='L') {
06279 cp++;
06280 input_endian = ENDIAN_LITTLE;
06281 } else if (cp[0] == 'B') {
06282 cp++;
06283 input_endian = ENDIAN_BIG;
06284 }
06285 enc_idx = (enc_idx == UTF_16
06286 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06287 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
06288 input_encoding = nkf_enc_from_index(enc_idx);
06289 }
06290 continue;
06291 #endif
06292
06293 case 'J':
06294 input_encoding = nkf_enc_from_index(ISO_2022_JP);
06295 continue;
06296 case 'E':
06297 input_encoding = nkf_enc_from_index(EUCJP_NKF);
06298 continue;
06299 case 'S':
06300 input_encoding = nkf_enc_from_index(SHIFT_JIS);
06301 continue;
06302 case 'Z':
06303
06304
06305
06306
06307
06308
06309
06310 while ('0'<= *cp && *cp <='4') {
06311 alpha_f |= 1 << (*cp++ - '0');
06312 }
06313 alpha_f |= 1;
06314 continue;
06315 case 'x':
06316 x0201_f = FALSE;
06317
06318
06319
06320
06321
06322
06323
06324
06325
06326
06327 continue;
06328 case 'X':
06329 x0201_f = TRUE;
06330 continue;
06331 case 'F':
06332 fold_preserve_f = TRUE;
06333 case 'f':
06334 fold_f = TRUE;
06335 fold_len = 0;
06336 while('0'<= *cp && *cp <='9') {
06337 fold_len *= 10;
06338 fold_len += *cp++ - '0';
06339 }
06340 if (!(0<fold_len && fold_len<BUFSIZ))
06341 fold_len = DEFAULT_FOLD;
06342 if (*cp=='-') {
06343 fold_margin = 0;
06344 cp++;
06345 while('0'<= *cp && *cp <='9') {
06346 fold_margin *= 10;
06347 fold_margin += *cp++ - '0';
06348 }
06349 }
06350 continue;
06351 case 'm':
06352
06353 if (*cp=='B'||*cp=='Q') {
06354 mime_decode_mode = *cp++;
06355 mimebuf_f = FIXED_MIME;
06356 } else if (*cp=='N') {
06357 mime_f = TRUE; cp++;
06358 } else if (*cp=='S') {
06359 mime_f = STRICT_MIME; cp++;
06360 } else if (*cp=='0') {
06361 mime_decode_f = FALSE;
06362 mime_f = FALSE; cp++;
06363 } else {
06364 mime_f = STRICT_MIME;
06365 }
06366 continue;
06367 case 'M':
06368 if (*cp=='B') {
06369 mimeout_mode = 'B';
06370 mimeout_f = FIXED_MIME; cp++;
06371 } else if (*cp=='Q') {
06372 mimeout_mode = 'Q';
06373 mimeout_f = FIXED_MIME; cp++;
06374 } else {
06375 mimeout_f = TRUE;
06376 }
06377 continue;
06378 case 'B':
06379
06380
06381
06382
06383 if ('9'>= *cp && *cp>='0')
06384 broken_f |= 1<<(*cp++ -'0');
06385 else
06386 broken_f |= TRUE;
06387 continue;
06388 #ifndef PERL_XS
06389 case 'O':
06390 file_out_f = TRUE;
06391 continue;
06392 #endif
06393 case 'c':
06394 eolmode_f = CRLF;
06395 continue;
06396 case 'd':
06397 eolmode_f = LF;
06398 continue;
06399 case 'I':
06400 iso2022jp_f = TRUE;
06401 continue;
06402 case 'L':
06403 if (*cp=='u') {
06404 eolmode_f = LF; cp++;
06405 } else if (*cp=='m') {
06406 eolmode_f = CR; cp++;
06407 } else if (*cp=='w') {
06408 eolmode_f = CRLF; cp++;
06409 } else if (*cp=='0') {
06410 eolmode_f = 0; cp++;
06411 }
06412 continue;
06413 #ifndef PERL_XS
06414 case 'g':
06415 if ('2' <= *cp && *cp <= '9') {
06416 guess_f = 2;
06417 cp++;
06418 } else if (*cp == '0' || *cp == '1') {
06419 guess_f = 1;
06420 cp++;
06421 } else {
06422 guess_f = 1;
06423 }
06424 continue;
06425 #endif
06426 case SP:
06427
06428 while(*cp && *cp++!='-');
06429 continue;
06430 default:
06431 #if !defined(PERL_XS) && !defined(WIN32DLL)
06432 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
06433 #endif
06434
06435 return -1;
06436 }
06437 }
06438 return 0;
06439 }
06440
06441 #ifdef WIN32DLL
06442 #include "nkf32dll.c"
06443 #elif defined(PERL_XS)
06444 #else
06445 int
06446 main(int argc, char **argv)
06447 {
06448 FILE *fin;
06449 unsigned char *cp;
06450
06451 char *outfname = NULL;
06452 char *origfname;
06453
06454 #ifdef EASYWIN
06455 _BufferSize.y = 400;
06456 #endif
06457 #ifdef DEFAULT_CODE_LOCALE
06458 setlocale(LC_CTYPE, "");
06459 #endif
06460 nkf_state_init();
06461
06462 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
06463 cp = (unsigned char *)*argv;
06464 options(cp);
06465 #ifdef EXEC_IO
06466 if (exec_f){
06467 int fds[2], pid;
06468 if (pipe(fds) < 0 || (pid = fork()) < 0){
06469 abort();
06470 }
06471 if (pid == 0){
06472 if (exec_f > 0){
06473 close(fds[0]);
06474 dup2(fds[1], 1);
06475 }else{
06476 close(fds[1]);
06477 dup2(fds[0], 0);
06478 }
06479 execvp(argv[1], &argv[1]);
06480 }
06481 if (exec_f > 0){
06482 close(fds[1]);
06483 dup2(fds[0], 0);
06484 }else{
06485 close(fds[0]);
06486 dup2(fds[1], 1);
06487 }
06488 argc = 0;
06489 break;
06490 }
06491 #endif
06492 }
06493
06494 if (guess_f) {
06495 #ifdef CHECK_OPTION
06496 int debug_f_back = debug_f;
06497 #endif
06498 #ifdef EXEC_IO
06499 int exec_f_back = exec_f;
06500 #endif
06501 #ifdef X0212_ENABLE
06502 int x0212_f_back = x0212_f;
06503 #endif
06504 int x0213_f_back = x0213_f;
06505 int guess_f_back = guess_f;
06506 reinit();
06507 guess_f = guess_f_back;
06508 mime_f = FALSE;
06509 #ifdef CHECK_OPTION
06510 debug_f = debug_f_back;
06511 #endif
06512 #ifdef EXEC_IO
06513 exec_f = exec_f_back;
06514 #endif
06515 x0212_f = x0212_f_back;
06516 x0213_f = x0213_f_back;
06517 }
06518
06519 if (binmode_f == TRUE)
06520 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06521 if (freopen("","wb",stdout) == NULL)
06522 return (-1);
06523 #else
06524 setbinmode(stdout);
06525 #endif
06526
06527 if (unbuf_f)
06528 setbuf(stdout, (char *) NULL);
06529 else
06530 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
06531
06532 if (argc == 0) {
06533 if (binmode_f == TRUE)
06534 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06535 if (freopen("","rb",stdin) == NULL) return (-1);
06536 #else
06537 setbinmode(stdin);
06538 #endif
06539 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
06540 if (nop_f)
06541 noconvert(stdin);
06542 else {
06543 kanji_convert(stdin);
06544 if (guess_f) print_guessed_code(NULL);
06545 }
06546 } else {
06547 int nfiles = argc;
06548 int is_argument_error = FALSE;
06549 while (argc--) {
06550 input_codename = NULL;
06551 input_eol = 0;
06552 #ifdef CHECK_OPTION
06553 iconv_for_check = 0;
06554 #endif
06555 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
06556 perror(*(argv-1));
06557 is_argument_error = TRUE;
06558 continue;
06559 } else {
06560 #ifdef OVERWRITE
06561 int fd = 0;
06562 int fd_backup = 0;
06563 #endif
06564
06565
06566 if (file_out_f == TRUE) {
06567 #ifdef OVERWRITE
06568 if (overwrite_f){
06569 outfname = nkf_xmalloc(strlen(origfname)
06570 + strlen(".nkftmpXXXXXX")
06571 + 1);
06572 strcpy(outfname, origfname);
06573 #ifdef MSDOS
06574 {
06575 int i;
06576 for (i = strlen(outfname); i; --i){
06577 if (outfname[i - 1] == '/'
06578 || outfname[i - 1] == '\\'){
06579 break;
06580 }
06581 }
06582 outfname[i] = '\0';
06583 }
06584 strcat(outfname, "ntXXXXXX");
06585 mktemp(outfname);
06586 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
06587 S_IREAD | S_IWRITE);
06588 #else
06589 strcat(outfname, ".nkftmpXXXXXX");
06590 fd = mkstemp(outfname);
06591 #endif
06592 if (fd < 0
06593 || (fd_backup = dup(fileno(stdout))) < 0
06594 || dup2(fd, fileno(stdout)) < 0
06595 ){
06596 perror(origfname);
06597 return -1;
06598 }
06599 }else
06600 #endif
06601 if(argc == 1) {
06602 outfname = *argv++;
06603 argc--;
06604 } else {
06605 outfname = "nkf.out";
06606 }
06607
06608 if(freopen(outfname, "w", stdout) == NULL) {
06609 perror (outfname);
06610 return (-1);
06611 }
06612 if (binmode_f == TRUE) {
06613 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06614 if (freopen("","wb",stdout) == NULL)
06615 return (-1);
06616 #else
06617 setbinmode(stdout);
06618 #endif
06619 }
06620 }
06621 if (binmode_f == TRUE)
06622 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06623 if (freopen("","rb",fin) == NULL)
06624 return (-1);
06625 #else
06626 setbinmode(fin);
06627 #endif
06628 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
06629 if (nop_f)
06630 noconvert(fin);
06631 else {
06632 char *filename = NULL;
06633 kanji_convert(fin);
06634 if (nfiles > 1) filename = origfname;
06635 if (guess_f) print_guessed_code(filename);
06636 }
06637 fclose(fin);
06638 #ifdef OVERWRITE
06639 if (overwrite_f) {
06640 struct stat sb;
06641 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
06642 time_t tb[2];
06643 #else
06644 struct utimbuf tb;
06645 #endif
06646
06647 fflush(stdout);
06648 close(fd);
06649 if (dup2(fd_backup, fileno(stdout)) < 0){
06650 perror("dup2");
06651 }
06652 if (stat(origfname, &sb)) {
06653 fprintf(stderr, "Can't stat %s\n", origfname);
06654 }
06655
06656 if (chmod(outfname, sb.st_mode)) {
06657 fprintf(stderr, "Can't set permission %s\n", outfname);
06658 }
06659
06660
06661 if(preserve_time_f){
06662 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
06663 tb[0] = tb[1] = sb.st_mtime;
06664 if (utime(outfname, tb)) {
06665 fprintf(stderr, "Can't set timestamp %s\n", outfname);
06666 }
06667 #else
06668 tb.actime = sb.st_atime;
06669 tb.modtime = sb.st_mtime;
06670 if (utime(outfname, &tb)) {
06671 fprintf(stderr, "Can't set timestamp %s\n", outfname);
06672 }
06673 #endif
06674 }
06675 if(backup_f){
06676 char *backup_filename = get_backup_filename(backup_suffix, origfname);
06677 #ifdef MSDOS
06678 unlink(backup_filename);
06679 #endif
06680 if (rename(origfname, backup_filename)) {
06681 perror(backup_filename);
06682 fprintf(stderr, "Can't rename %s to %s\n",
06683 origfname, backup_filename);
06684 }
06685 nkf_xfree(backup_filename);
06686 }else{
06687 #ifdef MSDOS
06688 if (unlink(origfname)){
06689 perror(origfname);
06690 }
06691 #endif
06692 }
06693 if (rename(outfname, origfname)) {
06694 perror(origfname);
06695 fprintf(stderr, "Can't rename %s to %s\n",
06696 outfname, origfname);
06697 }
06698 nkf_xfree(outfname);
06699 }
06700 #endif
06701 }
06702 }
06703 if (is_argument_error)
06704 return(-1);
06705 }
06706 #ifdef EASYWIN
06707 if (file_out_f == FALSE)
06708 scanf("%d",&end_check);
06709 else
06710 fclose(stdout);
06711 #else
06712 if (file_out_f == TRUE)
06713 fclose(stdout);
06714 #endif
06715 return (0);
06716 }
06717 #endif
06718