00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regenc.h"
00031
00032 #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
00033 ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
00034
00035 static const unsigned short EncISO_8859_1_CtypeTable[256] = {
00036 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00037 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
00038 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00039 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00040 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00041 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00042 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
00043 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00044 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
00045 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00046 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00047 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
00048 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
00049 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00050 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00051 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
00052 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00053 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00054 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00055 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00056 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
00057 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
00058 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
00059 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
00060 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
00061 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
00062 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
00063 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
00064 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
00065 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
00066 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
00067 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
00068 };
00069
00070 static const OnigPairCaseFoldCodes CaseFoldMap[] = {
00071 { 0xc0, 0xe0 },
00072 { 0xc1, 0xe1 },
00073 { 0xc2, 0xe2 },
00074 { 0xc3, 0xe3 },
00075 { 0xc4, 0xe4 },
00076 { 0xc5, 0xe5 },
00077 { 0xc6, 0xe6 },
00078 { 0xc7, 0xe7 },
00079 { 0xc8, 0xe8 },
00080 { 0xc9, 0xe9 },
00081 { 0xca, 0xea },
00082 { 0xcb, 0xeb },
00083 { 0xcc, 0xec },
00084 { 0xcd, 0xed },
00085 { 0xce, 0xee },
00086 { 0xcf, 0xef },
00087
00088 { 0xd0, 0xf0 },
00089 { 0xd1, 0xf1 },
00090 { 0xd2, 0xf2 },
00091 { 0xd3, 0xf3 },
00092 { 0xd4, 0xf4 },
00093 { 0xd5, 0xf5 },
00094 { 0xd6, 0xf6 },
00095 { 0xd8, 0xf8 },
00096 { 0xd9, 0xf9 },
00097 { 0xda, 0xfa },
00098 { 0xdb, 0xfb },
00099 { 0xdc, 0xfc },
00100 { 0xdd, 0xfd },
00101 { 0xde, 0xfe }
00102 };
00103
00104 static int
00105 apply_all_case_fold(OnigCaseFoldType flag,
00106 OnigApplyAllCaseFoldFunc f, void* arg,
00107 OnigEncoding enc ARG_UNUSED)
00108 {
00109 return onigenc_apply_all_case_fold_with_map(
00110 sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
00111 flag, f, arg);
00112 }
00113
00114 static int
00115 get_case_fold_codes_by_str(OnigCaseFoldType flag,
00116 const OnigUChar* p, const OnigUChar* end,
00117 OnigCaseFoldCodeItem items[],
00118 OnigEncoding enc ARG_UNUSED)
00119 {
00120 if (0x41 <= *p && *p <= 0x5a) {
00121 items[0].byte_len = 1;
00122 items[0].code_len = 1;
00123 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00124 if (*p == 0x53 && end > p + 1
00125 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
00126 items[1].byte_len = 2;
00127 items[1].code_len = 1;
00128 items[1].code[0] = (OnigCodePoint )0xdf;
00129 return 2;
00130 }
00131 else
00132 return 1;
00133 }
00134 else if (0x61 <= *p && *p <= 0x7a) {
00135 items[0].byte_len = 1;
00136 items[0].code_len = 1;
00137 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00138 if (*p == 0x73 && end > p + 1
00139 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
00140 items[1].byte_len = 2;
00141 items[1].code_len = 1;
00142 items[1].code[0] = (OnigCodePoint )0xdf;
00143 return 2;
00144 }
00145 else
00146 return 1;
00147 }
00148 else if (0xc0 <= *p && *p <= 0xcf) {
00149 items[0].byte_len = 1;
00150 items[0].code_len = 1;
00151 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00152 return 1;
00153 }
00154 else if (0xd0 <= *p && *p <= 0xdf) {
00155 if (*p == 0xdf) {
00156 items[0].byte_len = 1;
00157 items[0].code_len = 2;
00158 items[0].code[0] = (OnigCodePoint )'s';
00159 items[0].code[1] = (OnigCodePoint )'s';
00160
00161 items[1].byte_len = 1;
00162 items[1].code_len = 2;
00163 items[1].code[0] = (OnigCodePoint )'S';
00164 items[1].code[1] = (OnigCodePoint )'S';
00165
00166 items[2].byte_len = 1;
00167 items[2].code_len = 2;
00168 items[2].code[0] = (OnigCodePoint )'s';
00169 items[2].code[1] = (OnigCodePoint )'S';
00170
00171 items[3].byte_len = 1;
00172 items[3].code_len = 2;
00173 items[3].code[0] = (OnigCodePoint )'S';
00174 items[3].code[1] = (OnigCodePoint )'s';
00175
00176 return 4;
00177 }
00178 else if (*p != 0xd7) {
00179 items[0].byte_len = 1;
00180 items[0].code_len = 1;
00181 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00182 return 1;
00183 }
00184 }
00185 else if (0xe0 <= *p && *p <= 0xef) {
00186 items[0].byte_len = 1;
00187 items[0].code_len = 1;
00188 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00189 return 1;
00190 }
00191 else if (0xf0 <= *p && *p <= 0xfe) {
00192 if (*p != 0xf7) {
00193 items[0].byte_len = 1;
00194 items[0].code_len = 1;
00195 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00196 return 1;
00197 }
00198 }
00199
00200 return 0;
00201 }
00202
00203 static int
00204 mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end ARG_UNUSED,
00205 UChar* lower, OnigEncoding enc ARG_UNUSED)
00206 {
00207 const UChar* p = *pp;
00208
00209 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
00210 *lower++ = 's';
00211 *lower = 's';
00212 (*pp)++;
00213 return 2;
00214 }
00215
00216 *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
00217 (*pp)++;
00218 return 1;
00219 }
00220
00221 #if 0
00222 static int
00223 is_mbc_ambiguous(OnigCaseFoldType flag,
00224 const UChar** pp, const UChar* end)
00225 {
00226 int v;
00227 const UChar* p = *pp;
00228
00229 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
00230 (*pp)++;
00231 return TRUE;
00232 }
00233
00234 (*pp)++;
00235 v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
00236 if ((v | BIT_CTYPE_LOWER) != 0) {
00237
00238 if (*p >= 0xaa && *p <= 0xba)
00239 return FALSE;
00240 else
00241 return TRUE;
00242 }
00243
00244 return (v != 0 ? TRUE : FALSE);
00245 }
00246 #endif
00247
00248 static int
00249 is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
00250 {
00251 if (code < 256)
00252 return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
00253 else
00254 return FALSE;
00255 }
00256
00257 OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
00258 onigenc_single_byte_mbc_enc_len,
00259 "ISO-8859-1",
00260 1,
00261 1,
00262 onigenc_is_mbc_newline_0x0a,
00263 onigenc_single_byte_mbc_to_code,
00264 onigenc_single_byte_code_to_mbclen,
00265 onigenc_single_byte_code_to_mbc,
00266 mbc_case_fold,
00267 apply_all_case_fold,
00268 get_case_fold_codes_by_str,
00269 onigenc_minimum_property_name_to_ctype,
00270 is_code_ctype,
00271 onigenc_not_support_get_ctype_code_range,
00272 onigenc_single_byte_left_adjust_char_head,
00273 onigenc_always_true_is_allowed_reverse_match
00274 };
00275 ENC_ALIAS("ISO8859-1", "ISO-8859-1")
00276
00277
00278
00279
00280
00281
00282
00283
00284 ENC_REPLICATE("Windows-1252", "ISO-8859-1")
00285 ENC_ALIAS("CP1252", "Windows-1252")
00286