00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regenc.h"
00031
00032 static int
00033 utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
00034 OnigEncoding enc ARG_UNUSED)
00035 {
00036 return 4;
00037 }
00038
00039 static int
00040 utf32be_is_mbc_newline(const UChar* p, const UChar* end,
00041 OnigEncoding enc ARG_UNUSED)
00042 {
00043 if (p + 3 < end) {
00044 if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
00045 return 1;
00046 #ifdef USE_UNICODE_ALL_LINE_TERMINATORS
00047 if ((
00048 #ifndef USE_CRNL_AS_LINE_TERMINATOR
00049 *(p+3) == 0x0d ||
00050 #endif
00051 *(p+3) == 0x85)
00052 && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
00053 return 1;
00054 if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
00055 && *(p+1) == 0 && *p == 0)
00056 return 1;
00057 #endif
00058 }
00059 return 0;
00060 }
00061
00062 static OnigCodePoint
00063 utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
00064 OnigEncoding enc ARG_UNUSED)
00065 {
00066 return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
00067 }
00068
00069 static int
00070 utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED,
00071 OnigEncoding enc ARG_UNUSED)
00072 {
00073 return 4;
00074 }
00075
00076 static int
00077 utf32be_code_to_mbc(OnigCodePoint code, UChar *buf,
00078 OnigEncoding enc ARG_UNUSED)
00079 {
00080 UChar* p = buf;
00081
00082 *p++ = (UChar )((code & 0xff000000) >>24);
00083 *p++ = (UChar )((code & 0xff0000) >>16);
00084 *p++ = (UChar )((code & 0xff00) >> 8);
00085 *p++ = (UChar ) (code & 0xff);
00086 return 4;
00087 }
00088
00089 static int
00090 utf32be_mbc_case_fold(OnigCaseFoldType flag,
00091 const UChar** pp, const UChar* end, UChar* fold,
00092 OnigEncoding enc)
00093 {
00094 const UChar* p = *pp;
00095
00096 if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) {
00097 *fold++ = 0;
00098 *fold++ = 0;
00099
00100 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
00101 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
00102 if (*(p+3) == 0x49) {
00103 *fold++ = 0x01;
00104 *fold = 0x31;
00105 (*pp) += 4;
00106 return 4;
00107 }
00108 }
00109 #endif
00110
00111 *fold++ = 0;
00112 *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3));
00113 *pp += 4;
00114 return 4;
00115 }
00116 else
00117 return onigenc_unicode_mbc_case_fold(enc, flag, pp,
00118 end, fold);
00119 }
00120
00121 #if 0
00122 static int
00123 utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
00124 {
00125 const UChar* p = *pp;
00126
00127 (*pp) += 4;
00128
00129 if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
00130 int c, v;
00131
00132 p += 3;
00133 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
00134 return TRUE;
00135 }
00136
00137 c = *p;
00138 v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
00139 (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
00140 if ((v | BIT_CTYPE_LOWER) != 0) {
00141
00142 if (c >= 0xaa && c <= 0xba)
00143 return FALSE;
00144 else
00145 return TRUE;
00146 }
00147 return (v != 0 ? TRUE : FALSE);
00148 }
00149
00150 return FALSE;
00151 }
00152 #endif
00153
00154 static UChar*
00155 utf32be_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end,
00156 OnigEncoding enc ARG_UNUSED)
00157 {
00158 int rem;
00159
00160 if (s <= start) return (UChar* )s;
00161
00162 rem = (s - start) % 4;
00163 return (UChar* )(s - rem);
00164 }
00165
00166 static int
00167 utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
00168 const OnigUChar* p, const OnigUChar* end,
00169 OnigCaseFoldCodeItem items[],
00170 OnigEncoding enc)
00171 {
00172 return onigenc_unicode_get_case_fold_codes_by_str(enc,
00173 flag, p, end, items);
00174 }
00175
00176 OnigEncodingDefine(utf_32be, UTF_32BE) = {
00177 utf32be_mbc_enc_len,
00178 "UTF-32BE",
00179 4,
00180 4,
00181 utf32be_is_mbc_newline,
00182 utf32be_mbc_to_code,
00183 utf32be_code_to_mbclen,
00184 utf32be_code_to_mbc,
00185 utf32be_mbc_case_fold,
00186 onigenc_unicode_apply_all_case_fold,
00187 utf32be_get_case_fold_codes_by_str,
00188 onigenc_unicode_property_name_to_ctype,
00189 onigenc_unicode_is_code_ctype,
00190 onigenc_utf16_32_get_ctype_code_range,
00191 utf32be_left_adjust_char_head,
00192 onigenc_always_false_is_allowed_reverse_match
00193 };
00194 ENC_ALIAS("UCS-4BE", "UTF-32BE")
00195
00196