00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "regparse.h"
00032
00033 #define WARN_BUFSIZE 256
00034
00035 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
00036
00037
00038 const OnigSyntaxType OnigSyntaxRuby = {
00039 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00040 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
00041 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
00042 ONIG_SYN_OP_ESC_C_CONTROL )
00043 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00044 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
00045 ONIG_SYN_OP2_OPTION_RUBY |
00046 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
00047 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
00048 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
00049 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
00050 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
00051 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
00052 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
00053 ONIG_SYN_OP2_ESC_H_XDIGIT )
00054 , ( SYN_GNU_REGEX_BV |
00055 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
00056 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
00057 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
00058 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
00059 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
00060 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
00061 ONIG_SYN_WARN_CC_DUP |
00062 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
00063 , ONIG_OPTION_NONE
00064 ,
00065 {
00066 (OnigCodePoint )'\\'
00067 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00068 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00069 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00070 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00071 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00072 }
00073 };
00074
00075 const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
00076
00077 extern void onig_null_warn(const char* s ARG_UNUSED) { }
00078
00079 #ifdef DEFAULT_WARN_FUNCTION
00080 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
00081 #else
00082 static OnigWarnFunc onig_warn = onig_null_warn;
00083 #endif
00084
00085 #ifdef DEFAULT_VERB_WARN_FUNCTION
00086 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
00087 #else
00088 static OnigWarnFunc onig_verb_warn = onig_null_warn;
00089 #endif
00090
00091 extern void onig_set_warn_func(OnigWarnFunc f)
00092 {
00093 onig_warn = f;
00094 }
00095
00096 extern void onig_set_verb_warn_func(OnigWarnFunc f)
00097 {
00098 onig_verb_warn = f;
00099 }
00100
00101 static void CC_DUP_WARN(ScanEnv *env);
00102
00103 static void
00104 bbuf_free(BBuf* bbuf)
00105 {
00106 if (IS_NOT_NULL(bbuf)) {
00107 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
00108 xfree(bbuf);
00109 }
00110 }
00111
00112 static int
00113 bbuf_clone(BBuf** rto, BBuf* from)
00114 {
00115 int r;
00116 BBuf *to;
00117
00118 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
00119 CHECK_NULL_RETURN_MEMERR(to);
00120 r = BBUF_INIT(to, from->alloc);
00121 if (r != 0) return r;
00122 to->used = from->used;
00123 xmemcpy(to->p, from->p, from->used);
00124 return 0;
00125 }
00126
00127 #define BACKREF_REL_TO_ABS(rel_no, env) \
00128 ((env)->num_mem + 1 + (rel_no))
00129
00130 #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
00131
00132 #define MBCODE_START_POS(enc) \
00133 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
00134
00135 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
00136 add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
00137
00138 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
00139 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
00140 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
00141 if (r) return r;\
00142 }\
00143 } while (0)
00144
00145
00146 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
00147 if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
00148 BS_ROOM(bs, pos) |= BS_BIT(pos); \
00149 } while (0)
00150
00151 #define BITSET_IS_EMPTY(bs,empty) do {\
00152 int i;\
00153 empty = 1;\
00154 for (i = 0; i < (int )BITSET_SIZE; i++) {\
00155 if ((bs)[i] != 0) {\
00156 empty = 0; break;\
00157 }\
00158 }\
00159 } while (0)
00160
00161 static void
00162 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
00163 {
00164 int i;
00165 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
00166 BITSET_SET_BIT_CHKDUP(bs, i);
00167 }
00168 }
00169
00170 #if 0
00171 static void
00172 bitset_set_all(BitSetRef bs)
00173 {
00174 int i;
00175 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
00176 }
00177 #endif
00178
00179 static void
00180 bitset_invert(BitSetRef bs)
00181 {
00182 int i;
00183 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
00184 }
00185
00186 static void
00187 bitset_invert_to(BitSetRef from, BitSetRef to)
00188 {
00189 int i;
00190 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
00191 }
00192
00193 static void
00194 bitset_and(BitSetRef dest, BitSetRef bs)
00195 {
00196 int i;
00197 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
00198 }
00199
00200 static void
00201 bitset_or(BitSetRef dest, BitSetRef bs)
00202 {
00203 int i;
00204 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
00205 }
00206
00207 static void
00208 bitset_copy(BitSetRef dest, BitSetRef bs)
00209 {
00210 int i;
00211 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
00212 }
00213
00214 extern int
00215 onig_strncmp(const UChar* s1, const UChar* s2, int n)
00216 {
00217 int x;
00218
00219 while (n-- > 0) {
00220 x = *s2++ - *s1++;
00221 if (x) return x;
00222 }
00223 return 0;
00224 }
00225
00226 extern void
00227 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
00228 {
00229 ptrdiff_t len = end - src;
00230 if (len > 0) {
00231 xmemcpy(dest, src, len);
00232 dest[len] = (UChar )0;
00233 }
00234 }
00235
00236 #ifdef USE_NAMED_GROUP
00237 static UChar*
00238 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
00239 {
00240 ptrdiff_t slen;
00241 int term_len, i;
00242 UChar *r;
00243
00244 slen = end - s;
00245 term_len = ONIGENC_MBC_MINLEN(enc);
00246
00247 r = (UChar* )xmalloc(slen + term_len);
00248 CHECK_NULL_RETURN(r);
00249 xmemcpy(r, s, slen);
00250
00251 for (i = 0; i < term_len; i++)
00252 r[slen + i] = (UChar )0;
00253
00254 return r;
00255 }
00256 #endif
00257
00258
00259 #define PEND_VALUE 0
00260
00261 #define PFETCH_READY UChar* pfetch_prev
00262 #define PEND (p < end ? 0 : 1)
00263 #define PUNFETCH p = pfetch_prev
00264 #define PINC do { \
00265 pfetch_prev = p; \
00266 p += enclen(enc, p, end); \
00267 } while (0)
00268 #define PFETCH(c) do { \
00269 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
00270 pfetch_prev = p; \
00271 p += enclen(enc, p, end); \
00272 } while (0)
00273
00274 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
00275 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
00276
00277 static UChar*
00278 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
00279 int capa)
00280 {
00281 UChar* r;
00282
00283 if (dest)
00284 r = (UChar* )xrealloc(dest, capa + 1);
00285 else
00286 r = (UChar* )xmalloc(capa + 1);
00287
00288 CHECK_NULL_RETURN(r);
00289 onig_strcpy(r + (dest_end - dest), src, src_end);
00290 return r;
00291 }
00292
00293
00294 static UChar*
00295 strcat_capa_from_static(UChar* dest, UChar* dest_end,
00296 const UChar* src, const UChar* src_end, int capa)
00297 {
00298 UChar* r;
00299
00300 r = (UChar* )xmalloc(capa + 1);
00301 CHECK_NULL_RETURN(r);
00302 onig_strcpy(r, dest, dest_end);
00303 onig_strcpy(r + (dest_end - dest), src, src_end);
00304 return r;
00305 }
00306
00307
00308 #ifdef USE_ST_LIBRARY
00309
00310 #include "ruby/st.h"
00311
00312 typedef struct {
00313 const UChar* s;
00314 const UChar* end;
00315 } st_str_end_key;
00316
00317 static int
00318 str_end_cmp(st_data_t xp, st_data_t yp)
00319 {
00320 const st_str_end_key *x, *y;
00321 const UChar *p, *q;
00322 int c;
00323
00324 x = (const st_str_end_key *)xp;
00325 y = (const st_str_end_key *)yp;
00326 if ((x->end - x->s) != (y->end - y->s))
00327 return 1;
00328
00329 p = x->s;
00330 q = y->s;
00331 while (p < x->end) {
00332 c = (int )*p - (int )*q;
00333 if (c != 0) return c;
00334
00335 p++; q++;
00336 }
00337
00338 return 0;
00339 }
00340
00341 static st_index_t
00342 str_end_hash(st_data_t xp)
00343 {
00344 const st_str_end_key *x = (const st_str_end_key *)xp;
00345 const UChar *p;
00346 st_index_t val = 0;
00347
00348 p = x->s;
00349 while (p < x->end) {
00350 val = val * 997 + (int )*p++;
00351 }
00352
00353 return val + (val >> 5);
00354 }
00355
00356 extern hash_table_type*
00357 onig_st_init_strend_table_with_size(st_index_t size)
00358 {
00359 static const struct st_hash_type hashType = {
00360 str_end_cmp,
00361 str_end_hash,
00362 };
00363
00364 return (hash_table_type* )
00365 onig_st_init_table_with_size(&hashType, size);
00366 }
00367
00368 extern int
00369 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
00370 const UChar* end_key, hash_data_type *value)
00371 {
00372 st_str_end_key key;
00373
00374 key.s = (UChar* )str_key;
00375 key.end = (UChar* )end_key;
00376
00377 return onig_st_lookup(table, (st_data_t )(&key), value);
00378 }
00379
00380 extern int
00381 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
00382 const UChar* end_key, hash_data_type value)
00383 {
00384 st_str_end_key* key;
00385 int result;
00386
00387 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
00388 key->s = (UChar* )str_key;
00389 key->end = (UChar* )end_key;
00390 result = onig_st_insert(table, (st_data_t )key, value);
00391 if (result) {
00392 xfree(key);
00393 }
00394 return result;
00395 }
00396
00397 #endif
00398
00399
00400 #ifdef USE_NAMED_GROUP
00401
00402 #define INIT_NAME_BACKREFS_ALLOC_NUM 8
00403
00404 typedef struct {
00405 UChar* name;
00406 size_t name_len;
00407 int back_num;
00408 int back_alloc;
00409 int back_ref1;
00410 int* back_refs;
00411 } NameEntry;
00412
00413 #ifdef USE_ST_LIBRARY
00414
00415 typedef st_table NameTable;
00416 typedef st_data_t HashDataType;
00417
00418 #define NAMEBUF_SIZE 24
00419 #define NAMEBUF_SIZE_1 25
00420
00421 #ifdef ONIG_DEBUG
00422 static int
00423 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
00424 {
00425 int i;
00426 FILE* fp = (FILE* )arg;
00427
00428 fprintf(fp, "%s: ", e->name);
00429 if (e->back_num == 0)
00430 fputs("-", fp);
00431 else if (e->back_num == 1)
00432 fprintf(fp, "%d", e->back_ref1);
00433 else {
00434 for (i = 0; i < e->back_num; i++) {
00435 if (i > 0) fprintf(fp, ", ");
00436 fprintf(fp, "%d", e->back_refs[i]);
00437 }
00438 }
00439 fputs("\n", fp);
00440 return ST_CONTINUE;
00441 }
00442
00443 extern int
00444 onig_print_names(FILE* fp, regex_t* reg)
00445 {
00446 NameTable* t = (NameTable* )reg->name_table;
00447
00448 if (IS_NOT_NULL(t)) {
00449 fprintf(fp, "name table\n");
00450 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
00451 fputs("\n", fp);
00452 }
00453 return 0;
00454 }
00455 #endif
00456
00457 static int
00458 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
00459 {
00460 xfree(e->name);
00461 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00462 xfree(key);
00463 xfree(e);
00464 return ST_DELETE;
00465 }
00466
00467 static int
00468 names_clear(regex_t* reg)
00469 {
00470 NameTable* t = (NameTable* )reg->name_table;
00471
00472 if (IS_NOT_NULL(t)) {
00473 onig_st_foreach(t, i_free_name_entry, 0);
00474 }
00475 return 0;
00476 }
00477
00478 extern int
00479 onig_names_free(regex_t* reg)
00480 {
00481 int r;
00482 NameTable* t;
00483
00484 r = names_clear(reg);
00485 if (r) return r;
00486
00487 t = (NameTable* )reg->name_table;
00488 if (IS_NOT_NULL(t)) onig_st_free_table(t);
00489 reg->name_table = (void* )NULL;
00490 return 0;
00491 }
00492
00493 static NameEntry*
00494 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
00495 {
00496 NameEntry* e;
00497 NameTable* t = (NameTable* )reg->name_table;
00498
00499 e = (NameEntry* )NULL;
00500 if (IS_NOT_NULL(t)) {
00501 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
00502 }
00503 return e;
00504 }
00505
00506 typedef struct {
00507 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
00508 regex_t* reg;
00509 void* arg;
00510 int ret;
00511 OnigEncoding enc;
00512 } INamesArg;
00513
00514 static int
00515 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
00516 {
00517 int r = (*(arg->func))(e->name,
00518 e->name + e->name_len,
00519 e->back_num,
00520 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00521 arg->reg, arg->arg);
00522 if (r != 0) {
00523 arg->ret = r;
00524 return ST_STOP;
00525 }
00526 return ST_CONTINUE;
00527 }
00528
00529 extern int
00530 onig_foreach_name(regex_t* reg,
00531 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00532 {
00533 INamesArg narg;
00534 NameTable* t = (NameTable* )reg->name_table;
00535
00536 narg.ret = 0;
00537 if (IS_NOT_NULL(t)) {
00538 narg.func = func;
00539 narg.reg = reg;
00540 narg.arg = arg;
00541 narg.enc = reg->enc;
00542 onig_st_foreach(t, i_names, (HashDataType )&narg);
00543 }
00544 return narg.ret;
00545 }
00546
00547 static int
00548 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
00549 {
00550 int i;
00551
00552 if (e->back_num > 1) {
00553 for (i = 0; i < e->back_num; i++) {
00554 e->back_refs[i] = map[e->back_refs[i]].new_val;
00555 }
00556 }
00557 else if (e->back_num == 1) {
00558 e->back_ref1 = map[e->back_ref1].new_val;
00559 }
00560
00561 return ST_CONTINUE;
00562 }
00563
00564 extern int
00565 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
00566 {
00567 NameTable* t = (NameTable* )reg->name_table;
00568
00569 if (IS_NOT_NULL(t)) {
00570 onig_st_foreach(t, i_renumber_name, (HashDataType )map);
00571 }
00572 return 0;
00573 }
00574
00575
00576 extern int
00577 onig_number_of_names(regex_t* reg)
00578 {
00579 NameTable* t = (NameTable* )reg->name_table;
00580
00581 if (IS_NOT_NULL(t))
00582 return t->num_entries;
00583 else
00584 return 0;
00585 }
00586
00587 #else
00588
00589 #define INIT_NAMES_ALLOC_NUM 8
00590
00591 typedef struct {
00592 NameEntry* e;
00593 int num;
00594 int alloc;
00595 } NameTable;
00596
00597 #ifdef ONIG_DEBUG
00598 extern int
00599 onig_print_names(FILE* fp, regex_t* reg)
00600 {
00601 int i, j;
00602 NameEntry* e;
00603 NameTable* t = (NameTable* )reg->name_table;
00604
00605 if (IS_NOT_NULL(t) && t->num > 0) {
00606 fprintf(fp, "name table\n");
00607 for (i = 0; i < t->num; i++) {
00608 e = &(t->e[i]);
00609 fprintf(fp, "%s: ", e->name);
00610 if (e->back_num == 0) {
00611 fputs("-", fp);
00612 }
00613 else if (e->back_num == 1) {
00614 fprintf(fp, "%d", e->back_ref1);
00615 }
00616 else {
00617 for (j = 0; j < e->back_num; j++) {
00618 if (j > 0) fprintf(fp, ", ");
00619 fprintf(fp, "%d", e->back_refs[j]);
00620 }
00621 }
00622 fputs("\n", fp);
00623 }
00624 fputs("\n", fp);
00625 }
00626 return 0;
00627 }
00628 #endif
00629
00630 static int
00631 names_clear(regex_t* reg)
00632 {
00633 int i;
00634 NameEntry* e;
00635 NameTable* t = (NameTable* )reg->name_table;
00636
00637 if (IS_NOT_NULL(t)) {
00638 for (i = 0; i < t->num; i++) {
00639 e = &(t->e[i]);
00640 if (IS_NOT_NULL(e->name)) {
00641 xfree(e->name);
00642 e->name = NULL;
00643 e->name_len = 0;
00644 e->back_num = 0;
00645 e->back_alloc = 0;
00646 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00647 e->back_refs = (int* )NULL;
00648 }
00649 }
00650 if (IS_NOT_NULL(t->e)) {
00651 xfree(t->e);
00652 t->e = NULL;
00653 }
00654 t->num = 0;
00655 }
00656 return 0;
00657 }
00658
00659 extern int
00660 onig_names_free(regex_t* reg)
00661 {
00662 int r;
00663 NameTable* t;
00664
00665 r = names_clear(reg);
00666 if (r) return r;
00667
00668 t = (NameTable* )reg->name_table;
00669 if (IS_NOT_NULL(t)) xfree(t);
00670 reg->name_table = NULL;
00671 return 0;
00672 }
00673
00674 static NameEntry*
00675 name_find(regex_t* reg, UChar* name, UChar* name_end)
00676 {
00677 int i, len;
00678 NameEntry* e;
00679 NameTable* t = (NameTable* )reg->name_table;
00680
00681 if (IS_NOT_NULL(t)) {
00682 len = name_end - name;
00683 for (i = 0; i < t->num; i++) {
00684 e = &(t->e[i]);
00685 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
00686 return e;
00687 }
00688 }
00689 return (NameEntry* )NULL;
00690 }
00691
00692 extern int
00693 onig_foreach_name(regex_t* reg,
00694 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00695 {
00696 int i, r;
00697 NameEntry* e;
00698 NameTable* t = (NameTable* )reg->name_table;
00699
00700 if (IS_NOT_NULL(t)) {
00701 for (i = 0; i < t->num; i++) {
00702 e = &(t->e[i]);
00703 r = (*func)(e->name, e->name + e->name_len, e->back_num,
00704 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00705 reg, arg);
00706 if (r != 0) return r;
00707 }
00708 }
00709 return 0;
00710 }
00711
00712 extern int
00713 onig_number_of_names(regex_t* reg)
00714 {
00715 NameTable* t = (NameTable* )reg->name_table;
00716
00717 if (IS_NOT_NULL(t))
00718 return t->num;
00719 else
00720 return 0;
00721 }
00722
00723 #endif
00724
00725 static int
00726 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
00727 {
00728 int alloc;
00729 NameEntry* e;
00730 NameTable* t = (NameTable* )reg->name_table;
00731
00732 if (name_end - name <= 0)
00733 return ONIGERR_EMPTY_GROUP_NAME;
00734
00735 e = name_find(reg, name, name_end);
00736 if (IS_NULL(e)) {
00737 #ifdef USE_ST_LIBRARY
00738 if (IS_NULL(t)) {
00739 t = onig_st_init_strend_table_with_size(5);
00740 reg->name_table = (void* )t;
00741 }
00742 e = (NameEntry* )xmalloc(sizeof(NameEntry));
00743 CHECK_NULL_RETURN_MEMERR(e);
00744
00745 e->name = strdup_with_null(reg->enc, name, name_end);
00746 if (IS_NULL(e->name)) {
00747 xfree(e);
00748 return ONIGERR_MEMORY;
00749 }
00750 onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
00751 (HashDataType )e);
00752
00753 e->name_len = name_end - name;
00754 e->back_num = 0;
00755 e->back_alloc = 0;
00756 e->back_refs = (int* )NULL;
00757
00758 #else
00759
00760 if (IS_NULL(t)) {
00761 alloc = INIT_NAMES_ALLOC_NUM;
00762 t = (NameTable* )xmalloc(sizeof(NameTable));
00763 CHECK_NULL_RETURN_MEMERR(t);
00764 t->e = NULL;
00765 t->alloc = 0;
00766 t->num = 0;
00767
00768 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
00769 if (IS_NULL(t->e)) {
00770 xfree(t);
00771 return ONIGERR_MEMORY;
00772 }
00773 t->alloc = alloc;
00774 reg->name_table = t;
00775 goto clear;
00776 }
00777 else if (t->num == t->alloc) {
00778 int i;
00779
00780 alloc = t->alloc * 2;
00781 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
00782 CHECK_NULL_RETURN_MEMERR(t->e);
00783 t->alloc = alloc;
00784
00785 clear:
00786 for (i = t->num; i < t->alloc; i++) {
00787 t->e[i].name = NULL;
00788 t->e[i].name_len = 0;
00789 t->e[i].back_num = 0;
00790 t->e[i].back_alloc = 0;
00791 t->e[i].back_refs = (int* )NULL;
00792 }
00793 }
00794 e = &(t->e[t->num]);
00795 t->num++;
00796 e->name = strdup_with_null(reg->enc, name, name_end);
00797 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
00798 e->name_len = name_end - name;
00799 #endif
00800 }
00801
00802 if (e->back_num >= 1 &&
00803 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
00804 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
00805 name, name_end);
00806 return ONIGERR_MULTIPLEX_DEFINED_NAME;
00807 }
00808
00809 e->back_num++;
00810 if (e->back_num == 1) {
00811 e->back_ref1 = backref;
00812 }
00813 else {
00814 if (e->back_num == 2) {
00815 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
00816 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
00817 CHECK_NULL_RETURN_MEMERR(e->back_refs);
00818 e->back_alloc = alloc;
00819 e->back_refs[0] = e->back_ref1;
00820 e->back_refs[1] = backref;
00821 }
00822 else {
00823 if (e->back_num > e->back_alloc) {
00824 alloc = e->back_alloc * 2;
00825 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
00826 CHECK_NULL_RETURN_MEMERR(e->back_refs);
00827 e->back_alloc = alloc;
00828 }
00829 e->back_refs[e->back_num - 1] = backref;
00830 }
00831 }
00832
00833 return 0;
00834 }
00835
00836 extern int
00837 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00838 const UChar* name_end, int** nums)
00839 {
00840 NameEntry* e = name_find(reg, name, name_end);
00841
00842 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
00843
00844 switch (e->back_num) {
00845 case 0:
00846 *nums = 0;
00847 break;
00848 case 1:
00849 *nums = &(e->back_ref1);
00850 break;
00851 default:
00852 *nums = e->back_refs;
00853 break;
00854 }
00855 return e->back_num;
00856 }
00857
00858 extern int
00859 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00860 const UChar* name_end, OnigRegion *region)
00861 {
00862 int i, n, *nums;
00863
00864 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
00865 if (n < 0)
00866 return n;
00867 else if (n == 0)
00868 return ONIGERR_PARSER_BUG;
00869 else if (n == 1)
00870 return nums[0];
00871 else {
00872 if (IS_NOT_NULL(region)) {
00873 for (i = n - 1; i >= 0; i--) {
00874 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
00875 return nums[i];
00876 }
00877 }
00878 return nums[n - 1];
00879 }
00880 }
00881
00882 #else
00883
00884 extern int
00885 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00886 const UChar* name_end, int** nums)
00887 {
00888 return ONIG_NO_SUPPORT_CONFIG;
00889 }
00890
00891 extern int
00892 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00893 const UChar* name_end, OnigRegion* region)
00894 {
00895 return ONIG_NO_SUPPORT_CONFIG;
00896 }
00897
00898 extern int
00899 onig_foreach_name(regex_t* reg,
00900 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00901 {
00902 return ONIG_NO_SUPPORT_CONFIG;
00903 }
00904
00905 extern int
00906 onig_number_of_names(regex_t* reg)
00907 {
00908 return 0;
00909 }
00910 #endif
00911
00912 extern int
00913 onig_noname_group_capture_is_active(regex_t* reg)
00914 {
00915 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
00916 return 0;
00917
00918 #ifdef USE_NAMED_GROUP
00919 if (onig_number_of_names(reg) > 0 &&
00920 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
00921 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
00922 return 0;
00923 }
00924 #endif
00925
00926 return 1;
00927 }
00928
00929
00930 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
00931
00932 static void
00933 scan_env_clear(ScanEnv* env)
00934 {
00935 int i;
00936
00937 BIT_STATUS_CLEAR(env->capture_history);
00938 BIT_STATUS_CLEAR(env->bt_mem_start);
00939 BIT_STATUS_CLEAR(env->bt_mem_end);
00940 BIT_STATUS_CLEAR(env->backrefed_mem);
00941 env->error = (UChar* )NULL;
00942 env->error_end = (UChar* )NULL;
00943 env->num_call = 0;
00944 env->num_mem = 0;
00945 #ifdef USE_NAMED_GROUP
00946 env->num_named = 0;
00947 #endif
00948 env->mem_alloc = 0;
00949 env->mem_nodes_dynamic = (Node** )NULL;
00950
00951 for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
00952 env->mem_nodes_static[i] = NULL_NODE;
00953
00954 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00955 env->num_comb_exp_check = 0;
00956 env->comb_exp_max_regnum = 0;
00957 env->curr_max_regnum = 0;
00958 env->has_recursion = 0;
00959 #endif
00960 env->warnings_flag = 0;
00961 }
00962
00963 static int
00964 scan_env_add_mem_entry(ScanEnv* env)
00965 {
00966 int i, need, alloc;
00967 Node** p;
00968
00969 need = env->num_mem + 1;
00970 if (need >= SCANENV_MEMNODES_SIZE) {
00971 if (env->mem_alloc <= need) {
00972 if (IS_NULL(env->mem_nodes_dynamic)) {
00973 alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
00974 p = (Node** )xmalloc(sizeof(Node*) * alloc);
00975 xmemcpy(p, env->mem_nodes_static,
00976 sizeof(Node*) * SCANENV_MEMNODES_SIZE);
00977 }
00978 else {
00979 alloc = env->mem_alloc * 2;
00980 p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
00981 }
00982 CHECK_NULL_RETURN_MEMERR(p);
00983
00984 for (i = env->num_mem + 1; i < alloc; i++)
00985 p[i] = NULL_NODE;
00986
00987 env->mem_nodes_dynamic = p;
00988 env->mem_alloc = alloc;
00989 }
00990 }
00991
00992 env->num_mem++;
00993 return env->num_mem;
00994 }
00995
00996 static int
00997 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
00998 {
00999 if (env->num_mem >= num)
01000 SCANENV_MEM_NODES(env)[num] = node;
01001 else
01002 return ONIGERR_PARSER_BUG;
01003 return 0;
01004 }
01005
01006
01007 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01008 typedef struct _FreeNode {
01009 struct _FreeNode* next;
01010 } FreeNode;
01011
01012 static FreeNode* FreeNodeList = (FreeNode* )NULL;
01013 #endif
01014
01015 extern void
01016 onig_node_free(Node* node)
01017 {
01018 start:
01019 if (IS_NULL(node)) return ;
01020
01021 switch (NTYPE(node)) {
01022 case NT_STR:
01023 if (NSTR(node)->capa != 0 &&
01024 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01025 xfree(NSTR(node)->s);
01026 }
01027 break;
01028
01029 case NT_LIST:
01030 case NT_ALT:
01031 onig_node_free(NCAR(node));
01032 {
01033 Node* next_node = NCDR(node);
01034
01035 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01036 {
01037 FreeNode* n = (FreeNode* )node;
01038
01039 THREAD_ATOMIC_START;
01040 n->next = FreeNodeList;
01041 FreeNodeList = n;
01042 THREAD_ATOMIC_END;
01043 }
01044 #else
01045 xfree(node);
01046 #endif
01047 node = next_node;
01048 goto start;
01049 }
01050 break;
01051
01052 case NT_CCLASS:
01053 {
01054 CClassNode* cc = NCCLASS(node);
01055
01056 if (IS_NCCLASS_SHARE(cc)) return ;
01057 if (cc->mbuf)
01058 bbuf_free(cc->mbuf);
01059 }
01060 break;
01061
01062 case NT_QTFR:
01063 if (NQTFR(node)->target)
01064 onig_node_free(NQTFR(node)->target);
01065 break;
01066
01067 case NT_ENCLOSE:
01068 if (NENCLOSE(node)->target)
01069 onig_node_free(NENCLOSE(node)->target);
01070 break;
01071
01072 case NT_BREF:
01073 if (IS_NOT_NULL(NBREF(node)->back_dynamic))
01074 xfree(NBREF(node)->back_dynamic);
01075 break;
01076
01077 case NT_ANCHOR:
01078 if (NANCHOR(node)->target)
01079 onig_node_free(NANCHOR(node)->target);
01080 break;
01081 }
01082
01083 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01084 {
01085 FreeNode* n = (FreeNode* )node;
01086
01087 THREAD_ATOMIC_START;
01088 n->next = FreeNodeList;
01089 FreeNodeList = n;
01090 THREAD_ATOMIC_END;
01091 }
01092 #else
01093 xfree(node);
01094 #endif
01095 }
01096
01097 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01098 extern int
01099 onig_free_node_list(void)
01100 {
01101 FreeNode* n;
01102
01103
01104 while (IS_NOT_NULL(FreeNodeList)) {
01105 n = FreeNodeList;
01106 FreeNodeList = FreeNodeList->next;
01107 xfree(n);
01108 }
01109
01110 return 0;
01111 }
01112 #endif
01113
01114 static Node*
01115 node_new(void)
01116 {
01117 Node* node;
01118
01119 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01120 THREAD_ATOMIC_START;
01121 if (IS_NOT_NULL(FreeNodeList)) {
01122 node = (Node* )FreeNodeList;
01123 FreeNodeList = FreeNodeList->next;
01124 THREAD_ATOMIC_END;
01125 return node;
01126 }
01127 THREAD_ATOMIC_END;
01128 #endif
01129
01130 node = (Node* )xmalloc(sizeof(Node));
01131
01132 return node;
01133 }
01134
01135
01136 static void
01137 initialize_cclass(CClassNode* cc)
01138 {
01139 BITSET_CLEAR(cc->bs);
01140
01141 cc->flags = 0;
01142 cc->mbuf = NULL;
01143 }
01144
01145 static Node*
01146 node_new_cclass(void)
01147 {
01148 Node* node = node_new();
01149 CHECK_NULL_RETURN(node);
01150
01151 SET_NTYPE(node, NT_CCLASS);
01152 initialize_cclass(NCCLASS(node));
01153 return node;
01154 }
01155
01156 static Node*
01157 node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
01158 const OnigCodePoint ranges[])
01159 {
01160 int n, i;
01161 CClassNode* cc;
01162 OnigCodePoint j;
01163
01164 Node* node = node_new_cclass();
01165 CHECK_NULL_RETURN(node);
01166
01167 cc = NCCLASS(node);
01168 if (not != 0) NCCLASS_SET_NOT(cc);
01169
01170 BITSET_CLEAR(cc->bs);
01171 if (sb_out > 0 && IS_NOT_NULL(ranges)) {
01172 n = ONIGENC_CODE_RANGE_NUM(ranges);
01173 for (i = 0; i < n; i++) {
01174 for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
01175 j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
01176 if (j >= sb_out) goto sb_end;
01177
01178 BITSET_SET_BIT(cc->bs, j);
01179 }
01180 }
01181 }
01182
01183 sb_end:
01184 if (IS_NULL(ranges)) {
01185 is_null:
01186 cc->mbuf = NULL;
01187 }
01188 else {
01189 BBuf* bbuf;
01190
01191 n = ONIGENC_CODE_RANGE_NUM(ranges);
01192 if (n == 0) goto is_null;
01193
01194 bbuf = (BBuf* )xmalloc(sizeof(BBuf));
01195 CHECK_NULL_RETURN(bbuf);
01196 bbuf->alloc = n + 1;
01197 bbuf->used = n + 1;
01198 bbuf->p = (UChar* )((void* )ranges);
01199
01200 cc->mbuf = bbuf;
01201 }
01202
01203 return node;
01204 }
01205
01206 static Node*
01207 node_new_ctype(int type, int not)
01208 {
01209 Node* node = node_new();
01210 CHECK_NULL_RETURN(node);
01211
01212 SET_NTYPE(node, NT_CTYPE);
01213 NCTYPE(node)->ctype = type;
01214 NCTYPE(node)->not = not;
01215 return node;
01216 }
01217
01218 static Node*
01219 node_new_anychar(void)
01220 {
01221 Node* node = node_new();
01222 CHECK_NULL_RETURN(node);
01223
01224 SET_NTYPE(node, NT_CANY);
01225 return node;
01226 }
01227
01228 static Node*
01229 node_new_list(Node* left, Node* right)
01230 {
01231 Node* node = node_new();
01232 CHECK_NULL_RETURN(node);
01233
01234 SET_NTYPE(node, NT_LIST);
01235 NCAR(node) = left;
01236 NCDR(node) = right;
01237 return node;
01238 }
01239
01240 extern Node*
01241 onig_node_new_list(Node* left, Node* right)
01242 {
01243 return node_new_list(left, right);
01244 }
01245
01246 extern Node*
01247 onig_node_list_add(Node* list, Node* x)
01248 {
01249 Node *n;
01250
01251 n = onig_node_new_list(x, NULL);
01252 if (IS_NULL(n)) return NULL_NODE;
01253
01254 if (IS_NOT_NULL(list)) {
01255 while (IS_NOT_NULL(NCDR(list)))
01256 list = NCDR(list);
01257
01258 NCDR(list) = n;
01259 }
01260
01261 return n;
01262 }
01263
01264 extern Node*
01265 onig_node_new_alt(Node* left, Node* right)
01266 {
01267 Node* node = node_new();
01268 CHECK_NULL_RETURN(node);
01269
01270 SET_NTYPE(node, NT_ALT);
01271 NCAR(node) = left;
01272 NCDR(node) = right;
01273 return node;
01274 }
01275
01276 extern Node*
01277 onig_node_new_anchor(int type)
01278 {
01279 Node* node = node_new();
01280 CHECK_NULL_RETURN(node);
01281
01282 SET_NTYPE(node, NT_ANCHOR);
01283 NANCHOR(node)->type = type;
01284 NANCHOR(node)->target = NULL;
01285 NANCHOR(node)->char_len = -1;
01286 return node;
01287 }
01288
01289 static Node*
01290 node_new_backref(int back_num, int* backrefs, int by_name,
01291 #ifdef USE_BACKREF_WITH_LEVEL
01292 int exist_level, int nest_level,
01293 #endif
01294 ScanEnv* env)
01295 {
01296 int i;
01297 Node* node = node_new();
01298
01299 CHECK_NULL_RETURN(node);
01300
01301 SET_NTYPE(node, NT_BREF);
01302 NBREF(node)->state = 0;
01303 NBREF(node)->back_num = back_num;
01304 NBREF(node)->back_dynamic = (int* )NULL;
01305 if (by_name != 0)
01306 NBREF(node)->state |= NST_NAME_REF;
01307
01308 #ifdef USE_BACKREF_WITH_LEVEL
01309 if (exist_level != 0) {
01310 NBREF(node)->state |= NST_NEST_LEVEL;
01311 NBREF(node)->nest_level = nest_level;
01312 }
01313 #endif
01314
01315 for (i = 0; i < back_num; i++) {
01316 if (backrefs[i] <= env->num_mem &&
01317 IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
01318 NBREF(node)->state |= NST_RECURSION;
01319 break;
01320 }
01321 }
01322
01323 if (back_num <= NODE_BACKREFS_SIZE) {
01324 for (i = 0; i < back_num; i++)
01325 NBREF(node)->back_static[i] = backrefs[i];
01326 }
01327 else {
01328 int* p = (int* )xmalloc(sizeof(int) * back_num);
01329 if (IS_NULL(p)) {
01330 onig_node_free(node);
01331 return NULL;
01332 }
01333 NBREF(node)->back_dynamic = p;
01334 for (i = 0; i < back_num; i++)
01335 p[i] = backrefs[i];
01336 }
01337 return node;
01338 }
01339
01340 #ifdef USE_SUBEXP_CALL
01341 static Node*
01342 node_new_call(UChar* name, UChar* name_end, int gnum)
01343 {
01344 Node* node = node_new();
01345 CHECK_NULL_RETURN(node);
01346
01347 SET_NTYPE(node, NT_CALL);
01348 NCALL(node)->state = 0;
01349 NCALL(node)->target = NULL_NODE;
01350 NCALL(node)->name = name;
01351 NCALL(node)->name_end = name_end;
01352 NCALL(node)->group_num = gnum;
01353 return node;
01354 }
01355 #endif
01356
01357 static Node*
01358 node_new_quantifier(int lower, int upper, int by_number)
01359 {
01360 Node* node = node_new();
01361 CHECK_NULL_RETURN(node);
01362
01363 SET_NTYPE(node, NT_QTFR);
01364 NQTFR(node)->state = 0;
01365 NQTFR(node)->target = NULL;
01366 NQTFR(node)->lower = lower;
01367 NQTFR(node)->upper = upper;
01368 NQTFR(node)->greedy = 1;
01369 NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
01370 NQTFR(node)->head_exact = NULL_NODE;
01371 NQTFR(node)->next_head_exact = NULL_NODE;
01372 NQTFR(node)->is_refered = 0;
01373 if (by_number != 0)
01374 NQTFR(node)->state |= NST_BY_NUMBER;
01375
01376 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01377 NQTFR(node)->comb_exp_check_num = 0;
01378 #endif
01379
01380 return node;
01381 }
01382
01383 static Node*
01384 node_new_enclose(int type)
01385 {
01386 Node* node = node_new();
01387 CHECK_NULL_RETURN(node);
01388
01389 SET_NTYPE(node, NT_ENCLOSE);
01390 NENCLOSE(node)->type = type;
01391 NENCLOSE(node)->state = 0;
01392 NENCLOSE(node)->regnum = 0;
01393 NENCLOSE(node)->option = 0;
01394 NENCLOSE(node)->target = NULL;
01395 NENCLOSE(node)->call_addr = -1;
01396 NENCLOSE(node)->opt_count = 0;
01397 return node;
01398 }
01399
01400 extern Node*
01401 onig_node_new_enclose(int type)
01402 {
01403 return node_new_enclose(type);
01404 }
01405
01406 static Node*
01407 node_new_enclose_memory(OnigOptionType option, int is_named)
01408 {
01409 Node* node = node_new_enclose(ENCLOSE_MEMORY);
01410 CHECK_NULL_RETURN(node);
01411 if (is_named != 0)
01412 SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
01413
01414 #ifdef USE_SUBEXP_CALL
01415 NENCLOSE(node)->option = option;
01416 #endif
01417 return node;
01418 }
01419
01420 static Node*
01421 node_new_option(OnigOptionType option)
01422 {
01423 Node* node = node_new_enclose(ENCLOSE_OPTION);
01424 CHECK_NULL_RETURN(node);
01425 NENCLOSE(node)->option = option;
01426 return node;
01427 }
01428
01429 extern int
01430 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
01431 {
01432 ptrdiff_t addlen = end - s;
01433
01434 if (addlen > 0) {
01435 ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
01436
01437 if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
01438 UChar* p;
01439 ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
01440
01441 if (capa <= NSTR(node)->capa) {
01442 onig_strcpy(NSTR(node)->s + len, s, end);
01443 }
01444 else {
01445 if (NSTR(node)->s == NSTR(node)->buf)
01446 p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
01447 s, end, capa);
01448 else
01449 p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
01450
01451 CHECK_NULL_RETURN_MEMERR(p);
01452 NSTR(node)->s = p;
01453 NSTR(node)->capa = capa;
01454 }
01455 }
01456 else {
01457 onig_strcpy(NSTR(node)->s + len, s, end);
01458 }
01459 NSTR(node)->end = NSTR(node)->s + len + addlen;
01460 }
01461
01462 return 0;
01463 }
01464
01465 extern int
01466 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
01467 {
01468 onig_node_str_clear(node);
01469 return onig_node_str_cat(node, s, end);
01470 }
01471
01472 static int
01473 node_str_cat_char(Node* node, UChar c)
01474 {
01475 UChar s[1];
01476
01477 s[0] = c;
01478 return onig_node_str_cat(node, s, s + 1);
01479 }
01480
01481 extern void
01482 onig_node_conv_to_str_node(Node* node, int flag)
01483 {
01484 SET_NTYPE(node, NT_STR);
01485 NSTR(node)->flag = flag;
01486 NSTR(node)->capa = 0;
01487 NSTR(node)->s = NSTR(node)->buf;
01488 NSTR(node)->end = NSTR(node)->buf;
01489 }
01490
01491 extern void
01492 onig_node_str_clear(Node* node)
01493 {
01494 if (NSTR(node)->capa != 0 &&
01495 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01496 xfree(NSTR(node)->s);
01497 }
01498
01499 NSTR(node)->capa = 0;
01500 NSTR(node)->flag = 0;
01501 NSTR(node)->s = NSTR(node)->buf;
01502 NSTR(node)->end = NSTR(node)->buf;
01503 }
01504
01505 static Node*
01506 node_new_str(const UChar* s, const UChar* end)
01507 {
01508 Node* node = node_new();
01509 CHECK_NULL_RETURN(node);
01510
01511 SET_NTYPE(node, NT_STR);
01512 NSTR(node)->capa = 0;
01513 NSTR(node)->flag = 0;
01514 NSTR(node)->s = NSTR(node)->buf;
01515 NSTR(node)->end = NSTR(node)->buf;
01516 if (onig_node_str_cat(node, s, end)) {
01517 onig_node_free(node);
01518 return NULL;
01519 }
01520 return node;
01521 }
01522
01523 extern Node*
01524 onig_node_new_str(const UChar* s, const UChar* end)
01525 {
01526 return node_new_str(s, end);
01527 }
01528
01529 static Node*
01530 node_new_str_raw(UChar* s, UChar* end)
01531 {
01532 Node* node = node_new_str(s, end);
01533 NSTRING_SET_RAW(node);
01534 return node;
01535 }
01536
01537 static Node*
01538 node_new_empty(void)
01539 {
01540 return node_new_str(NULL, NULL);
01541 }
01542
01543 static Node*
01544 node_new_str_raw_char(UChar c)
01545 {
01546 UChar p[1];
01547
01548 p[0] = c;
01549 return node_new_str_raw(p, p + 1);
01550 }
01551
01552 static Node*
01553 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
01554 {
01555 const UChar *p;
01556 Node* n = NULL_NODE;
01557
01558 if (sn->end > sn->s) {
01559 p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
01560 if (p && p > sn->s) {
01561 n = node_new_str(p, sn->end);
01562 if ((sn->flag & NSTR_RAW) != 0)
01563 NSTRING_SET_RAW(n);
01564 sn->end = (UChar* )p;
01565 }
01566 }
01567 return n;
01568 }
01569
01570 static int
01571 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
01572 {
01573 if (sn->end > sn->s) {
01574 return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
01575 }
01576 return 0;
01577 }
01578
01579 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
01580 static int
01581 node_str_head_pad(StrNode* sn, int num, UChar val)
01582 {
01583 UChar buf[NODE_STR_BUF_SIZE];
01584 int i, len;
01585
01586 len = sn->end - sn->s;
01587 onig_strcpy(buf, sn->s, sn->end);
01588 onig_strcpy(&(sn->s[num]), buf, buf + len);
01589 sn->end += num;
01590
01591 for (i = 0; i < num; i++) {
01592 sn->s[i] = val;
01593 }
01594 }
01595 #endif
01596
01597 extern int
01598 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
01599 {
01600 unsigned int num, val;
01601 OnigCodePoint c;
01602 UChar* p = *src;
01603 PFETCH_READY;
01604
01605 num = 0;
01606 while (!PEND) {
01607 PFETCH(c);
01608 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
01609 val = (unsigned int )DIGITVAL(c);
01610 if ((INT_MAX_LIMIT - val) / 10UL < num)
01611 return -1;
01612
01613 num = num * 10 + val;
01614 }
01615 else {
01616 PUNFETCH;
01617 break;
01618 }
01619 }
01620 *src = p;
01621 return num;
01622 }
01623
01624 static int
01625 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
01626 OnigEncoding enc)
01627 {
01628 OnigCodePoint c;
01629 unsigned int num, val;
01630 UChar* p = *src;
01631 PFETCH_READY;
01632
01633 num = 0;
01634 while (!PEND && maxlen-- != 0) {
01635 PFETCH(c);
01636 if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
01637 val = (unsigned int )XDIGITVAL(enc,c);
01638 if ((INT_MAX_LIMIT - val) / 16UL < num)
01639 return -1;
01640
01641 num = (num << 4) + XDIGITVAL(enc,c);
01642 }
01643 else {
01644 PUNFETCH;
01645 break;
01646 }
01647 }
01648 *src = p;
01649 return num;
01650 }
01651
01652 static int
01653 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
01654 OnigEncoding enc)
01655 {
01656 OnigCodePoint c;
01657 unsigned int num, val;
01658 UChar* p = *src;
01659 PFETCH_READY;
01660
01661 num = 0;
01662 while (!PEND && maxlen-- != 0) {
01663 PFETCH(c);
01664 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
01665 val = ODIGITVAL(c);
01666 if ((INT_MAX_LIMIT - val) / 8UL < num)
01667 return -1;
01668
01669 num = (num << 3) + val;
01670 }
01671 else {
01672 PUNFETCH;
01673 break;
01674 }
01675 }
01676 *src = p;
01677 return num;
01678 }
01679
01680
01681 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
01682 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
01683
01684
01685
01686
01687
01688 static int
01689 new_code_range(BBuf** pbuf)
01690 {
01691 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
01692 int r;
01693 OnigCodePoint n;
01694 BBuf* bbuf;
01695
01696 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
01697 CHECK_NULL_RETURN_MEMERR(*pbuf);
01698 r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
01699 if (r) return r;
01700
01701 n = 0;
01702 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01703 return 0;
01704 }
01705
01706 static int
01707 add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
01708 int checkdup)
01709 {
01710 int r, inc_n, pos;
01711 int low, high, bound, x;
01712 OnigCodePoint n, *data;
01713 BBuf* bbuf;
01714
01715 if (from > to) {
01716 n = from; from = to; to = n;
01717 }
01718
01719 if (IS_NULL(*pbuf)) {
01720 r = new_code_range(pbuf);
01721 if (r) return r;
01722 bbuf = *pbuf;
01723 n = 0;
01724 }
01725 else {
01726 bbuf = *pbuf;
01727 GET_CODE_POINT(n, bbuf->p);
01728 }
01729 data = (OnigCodePoint* )(bbuf->p);
01730 data++;
01731
01732 for (low = 0, bound = n; low < bound; ) {
01733 x = (low + bound) >> 1;
01734 if (from > data[x*2 + 1])
01735 low = x + 1;
01736 else
01737 bound = x;
01738 }
01739
01740 for (high = low, bound = n; high < bound; ) {
01741 x = (high + bound) >> 1;
01742 if (to >= data[x*2] - 1)
01743 high = x + 1;
01744 else
01745 bound = x;
01746 }
01747
01748 inc_n = low + 1 - high;
01749 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
01750 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
01751
01752 if (inc_n != 1) {
01753 if (checkdup && to >= data[low*2]) CC_DUP_WARN(env);
01754 if (from > data[low*2])
01755 from = data[low*2];
01756 if (to < data[(high - 1)*2 + 1])
01757 to = data[(high - 1)*2 + 1];
01758 }
01759
01760 if (inc_n != 0 && (OnigCodePoint )high < n) {
01761 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
01762 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
01763 int size = (n - high) * 2 * SIZE_CODE_POINT;
01764
01765 if (inc_n > 0) {
01766 BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
01767 }
01768 else {
01769 BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
01770 }
01771 }
01772
01773 pos = SIZE_CODE_POINT * (1 + low * 2);
01774 BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
01775 BBUF_WRITE_CODE_POINT(bbuf, pos, from);
01776 BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
01777 n += inc_n;
01778 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01779
01780 return 0;
01781 }
01782
01783 static int
01784 add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01785 {
01786 return add_code_range_to_buf0(pbuf, env, from, to, 1);
01787 }
01788
01789 static int
01790 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
01791 {
01792 if (from > to) {
01793 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
01794 return 0;
01795 else
01796 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
01797 }
01798
01799 return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
01800 }
01801
01802 static int
01803 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01804 {
01805 return add_code_range0(pbuf, env, from, to, 1);
01806 }
01807
01808 static int
01809 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
01810 {
01811 int r, i, n;
01812 OnigCodePoint pre, from, *data, to = 0;
01813
01814 *pbuf = (BBuf* )NULL;
01815 if (IS_NULL(bbuf)) {
01816 set_all:
01817 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01818 }
01819
01820 data = (OnigCodePoint* )(bbuf->p);
01821 GET_CODE_POINT(n, data);
01822 data++;
01823 if (n <= 0) goto set_all;
01824
01825 r = 0;
01826 pre = MBCODE_START_POS(enc);
01827 for (i = 0; i < n; i++) {
01828 from = data[i*2];
01829 to = data[i*2+1];
01830 if (pre <= from - 1) {
01831 r = add_code_range_to_buf(pbuf, env, pre, from - 1);
01832 if (r != 0) return r;
01833 }
01834 if (to == ~((OnigCodePoint )0)) break;
01835 pre = to + 1;
01836 }
01837 if (to < ~((OnigCodePoint )0)) {
01838 r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
01839 }
01840 return r;
01841 }
01842
01843 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
01844 BBuf *tbuf; \
01845 int tnot; \
01846 tnot = not1; not1 = not2; not2 = tnot; \
01847 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
01848 } while (0)
01849
01850 static int
01851 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
01852 BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01853 {
01854 int r;
01855 OnigCodePoint i, n1, *data1;
01856 OnigCodePoint from, to;
01857
01858 *pbuf = (BBuf* )NULL;
01859 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
01860 if (not1 != 0 || not2 != 0)
01861 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01862 return 0;
01863 }
01864
01865 r = 0;
01866 if (IS_NULL(bbuf2))
01867 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01868
01869 if (IS_NULL(bbuf1)) {
01870 if (not1 != 0) {
01871 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01872 }
01873 else {
01874 if (not2 == 0) {
01875 return bbuf_clone(pbuf, bbuf2);
01876 }
01877 else {
01878 return not_code_range_buf(enc, bbuf2, pbuf, env);
01879 }
01880 }
01881 }
01882
01883 if (not1 != 0)
01884 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01885
01886 data1 = (OnigCodePoint* )(bbuf1->p);
01887 GET_CODE_POINT(n1, data1);
01888 data1++;
01889
01890 if (not2 == 0 && not1 == 0) {
01891 r = bbuf_clone(pbuf, bbuf2);
01892 }
01893 else if (not1 == 0) {
01894 r = not_code_range_buf(enc, bbuf2, pbuf, env);
01895 }
01896 if (r != 0) return r;
01897
01898 for (i = 0; i < n1; i++) {
01899 from = data1[i*2];
01900 to = data1[i*2+1];
01901 r = add_code_range_to_buf(pbuf, env, from, to);
01902 if (r != 0) return r;
01903 }
01904 return 0;
01905 }
01906
01907 static int
01908 and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
01909 OnigCodePoint* data, int n)
01910 {
01911 int i, r;
01912 OnigCodePoint from2, to2;
01913
01914 for (i = 0; i < n; i++) {
01915 from2 = data[i*2];
01916 to2 = data[i*2+1];
01917 if (from2 < from1) {
01918 if (to2 < from1) continue;
01919 else {
01920 from1 = to2 + 1;
01921 }
01922 }
01923 else if (from2 <= to1) {
01924 if (to2 < to1) {
01925 if (from1 <= from2 - 1) {
01926 r = add_code_range_to_buf(pbuf, env, from1, from2-1);
01927 if (r != 0) return r;
01928 }
01929 from1 = to2 + 1;
01930 }
01931 else {
01932 to1 = from2 - 1;
01933 }
01934 }
01935 else {
01936 from1 = from2;
01937 }
01938 if (from1 > to1) break;
01939 }
01940 if (from1 <= to1) {
01941 r = add_code_range_to_buf(pbuf, env, from1, to1);
01942 if (r != 0) return r;
01943 }
01944 return 0;
01945 }
01946
01947 static int
01948 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01949 {
01950 int r;
01951 OnigCodePoint i, j, n1, n2, *data1, *data2;
01952 OnigCodePoint from, to, from1, to1, from2, to2;
01953
01954 *pbuf = (BBuf* )NULL;
01955 if (IS_NULL(bbuf1)) {
01956 if (not1 != 0 && IS_NOT_NULL(bbuf2))
01957 return bbuf_clone(pbuf, bbuf2);
01958 return 0;
01959 }
01960 else if (IS_NULL(bbuf2)) {
01961 if (not2 != 0)
01962 return bbuf_clone(pbuf, bbuf1);
01963 return 0;
01964 }
01965
01966 if (not1 != 0)
01967 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01968
01969 data1 = (OnigCodePoint* )(bbuf1->p);
01970 data2 = (OnigCodePoint* )(bbuf2->p);
01971 GET_CODE_POINT(n1, data1);
01972 GET_CODE_POINT(n2, data2);
01973 data1++;
01974 data2++;
01975
01976 if (not2 == 0 && not1 == 0) {
01977 for (i = 0; i < n1; i++) {
01978 from1 = data1[i*2];
01979 to1 = data1[i*2+1];
01980 for (j = 0; j < n2; j++) {
01981 from2 = data2[j*2];
01982 to2 = data2[j*2+1];
01983 if (from2 > to1) break;
01984 if (to2 < from1) continue;
01985 from = MAX(from1, from2);
01986 to = MIN(to1, to2);
01987 r = add_code_range_to_buf(pbuf, env, from, to);
01988 if (r != 0) return r;
01989 }
01990 }
01991 }
01992 else if (not1 == 0) {
01993 for (i = 0; i < n1; i++) {
01994 from1 = data1[i*2];
01995 to1 = data1[i*2+1];
01996 r = and_code_range1(pbuf, env, from1, to1, data2, n2);
01997 if (r != 0) return r;
01998 }
01999 }
02000
02001 return 0;
02002 }
02003
02004 static int
02005 and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02006 {
02007 OnigEncoding enc = env->enc;
02008 int r, not1, not2;
02009 BBuf *buf1, *buf2, *pbuf;
02010 BitSetRef bsr1, bsr2;
02011 BitSet bs1, bs2;
02012
02013 not1 = IS_NCCLASS_NOT(dest);
02014 bsr1 = dest->bs;
02015 buf1 = dest->mbuf;
02016 not2 = IS_NCCLASS_NOT(cc);
02017 bsr2 = cc->bs;
02018 buf2 = cc->mbuf;
02019
02020 if (not1 != 0) {
02021 bitset_invert_to(bsr1, bs1);
02022 bsr1 = bs1;
02023 }
02024 if (not2 != 0) {
02025 bitset_invert_to(bsr2, bs2);
02026 bsr2 = bs2;
02027 }
02028 bitset_and(bsr1, bsr2);
02029 if (bsr1 != dest->bs) {
02030 bitset_copy(dest->bs, bsr1);
02031 bsr1 = dest->bs;
02032 }
02033 if (not1 != 0) {
02034 bitset_invert(dest->bs);
02035 }
02036
02037 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02038 if (not1 != 0 && not2 != 0) {
02039 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
02040 }
02041 else {
02042 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
02043 if (r == 0 && not1 != 0) {
02044 BBuf *tbuf;
02045 r = not_code_range_buf(enc, pbuf, &tbuf, env);
02046 if (r != 0) {
02047 bbuf_free(pbuf);
02048 return r;
02049 }
02050 bbuf_free(pbuf);
02051 pbuf = tbuf;
02052 }
02053 }
02054 if (r != 0) return r;
02055
02056 dest->mbuf = pbuf;
02057 bbuf_free(buf1);
02058 return r;
02059 }
02060 return 0;
02061 }
02062
02063 static int
02064 or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02065 {
02066 OnigEncoding enc = env->enc;
02067 int r, not1, not2;
02068 BBuf *buf1, *buf2, *pbuf;
02069 BitSetRef bsr1, bsr2;
02070 BitSet bs1, bs2;
02071
02072 not1 = IS_NCCLASS_NOT(dest);
02073 bsr1 = dest->bs;
02074 buf1 = dest->mbuf;
02075 not2 = IS_NCCLASS_NOT(cc);
02076 bsr2 = cc->bs;
02077 buf2 = cc->mbuf;
02078
02079 if (not1 != 0) {
02080 bitset_invert_to(bsr1, bs1);
02081 bsr1 = bs1;
02082 }
02083 if (not2 != 0) {
02084 bitset_invert_to(bsr2, bs2);
02085 bsr2 = bs2;
02086 }
02087 bitset_or(bsr1, bsr2);
02088 if (bsr1 != dest->bs) {
02089 bitset_copy(dest->bs, bsr1);
02090 bsr1 = dest->bs;
02091 }
02092 if (not1 != 0) {
02093 bitset_invert(dest->bs);
02094 }
02095
02096 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02097 if (not1 != 0 && not2 != 0) {
02098 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
02099 }
02100 else {
02101 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
02102 if (r == 0 && not1 != 0) {
02103 BBuf *tbuf;
02104 r = not_code_range_buf(enc, pbuf, &tbuf, env);
02105 if (r != 0) {
02106 bbuf_free(pbuf);
02107 return r;
02108 }
02109 bbuf_free(pbuf);
02110 pbuf = tbuf;
02111 }
02112 }
02113 if (r != 0) return r;
02114
02115 dest->mbuf = pbuf;
02116 bbuf_free(buf1);
02117 return r;
02118 }
02119 else
02120 return 0;
02121 }
02122
02123 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
02124
02125 static int
02126 conv_backslash_value(int c, ScanEnv* env)
02127 {
02128 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
02129 switch (c) {
02130 case 'n': return '\n';
02131 case 't': return '\t';
02132 case 'r': return '\r';
02133 case 'f': return '\f';
02134 case 'a': return '\007';
02135 case 'b': return '\010';
02136 case 'e': return '\033';
02137 case 'v':
02138 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
02139 return '\v';
02140 break;
02141
02142 default:
02143 if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
02144 UNKNOWN_ESC_WARN(env, c);
02145 break;
02146 }
02147 }
02148 return c;
02149 }
02150
02151 #if 0
02152 static int
02153 is_invalid_quantifier_target(Node* node)
02154 {
02155 switch (NTYPE(node)) {
02156 case NT_ANCHOR:
02157 return 1;
02158 break;
02159
02160 case NT_ENCLOSE:
02161
02162
02163 break;
02164
02165 case NT_LIST:
02166 do {
02167 if (! is_invalid_quantifier_target(NCAR(node))) return 0;
02168 } while (IS_NOT_NULL(node = NCDR(node)));
02169 return 0;
02170 break;
02171
02172 case NT_ALT:
02173 do {
02174 if (is_invalid_quantifier_target(NCAR(node))) return 1;
02175 } while (IS_NOT_NULL(node = NCDR(node)));
02176 break;
02177
02178 default:
02179 break;
02180 }
02181 return 0;
02182 }
02183 #else
02184 #define is_invalid_quantifier_target(node) 0
02185 #endif
02186
02187
02188 static int
02189 popular_quantifier_num(QtfrNode* q)
02190 {
02191 if (q->greedy) {
02192 if (q->lower == 0) {
02193 if (q->upper == 1) return 0;
02194 else if (IS_REPEAT_INFINITE(q->upper)) return 1;
02195 }
02196 else if (q->lower == 1) {
02197 if (IS_REPEAT_INFINITE(q->upper)) return 2;
02198 }
02199 }
02200 else {
02201 if (q->lower == 0) {
02202 if (q->upper == 1) return 3;
02203 else if (IS_REPEAT_INFINITE(q->upper)) return 4;
02204 }
02205 else if (q->lower == 1) {
02206 if (IS_REPEAT_INFINITE(q->upper)) return 5;
02207 }
02208 }
02209 return -1;
02210 }
02211
02212
02213 enum ReduceType {
02214 RQ_ASIS = 0,
02215 RQ_DEL = 1,
02216 RQ_A,
02217 RQ_AQ,
02218 RQ_QQ,
02219 RQ_P_QQ,
02220 RQ_PQ_Q
02221 };
02222
02223 static enum ReduceType const ReduceTypeTable[6][6] = {
02224 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS},
02225 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},
02226 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},
02227 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ},
02228 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL},
02229 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL}
02230 };
02231
02232 extern void
02233 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
02234 {
02235 int pnum, cnum;
02236 QtfrNode *p, *c;
02237
02238 p = NQTFR(pnode);
02239 c = NQTFR(cnode);
02240 pnum = popular_quantifier_num(p);
02241 cnum = popular_quantifier_num(c);
02242 if (pnum < 0 || cnum < 0) return ;
02243
02244 switch(ReduceTypeTable[cnum][pnum]) {
02245 case RQ_DEL:
02246 *pnode = *cnode;
02247 break;
02248 case RQ_A:
02249 p->target = c->target;
02250 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
02251 break;
02252 case RQ_AQ:
02253 p->target = c->target;
02254 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
02255 break;
02256 case RQ_QQ:
02257 p->target = c->target;
02258 p->lower = 0; p->upper = 1; p->greedy = 0;
02259 break;
02260 case RQ_P_QQ:
02261 p->target = cnode;
02262 p->lower = 0; p->upper = 1; p->greedy = 0;
02263 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
02264 return ;
02265 break;
02266 case RQ_PQ_Q:
02267 p->target = cnode;
02268 p->lower = 0; p->upper = 1; p->greedy = 1;
02269 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
02270 return ;
02271 break;
02272 case RQ_ASIS:
02273 p->target = cnode;
02274 return ;
02275 break;
02276 }
02277
02278 c->target = NULL_NODE;
02279 onig_node_free(cnode);
02280 }
02281
02282
02283 enum TokenSyms {
02284 TK_EOT = 0,
02285 TK_RAW_BYTE = 1,
02286 TK_CHAR,
02287 TK_STRING,
02288 TK_CODE_POINT,
02289 TK_ANYCHAR,
02290 TK_CHAR_TYPE,
02291 TK_BACKREF,
02292 TK_CALL,
02293 TK_ANCHOR,
02294 TK_OP_REPEAT,
02295 TK_INTERVAL,
02296 TK_ANYCHAR_ANYTIME,
02297 TK_ALT,
02298 TK_SUBEXP_OPEN,
02299 TK_SUBEXP_CLOSE,
02300 TK_CC_OPEN,
02301 TK_QUOTE_OPEN,
02302 TK_CHAR_PROPERTY,
02303
02304 TK_CC_CLOSE,
02305 TK_CC_RANGE,
02306 TK_POSIX_BRACKET_OPEN,
02307 TK_CC_AND,
02308 TK_CC_CC_OPEN
02309 };
02310
02311 typedef struct {
02312 enum TokenSyms type;
02313 int escaped;
02314 int base;
02315 UChar* backp;
02316 union {
02317 UChar* s;
02318 int c;
02319 OnigCodePoint code;
02320 int anchor;
02321 int subtype;
02322 struct {
02323 int lower;
02324 int upper;
02325 int greedy;
02326 int possessive;
02327 } repeat;
02328 struct {
02329 int num;
02330 int ref1;
02331 int* refs;
02332 int by_name;
02333 #ifdef USE_BACKREF_WITH_LEVEL
02334 int exist_level;
02335 int level;
02336 #endif
02337 } backref;
02338 struct {
02339 UChar* name;
02340 UChar* name_end;
02341 int gnum;
02342 } call;
02343 struct {
02344 int ctype;
02345 int not;
02346 } prop;
02347 } u;
02348 } OnigToken;
02349
02350
02351 static int
02352 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
02353 {
02354 int low, up, syn_allow, non_low = 0;
02355 int r = 0;
02356 OnigCodePoint c;
02357 OnigEncoding enc = env->enc;
02358 UChar* p = *src;
02359 PFETCH_READY;
02360
02361 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
02362
02363 if (PEND) {
02364 if (syn_allow)
02365 return 1;
02366 else
02367 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
02368 }
02369
02370 if (! syn_allow) {
02371 c = PPEEK;
02372 if (c == ')' || c == '(' || c == '|') {
02373 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
02374 }
02375 }
02376
02377 low = onig_scan_unsigned_number(&p, end, env->enc);
02378 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02379 if (low > ONIG_MAX_REPEAT_NUM)
02380 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02381
02382 if (p == *src) {
02383 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
02384
02385 low = 0;
02386 non_low = 1;
02387 }
02388 else
02389 goto invalid;
02390 }
02391
02392 if (PEND) goto invalid;
02393 PFETCH(c);
02394 if (c == ',') {
02395 UChar* prev = p;
02396 up = onig_scan_unsigned_number(&p, end, env->enc);
02397 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02398 if (up > ONIG_MAX_REPEAT_NUM)
02399 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02400
02401 if (p == prev) {
02402 if (non_low != 0)
02403 goto invalid;
02404 up = REPEAT_INFINITE;
02405 }
02406 }
02407 else {
02408 if (non_low != 0)
02409 goto invalid;
02410
02411 PUNFETCH;
02412 up = low;
02413 r = 2;
02414 }
02415
02416 if (PEND) goto invalid;
02417 PFETCH(c);
02418 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
02419 if (c != MC_ESC(env->syntax)) goto invalid;
02420 PFETCH(c);
02421 }
02422 if (c != '}') goto invalid;
02423
02424 if (!IS_REPEAT_INFINITE(up) && low > up) {
02425 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
02426 }
02427
02428 tok->type = TK_INTERVAL;
02429 tok->u.repeat.lower = low;
02430 tok->u.repeat.upper = up;
02431 *src = p;
02432 return r;
02433
02434 invalid:
02435 if (syn_allow)
02436 return 1;
02437 else
02438 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
02439 }
02440
02441
02442 static int
02443 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
02444 {
02445 int v;
02446 OnigCodePoint c;
02447 OnigEncoding enc = env->enc;
02448 UChar* p = *src;
02449 PFETCH_READY;
02450
02451 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
02452
02453 PFETCH(c);
02454 switch (c) {
02455 case 'M':
02456 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
02457 if (PEND) return ONIGERR_END_PATTERN_AT_META;
02458 PFETCH(c);
02459 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
02460 if (PEND) return ONIGERR_END_PATTERN_AT_META;
02461 PFETCH(c);
02462 if (c == MC_ESC(env->syntax)) {
02463 v = fetch_escaped_value(&p, end, env);
02464 if (v < 0) return v;
02465 c = (OnigCodePoint )v;
02466 }
02467 c = ((c & 0xff) | 0x80);
02468 }
02469 else
02470 goto backslash;
02471 break;
02472
02473 case 'C':
02474 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
02475 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02476 PFETCH(c);
02477 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
02478 goto control;
02479 }
02480 else
02481 goto backslash;
02482
02483 case 'c':
02484 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
02485 control:
02486 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02487 PFETCH(c);
02488 if (c == '?') {
02489 c = 0177;
02490 }
02491 else {
02492 if (c == MC_ESC(env->syntax)) {
02493 v = fetch_escaped_value(&p, end, env);
02494 if (v < 0) return v;
02495 c = (OnigCodePoint )v;
02496 }
02497 c &= 0x9f;
02498 }
02499 break;
02500 }
02501
02502
02503 default:
02504 {
02505 backslash:
02506 c = conv_backslash_value(c, env);
02507 }
02508 break;
02509 }
02510
02511 *src = p;
02512 return c;
02513 }
02514
02515 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
02516
02517 static OnigCodePoint
02518 get_name_end_code_point(OnigCodePoint start)
02519 {
02520 switch (start) {
02521 case '<': return (OnigCodePoint )'>'; break;
02522 case '\'': return (OnigCodePoint )'\''; break;
02523 default:
02524 break;
02525 }
02526
02527 return (OnigCodePoint )0;
02528 }
02529
02530 #ifdef USE_NAMED_GROUP
02531 #ifdef USE_BACKREF_WITH_LEVEL
02532
02533
02534
02535
02536
02537 static int
02538 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
02539 UChar** rname_end, ScanEnv* env,
02540 int* rback_num, int* rlevel)
02541 {
02542 int r, sign, is_num, exist_level;
02543 OnigCodePoint end_code;
02544 OnigCodePoint c = 0;
02545 OnigEncoding enc = env->enc;
02546 UChar *name_end;
02547 UChar *pnum_head;
02548 UChar *p = *src;
02549 PFETCH_READY;
02550
02551 *rback_num = 0;
02552 is_num = exist_level = 0;
02553 sign = 1;
02554 pnum_head = *src;
02555
02556 end_code = get_name_end_code_point(start_code);
02557
02558 name_end = end;
02559 r = 0;
02560 if (PEND) {
02561 return ONIGERR_EMPTY_GROUP_NAME;
02562 }
02563 else {
02564 PFETCH(c);
02565 if (c == end_code)
02566 return ONIGERR_EMPTY_GROUP_NAME;
02567
02568 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02569 is_num = 1;
02570 }
02571 else if (c == '-') {
02572 is_num = 2;
02573 sign = -1;
02574 pnum_head = p;
02575 }
02576 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02577 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02578 }
02579 }
02580
02581 while (!PEND) {
02582 name_end = p;
02583 PFETCH(c);
02584 if (c == end_code || c == ')' || c == '+' || c == '-') {
02585 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
02586 break;
02587 }
02588
02589 if (is_num != 0) {
02590 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02591 is_num = 1;
02592 }
02593 else {
02594 r = ONIGERR_INVALID_GROUP_NAME;
02595 is_num = 0;
02596 }
02597 }
02598 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02599 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02600 }
02601 }
02602
02603 if (r == 0 && c != end_code) {
02604 if (c == '+' || c == '-') {
02605 int level;
02606 int flag = (c == '-' ? -1 : 1);
02607
02608 PFETCH(c);
02609 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
02610 PUNFETCH;
02611 level = onig_scan_unsigned_number(&p, end, enc);
02612 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
02613 *rlevel = (level * flag);
02614 exist_level = 1;
02615
02616 PFETCH(c);
02617 if (c == end_code)
02618 goto end;
02619 }
02620
02621 err:
02622 r = ONIGERR_INVALID_GROUP_NAME;
02623 name_end = end;
02624 }
02625
02626 end:
02627 if (r == 0) {
02628 if (is_num != 0) {
02629 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02630 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02631 else if (*rback_num == 0) goto err;
02632
02633 *rback_num *= sign;
02634 }
02635
02636 *rname_end = name_end;
02637 *src = p;
02638 return (exist_level ? 1 : 0);
02639 }
02640 else {
02641 onig_scan_env_set_error_string(env, r, *src, name_end);
02642 return r;
02643 }
02644 }
02645 #endif
02646
02647
02648
02649
02650
02651 static int
02652 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02653 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02654 {
02655 int r, is_num, sign;
02656 OnigCodePoint end_code;
02657 OnigCodePoint c = 0;
02658 OnigEncoding enc = env->enc;
02659 UChar *name_end;
02660 UChar *pnum_head;
02661 UChar *p = *src;
02662 PFETCH_READY;
02663
02664 *rback_num = 0;
02665
02666 end_code = get_name_end_code_point(start_code);
02667
02668 name_end = end;
02669 pnum_head = *src;
02670 r = 0;
02671 is_num = 0;
02672 sign = 1;
02673 if (PEND) {
02674 return ONIGERR_EMPTY_GROUP_NAME;
02675 }
02676 else {
02677 PFETCH(c);
02678 if (c == end_code)
02679 return ONIGERR_EMPTY_GROUP_NAME;
02680
02681 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02682 if (ref == 1)
02683 is_num = 1;
02684 else {
02685 r = ONIGERR_INVALID_GROUP_NAME;
02686 is_num = 0;
02687 }
02688 }
02689 else if (c == '-') {
02690 if (ref == 1) {
02691 is_num = 2;
02692 sign = -1;
02693 pnum_head = p;
02694 }
02695 else {
02696 r = ONIGERR_INVALID_GROUP_NAME;
02697 is_num = 0;
02698 }
02699 }
02700 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02701 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02702 }
02703 }
02704
02705 if (r == 0) {
02706 while (!PEND) {
02707 name_end = p;
02708 PFETCH(c);
02709 if (c == end_code || c == ')') {
02710 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
02711 break;
02712 }
02713
02714 if (is_num != 0) {
02715 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02716 is_num = 1;
02717 }
02718 else {
02719 if (!ONIGENC_IS_CODE_WORD(enc, c))
02720 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02721 else
02722 r = ONIGERR_INVALID_GROUP_NAME;
02723
02724 is_num = 0;
02725 }
02726 }
02727 else {
02728 if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02729 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02730 }
02731 }
02732 }
02733
02734 if (c != end_code) {
02735 r = ONIGERR_INVALID_GROUP_NAME;
02736 name_end = end;
02737 }
02738
02739 if (is_num != 0) {
02740 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02741 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02742 else if (*rback_num == 0) {
02743 r = ONIGERR_INVALID_GROUP_NAME;
02744 goto err;
02745 }
02746
02747 *rback_num *= sign;
02748 }
02749
02750 *rname_end = name_end;
02751 *src = p;
02752 return 0;
02753 }
02754 else {
02755 while (!PEND) {
02756 name_end = p;
02757 PFETCH(c);
02758 if (c == end_code || c == ')')
02759 break;
02760 }
02761 if (PEND)
02762 name_end = end;
02763
02764 err:
02765 onig_scan_env_set_error_string(env, r, *src, name_end);
02766 return r;
02767 }
02768 }
02769 #else
02770 static int
02771 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02772 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02773 {
02774 int r, is_num, sign;
02775 OnigCodePoint end_code;
02776 OnigCodePoint c = 0;
02777 UChar *name_end;
02778 OnigEncoding enc = env->enc;
02779 UChar *pnum_head;
02780 UChar *p = *src;
02781 PFETCH_READY;
02782
02783 *rback_num = 0;
02784
02785 end_code = get_name_end_code_point(start_code);
02786
02787 *rname_end = name_end = end;
02788 r = 0;
02789 pnum_head = *src;
02790 is_num = 0;
02791 sign = 1;
02792
02793 if (PEND) {
02794 return ONIGERR_EMPTY_GROUP_NAME;
02795 }
02796 else {
02797 PFETCH(c);
02798 if (c == end_code)
02799 return ONIGERR_EMPTY_GROUP_NAME;
02800
02801 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02802 is_num = 1;
02803 }
02804 else if (c == '-') {
02805 is_num = 2;
02806 sign = -1;
02807 pnum_head = p;
02808 }
02809 else {
02810 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02811 }
02812 }
02813
02814 while (!PEND) {
02815 name_end = p;
02816
02817 PFETCH(c);
02818 if (c == end_code || c == ')') break;
02819 if (! ONIGENC_IS_CODE_DIGIT(enc, c))
02820 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02821 }
02822 if (r == 0 && c != end_code) {
02823 r = ONIGERR_INVALID_GROUP_NAME;
02824 name_end = end;
02825 }
02826
02827 if (r == 0) {
02828 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02829 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02830 else if (*rback_num == 0) {
02831 r = ONIGERR_INVALID_GROUP_NAME;
02832 goto err;
02833 }
02834 *rback_num *= sign;
02835
02836 *rname_end = name_end;
02837 *src = p;
02838 return 0;
02839 }
02840 else {
02841 err:
02842 onig_scan_env_set_error_string(env, r, *src, name_end);
02843 return r;
02844 }
02845 }
02846 #endif
02847
02848 void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
02849 UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
02850
02851 static void
02852 onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
02853 {
02854 va_list args;
02855 UChar buf[WARN_BUFSIZE];
02856 va_start(args, fmt);
02857 onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
02858 env->pattern, env->pattern_end,
02859 (const UChar *)fmt, args);
02860 va_end(args);
02861 if (env->sourcefile == NULL)
02862 rb_warn("%s", (char *)buf);
02863 else
02864 rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
02865 }
02866
02867 static void
02868 CC_ESC_WARN(ScanEnv *env, UChar *c)
02869 {
02870 if (onig_warn == onig_null_warn) return ;
02871
02872 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
02873 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
02874 onig_syntax_warn(env, "character class has '%s' without escape", c);
02875 }
02876 }
02877
02878 static void
02879 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
02880 {
02881 if (onig_warn == onig_null_warn) return ;
02882
02883 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
02884 onig_syntax_warn(env, "regular expression has '%s' without escape", c);
02885 }
02886 }
02887
02888 static void
02889 CC_DUP_WARN(ScanEnv *env)
02890 {
02891 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02892
02893 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
02894 !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
02895 (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
02896 onig_syntax_warn(env, "character class has duplicated range");
02897 }
02898 }
02899
02900 static void
02901 UNKNOWN_ESC_WARN(ScanEnv *env, int c)
02902 {
02903 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02904 onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
02905 }
02906
02907 static UChar*
02908 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
02909 UChar **next, OnigEncoding enc)
02910 {
02911 int i;
02912 OnigCodePoint x;
02913 UChar *q;
02914 UChar *p = from;
02915
02916 while (p < to) {
02917 x = ONIGENC_MBC_TO_CODE(enc, p, to);
02918 q = p + enclen(enc, p, to);
02919 if (x == s[0]) {
02920 for (i = 1; i < n && q < to; i++) {
02921 x = ONIGENC_MBC_TO_CODE(enc, q, to);
02922 if (x != s[i]) break;
02923 q += enclen(enc, q, to);
02924 }
02925 if (i >= n) {
02926 if (IS_NOT_NULL(next))
02927 *next = q;
02928 return p;
02929 }
02930 }
02931 p = q;
02932 }
02933 return NULL_UCHARP;
02934 }
02935
02936 static int
02937 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
02938 OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
02939 {
02940 int i, in_esc;
02941 OnigCodePoint x;
02942 UChar *q;
02943 UChar *p = from;
02944
02945 in_esc = 0;
02946 while (p < to) {
02947 if (in_esc) {
02948 in_esc = 0;
02949 p += enclen(enc, p, to);
02950 }
02951 else {
02952 x = ONIGENC_MBC_TO_CODE(enc, p, to);
02953 q = p + enclen(enc, p, to);
02954 if (x == s[0]) {
02955 for (i = 1; i < n && q < to; i++) {
02956 x = ONIGENC_MBC_TO_CODE(enc, q, to);
02957 if (x != s[i]) break;
02958 q += enclen(enc, q, to);
02959 }
02960 if (i >= n) return 1;
02961 p += enclen(enc, p, to);
02962 }
02963 else {
02964 x = ONIGENC_MBC_TO_CODE(enc, p, to);
02965 if (x == bad) return 0;
02966 else if (x == MC_ESC(syn)) in_esc = 1;
02967 p = q;
02968 }
02969 }
02970 }
02971 return 0;
02972 }
02973
02974 static int
02975 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
02976 {
02977 int num;
02978 OnigCodePoint c, c2;
02979 const OnigSyntaxType* syn = env->syntax;
02980 OnigEncoding enc = env->enc;
02981 UChar* prev;
02982 UChar* p = *src;
02983 PFETCH_READY;
02984
02985 if (PEND) {
02986 tok->type = TK_EOT;
02987 return tok->type;
02988 }
02989
02990 PFETCH(c);
02991 tok->type = TK_CHAR;
02992 tok->base = 0;
02993 tok->u.c = c;
02994 tok->escaped = 0;
02995
02996 if (c == ']') {
02997 tok->type = TK_CC_CLOSE;
02998 }
02999 else if (c == '-') {
03000 tok->type = TK_CC_RANGE;
03001 }
03002 else if (c == MC_ESC(syn)) {
03003 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
03004 goto end;
03005
03006 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03007
03008 PFETCH(c);
03009 tok->escaped = 1;
03010 tok->u.c = c;
03011 switch (c) {
03012 case 'w':
03013 tok->type = TK_CHAR_TYPE;
03014 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03015 tok->u.prop.not = 0;
03016 break;
03017 case 'W':
03018 tok->type = TK_CHAR_TYPE;
03019 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03020 tok->u.prop.not = 1;
03021 break;
03022 case 'd':
03023 tok->type = TK_CHAR_TYPE;
03024 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03025 tok->u.prop.not = 0;
03026 break;
03027 case 'D':
03028 tok->type = TK_CHAR_TYPE;
03029 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03030 tok->u.prop.not = 1;
03031 break;
03032 case 's':
03033 tok->type = TK_CHAR_TYPE;
03034 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03035 tok->u.prop.not = 0;
03036 break;
03037 case 'S':
03038 tok->type = TK_CHAR_TYPE;
03039 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03040 tok->u.prop.not = 1;
03041 break;
03042 case 'h':
03043 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03044 tok->type = TK_CHAR_TYPE;
03045 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03046 tok->u.prop.not = 0;
03047 break;
03048 case 'H':
03049 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03050 tok->type = TK_CHAR_TYPE;
03051 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03052 tok->u.prop.not = 1;
03053 break;
03054
03055 case 'p':
03056 case 'P':
03057 c2 = PPEEK;
03058 if (c2 == '{' &&
03059 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03060 PINC;
03061 tok->type = TK_CHAR_PROPERTY;
03062 tok->u.prop.not = (c == 'P' ? 1 : 0);
03063
03064 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03065 PFETCH(c2);
03066 if (c2 == '^') {
03067 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03068 }
03069 else
03070 PUNFETCH;
03071 }
03072 }
03073 else {
03074 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03075 }
03076 break;
03077
03078 case 'x':
03079 if (PEND) break;
03080
03081 prev = p;
03082 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03083 PINC;
03084 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
03085 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03086 if (!PEND) {
03087 c2 = PPEEK;
03088 if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
03089 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03090 }
03091
03092 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
03093 PINC;
03094 tok->type = TK_CODE_POINT;
03095 tok->base = 16;
03096 tok->u.code = (OnigCodePoint )num;
03097 }
03098 else {
03099
03100 p = prev;
03101 }
03102 }
03103 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03104 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
03105 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03106 if (p == prev) {
03107 num = 0;
03108 }
03109 tok->type = TK_RAW_BYTE;
03110 tok->base = 16;
03111 tok->u.c = num;
03112 }
03113 break;
03114
03115 case 'u':
03116 if (PEND) break;
03117
03118 prev = p;
03119 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03120 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
03121 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03122 if (p == prev) {
03123 num = 0;
03124 }
03125 tok->type = TK_CODE_POINT;
03126 tok->base = 16;
03127 tok->u.code = (OnigCodePoint )num;
03128 }
03129 break;
03130
03131 case '0':
03132 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
03133 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03134 PUNFETCH;
03135 prev = p;
03136 num = scan_unsigned_octal_number(&p, end, 3, enc);
03137 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03138 if (p == prev) {
03139 num = 0;
03140 }
03141 tok->type = TK_RAW_BYTE;
03142 tok->base = 8;
03143 tok->u.c = num;
03144 }
03145 break;
03146
03147 default:
03148 PUNFETCH;
03149 num = fetch_escaped_value(&p, end, env);
03150 if (num < 0) return num;
03151 if (tok->u.c != num) {
03152 tok->u.code = (OnigCodePoint )num;
03153 tok->type = TK_CODE_POINT;
03154 }
03155 break;
03156 }
03157 }
03158 else if (c == '[') {
03159 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
03160 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
03161 tok->backp = p;
03162 PINC;
03163 if (str_exist_check_with_esc(send, 2, p, end,
03164 (OnigCodePoint )']', enc, syn)) {
03165 tok->type = TK_POSIX_BRACKET_OPEN;
03166 }
03167 else {
03168 PUNFETCH;
03169 goto cc_in_cc;
03170 }
03171 }
03172 else {
03173 cc_in_cc:
03174 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
03175 tok->type = TK_CC_CC_OPEN;
03176 }
03177 else {
03178 CC_ESC_WARN(env, (UChar* )"[");
03179 }
03180 }
03181 }
03182 else if (c == '&') {
03183 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
03184 !PEND && (PPEEK_IS('&'))) {
03185 PINC;
03186 tok->type = TK_CC_AND;
03187 }
03188 }
03189
03190 end:
03191 *src = p;
03192 return tok->type;
03193 }
03194
03195 static int
03196 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
03197 {
03198 int r, num;
03199 OnigCodePoint c;
03200 OnigEncoding enc = env->enc;
03201 const OnigSyntaxType* syn = env->syntax;
03202 UChar* prev;
03203 UChar* p = *src;
03204 PFETCH_READY;
03205
03206 start:
03207 if (PEND) {
03208 tok->type = TK_EOT;
03209 return tok->type;
03210 }
03211
03212 tok->type = TK_STRING;
03213 tok->base = 0;
03214 tok->backp = p;
03215
03216 PFETCH(c);
03217 if (IS_MC_ESC_CODE(c, syn)) {
03218 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03219
03220 tok->backp = p;
03221 PFETCH(c);
03222
03223 tok->u.c = c;
03224 tok->escaped = 1;
03225 switch (c) {
03226 case '*':
03227 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
03228 tok->type = TK_OP_REPEAT;
03229 tok->u.repeat.lower = 0;
03230 tok->u.repeat.upper = REPEAT_INFINITE;
03231 goto greedy_check;
03232 break;
03233
03234 case '+':
03235 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
03236 tok->type = TK_OP_REPEAT;
03237 tok->u.repeat.lower = 1;
03238 tok->u.repeat.upper = REPEAT_INFINITE;
03239 goto greedy_check;
03240 break;
03241
03242 case '?':
03243 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
03244 tok->type = TK_OP_REPEAT;
03245 tok->u.repeat.lower = 0;
03246 tok->u.repeat.upper = 1;
03247 greedy_check:
03248 if (!PEND && PPEEK_IS('?') &&
03249 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
03250 PFETCH(c);
03251 tok->u.repeat.greedy = 0;
03252 tok->u.repeat.possessive = 0;
03253 }
03254 else {
03255 possessive_check:
03256 if (!PEND && PPEEK_IS('+') &&
03257 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
03258 tok->type != TK_INTERVAL) ||
03259 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
03260 tok->type == TK_INTERVAL))) {
03261 PFETCH(c);
03262 tok->u.repeat.greedy = 1;
03263 tok->u.repeat.possessive = 1;
03264 }
03265 else {
03266 tok->u.repeat.greedy = 1;
03267 tok->u.repeat.possessive = 0;
03268 }
03269 }
03270 break;
03271
03272 case '{':
03273 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
03274 r = fetch_range_quantifier(&p, end, tok, env);
03275 if (r < 0) return r;
03276 if (r == 0) goto greedy_check;
03277 else if (r == 2) {
03278 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03279 goto possessive_check;
03280
03281 goto greedy_check;
03282 }
03283
03284 break;
03285
03286 case '|':
03287 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
03288 tok->type = TK_ALT;
03289 break;
03290
03291 case '(':
03292 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03293 tok->type = TK_SUBEXP_OPEN;
03294 break;
03295
03296 case ')':
03297 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03298 tok->type = TK_SUBEXP_CLOSE;
03299 break;
03300
03301 case 'w':
03302 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03303 tok->type = TK_CHAR_TYPE;
03304 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03305 tok->u.prop.not = 0;
03306 break;
03307
03308 case 'W':
03309 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03310 tok->type = TK_CHAR_TYPE;
03311 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03312 tok->u.prop.not = 1;
03313 break;
03314
03315 case 'b':
03316 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03317 tok->type = TK_ANCHOR;
03318 tok->u.anchor = ANCHOR_WORD_BOUND;
03319 break;
03320
03321 case 'B':
03322 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03323 tok->type = TK_ANCHOR;
03324 tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
03325 break;
03326
03327 #ifdef USE_WORD_BEGIN_END
03328 case '<':
03329 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03330 tok->type = TK_ANCHOR;
03331 tok->u.anchor = ANCHOR_WORD_BEGIN;
03332 break;
03333
03334 case '>':
03335 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03336 tok->type = TK_ANCHOR;
03337 tok->u.anchor = ANCHOR_WORD_END;
03338 break;
03339 #endif
03340
03341 case 's':
03342 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03343 tok->type = TK_CHAR_TYPE;
03344 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03345 tok->u.prop.not = 0;
03346 break;
03347
03348 case 'S':
03349 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03350 tok->type = TK_CHAR_TYPE;
03351 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03352 tok->u.prop.not = 1;
03353 break;
03354
03355 case 'd':
03356 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03357 tok->type = TK_CHAR_TYPE;
03358 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03359 tok->u.prop.not = 0;
03360 break;
03361
03362 case 'D':
03363 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03364 tok->type = TK_CHAR_TYPE;
03365 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03366 tok->u.prop.not = 1;
03367 break;
03368
03369 case 'h':
03370 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03371 tok->type = TK_CHAR_TYPE;
03372 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03373 tok->u.prop.not = 0;
03374 break;
03375
03376 case 'H':
03377 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03378 tok->type = TK_CHAR_TYPE;
03379 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03380 tok->u.prop.not = 1;
03381 break;
03382
03383 case 'A':
03384 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03385 begin_buf:
03386 tok->type = TK_ANCHOR;
03387 tok->u.subtype = ANCHOR_BEGIN_BUF;
03388 break;
03389
03390 case 'Z':
03391 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03392 tok->type = TK_ANCHOR;
03393 tok->u.subtype = ANCHOR_SEMI_END_BUF;
03394 break;
03395
03396 case 'z':
03397 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03398 end_buf:
03399 tok->type = TK_ANCHOR;
03400 tok->u.subtype = ANCHOR_END_BUF;
03401 break;
03402
03403 case 'G':
03404 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
03405 tok->type = TK_ANCHOR;
03406 tok->u.subtype = ANCHOR_BEGIN_POSITION;
03407 break;
03408
03409 case '`':
03410 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03411 goto begin_buf;
03412 break;
03413
03414 case '\'':
03415 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03416 goto end_buf;
03417 break;
03418
03419 case 'x':
03420 if (PEND) break;
03421
03422 prev = p;
03423 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03424 PINC;
03425 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
03426 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03427 if (!PEND) {
03428 if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
03429 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03430 }
03431
03432 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
03433 PINC;
03434 tok->type = TK_CODE_POINT;
03435 tok->u.code = (OnigCodePoint )num;
03436 }
03437 else {
03438
03439 p = prev;
03440 }
03441 }
03442 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03443 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
03444 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03445 if (p == prev) {
03446 num = 0;
03447 }
03448 tok->type = TK_RAW_BYTE;
03449 tok->base = 16;
03450 tok->u.c = num;
03451 }
03452 break;
03453
03454 case 'u':
03455 if (PEND) break;
03456
03457 prev = p;
03458 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03459 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
03460 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03461 if (p == prev) {
03462 num = 0;
03463 }
03464 tok->type = TK_CODE_POINT;
03465 tok->base = 16;
03466 tok->u.code = (OnigCodePoint )num;
03467 }
03468 break;
03469
03470 case '1': case '2': case '3': case '4':
03471 case '5': case '6': case '7': case '8': case '9':
03472 PUNFETCH;
03473 prev = p;
03474 num = onig_scan_unsigned_number(&p, end, enc);
03475 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
03476 goto skip_backref;
03477 }
03478
03479 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
03480 (num <= env->num_mem || num <= 9)) {
03481 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03482 if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
03483 return ONIGERR_INVALID_BACKREF;
03484 }
03485
03486 tok->type = TK_BACKREF;
03487 tok->u.backref.num = 1;
03488 tok->u.backref.ref1 = num;
03489 tok->u.backref.by_name = 0;
03490 #ifdef USE_BACKREF_WITH_LEVEL
03491 tok->u.backref.exist_level = 0;
03492 #endif
03493 break;
03494 }
03495
03496 skip_backref:
03497 if (c == '8' || c == '9') {
03498
03499 p = prev; PINC;
03500 break;
03501 }
03502
03503 p = prev;
03504
03505 case '0':
03506 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03507 prev = p;
03508 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
03509 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03510 if (p == prev) {
03511 num = 0;
03512 }
03513 tok->type = TK_RAW_BYTE;
03514 tok->base = 8;
03515 tok->u.c = num;
03516 }
03517 else if (c != '0') {
03518 PINC;
03519 }
03520 break;
03521
03522 #ifdef USE_NAMED_GROUP
03523 case 'k':
03524 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
03525 PFETCH(c);
03526 if (c == '<' || c == '\'') {
03527 UChar* name_end;
03528 int* backs;
03529 int back_num;
03530
03531 prev = p;
03532
03533 #ifdef USE_BACKREF_WITH_LEVEL
03534 name_end = NULL_UCHARP;
03535 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
03536 env, &back_num, &tok->u.backref.level);
03537 if (r == 1) tok->u.backref.exist_level = 1;
03538 else tok->u.backref.exist_level = 0;
03539 #else
03540 r = fetch_name(&p, end, &name_end, env, &back_num, 1);
03541 #endif
03542 if (r < 0) return r;
03543
03544 if (back_num != 0) {
03545 if (back_num < 0) {
03546 back_num = BACKREF_REL_TO_ABS(back_num, env);
03547 if (back_num <= 0)
03548 return ONIGERR_INVALID_BACKREF;
03549 }
03550
03551 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03552 if (back_num > env->num_mem ||
03553 IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
03554 return ONIGERR_INVALID_BACKREF;
03555 }
03556 tok->type = TK_BACKREF;
03557 tok->u.backref.by_name = 0;
03558 tok->u.backref.num = 1;
03559 tok->u.backref.ref1 = back_num;
03560 }
03561 else {
03562 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
03563 if (num <= 0) {
03564 onig_scan_env_set_error_string(env,
03565 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
03566 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03567 }
03568 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03569 int i;
03570 for (i = 0; i < num; i++) {
03571 if (backs[i] > env->num_mem ||
03572 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
03573 return ONIGERR_INVALID_BACKREF;
03574 }
03575 }
03576
03577 tok->type = TK_BACKREF;
03578 tok->u.backref.by_name = 1;
03579 if (num == 1) {
03580 tok->u.backref.num = 1;
03581 tok->u.backref.ref1 = backs[0];
03582 }
03583 else {
03584 tok->u.backref.num = num;
03585 tok->u.backref.refs = backs;
03586 }
03587 }
03588 }
03589 else {
03590 PUNFETCH;
03591 onig_syntax_warn(env, "invalid back reference");
03592 }
03593 }
03594 break;
03595 #endif
03596
03597 #ifdef USE_SUBEXP_CALL
03598 case 'g':
03599 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
03600 PFETCH(c);
03601 if (c == '<' || c == '\'') {
03602 int gnum;
03603 UChar* name_end;
03604
03605 prev = p;
03606 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
03607 if (r < 0) return r;
03608
03609 tok->type = TK_CALL;
03610 tok->u.call.name = prev;
03611 tok->u.call.name_end = name_end;
03612 tok->u.call.gnum = gnum;
03613 }
03614 else {
03615 onig_syntax_warn(env, "invalid subexp call");
03616 PUNFETCH;
03617 }
03618 }
03619 break;
03620 #endif
03621
03622 case 'Q':
03623 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
03624 tok->type = TK_QUOTE_OPEN;
03625 }
03626 break;
03627
03628 case 'p':
03629 case 'P':
03630 if (PPEEK_IS('{') &&
03631 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03632 PINC;
03633 tok->type = TK_CHAR_PROPERTY;
03634 tok->u.prop.not = (c == 'P' ? 1 : 0);
03635
03636 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03637 PFETCH(c);
03638 if (c == '^') {
03639 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03640 }
03641 else
03642 PUNFETCH;
03643 }
03644 }
03645 else {
03646 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03647 }
03648 break;
03649
03650 default:
03651 PUNFETCH;
03652 num = fetch_escaped_value(&p, end, env);
03653 if (num < 0) return num;
03654
03655 if (tok->u.c != num) {
03656 tok->type = TK_CODE_POINT;
03657 tok->u.code = (OnigCodePoint )num;
03658 }
03659 else {
03660 p = tok->backp + enclen(enc, tok->backp, end);
03661 }
03662 break;
03663 }
03664 }
03665 else {
03666 tok->u.c = c;
03667 tok->escaped = 0;
03668
03669 #ifdef USE_VARIABLE_META_CHARS
03670 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
03671 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
03672 if (c == MC_ANYCHAR(syn))
03673 goto any_char;
03674 else if (c == MC_ANYTIME(syn))
03675 goto anytime;
03676 else if (c == MC_ZERO_OR_ONE_TIME(syn))
03677 goto zero_or_one_time;
03678 else if (c == MC_ONE_OR_MORE_TIME(syn))
03679 goto one_or_more_time;
03680 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
03681 tok->type = TK_ANYCHAR_ANYTIME;
03682 goto out;
03683 }
03684 }
03685 #endif
03686
03687 switch (c) {
03688 case '.':
03689 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
03690 #ifdef USE_VARIABLE_META_CHARS
03691 any_char:
03692 #endif
03693 tok->type = TK_ANYCHAR;
03694 break;
03695
03696 case '*':
03697 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
03698 #ifdef USE_VARIABLE_META_CHARS
03699 anytime:
03700 #endif
03701 tok->type = TK_OP_REPEAT;
03702 tok->u.repeat.lower = 0;
03703 tok->u.repeat.upper = REPEAT_INFINITE;
03704 goto greedy_check;
03705 break;
03706
03707 case '+':
03708 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
03709 #ifdef USE_VARIABLE_META_CHARS
03710 one_or_more_time:
03711 #endif
03712 tok->type = TK_OP_REPEAT;
03713 tok->u.repeat.lower = 1;
03714 tok->u.repeat.upper = REPEAT_INFINITE;
03715 goto greedy_check;
03716 break;
03717
03718 case '?':
03719 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
03720 #ifdef USE_VARIABLE_META_CHARS
03721 zero_or_one_time:
03722 #endif
03723 tok->type = TK_OP_REPEAT;
03724 tok->u.repeat.lower = 0;
03725 tok->u.repeat.upper = 1;
03726 goto greedy_check;
03727 break;
03728
03729 case '{':
03730 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
03731 r = fetch_range_quantifier(&p, end, tok, env);
03732 if (r < 0) return r;
03733 if (r == 0) goto greedy_check;
03734 else if (r == 2) {
03735 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03736 goto possessive_check;
03737
03738 goto greedy_check;
03739 }
03740
03741 break;
03742
03743 case '|':
03744 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
03745 tok->type = TK_ALT;
03746 break;
03747
03748 case '(':
03749 if (PPEEK_IS('?') &&
03750 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
03751 PINC;
03752 if (PPEEK_IS('#')) {
03753 PFETCH(c);
03754 while (1) {
03755 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
03756 PFETCH(c);
03757 if (c == MC_ESC(syn)) {
03758 if (!PEND) PFETCH(c);
03759 }
03760 else {
03761 if (c == ')') break;
03762 }
03763 }
03764 goto start;
03765 }
03766 PUNFETCH;
03767 }
03768
03769 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03770 tok->type = TK_SUBEXP_OPEN;
03771 break;
03772
03773 case ')':
03774 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03775 tok->type = TK_SUBEXP_CLOSE;
03776 break;
03777
03778 case '^':
03779 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03780 tok->type = TK_ANCHOR;
03781 tok->u.subtype = (IS_SINGLELINE(env->option)
03782 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
03783 break;
03784
03785 case '$':
03786 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03787 tok->type = TK_ANCHOR;
03788 tok->u.subtype = (IS_SINGLELINE(env->option)
03789 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
03790 break;
03791
03792 case '[':
03793 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
03794 tok->type = TK_CC_OPEN;
03795 break;
03796
03797 case ']':
03798 if (*src > env->pattern)
03799 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
03800 break;
03801
03802 case '#':
03803 if (IS_EXTEND(env->option)) {
03804 while (!PEND) {
03805 PFETCH(c);
03806 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
03807 break;
03808 }
03809 goto start;
03810 break;
03811 }
03812 break;
03813
03814 case ' ': case '\t': case '\n': case '\r': case '\f':
03815 if (IS_EXTEND(env->option))
03816 goto start;
03817 break;
03818
03819 default:
03820
03821 break;
03822 }
03823 }
03824
03825 #ifdef USE_VARIABLE_META_CHARS
03826 out:
03827 #endif
03828 *src = p;
03829 return tok->type;
03830 }
03831
03832 static int
03833 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
03834 ScanEnv* env,
03835 OnigCodePoint sb_out, const OnigCodePoint mbr[])
03836 {
03837 int i, r;
03838 OnigCodePoint j;
03839
03840 int n = ONIGENC_CODE_RANGE_NUM(mbr);
03841
03842 if (not == 0) {
03843 for (i = 0; i < n; i++) {
03844 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
03845 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
03846 if (j >= sb_out) {
03847 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
03848 r = add_code_range_to_buf(&(cc->mbuf), env, j,
03849 ONIGENC_CODE_RANGE_TO(mbr, i));
03850 if (r != 0) return r;
03851 i++;
03852 }
03853
03854 goto sb_end;
03855 }
03856 BITSET_SET_BIT_CHKDUP(cc->bs, j);
03857 }
03858 }
03859
03860 sb_end:
03861 for ( ; i < n; i++) {
03862 r = add_code_range_to_buf(&(cc->mbuf), env,
03863 ONIGENC_CODE_RANGE_FROM(mbr, i),
03864 ONIGENC_CODE_RANGE_TO(mbr, i));
03865 if (r != 0) return r;
03866 }
03867 }
03868 else {
03869 OnigCodePoint prev = 0;
03870
03871 for (i = 0; i < n; i++) {
03872 for (j = prev;
03873 j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
03874 if (j >= sb_out) {
03875 goto sb_end2;
03876 }
03877 BITSET_SET_BIT_CHKDUP(cc->bs, j);
03878 }
03879 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
03880 }
03881 for (j = prev; j < sb_out; j++) {
03882 BITSET_SET_BIT_CHKDUP(cc->bs, j);
03883 }
03884
03885 sb_end2:
03886 prev = sb_out;
03887
03888 for (i = 0; i < n; i++) {
03889 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
03890 r = add_code_range_to_buf(&(cc->mbuf), env, prev,
03891 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
03892 if (r != 0) return r;
03893 }
03894 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
03895 }
03896 if (prev < 0x7fffffff) {
03897 r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
03898 if (r != 0) return r;
03899 }
03900 }
03901
03902 return 0;
03903 }
03904
03905 static int
03906 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
03907 {
03908 int c, r;
03909 const OnigCodePoint *ranges;
03910 OnigCodePoint sb_out;
03911 OnigEncoding enc = env->enc;
03912
03913 switch (ctype) {
03914 case ONIGENC_CTYPE_D:
03915 case ONIGENC_CTYPE_S:
03916 case ONIGENC_CTYPE_W:
03917 ctype ^= ONIGENC_CTYPE_SPECIAL_MASK;
03918 if (not != 0) {
03919 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03920 if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
03921 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03922 }
03923 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03924 }
03925 else {
03926 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03927 if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
03928 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03929 }
03930 }
03931 return 0;
03932 break;
03933 }
03934
03935 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
03936 if (r == 0) {
03937 return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
03938 }
03939 else if (r != ONIG_NO_SUPPORT_CONFIG) {
03940 return r;
03941 }
03942
03943 r = 0;
03944 switch (ctype) {
03945 case ONIGENC_CTYPE_ALPHA:
03946 case ONIGENC_CTYPE_BLANK:
03947 case ONIGENC_CTYPE_CNTRL:
03948 case ONIGENC_CTYPE_DIGIT:
03949 case ONIGENC_CTYPE_LOWER:
03950 case ONIGENC_CTYPE_PUNCT:
03951 case ONIGENC_CTYPE_SPACE:
03952 case ONIGENC_CTYPE_UPPER:
03953 case ONIGENC_CTYPE_XDIGIT:
03954 case ONIGENC_CTYPE_ASCII:
03955 case ONIGENC_CTYPE_ALNUM:
03956 if (not != 0) {
03957 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03958 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03959 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03960 }
03961 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03962 }
03963 else {
03964 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03965 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03966 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03967 }
03968 }
03969 break;
03970
03971 case ONIGENC_CTYPE_GRAPH:
03972 case ONIGENC_CTYPE_PRINT:
03973 if (not != 0) {
03974 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03975 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03976 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03977 }
03978 }
03979 else {
03980 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03981 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03982 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03983 }
03984 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03985 }
03986 break;
03987
03988 case ONIGENC_CTYPE_WORD:
03989 if (not == 0) {
03990 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03991 if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
03992 }
03993 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03994 }
03995 else {
03996 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03997 if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
03998 && ! ONIGENC_IS_CODE_WORD(enc, c))
03999 BITSET_SET_BIT_CHKDUP(cc->bs, c);
04000 }
04001 }
04002 break;
04003
04004 default:
04005 return ONIGERR_PARSER_BUG;
04006 break;
04007 }
04008
04009 return r;
04010 }
04011
04012 static int
04013 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
04014 {
04015 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
04016 #define POSIX_BRACKET_NAME_MIN_LEN 4
04017
04018 static const PosixBracketEntryType PBS[] = {
04019 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
04020 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
04021 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
04022 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
04023 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
04024 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
04025 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
04026 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
04027 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
04028 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
04029 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
04030 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
04031 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
04032 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
04033 { (UChar* )NULL, -1, 0 }
04034 };
04035
04036 const PosixBracketEntryType *pb;
04037 int not, i, r;
04038 OnigCodePoint c;
04039 OnigEncoding enc = env->enc;
04040 UChar *p = *src;
04041 PFETCH_READY;
04042
04043 if (PPEEK_IS('^')) {
04044 PINC;
04045 not = 1;
04046 }
04047 else
04048 not = 0;
04049
04050 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
04051 goto not_posix_bracket;
04052
04053 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
04054 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
04055 p = (UChar* )onigenc_step(enc, p, end, pb->len);
04056 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
04057 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04058
04059 r = add_ctype_to_cc(cc, pb->ctype, not, env);
04060 if (r != 0) return r;
04061
04062 PINC; PINC;
04063 *src = p;
04064 return 0;
04065 }
04066 }
04067
04068 not_posix_bracket:
04069 c = 0;
04070 i = 0;
04071 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
04072 PINC;
04073 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
04074 }
04075 if (c == ':' && ! PEND) {
04076 PINC;
04077 if (! PEND) {
04078 PFETCH(c);
04079 if (c == ']')
04080 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04081 }
04082 }
04083
04084 return 1;
04085 }
04086
04087 static int
04088 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
04089 {
04090 int r;
04091 OnigCodePoint c;
04092 OnigEncoding enc = env->enc;
04093 UChar *prev, *start, *p = *src;
04094 PFETCH_READY;
04095
04096 r = 0;
04097 start = prev = p;
04098
04099 while (!PEND) {
04100 prev = p;
04101 PFETCH(c);
04102 if (c == '}') {
04103 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
04104 if (r < 0) break;
04105
04106 *src = p;
04107 return r;
04108 }
04109 else if (c == '(' || c == ')' || c == '{' || c == '|') {
04110 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
04111 break;
04112 }
04113 }
04114
04115 onig_scan_env_set_error_string(env, r, *src, prev);
04116 return r;
04117 }
04118
04119 static int
04120 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
04121 ScanEnv* env)
04122 {
04123 int r, ctype;
04124 CClassNode* cc;
04125
04126 ctype = fetch_char_property_to_ctype(src, end, env);
04127 if (ctype < 0) return ctype;
04128
04129 *np = node_new_cclass();
04130 CHECK_NULL_RETURN_MEMERR(*np);
04131 cc = NCCLASS(*np);
04132 r = add_ctype_to_cc(cc, ctype, 0, env);
04133 if (r != 0) return r;
04134 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
04135
04136 return 0;
04137 }
04138
04139
04140 enum CCSTATE {
04141 CCS_VALUE,
04142 CCS_RANGE,
04143 CCS_COMPLETE,
04144 CCS_START
04145 };
04146
04147 enum CCVALTYPE {
04148 CCV_SB,
04149 CCV_CODE_POINT,
04150 CCV_CLASS
04151 };
04152
04153 static int
04154 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
04155 enum CCSTATE* state, ScanEnv* env)
04156 {
04157 int r;
04158
04159 if (*state == CCS_RANGE)
04160 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
04161
04162 if (*state == CCS_VALUE && *type != CCV_CLASS) {
04163 if (*type == CCV_SB)
04164 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04165 else if (*type == CCV_CODE_POINT) {
04166 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04167 if (r < 0) return r;
04168 }
04169 }
04170
04171 *state = CCS_VALUE;
04172 *type = CCV_CLASS;
04173 return 0;
04174 }
04175
04176 static int
04177 next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
04178 int* vs_israw, int v_israw,
04179 enum CCVALTYPE intype, enum CCVALTYPE* type,
04180 enum CCSTATE* state, ScanEnv* env)
04181 {
04182 int r;
04183
04184 switch (*state) {
04185 case CCS_VALUE:
04186 if (*type == CCV_SB)
04187 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04188 else if (*type == CCV_CODE_POINT) {
04189 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04190 if (r < 0) return r;
04191 }
04192 break;
04193
04194 case CCS_RANGE:
04195 if (intype == *type) {
04196 if (intype == CCV_SB) {
04197 if (*vs > 0xff || v > 0xff)
04198 return ONIGERR_INVALID_CODE_POINT_VALUE;
04199
04200 if (*vs > v) {
04201 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04202 goto ccs_range_end;
04203 else
04204 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04205 }
04206 bitset_set_range(env, cc->bs, (int )*vs, (int )v);
04207 }
04208 else {
04209 r = add_code_range(&(cc->mbuf), env, *vs, v);
04210 if (r < 0) return r;
04211 }
04212 }
04213 else {
04214 #if 0
04215 if (intype == CCV_CODE_POINT && *type == CCV_SB) {
04216 #endif
04217 if (*vs > v) {
04218 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04219 goto ccs_range_end;
04220 else
04221 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04222 }
04223 bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
04224 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
04225 if (r < 0) return r;
04226 #if 0
04227 }
04228 else
04229 return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
04230 #endif
04231 }
04232 ccs_range_end:
04233 *state = CCS_COMPLETE;
04234 break;
04235
04236 case CCS_COMPLETE:
04237 case CCS_START:
04238 *state = CCS_VALUE;
04239 break;
04240
04241 default:
04242 break;
04243 }
04244
04245 *vs_israw = v_israw;
04246 *vs = v;
04247 *type = intype;
04248 return 0;
04249 }
04250
04251 static int
04252 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
04253 ScanEnv* env)
04254 {
04255 int in_esc;
04256 OnigCodePoint code;
04257 OnigEncoding enc = env->enc;
04258 UChar* p = from;
04259 PFETCH_READY;
04260
04261 in_esc = 0;
04262 while (! PEND) {
04263 if (ignore_escaped && in_esc) {
04264 in_esc = 0;
04265 }
04266 else {
04267 PFETCH(code);
04268 if (code == c) return 1;
04269 if (code == MC_ESC(env->syntax)) in_esc = 1;
04270 }
04271 }
04272 return 0;
04273 }
04274
04275 static int
04276 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
04277 ScanEnv* env)
04278 {
04279 int r, neg, len, fetched, and_start;
04280 OnigCodePoint v, vs;
04281 UChar *p;
04282 Node* node;
04283 CClassNode *cc, *prev_cc;
04284 CClassNode work_cc;
04285
04286 enum CCSTATE state;
04287 enum CCVALTYPE val_type, in_type;
04288 int val_israw, in_israw;
04289
04290 prev_cc = (CClassNode* )NULL;
04291 *np = NULL_NODE;
04292 r = fetch_token_in_cc(tok, src, end, env);
04293 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
04294 neg = 1;
04295 r = fetch_token_in_cc(tok, src, end, env);
04296 }
04297 else {
04298 neg = 0;
04299 }
04300
04301 if (r < 0) return r;
04302 if (r == TK_CC_CLOSE) {
04303 if (! code_exist_check((OnigCodePoint )']',
04304 *src, env->pattern_end, 1, env))
04305 return ONIGERR_EMPTY_CHAR_CLASS;
04306
04307 CC_ESC_WARN(env, (UChar* )"]");
04308 r = tok->type = TK_CHAR;
04309 }
04310
04311 *np = node = node_new_cclass();
04312 CHECK_NULL_RETURN_MEMERR(node);
04313 cc = NCCLASS(node);
04314
04315 and_start = 0;
04316 state = CCS_START;
04317 p = *src;
04318 while (r != TK_CC_CLOSE) {
04319 fetched = 0;
04320 switch (r) {
04321 case TK_CHAR:
04322 if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
04323 (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
04324 in_type = CCV_CODE_POINT;
04325 }
04326 else if (len < 0) {
04327 r = len;
04328 goto err;
04329 }
04330 else {
04331 sb_char:
04332 in_type = CCV_SB;
04333 }
04334 v = (OnigCodePoint )tok->u.c;
04335 in_israw = 0;
04336 goto val_entry2;
04337 break;
04338
04339 case TK_RAW_BYTE:
04340
04341 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
04342 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04343 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
04344 UChar* psave = p;
04345 int i, base = tok->base;
04346
04347 buf[0] = tok->u.c;
04348 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
04349 r = fetch_token_in_cc(tok, &p, end, env);
04350 if (r < 0) goto err;
04351 if (r != TK_RAW_BYTE || tok->base != base) {
04352 fetched = 1;
04353 break;
04354 }
04355 buf[i] = tok->u.c;
04356 }
04357
04358 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
04359 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04360 goto err;
04361 }
04362
04363 len = enclen(env->enc, buf, buf+i);
04364 if (i < len) {
04365 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04366 goto err;
04367 }
04368 else if (i > len) {
04369 p = psave;
04370 for (i = 1; i < len; i++) {
04371 r = fetch_token_in_cc(tok, &p, end, env);
04372 }
04373 fetched = 0;
04374 }
04375
04376 if (i == 1) {
04377 v = (OnigCodePoint )buf[0];
04378 goto raw_single;
04379 }
04380 else {
04381 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
04382 in_type = CCV_CODE_POINT;
04383 }
04384 }
04385 else {
04386 v = (OnigCodePoint )tok->u.c;
04387 raw_single:
04388 in_type = CCV_SB;
04389 }
04390 in_israw = 1;
04391 goto val_entry2;
04392 break;
04393
04394 case TK_CODE_POINT:
04395 v = tok->u.code;
04396 in_israw = 1;
04397 val_entry:
04398 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
04399 if (len < 0) {
04400 r = len;
04401 goto err;
04402 }
04403 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
04404 val_entry2:
04405 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
04406 &state, env);
04407 if (r != 0) goto err;
04408 break;
04409
04410 case TK_POSIX_BRACKET_OPEN:
04411 r = parse_posix_bracket(cc, &p, end, env);
04412 if (r < 0) goto err;
04413 if (r == 1) {
04414 CC_ESC_WARN(env, (UChar* )"[");
04415 p = tok->backp;
04416 v = (OnigCodePoint )tok->u.c;
04417 in_israw = 0;
04418 goto val_entry;
04419 }
04420 goto next_class;
04421 break;
04422
04423 case TK_CHAR_TYPE:
04424 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
04425 if (r != 0) return r;
04426
04427 next_class:
04428 r = next_state_class(cc, &vs, &val_type, &state, env);
04429 if (r != 0) goto err;
04430 break;
04431
04432 case TK_CHAR_PROPERTY:
04433 {
04434 int ctype;
04435
04436 ctype = fetch_char_property_to_ctype(&p, end, env);
04437 if (ctype < 0) return ctype;
04438 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
04439 if (r != 0) return r;
04440 goto next_class;
04441 }
04442 break;
04443
04444 case TK_CC_RANGE:
04445 if (state == CCS_VALUE) {
04446 r = fetch_token_in_cc(tok, &p, end, env);
04447 if (r < 0) goto err;
04448 fetched = 1;
04449 if (r == TK_CC_CLOSE) {
04450 range_end_val:
04451 v = (OnigCodePoint )'-';
04452 in_israw = 0;
04453 goto val_entry;
04454 }
04455 else if (r == TK_CC_AND) {
04456 CC_ESC_WARN(env, (UChar* )"-");
04457 goto range_end_val;
04458 }
04459 state = CCS_RANGE;
04460 }
04461 else if (state == CCS_START) {
04462
04463 v = (OnigCodePoint )tok->u.c;
04464 in_israw = 0;
04465
04466 r = fetch_token_in_cc(tok, &p, end, env);
04467 if (r < 0) goto err;
04468 fetched = 1;
04469
04470 if (r == TK_CC_RANGE || and_start != 0)
04471 CC_ESC_WARN(env, (UChar* )"-");
04472
04473 goto val_entry;
04474 }
04475 else if (state == CCS_RANGE) {
04476 CC_ESC_WARN(env, (UChar* )"-");
04477 goto sb_char;
04478 }
04479 else {
04480 r = fetch_token_in_cc(tok, &p, end, env);
04481 if (r < 0) goto err;
04482 fetched = 1;
04483 if (r == TK_CC_CLOSE) goto range_end_val;
04484 else if (r == TK_CC_AND) {
04485 CC_ESC_WARN(env, (UChar* )"-");
04486 goto range_end_val;
04487 }
04488
04489 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
04490 CC_ESC_WARN(env, (UChar* )"-");
04491 goto sb_char;
04492 }
04493 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
04494 goto err;
04495 }
04496 break;
04497
04498 case TK_CC_CC_OPEN:
04499 {
04500 Node *anode;
04501 CClassNode* acc;
04502
04503 r = parse_char_class(&anode, tok, &p, end, env);
04504 if (r == 0) {
04505 acc = NCCLASS(anode);
04506 r = or_cclass(cc, acc, env);
04507 }
04508 onig_node_free(anode);
04509 if (r != 0) goto err;
04510 }
04511 break;
04512
04513 case TK_CC_AND:
04514 {
04515 if (state == CCS_VALUE) {
04516 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04517 &val_type, &state, env);
04518 if (r != 0) goto err;
04519 }
04520
04521 and_start = 1;
04522 state = CCS_START;
04523
04524 if (IS_NOT_NULL(prev_cc)) {
04525 r = and_cclass(prev_cc, cc, env);
04526 if (r != 0) goto err;
04527 bbuf_free(cc->mbuf);
04528 }
04529 else {
04530 prev_cc = cc;
04531 cc = &work_cc;
04532 }
04533 initialize_cclass(cc);
04534 }
04535 break;
04536
04537 case TK_EOT:
04538 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
04539 goto err;
04540 break;
04541 default:
04542 r = ONIGERR_PARSER_BUG;
04543 goto err;
04544 break;
04545 }
04546
04547 if (fetched)
04548 r = tok->type;
04549 else {
04550 r = fetch_token_in_cc(tok, &p, end, env);
04551 if (r < 0) goto err;
04552 }
04553 }
04554
04555 if (state == CCS_VALUE) {
04556 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04557 &val_type, &state, env);
04558 if (r != 0) goto err;
04559 }
04560
04561 if (IS_NOT_NULL(prev_cc)) {
04562 r = and_cclass(prev_cc, cc, env);
04563 if (r != 0) goto err;
04564 bbuf_free(cc->mbuf);
04565 cc = prev_cc;
04566 }
04567
04568 if (neg != 0)
04569 NCCLASS_SET_NOT(cc);
04570 else
04571 NCCLASS_CLEAR_NOT(cc);
04572 if (IS_NCCLASS_NOT(cc) &&
04573 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
04574 int is_empty;
04575
04576 is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
04577 if (is_empty != 0)
04578 BITSET_IS_EMPTY(cc->bs, is_empty);
04579
04580 if (is_empty == 0) {
04581 #define NEWLINE_CODE 0x0a
04582
04583 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
04584 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
04585 BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
04586 else
04587 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
04588 }
04589 }
04590 }
04591 *src = p;
04592 return 0;
04593
04594 err:
04595 if (cc != NCCLASS(*np))
04596 bbuf_free(cc->mbuf);
04597 return r;
04598 }
04599
04600 static int parse_subexp(Node** top, OnigToken* tok, int term,
04601 UChar** src, UChar* end, ScanEnv* env);
04602
04603 static int
04604 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
04605 ScanEnv* env)
04606 {
04607 int r, num;
04608 Node *target;
04609 OnigOptionType option;
04610 OnigCodePoint c;
04611 OnigEncoding enc = env->enc;
04612
04613 #ifdef USE_NAMED_GROUP
04614 int list_capture;
04615 #endif
04616
04617 UChar* p = *src;
04618 PFETCH_READY;
04619
04620 *np = NULL;
04621 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
04622
04623 option = env->option;
04624 if (PPEEK_IS('?') &&
04625 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
04626 PINC;
04627 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04628
04629 PFETCH(c);
04630 switch (c) {
04631 case ':':
04632 group:
04633 r = fetch_token(tok, &p, end, env);
04634 if (r < 0) return r;
04635 r = parse_subexp(np, tok, term, &p, end, env);
04636 if (r < 0) return r;
04637 *src = p;
04638 return 1;
04639 break;
04640
04641 case '=':
04642 *np = onig_node_new_anchor(ANCHOR_PREC_READ);
04643 break;
04644 case '!':
04645 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
04646 break;
04647 case '>':
04648 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
04649 break;
04650
04651 #ifdef USE_NAMED_GROUP
04652 case '\'':
04653 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04654 goto named_group1;
04655 }
04656 else
04657 return ONIGERR_UNDEFINED_GROUP_OPTION;
04658 break;
04659 #endif
04660
04661 case '<':
04662 PFETCH(c);
04663 if (c == '=')
04664 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
04665 else if (c == '!')
04666 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
04667 #ifdef USE_NAMED_GROUP
04668 else {
04669 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04670 UChar *name;
04671 UChar *name_end;
04672
04673 PUNFETCH;
04674 c = '<';
04675
04676 named_group1:
04677 list_capture = 0;
04678
04679 named_group2:
04680 name = p;
04681 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
04682 if (r < 0) return r;
04683
04684 num = scan_env_add_mem_entry(env);
04685 if (num < 0) return num;
04686 if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
04687 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04688
04689 r = name_add(env->reg, name, name_end, num, env);
04690 if (r != 0) return r;
04691 *np = node_new_enclose_memory(env->option, 1);
04692 CHECK_NULL_RETURN_MEMERR(*np);
04693 NENCLOSE(*np)->regnum = num;
04694 if (list_capture != 0)
04695 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04696 env->num_named++;
04697 }
04698 else {
04699 return ONIGERR_UNDEFINED_GROUP_OPTION;
04700 }
04701 }
04702 #else
04703 else {
04704 return ONIGERR_UNDEFINED_GROUP_OPTION;
04705 }
04706 #endif
04707 break;
04708
04709 case '@':
04710 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
04711 #ifdef USE_NAMED_GROUP
04712 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04713 PFETCH(c);
04714 if (c == '<' || c == '\'') {
04715 list_capture = 1;
04716 goto named_group2;
04717 }
04718 PUNFETCH;
04719 }
04720 #endif
04721 *np = node_new_enclose_memory(env->option, 0);
04722 CHECK_NULL_RETURN_MEMERR(*np);
04723 num = scan_env_add_mem_entry(env);
04724 if (num < 0) {
04725 onig_node_free(*np);
04726 return num;
04727 }
04728 else if (num >= (int )BIT_STATUS_BITS_NUM) {
04729 onig_node_free(*np);
04730 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04731 }
04732 NENCLOSE(*np)->regnum = num;
04733 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04734 }
04735 else {
04736 return ONIGERR_UNDEFINED_GROUP_OPTION;
04737 }
04738 break;
04739
04740 #ifdef USE_POSIXLINE_OPTION
04741 case 'p':
04742 #endif
04743 case '-': case 'i': case 'm': case 's': case 'x':
04744 {
04745 int neg = 0;
04746
04747 while (1) {
04748 switch (c) {
04749 case ':':
04750 case ')':
04751 break;
04752
04753 case '-': neg = 1; break;
04754 case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
04755 case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
04756 case 's':
04757 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04758 ONOFF(option, ONIG_OPTION_MULTILINE, neg);
04759 }
04760 else
04761 return ONIGERR_UNDEFINED_GROUP_OPTION;
04762 break;
04763
04764 case 'm':
04765 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04766 ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
04767 }
04768 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
04769 ONOFF(option, ONIG_OPTION_MULTILINE, neg);
04770 }
04771 else
04772 return ONIGERR_UNDEFINED_GROUP_OPTION;
04773 break;
04774 #ifdef USE_POSIXLINE_OPTION
04775 case 'p':
04776 ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
04777 break;
04778 #endif
04779 default:
04780 return ONIGERR_UNDEFINED_GROUP_OPTION;
04781 }
04782
04783 if (c == ')') {
04784 *np = node_new_option(option);
04785 CHECK_NULL_RETURN_MEMERR(*np);
04786 *src = p;
04787 return 2;
04788 }
04789 else if (c == ':') {
04790 OnigOptionType prev = env->option;
04791
04792 env->option = option;
04793 r = fetch_token(tok, &p, end, env);
04794 if (r < 0) return r;
04795 r = parse_subexp(&target, tok, term, &p, end, env);
04796 env->option = prev;
04797 if (r < 0) return r;
04798 *np = node_new_option(option);
04799 CHECK_NULL_RETURN_MEMERR(*np);
04800 NENCLOSE(*np)->target = target;
04801 *src = p;
04802 return 0;
04803 }
04804
04805 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04806 PFETCH(c);
04807 }
04808 }
04809 break;
04810
04811 default:
04812 return ONIGERR_UNDEFINED_GROUP_OPTION;
04813 }
04814 }
04815 else {
04816 if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
04817 goto group;
04818
04819 *np = node_new_enclose_memory(env->option, 0);
04820 CHECK_NULL_RETURN_MEMERR(*np);
04821 num = scan_env_add_mem_entry(env);
04822 if (num < 0) return num;
04823 NENCLOSE(*np)->regnum = num;
04824 }
04825
04826 CHECK_NULL_RETURN_MEMERR(*np);
04827 r = fetch_token(tok, &p, end, env);
04828 if (r < 0) return r;
04829 r = parse_subexp(&target, tok, term, &p, end, env);
04830 if (r < 0) {
04831 onig_node_free(target);
04832 return r;
04833 }
04834
04835 if (NTYPE(*np) == NT_ANCHOR)
04836 NANCHOR(*np)->target = target;
04837 else {
04838 NENCLOSE(*np)->target = target;
04839 if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
04840
04841 r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
04842 if (r != 0) return r;
04843 }
04844 }
04845
04846 *src = p;
04847 return 0;
04848 }
04849
04850 static const char* const PopularQStr[] = {
04851 "?", "*", "+", "??", "*?", "+?"
04852 };
04853
04854 static const char* const ReduceQStr[] = {
04855 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
04856 };
04857
04858 static int
04859 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
04860 {
04861 QtfrNode* qn;
04862
04863 qn = NQTFR(qnode);
04864 if (qn->lower == 1 && qn->upper == 1) {
04865 return 1;
04866 }
04867
04868 switch (NTYPE(target)) {
04869 case NT_STR:
04870 if (! group) {
04871 StrNode* sn = NSTR(target);
04872 if (str_node_can_be_split(sn, env->enc)) {
04873 Node* n = str_node_split_last_char(sn, env->enc);
04874 if (IS_NOT_NULL(n)) {
04875 qn->target = n;
04876 return 2;
04877 }
04878 }
04879 }
04880 break;
04881
04882 case NT_QTFR:
04883 {
04884
04885 QtfrNode* qnt = NQTFR(target);
04886 int nestq_num = popular_quantifier_num(qn);
04887 int targetq_num = popular_quantifier_num(qnt);
04888
04889 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
04890 if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
04891 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
04892 UChar buf[WARN_BUFSIZE];
04893
04894 switch(ReduceTypeTable[targetq_num][nestq_num]) {
04895 case RQ_ASIS:
04896 break;
04897
04898 case RQ_DEL:
04899 if (onig_verb_warn != onig_null_warn) {
04900 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04901 env->pattern, env->pattern_end,
04902 (UChar* )"redundant nested repeat operator");
04903 (*onig_verb_warn)((char* )buf);
04904 }
04905 goto warn_exit;
04906 break;
04907
04908 default:
04909 if (onig_verb_warn != onig_null_warn) {
04910 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04911 env->pattern, env->pattern_end,
04912 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
04913 PopularQStr[targetq_num], PopularQStr[nestq_num],
04914 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
04915 (*onig_verb_warn)((char* )buf);
04916 }
04917 goto warn_exit;
04918 break;
04919 }
04920 }
04921
04922 warn_exit:
04923 #endif
04924 if (targetq_num >= 0) {
04925 if (nestq_num >= 0) {
04926 onig_reduce_nested_quantifier(qnode, target);
04927 goto q_exit;
04928 }
04929 else if (targetq_num == 1 || targetq_num == 2) {
04930
04931 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
04932 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
04933 }
04934 }
04935 }
04936 }
04937 break;
04938
04939 default:
04940 break;
04941 }
04942
04943 qn->target = target;
04944 q_exit:
04945 return 0;
04946 }
04947
04948
04949 #ifdef USE_SHARED_CCLASS_TABLE
04950
04951 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
04952
04953
04954
04955 typedef struct {
04956 OnigEncoding enc;
04957 int not;
04958 int type;
04959 } type_cclass_key;
04960
04961 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
04962 {
04963 if (x->type != y->type) return 1;
04964 if (x->enc != y->enc) return 1;
04965 if (x->not != y->not) return 1;
04966 return 0;
04967 }
04968
04969 static st_index_t type_cclass_hash(type_cclass_key* key)
04970 {
04971 int i, val;
04972 UChar *p;
04973
04974 val = 0;
04975
04976 p = (UChar* )&(key->enc);
04977 for (i = 0; i < (int )sizeof(key->enc); i++) {
04978 val = val * 997 + (int )*p++;
04979 }
04980
04981 p = (UChar* )(&key->type);
04982 for (i = 0; i < (int )sizeof(key->type); i++) {
04983 val = val * 997 + (int )*p++;
04984 }
04985
04986 val += key->not;
04987 return val + (val >> 5);
04988 }
04989
04990 static const struct st_hash_type type_type_cclass_hash = {
04991 type_cclass_cmp,
04992 type_cclass_hash,
04993 };
04994
04995 static st_table* OnigTypeCClassTable;
04996
04997
04998 static int
04999 i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
05000 {
05001 if (IS_NOT_NULL(node)) {
05002 CClassNode* cc = NCCLASS(node);
05003 if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
05004 xfree(node);
05005 }
05006
05007 if (IS_NOT_NULL(key)) xfree(key);
05008 return ST_DELETE;
05009 }
05010
05011 extern int
05012 onig_free_shared_cclass_table(void)
05013 {
05014 THREAD_ATOMIC_START;
05015 if (IS_NOT_NULL(OnigTypeCClassTable)) {
05016 onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
05017 onig_st_free_table(OnigTypeCClassTable);
05018 OnigTypeCClassTable = NULL;
05019 }
05020 THREAD_ATOMIC_END;
05021
05022 return 0;
05023 }
05024
05025 #endif
05026
05027
05028 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05029 static int
05030 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
05031 {
05032 BBuf *tbuf;
05033 int r;
05034
05035 if (IS_NCCLASS_NOT(cc)) {
05036 bitset_invert(cc->bs);
05037
05038 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
05039 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
05040 if (r != 0) return r;
05041
05042 bbuf_free(cc->mbuf);
05043 cc->mbuf = tbuf;
05044 }
05045
05046 NCCLASS_CLEAR_NOT(cc);
05047 }
05048
05049 return 0;
05050 }
05051 #endif
05052
05053 typedef struct {
05054 ScanEnv* env;
05055 CClassNode* cc;
05056 Node* alt_root;
05057 Node** ptail;
05058 } IApplyCaseFoldArg;
05059
05060 static int
05061 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
05062 int to_len, void* arg)
05063 {
05064 IApplyCaseFoldArg* iarg;
05065 ScanEnv* env;
05066 CClassNode* cc;
05067 BitSetRef bs;
05068
05069 iarg = (IApplyCaseFoldArg* )arg;
05070 env = iarg->env;
05071 cc = iarg->cc;
05072 bs = cc->bs;
05073
05074 if (to_len == 1) {
05075 int is_in = onig_is_code_in_cc(env->enc, from, cc);
05076 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05077 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
05078 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
05079 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05080 add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05081 }
05082 else {
05083 BITSET_SET_BIT(bs, *to);
05084 }
05085 }
05086 #else
05087 if (is_in != 0) {
05088 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05089 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
05090 add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05091 }
05092 else {
05093 if (IS_NCCLASS_NOT(cc)) {
05094 BITSET_CLEAR_BIT(bs, *to);
05095 }
05096 else
05097 BITSET_SET_BIT(bs, *to);
05098 }
05099 }
05100 #endif
05101 }
05102 else {
05103 int r, i, len;
05104 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
05105 Node *snode = NULL_NODE;
05106
05107 if (onig_is_code_in_cc(env->enc, from, cc)
05108 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05109 && !IS_NCCLASS_NOT(cc)
05110 #endif
05111 ) {
05112 for (i = 0; i < to_len; i++) {
05113 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
05114 if (i == 0) {
05115 snode = onig_node_new_str(buf, buf + len);
05116 CHECK_NULL_RETURN_MEMERR(snode);
05117
05118
05119
05120 NSTRING_SET_AMBIG(snode);
05121 }
05122 else {
05123 r = onig_node_str_cat(snode, buf, buf + len);
05124 if (r < 0) {
05125 onig_node_free(snode);
05126 return r;
05127 }
05128 }
05129 }
05130
05131 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
05132 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
05133 iarg->ptail = &(NCDR((*(iarg->ptail))));
05134 }
05135 }
05136
05137 return 0;
05138 }
05139
05140 static int
05141 parse_exp(Node** np, OnigToken* tok, int term,
05142 UChar** src, UChar* end, ScanEnv* env)
05143 {
05144 int r, len, group = 0;
05145 Node* qn;
05146 Node** targetp;
05147
05148 *np = NULL;
05149 if (tok->type == (enum TokenSyms )term)
05150 goto end_of_token;
05151
05152 switch (tok->type) {
05153 case TK_ALT:
05154 case TK_EOT:
05155 end_of_token:
05156 *np = node_new_empty();
05157 return tok->type;
05158
05159 case TK_SUBEXP_OPEN:
05160 r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
05161 if (r < 0) return r;
05162 if (r == 1) group = 1;
05163 else if (r == 2) {
05164 Node* target;
05165 OnigOptionType prev = env->option;
05166
05167 env->option = NENCLOSE(*np)->option;
05168 r = fetch_token(tok, src, end, env);
05169 if (r < 0) return r;
05170 r = parse_subexp(&target, tok, term, src, end, env);
05171 env->option = prev;
05172 if (r < 0) {
05173 onig_node_free(target);
05174 return r;
05175 }
05176 NENCLOSE(*np)->target = target;
05177 return tok->type;
05178 }
05179 break;
05180
05181 case TK_SUBEXP_CLOSE:
05182 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
05183 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
05184
05185 if (tok->escaped) goto tk_raw_byte;
05186 else goto tk_byte;
05187 break;
05188
05189 case TK_STRING:
05190 tk_byte:
05191 {
05192 *np = node_new_str(tok->backp, *src);
05193 CHECK_NULL_RETURN_MEMERR(*np);
05194
05195 while (1) {
05196 r = fetch_token(tok, src, end, env);
05197 if (r < 0) return r;
05198 if (r != TK_STRING) break;
05199
05200 r = onig_node_str_cat(*np, tok->backp, *src);
05201 if (r < 0) return r;
05202 }
05203
05204 string_end:
05205 targetp = np;
05206 goto repeat;
05207 }
05208 break;
05209
05210 case TK_RAW_BYTE:
05211 tk_raw_byte:
05212 {
05213 *np = node_new_str_raw_char((UChar )tok->u.c);
05214 CHECK_NULL_RETURN_MEMERR(*np);
05215 len = 1;
05216 while (1) {
05217 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
05218 if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
05219 r = fetch_token(tok, src, end, env);
05220 NSTRING_CLEAR_RAW(*np);
05221 goto string_end;
05222 }
05223 }
05224
05225 r = fetch_token(tok, src, end, env);
05226 if (r < 0) return r;
05227 if (r != TK_RAW_BYTE) {
05228
05229 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
05230 int rem;
05231 if (len < ONIGENC_MBC_MINLEN(env->enc)) {
05232 rem = ONIGENC_MBC_MINLEN(env->enc) - len;
05233 (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
05234 if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
05235 NSTRING_CLEAR_RAW(*np);
05236 goto string_end;
05237 }
05238 }
05239 #endif
05240 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
05241 }
05242
05243 r = node_str_cat_char(*np, (UChar )tok->u.c);
05244 if (r < 0) return r;
05245
05246 len++;
05247 }
05248 }
05249 break;
05250
05251 case TK_CODE_POINT:
05252 {
05253 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
05254 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
05255 if (num < 0) return num;
05256 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
05257 *np = node_new_str_raw(buf, buf + num);
05258 #else
05259 *np = node_new_str(buf, buf + num);
05260 #endif
05261 CHECK_NULL_RETURN_MEMERR(*np);
05262 }
05263 break;
05264
05265 case TK_QUOTE_OPEN:
05266 {
05267 OnigCodePoint end_op[2];
05268 UChar *qstart, *qend, *nextp;
05269
05270 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
05271 end_op[1] = (OnigCodePoint )'E';
05272 qstart = *src;
05273 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
05274 if (IS_NULL(qend)) {
05275 nextp = qend = end;
05276 }
05277 *np = node_new_str(qstart, qend);
05278 CHECK_NULL_RETURN_MEMERR(*np);
05279 *src = nextp;
05280 }
05281 break;
05282
05283 case TK_CHAR_TYPE:
05284 {
05285 switch (tok->u.prop.ctype) {
05286 case ONIGENC_CTYPE_D:
05287 case ONIGENC_CTYPE_S:
05288 case ONIGENC_CTYPE_W:
05289 {
05290 CClassNode* cc;
05291 *np = node_new_cclass();
05292 CHECK_NULL_RETURN_MEMERR(*np);
05293 cc = NCCLASS(*np);
05294 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
05295 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
05296 }
05297 break;
05298
05299 case ONIGENC_CTYPE_WORD:
05300 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
05301 CHECK_NULL_RETURN_MEMERR(*np);
05302 break;
05303
05304 case ONIGENC_CTYPE_SPACE:
05305 case ONIGENC_CTYPE_DIGIT:
05306 case ONIGENC_CTYPE_XDIGIT:
05307 {
05308 CClassNode* cc;
05309
05310 #ifdef USE_SHARED_CCLASS_TABLE
05311 const OnigCodePoint *mbr;
05312 OnigCodePoint sb_out;
05313
05314 r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
05315 &sb_out, &mbr);
05316 if (r == 0 &&
05317 ONIGENC_CODE_RANGE_NUM(mbr)
05318 >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
05319 type_cclass_key key;
05320 type_cclass_key* new_key;
05321
05322 key.enc = env->enc;
05323 key.not = tok->u.prop.not;
05324 key.type = tok->u.prop.ctype;
05325
05326 THREAD_ATOMIC_START;
05327
05328 if (IS_NULL(OnigTypeCClassTable)) {
05329 OnigTypeCClassTable
05330 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
05331 if (IS_NULL(OnigTypeCClassTable)) {
05332 THREAD_ATOMIC_END;
05333 return ONIGERR_MEMORY;
05334 }
05335 }
05336 else {
05337 if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
05338 (st_data_t* )np)) {
05339 THREAD_ATOMIC_END;
05340 break;
05341 }
05342 }
05343
05344 *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
05345 sb_out, mbr);
05346 if (IS_NULL(*np)) {
05347 THREAD_ATOMIC_END;
05348 return ONIGERR_MEMORY;
05349 }
05350
05351 cc = NCCLASS(*np);
05352 NCCLASS_SET_SHARE(cc);
05353 new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
05354 xmemcpy(new_key, &key, sizeof(type_cclass_key));
05355 onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
05356 (st_data_t )*np);
05357
05358 THREAD_ATOMIC_END;
05359 }
05360 else {
05361 #endif
05362 *np = node_new_cclass();
05363 CHECK_NULL_RETURN_MEMERR(*np);
05364 cc = NCCLASS(*np);
05365 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
05366 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
05367 #ifdef USE_SHARED_CCLASS_TABLE
05368 }
05369 #endif
05370 }
05371 break;
05372
05373 default:
05374 return ONIGERR_PARSER_BUG;
05375 break;
05376 }
05377 }
05378 break;
05379
05380 case TK_CHAR_PROPERTY:
05381 r = parse_char_property(np, tok, src, end, env);
05382 if (r != 0) return r;
05383 break;
05384
05385 case TK_CC_OPEN:
05386 {
05387 CClassNode* cc;
05388
05389 r = parse_char_class(np, tok, src, end, env);
05390 if (r != 0) return r;
05391
05392 cc = NCCLASS(*np);
05393 if (IS_IGNORECASE(env->option)) {
05394 IApplyCaseFoldArg iarg;
05395
05396 iarg.env = env;
05397 iarg.cc = cc;
05398 iarg.alt_root = NULL_NODE;
05399 iarg.ptail = &(iarg.alt_root);
05400
05401 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
05402 i_apply_case_fold, &iarg);
05403 if (r != 0) {
05404 onig_node_free(iarg.alt_root);
05405 return r;
05406 }
05407 if (IS_NOT_NULL(iarg.alt_root)) {
05408 Node* work = onig_node_new_alt(*np, iarg.alt_root);
05409 if (IS_NULL(work)) {
05410 onig_node_free(iarg.alt_root);
05411 return ONIGERR_MEMORY;
05412 }
05413 *np = work;
05414 }
05415 }
05416 }
05417 break;
05418
05419 case TK_ANYCHAR:
05420 *np = node_new_anychar();
05421 CHECK_NULL_RETURN_MEMERR(*np);
05422 break;
05423
05424 case TK_ANYCHAR_ANYTIME:
05425 *np = node_new_anychar();
05426 CHECK_NULL_RETURN_MEMERR(*np);
05427 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
05428 CHECK_NULL_RETURN_MEMERR(qn);
05429 NQTFR(qn)->target = *np;
05430 *np = qn;
05431 break;
05432
05433 case TK_BACKREF:
05434 len = tok->u.backref.num;
05435 *np = node_new_backref(len,
05436 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
05437 tok->u.backref.by_name,
05438 #ifdef USE_BACKREF_WITH_LEVEL
05439 tok->u.backref.exist_level,
05440 tok->u.backref.level,
05441 #endif
05442 env);
05443 CHECK_NULL_RETURN_MEMERR(*np);
05444 break;
05445
05446 #ifdef USE_SUBEXP_CALL
05447 case TK_CALL:
05448 {
05449 int gnum = tok->u.call.gnum;
05450
05451 if (gnum < 0) {
05452 gnum = BACKREF_REL_TO_ABS(gnum, env);
05453 if (gnum <= 0)
05454 return ONIGERR_INVALID_BACKREF;
05455 }
05456 *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
05457 CHECK_NULL_RETURN_MEMERR(*np);
05458 env->num_call++;
05459 }
05460 break;
05461 #endif
05462
05463 case TK_ANCHOR:
05464 *np = onig_node_new_anchor(tok->u.anchor);
05465 break;
05466
05467 case TK_OP_REPEAT:
05468 case TK_INTERVAL:
05469 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
05470 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
05471 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
05472 else
05473 *np = node_new_empty();
05474 }
05475 else {
05476 goto tk_byte;
05477 }
05478 break;
05479
05480 default:
05481 return ONIGERR_PARSER_BUG;
05482 break;
05483 }
05484
05485 {
05486 targetp = np;
05487
05488 re_entry:
05489 r = fetch_token(tok, src, end, env);
05490 if (r < 0) return r;
05491
05492 repeat:
05493 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
05494 if (is_invalid_quantifier_target(*targetp))
05495 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
05496
05497 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
05498 (r == TK_INTERVAL ? 1 : 0));
05499 CHECK_NULL_RETURN_MEMERR(qn);
05500 NQTFR(qn)->greedy = tok->u.repeat.greedy;
05501 r = set_quantifier(qn, *targetp, group, env);
05502 if (r < 0) {
05503 onig_node_free(qn);
05504 return r;
05505 }
05506
05507 if (tok->u.repeat.possessive != 0) {
05508 Node* en;
05509 en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
05510 if (IS_NULL(en)) {
05511 onig_node_free(qn);
05512 return ONIGERR_MEMORY;
05513 }
05514 NENCLOSE(en)->target = qn;
05515 qn = en;
05516 }
05517
05518 if (r == 0) {
05519 *targetp = qn;
05520 }
05521 else if (r == 1) {
05522 onig_node_free(qn);
05523 }
05524 else if (r == 2) {
05525 Node *tmp;
05526
05527 *targetp = node_new_list(*targetp, NULL);
05528 if (IS_NULL(*targetp)) {
05529 onig_node_free(qn);
05530 return ONIGERR_MEMORY;
05531 }
05532 tmp = NCDR(*targetp) = node_new_list(qn, NULL);
05533 if (IS_NULL(tmp)) {
05534 onig_node_free(qn);
05535 return ONIGERR_MEMORY;
05536 }
05537 targetp = &(NCAR(tmp));
05538 }
05539 goto re_entry;
05540 }
05541 }
05542
05543 return r;
05544 }
05545
05546 static int
05547 parse_branch(Node** top, OnigToken* tok, int term,
05548 UChar** src, UChar* end, ScanEnv* env)
05549 {
05550 int r;
05551 Node *node, **headp;
05552
05553 *top = NULL;
05554 r = parse_exp(&node, tok, term, src, end, env);
05555 if (r < 0) {
05556 onig_node_free(node);
05557 return r;
05558 }
05559
05560 if (r == TK_EOT || r == term || r == TK_ALT) {
05561 *top = node;
05562 }
05563 else {
05564 *top = node_new_list(node, NULL);
05565 headp = &(NCDR(*top));
05566 while (r != TK_EOT && r != term && r != TK_ALT) {
05567 r = parse_exp(&node, tok, term, src, end, env);
05568 if (r < 0) {
05569 onig_node_free(node);
05570 return r;
05571 }
05572
05573 if (NTYPE(node) == NT_LIST) {
05574 *headp = node;
05575 while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
05576 headp = &(NCDR(node));
05577 }
05578 else {
05579 *headp = node_new_list(node, NULL);
05580 headp = &(NCDR(*headp));
05581 }
05582 }
05583 }
05584
05585 return r;
05586 }
05587
05588
05589 static int
05590 parse_subexp(Node** top, OnigToken* tok, int term,
05591 UChar** src, UChar* end, ScanEnv* env)
05592 {
05593 int r;
05594 Node *node, **headp;
05595
05596 *top = NULL;
05597 r = parse_branch(&node, tok, term, src, end, env);
05598 if (r < 0) {
05599 onig_node_free(node);
05600 return r;
05601 }
05602
05603 if (r == term) {
05604 *top = node;
05605 }
05606 else if (r == TK_ALT) {
05607 *top = onig_node_new_alt(node, NULL);
05608 headp = &(NCDR(*top));
05609 while (r == TK_ALT) {
05610 r = fetch_token(tok, src, end, env);
05611 if (r < 0) return r;
05612 r = parse_branch(&node, tok, term, src, end, env);
05613 if (r < 0) {
05614 onig_node_free(node);
05615 return r;
05616 }
05617
05618 *headp = onig_node_new_alt(node, NULL);
05619 headp = &(NCDR(*headp));
05620 }
05621
05622 if (tok->type != (enum TokenSyms )term)
05623 goto err;
05624 }
05625 else {
05626 onig_node_free(node);
05627 err:
05628 if (term == TK_SUBEXP_CLOSE)
05629 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
05630 else
05631 return ONIGERR_PARSER_BUG;
05632 }
05633
05634 return r;
05635 }
05636
05637 static int
05638 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
05639 {
05640 int r;
05641 OnigToken tok;
05642
05643 r = fetch_token(&tok, src, end, env);
05644 if (r < 0) return r;
05645 r = parse_subexp(top, &tok, TK_EOT, src, end, env);
05646 if (r < 0) return r;
05647 return 0;
05648 }
05649
05650 extern int
05651 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
05652 regex_t* reg, ScanEnv* env)
05653 {
05654 int r;
05655 UChar* p;
05656
05657 #ifdef USE_NAMED_GROUP
05658 names_clear(reg);
05659 #endif
05660
05661 scan_env_clear(env);
05662 env->option = reg->options;
05663 env->case_fold_flag = reg->case_fold_flag;
05664 env->enc = reg->enc;
05665 env->syntax = reg->syntax;
05666 env->pattern = (UChar* )pattern;
05667 env->pattern_end = (UChar* )end;
05668 env->reg = reg;
05669
05670 *root = NULL;
05671 p = (UChar* )pattern;
05672 r = parse_regexp(root, &p, (UChar* )end, env);
05673 reg->num_mem = env->num_mem;
05674 return r;
05675 }
05676
05677 extern void
05678 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
05679 UChar* arg, UChar* arg_end)
05680 {
05681 env->error = arg;
05682 env->error_end = arg_end;
05683 }
05684