• Main Page
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

regparse.c

Go to the documentation of this file.
00001 /* -*- mode:c; c-file-style:"gnu" -*- */
00002 /**********************************************************************
00003   regparse.c -  Oniguruma (regular expression library)
00004 **********************************************************************/
00005 /*-
00006  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00007  * All rights reserved.
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  * 1. Redistributions of source code must retain the above copyright
00013  *    notice, this list of conditions and the following disclaimer.
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in the
00016  *    documentation and/or other materials provided with the distribution.
00017  *
00018  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00019  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00022  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00023  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00024  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00025  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00026  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00027  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00028  * SUCH DAMAGE.
00029  */
00030 
00031 #include "regparse.h"
00032 
00033 #define WARN_BUFSIZE    256
00034 
00035 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
00036 
00037 
00038 const OnigSyntaxType OnigSyntaxRuby = {
00039   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00040      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
00041      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
00042      ONIG_SYN_OP_ESC_C_CONTROL )
00043    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00044   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
00045       ONIG_SYN_OP2_OPTION_RUBY |
00046       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
00047       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
00048       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
00049       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
00050       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
00051       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
00052       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
00053       ONIG_SYN_OP2_ESC_H_XDIGIT )
00054   , ( SYN_GNU_REGEX_BV |
00055       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
00056       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
00057       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
00058       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
00059       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
00060       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
00061       ONIG_SYN_WARN_CC_DUP |
00062       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
00063   , ONIG_OPTION_NONE
00064   ,
00065   {
00066       (OnigCodePoint )'\\'                       /* esc */
00067     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00068     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00069     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00070     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00071     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00072   }
00073 };
00074 
00075 const OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
00076 
00077 extern void onig_null_warn(const char* s ARG_UNUSED) { }
00078 
00079 #ifdef DEFAULT_WARN_FUNCTION
00080 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
00081 #else
00082 static OnigWarnFunc onig_warn = onig_null_warn;
00083 #endif
00084 
00085 #ifdef DEFAULT_VERB_WARN_FUNCTION
00086 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
00087 #else
00088 static OnigWarnFunc onig_verb_warn = onig_null_warn;
00089 #endif
00090 
00091 extern void onig_set_warn_func(OnigWarnFunc f)
00092 {
00093   onig_warn = f;
00094 }
00095 
00096 extern void onig_set_verb_warn_func(OnigWarnFunc f)
00097 {
00098   onig_verb_warn = f;
00099 }
00100 
00101 static void CC_DUP_WARN(ScanEnv *env);
00102 
00103 static void
00104 bbuf_free(BBuf* bbuf)
00105 {
00106   if (IS_NOT_NULL(bbuf)) {
00107     if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
00108     xfree(bbuf);
00109   }
00110 }
00111 
00112 static int
00113 bbuf_clone(BBuf** rto, BBuf* from)
00114 {
00115   int r;
00116   BBuf *to;
00117 
00118   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
00119   CHECK_NULL_RETURN_MEMERR(to);
00120   r = BBUF_INIT(to, from->alloc);
00121   if (r != 0) return r;
00122   to->used = from->used;
00123   xmemcpy(to->p, from->p, from->used);
00124   return 0;
00125 }
00126 
00127 #define BACKREF_REL_TO_ABS(rel_no, env) \
00128   ((env)->num_mem + 1 + (rel_no))
00129 
00130 #define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
00131 
00132 #define MBCODE_START_POS(enc) \
00133   (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
00134 
00135 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
00136   add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
00137 
00138 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
00139   if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
00140     r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
00141     if (r) return r;\
00142   }\
00143 } while (0)
00144 
00145 
00146 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
00147   if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
00148   BS_ROOM(bs, pos) |= BS_BIT(pos); \
00149 } while (0)
00150 
00151 #define BITSET_IS_EMPTY(bs,empty) do {\
00152   int i;\
00153   empty = 1;\
00154   for (i = 0; i < (int )BITSET_SIZE; i++) {\
00155     if ((bs)[i] != 0) {\
00156       empty = 0; break;\
00157     }\
00158   }\
00159 } while (0)
00160 
00161 static void
00162 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
00163 {
00164   int i;
00165   for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
00166     BITSET_SET_BIT_CHKDUP(bs, i);
00167   }
00168 }
00169 
00170 #if 0
00171 static void
00172 bitset_set_all(BitSetRef bs)
00173 {
00174   int i;
00175   for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
00176 }
00177 #endif
00178 
00179 static void
00180 bitset_invert(BitSetRef bs)
00181 {
00182   int i;
00183   for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
00184 }
00185 
00186 static void
00187 bitset_invert_to(BitSetRef from, BitSetRef to)
00188 {
00189   int i;
00190   for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
00191 }
00192 
00193 static void
00194 bitset_and(BitSetRef dest, BitSetRef bs)
00195 {
00196   int i;
00197   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
00198 }
00199 
00200 static void
00201 bitset_or(BitSetRef dest, BitSetRef bs)
00202 {
00203   int i;
00204   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
00205 }
00206 
00207 static void
00208 bitset_copy(BitSetRef dest, BitSetRef bs)
00209 {
00210   int i;
00211   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
00212 }
00213 
00214 extern int
00215 onig_strncmp(const UChar* s1, const UChar* s2, int n)
00216 {
00217   int x;
00218 
00219   while (n-- > 0) {
00220     x = *s2++ - *s1++;
00221     if (x) return x;
00222   }
00223   return 0;
00224 }
00225 
00226 extern void
00227 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
00228 {
00229   ptrdiff_t len = end - src;
00230   if (len > 0) {
00231     xmemcpy(dest, src, len);
00232     dest[len] = (UChar )0;
00233   }
00234 }
00235 
00236 #ifdef USE_NAMED_GROUP
00237 static UChar*
00238 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
00239 {
00240   ptrdiff_t slen;
00241   int term_len, i;
00242   UChar *r;
00243 
00244   slen = end - s;
00245   term_len = ONIGENC_MBC_MINLEN(enc);
00246 
00247   r = (UChar* )xmalloc(slen + term_len);
00248   CHECK_NULL_RETURN(r);
00249   xmemcpy(r, s, slen);
00250 
00251   for (i = 0; i < term_len; i++)
00252     r[slen + i] = (UChar )0;
00253 
00254   return r;
00255 }
00256 #endif
00257 
00258 /* scan pattern methods */
00259 #define PEND_VALUE   0
00260 
00261 #define PFETCH_READY  UChar* pfetch_prev
00262 #define PEND         (p < end ?  0 : 1)
00263 #define PUNFETCH     p = pfetch_prev
00264 #define PINC       do { \
00265   pfetch_prev = p; \
00266   p += enclen(enc, p, end); \
00267 } while (0)
00268 #define PFETCH(c)  do { \
00269   c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
00270   pfetch_prev = p; \
00271   p += enclen(enc, p, end); \
00272 } while (0)
00273 
00274 #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
00275 #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
00276 
00277 static UChar*
00278 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
00279               int capa)
00280 {
00281   UChar* r;
00282 
00283   if (dest)
00284     r = (UChar* )xrealloc(dest, capa + 1);
00285   else
00286     r = (UChar* )xmalloc(capa + 1);
00287 
00288   CHECK_NULL_RETURN(r);
00289   onig_strcpy(r + (dest_end - dest), src, src_end);
00290   return r;
00291 }
00292 
00293 /* dest on static area */
00294 static UChar*
00295 strcat_capa_from_static(UChar* dest, UChar* dest_end,
00296                         const UChar* src, const UChar* src_end, int capa)
00297 {
00298   UChar* r;
00299 
00300   r = (UChar* )xmalloc(capa + 1);
00301   CHECK_NULL_RETURN(r);
00302   onig_strcpy(r, dest, dest_end);
00303   onig_strcpy(r + (dest_end - dest), src, src_end);
00304   return r;
00305 }
00306 
00307 
00308 #ifdef USE_ST_LIBRARY
00309 
00310 #include "ruby/st.h"
00311 
00312 typedef struct {
00313   const UChar* s;
00314   const UChar* end;
00315 } st_str_end_key;
00316 
00317 static int
00318 str_end_cmp(st_data_t xp, st_data_t yp)
00319 {
00320   const st_str_end_key *x, *y;
00321   const UChar *p, *q;
00322   int c;
00323 
00324   x = (const st_str_end_key *)xp;
00325   y = (const st_str_end_key *)yp;
00326   if ((x->end - x->s) != (y->end - y->s))
00327     return 1;
00328 
00329   p = x->s;
00330   q = y->s;
00331   while (p < x->end) {
00332     c = (int )*p - (int )*q;
00333     if (c != 0) return c;
00334 
00335     p++; q++;
00336   }
00337 
00338   return 0;
00339 }
00340 
00341 static st_index_t
00342 str_end_hash(st_data_t xp)
00343 {
00344   const st_str_end_key *x = (const st_str_end_key *)xp;
00345   const UChar *p;
00346   st_index_t val = 0;
00347 
00348   p = x->s;
00349   while (p < x->end) {
00350     val = val * 997 + (int )*p++;
00351   }
00352 
00353   return val + (val >> 5);
00354 }
00355 
00356 extern hash_table_type*
00357 onig_st_init_strend_table_with_size(st_index_t size)
00358 {
00359   static const struct st_hash_type hashType = {
00360     str_end_cmp,
00361     str_end_hash,
00362   };
00363 
00364   return (hash_table_type* )
00365            onig_st_init_table_with_size(&hashType, size);
00366 }
00367 
00368 extern int
00369 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
00370                       const UChar* end_key, hash_data_type *value)
00371 {
00372   st_str_end_key key;
00373 
00374   key.s   = (UChar* )str_key;
00375   key.end = (UChar* )end_key;
00376 
00377   return onig_st_lookup(table, (st_data_t )(&key), value);
00378 }
00379 
00380 extern int
00381 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
00382                       const UChar* end_key, hash_data_type value)
00383 {
00384   st_str_end_key* key;
00385   int result;
00386 
00387   key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
00388   key->s   = (UChar* )str_key;
00389   key->end = (UChar* )end_key;
00390   result = onig_st_insert(table, (st_data_t )key, value);
00391   if (result) {
00392     xfree(key);
00393   }
00394   return result;
00395 }
00396 
00397 #endif /* USE_ST_LIBRARY */
00398 
00399 
00400 #ifdef USE_NAMED_GROUP
00401 
00402 #define INIT_NAME_BACKREFS_ALLOC_NUM   8
00403 
00404 typedef struct {
00405   UChar* name;
00406   size_t name_len;   /* byte length */
00407   int    back_num;   /* number of backrefs */
00408   int    back_alloc;
00409   int    back_ref1;
00410   int*   back_refs;
00411 } NameEntry;
00412 
00413 #ifdef USE_ST_LIBRARY
00414 
00415 typedef st_table  NameTable;
00416 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
00417 
00418 #define NAMEBUF_SIZE    24
00419 #define NAMEBUF_SIZE_1  25
00420 
00421 #ifdef ONIG_DEBUG
00422 static int
00423 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
00424 {
00425   int i;
00426   FILE* fp = (FILE* )arg;
00427 
00428   fprintf(fp, "%s: ", e->name);
00429   if (e->back_num == 0)
00430     fputs("-", fp);
00431   else if (e->back_num == 1)
00432     fprintf(fp, "%d", e->back_ref1);
00433   else {
00434     for (i = 0; i < e->back_num; i++) {
00435       if (i > 0) fprintf(fp, ", ");
00436       fprintf(fp, "%d", e->back_refs[i]);
00437     }
00438   }
00439   fputs("\n", fp);
00440   return ST_CONTINUE;
00441 }
00442 
00443 extern int
00444 onig_print_names(FILE* fp, regex_t* reg)
00445 {
00446   NameTable* t = (NameTable* )reg->name_table;
00447 
00448   if (IS_NOT_NULL(t)) {
00449     fprintf(fp, "name table\n");
00450     onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
00451     fputs("\n", fp);
00452   }
00453   return 0;
00454 }
00455 #endif /* ONIG_DEBUG */
00456 
00457 static int
00458 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
00459 {
00460   xfree(e->name);
00461   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00462   xfree(key);
00463   xfree(e);
00464   return ST_DELETE;
00465 }
00466 
00467 static int
00468 names_clear(regex_t* reg)
00469 {
00470   NameTable* t = (NameTable* )reg->name_table;
00471 
00472   if (IS_NOT_NULL(t)) {
00473     onig_st_foreach(t, i_free_name_entry, 0);
00474   }
00475   return 0;
00476 }
00477 
00478 extern int
00479 onig_names_free(regex_t* reg)
00480 {
00481   int r;
00482   NameTable* t;
00483 
00484   r = names_clear(reg);
00485   if (r) return r;
00486 
00487   t = (NameTable* )reg->name_table;
00488   if (IS_NOT_NULL(t)) onig_st_free_table(t);
00489   reg->name_table = (void* )NULL;
00490   return 0;
00491 }
00492 
00493 static NameEntry*
00494 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
00495 {
00496   NameEntry* e;
00497   NameTable* t = (NameTable* )reg->name_table;
00498 
00499   e = (NameEntry* )NULL;
00500   if (IS_NOT_NULL(t)) {
00501     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
00502   }
00503   return e;
00504 }
00505 
00506 typedef struct {
00507   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
00508   regex_t* reg;
00509   void* arg;
00510   int ret;
00511   OnigEncoding enc;
00512 } INamesArg;
00513 
00514 static int
00515 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
00516 {
00517   int r = (*(arg->func))(e->name,
00518                          e->name + e->name_len,
00519                          e->back_num,
00520                          (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00521                          arg->reg, arg->arg);
00522   if (r != 0) {
00523     arg->ret = r;
00524     return ST_STOP;
00525   }
00526   return ST_CONTINUE;
00527 }
00528 
00529 extern int
00530 onig_foreach_name(regex_t* reg,
00531   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00532 {
00533   INamesArg narg;
00534   NameTable* t = (NameTable* )reg->name_table;
00535 
00536   narg.ret = 0;
00537   if (IS_NOT_NULL(t)) {
00538     narg.func = func;
00539     narg.reg  = reg;
00540     narg.arg  = arg;
00541     narg.enc  = reg->enc; /* should be pattern encoding. */
00542     onig_st_foreach(t, i_names, (HashDataType )&narg);
00543   }
00544   return narg.ret;
00545 }
00546 
00547 static int
00548 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
00549 {
00550   int i;
00551 
00552   if (e->back_num > 1) {
00553     for (i = 0; i < e->back_num; i++) {
00554       e->back_refs[i] = map[e->back_refs[i]].new_val;
00555     }
00556   }
00557   else if (e->back_num == 1) {
00558     e->back_ref1 = map[e->back_ref1].new_val;
00559   }
00560 
00561   return ST_CONTINUE;
00562 }
00563 
00564 extern int
00565 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
00566 {
00567   NameTable* t = (NameTable* )reg->name_table;
00568 
00569   if (IS_NOT_NULL(t)) {
00570     onig_st_foreach(t, i_renumber_name, (HashDataType )map);
00571   }
00572   return 0;
00573 }
00574 
00575 
00576 extern int
00577 onig_number_of_names(regex_t* reg)
00578 {
00579   NameTable* t = (NameTable* )reg->name_table;
00580 
00581   if (IS_NOT_NULL(t))
00582     return t->num_entries;
00583   else
00584     return 0;
00585 }
00586 
00587 #else  /* USE_ST_LIBRARY */
00588 
00589 #define INIT_NAMES_ALLOC_NUM    8
00590 
00591 typedef struct {
00592   NameEntry* e;
00593   int        num;
00594   int        alloc;
00595 } NameTable;
00596 
00597 #ifdef ONIG_DEBUG
00598 extern int
00599 onig_print_names(FILE* fp, regex_t* reg)
00600 {
00601   int i, j;
00602   NameEntry* e;
00603   NameTable* t = (NameTable* )reg->name_table;
00604 
00605   if (IS_NOT_NULL(t) && t->num > 0) {
00606     fprintf(fp, "name table\n");
00607     for (i = 0; i < t->num; i++) {
00608       e = &(t->e[i]);
00609       fprintf(fp, "%s: ", e->name);
00610       if (e->back_num == 0) {
00611         fputs("-", fp);
00612       }
00613       else if (e->back_num == 1) {
00614         fprintf(fp, "%d", e->back_ref1);
00615       }
00616       else {
00617         for (j = 0; j < e->back_num; j++) {
00618           if (j > 0) fprintf(fp, ", ");
00619           fprintf(fp, "%d", e->back_refs[j]);
00620         }
00621       }
00622       fputs("\n", fp);
00623     }
00624     fputs("\n", fp);
00625   }
00626   return 0;
00627 }
00628 #endif
00629 
00630 static int
00631 names_clear(regex_t* reg)
00632 {
00633   int i;
00634   NameEntry* e;
00635   NameTable* t = (NameTable* )reg->name_table;
00636 
00637   if (IS_NOT_NULL(t)) {
00638     for (i = 0; i < t->num; i++) {
00639       e = &(t->e[i]);
00640       if (IS_NOT_NULL(e->name)) {
00641         xfree(e->name);
00642         e->name       = NULL;
00643         e->name_len   = 0;
00644         e->back_num   = 0;
00645         e->back_alloc = 0;
00646         if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00647         e->back_refs = (int* )NULL;
00648       }
00649     }
00650     if (IS_NOT_NULL(t->e)) {
00651       xfree(t->e);
00652       t->e = NULL;
00653     }
00654     t->num = 0;
00655   }
00656   return 0;
00657 }
00658 
00659 extern int
00660 onig_names_free(regex_t* reg)
00661 {
00662   int r;
00663   NameTable* t;
00664 
00665   r = names_clear(reg);
00666   if (r) return r;
00667 
00668   t = (NameTable* )reg->name_table;
00669   if (IS_NOT_NULL(t)) xfree(t);
00670   reg->name_table = NULL;
00671   return 0;
00672 }
00673 
00674 static NameEntry*
00675 name_find(regex_t* reg, UChar* name, UChar* name_end)
00676 {
00677   int i, len;
00678   NameEntry* e;
00679   NameTable* t = (NameTable* )reg->name_table;
00680 
00681   if (IS_NOT_NULL(t)) {
00682     len = name_end - name;
00683     for (i = 0; i < t->num; i++) {
00684       e = &(t->e[i]);
00685       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
00686         return e;
00687     }
00688   }
00689   return (NameEntry* )NULL;
00690 }
00691 
00692 extern int
00693 onig_foreach_name(regex_t* reg,
00694   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00695 {
00696   int i, r;
00697   NameEntry* e;
00698   NameTable* t = (NameTable* )reg->name_table;
00699 
00700   if (IS_NOT_NULL(t)) {
00701     for (i = 0; i < t->num; i++) {
00702       e = &(t->e[i]);
00703       r = (*func)(e->name, e->name + e->name_len, e->back_num,
00704                   (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00705                   reg, arg);
00706       if (r != 0) return r;
00707     }
00708   }
00709   return 0;
00710 }
00711 
00712 extern int
00713 onig_number_of_names(regex_t* reg)
00714 {
00715   NameTable* t = (NameTable* )reg->name_table;
00716 
00717   if (IS_NOT_NULL(t))
00718     return t->num;
00719   else
00720     return 0;
00721 }
00722 
00723 #endif /* else USE_ST_LIBRARY */
00724 
00725 static int
00726 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
00727 {
00728   int alloc;
00729   NameEntry* e;
00730   NameTable* t = (NameTable* )reg->name_table;
00731 
00732   if (name_end - name <= 0)
00733     return ONIGERR_EMPTY_GROUP_NAME;
00734 
00735   e = name_find(reg, name, name_end);
00736   if (IS_NULL(e)) {
00737 #ifdef USE_ST_LIBRARY
00738     if (IS_NULL(t)) {
00739       t = onig_st_init_strend_table_with_size(5);
00740       reg->name_table = (void* )t;
00741     }
00742     e = (NameEntry* )xmalloc(sizeof(NameEntry));
00743     CHECK_NULL_RETURN_MEMERR(e);
00744 
00745     e->name = strdup_with_null(reg->enc, name, name_end);
00746     if (IS_NULL(e->name)) {
00747       xfree(e);
00748       return ONIGERR_MEMORY;
00749     }
00750     onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
00751                           (HashDataType )e);
00752 
00753     e->name_len   = name_end - name;
00754     e->back_num   = 0;
00755     e->back_alloc = 0;
00756     e->back_refs  = (int* )NULL;
00757 
00758 #else
00759 
00760     if (IS_NULL(t)) {
00761       alloc = INIT_NAMES_ALLOC_NUM;
00762       t = (NameTable* )xmalloc(sizeof(NameTable));
00763       CHECK_NULL_RETURN_MEMERR(t);
00764       t->e     = NULL;
00765       t->alloc = 0;
00766       t->num   = 0;
00767 
00768       t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
00769       if (IS_NULL(t->e)) {
00770         xfree(t);
00771         return ONIGERR_MEMORY;
00772       }
00773       t->alloc = alloc;
00774       reg->name_table = t;
00775       goto clear;
00776     }
00777     else if (t->num == t->alloc) {
00778       int i;
00779 
00780       alloc = t->alloc * 2;
00781       t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
00782       CHECK_NULL_RETURN_MEMERR(t->e);
00783       t->alloc = alloc;
00784 
00785     clear:
00786       for (i = t->num; i < t->alloc; i++) {
00787         t->e[i].name       = NULL;
00788         t->e[i].name_len   = 0;
00789         t->e[i].back_num   = 0;
00790         t->e[i].back_alloc = 0;
00791         t->e[i].back_refs  = (int* )NULL;
00792       }
00793     }
00794     e = &(t->e[t->num]);
00795     t->num++;
00796     e->name = strdup_with_null(reg->enc, name, name_end);
00797     if (IS_NULL(e->name)) return ONIGERR_MEMORY;
00798     e->name_len = name_end - name;
00799 #endif
00800   }
00801 
00802   if (e->back_num >= 1 &&
00803       ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
00804     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
00805                                     name, name_end);
00806     return ONIGERR_MULTIPLEX_DEFINED_NAME;
00807   }
00808 
00809   e->back_num++;
00810   if (e->back_num == 1) {
00811     e->back_ref1 = backref;
00812   }
00813   else {
00814     if (e->back_num == 2) {
00815       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
00816       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
00817       CHECK_NULL_RETURN_MEMERR(e->back_refs);
00818       e->back_alloc = alloc;
00819       e->back_refs[0] = e->back_ref1;
00820       e->back_refs[1] = backref;
00821     }
00822     else {
00823       if (e->back_num > e->back_alloc) {
00824         alloc = e->back_alloc * 2;
00825         e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
00826         CHECK_NULL_RETURN_MEMERR(e->back_refs);
00827         e->back_alloc = alloc;
00828       }
00829       e->back_refs[e->back_num - 1] = backref;
00830     }
00831   }
00832 
00833   return 0;
00834 }
00835 
00836 extern int
00837 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00838                            const UChar* name_end, int** nums)
00839 {
00840   NameEntry* e = name_find(reg, name, name_end);
00841 
00842   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
00843 
00844   switch (e->back_num) {
00845   case 0:
00846     *nums = 0;
00847     break;
00848   case 1:
00849     *nums = &(e->back_ref1);
00850     break;
00851   default:
00852     *nums = e->back_refs;
00853     break;
00854   }
00855   return e->back_num;
00856 }
00857 
00858 extern int
00859 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00860                             const UChar* name_end, OnigRegion *region)
00861 {
00862   int i, n, *nums;
00863 
00864   n = onig_name_to_group_numbers(reg, name, name_end, &nums);
00865   if (n < 0)
00866     return n;
00867   else if (n == 0)
00868     return ONIGERR_PARSER_BUG;
00869   else if (n == 1)
00870     return nums[0];
00871   else {
00872     if (IS_NOT_NULL(region)) {
00873       for (i = n - 1; i >= 0; i--) {
00874         if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
00875           return nums[i];
00876       }
00877     }
00878     return nums[n - 1];
00879   }
00880 }
00881 
00882 #else /* USE_NAMED_GROUP */
00883 
00884 extern int
00885 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00886                            const UChar* name_end, int** nums)
00887 {
00888   return ONIG_NO_SUPPORT_CONFIG;
00889 }
00890 
00891 extern int
00892 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00893                             const UChar* name_end, OnigRegion* region)
00894 {
00895   return ONIG_NO_SUPPORT_CONFIG;
00896 }
00897 
00898 extern int
00899 onig_foreach_name(regex_t* reg,
00900   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00901 {
00902   return ONIG_NO_SUPPORT_CONFIG;
00903 }
00904 
00905 extern int
00906 onig_number_of_names(regex_t* reg)
00907 {
00908   return 0;
00909 }
00910 #endif /* else USE_NAMED_GROUP */
00911 
00912 extern int
00913 onig_noname_group_capture_is_active(regex_t* reg)
00914 {
00915   if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
00916     return 0;
00917 
00918 #ifdef USE_NAMED_GROUP
00919   if (onig_number_of_names(reg) > 0 &&
00920       IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
00921       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
00922     return 0;
00923   }
00924 #endif
00925 
00926   return 1;
00927 }
00928 
00929 
00930 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16
00931 
00932 static void
00933 scan_env_clear(ScanEnv* env)
00934 {
00935   int i;
00936 
00937   BIT_STATUS_CLEAR(env->capture_history);
00938   BIT_STATUS_CLEAR(env->bt_mem_start);
00939   BIT_STATUS_CLEAR(env->bt_mem_end);
00940   BIT_STATUS_CLEAR(env->backrefed_mem);
00941   env->error      = (UChar* )NULL;
00942   env->error_end  = (UChar* )NULL;
00943   env->num_call   = 0;
00944   env->num_mem    = 0;
00945 #ifdef USE_NAMED_GROUP
00946   env->num_named  = 0;
00947 #endif
00948   env->mem_alloc         = 0;
00949   env->mem_nodes_dynamic = (Node** )NULL;
00950 
00951   for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
00952     env->mem_nodes_static[i] = NULL_NODE;
00953 
00954 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00955   env->num_comb_exp_check  = 0;
00956   env->comb_exp_max_regnum = 0;
00957   env->curr_max_regnum     = 0;
00958   env->has_recursion       = 0;
00959 #endif
00960   env->warnings_flag       = 0;
00961 }
00962 
00963 static int
00964 scan_env_add_mem_entry(ScanEnv* env)
00965 {
00966   int i, need, alloc;
00967   Node** p;
00968 
00969   need = env->num_mem + 1;
00970   if (need >= SCANENV_MEMNODES_SIZE) {
00971     if (env->mem_alloc <= need) {
00972       if (IS_NULL(env->mem_nodes_dynamic)) {
00973         alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
00974         p = (Node** )xmalloc(sizeof(Node*) * alloc);
00975         xmemcpy(p, env->mem_nodes_static,
00976                 sizeof(Node*) * SCANENV_MEMNODES_SIZE);
00977       }
00978       else {
00979         alloc = env->mem_alloc * 2;
00980         p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
00981       }
00982       CHECK_NULL_RETURN_MEMERR(p);
00983 
00984       for (i = env->num_mem + 1; i < alloc; i++)
00985         p[i] = NULL_NODE;
00986 
00987       env->mem_nodes_dynamic = p;
00988       env->mem_alloc = alloc;
00989     }
00990   }
00991 
00992   env->num_mem++;
00993   return env->num_mem;
00994 }
00995 
00996 static int
00997 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
00998 {
00999   if (env->num_mem >= num)
01000     SCANENV_MEM_NODES(env)[num] = node;
01001   else
01002     return ONIGERR_PARSER_BUG;
01003   return 0;
01004 }
01005 
01006 
01007 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01008 typedef struct _FreeNode {
01009   struct _FreeNode* next;
01010 } FreeNode;
01011 
01012 static FreeNode* FreeNodeList = (FreeNode* )NULL;
01013 #endif
01014 
01015 extern void
01016 onig_node_free(Node* node)
01017 {
01018  start:
01019   if (IS_NULL(node)) return ;
01020 
01021   switch (NTYPE(node)) {
01022   case NT_STR:
01023     if (NSTR(node)->capa != 0 &&
01024         IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01025       xfree(NSTR(node)->s);
01026     }
01027     break;
01028 
01029   case NT_LIST:
01030   case NT_ALT:
01031     onig_node_free(NCAR(node));
01032     {
01033       Node* next_node = NCDR(node);
01034 
01035 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01036       {
01037         FreeNode* n = (FreeNode* )node;
01038 
01039         THREAD_ATOMIC_START;
01040         n->next = FreeNodeList;
01041         FreeNodeList = n;
01042         THREAD_ATOMIC_END;
01043       }
01044 #else
01045       xfree(node);
01046 #endif
01047       node = next_node;
01048       goto start;
01049     }
01050     break;
01051 
01052   case NT_CCLASS:
01053     {
01054       CClassNode* cc = NCCLASS(node);
01055 
01056       if (IS_NCCLASS_SHARE(cc)) return ;
01057       if (cc->mbuf)
01058         bbuf_free(cc->mbuf);
01059     }
01060     break;
01061 
01062   case NT_QTFR:
01063     if (NQTFR(node)->target)
01064       onig_node_free(NQTFR(node)->target);
01065     break;
01066 
01067   case NT_ENCLOSE:
01068     if (NENCLOSE(node)->target)
01069       onig_node_free(NENCLOSE(node)->target);
01070     break;
01071 
01072   case NT_BREF:
01073     if (IS_NOT_NULL(NBREF(node)->back_dynamic))
01074       xfree(NBREF(node)->back_dynamic);
01075     break;
01076 
01077   case NT_ANCHOR:
01078     if (NANCHOR(node)->target)
01079       onig_node_free(NANCHOR(node)->target);
01080     break;
01081   }
01082 
01083 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01084   {
01085     FreeNode* n = (FreeNode* )node;
01086 
01087     THREAD_ATOMIC_START;
01088     n->next = FreeNodeList;
01089     FreeNodeList = n;
01090     THREAD_ATOMIC_END;
01091   }
01092 #else
01093   xfree(node);
01094 #endif
01095 }
01096 
01097 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01098 extern int
01099 onig_free_node_list(void)
01100 {
01101   FreeNode* n;
01102 
01103   /* THREAD_ATOMIC_START; */
01104   while (IS_NOT_NULL(FreeNodeList)) {
01105     n = FreeNodeList;
01106     FreeNodeList = FreeNodeList->next;
01107     xfree(n);
01108   }
01109   /* THREAD_ATOMIC_END; */
01110   return 0;
01111 }
01112 #endif
01113 
01114 static Node*
01115 node_new(void)
01116 {
01117   Node* node;
01118 
01119 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01120   THREAD_ATOMIC_START;
01121   if (IS_NOT_NULL(FreeNodeList)) {
01122     node = (Node* )FreeNodeList;
01123     FreeNodeList = FreeNodeList->next;
01124     THREAD_ATOMIC_END;
01125     return node;
01126   }
01127   THREAD_ATOMIC_END;
01128 #endif
01129 
01130   node = (Node* )xmalloc(sizeof(Node));
01131   /* xmemset(node, 0, sizeof(Node)); */
01132   return node;
01133 }
01134 
01135 
01136 static void
01137 initialize_cclass(CClassNode* cc)
01138 {
01139   BITSET_CLEAR(cc->bs);
01140   /* cc->base.flags = 0; */
01141   cc->flags = 0;
01142   cc->mbuf  = NULL;
01143 }
01144 
01145 static Node*
01146 node_new_cclass(void)
01147 {
01148   Node* node = node_new();
01149   CHECK_NULL_RETURN(node);
01150 
01151   SET_NTYPE(node, NT_CCLASS);
01152   initialize_cclass(NCCLASS(node));
01153   return node;
01154 }
01155 
01156 static Node*
01157 node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
01158                                    const OnigCodePoint ranges[])
01159 {
01160   int n, i;
01161   CClassNode* cc;
01162   OnigCodePoint j;
01163 
01164   Node* node = node_new_cclass();
01165   CHECK_NULL_RETURN(node);
01166 
01167   cc = NCCLASS(node);
01168   if (not != 0) NCCLASS_SET_NOT(cc);
01169 
01170   BITSET_CLEAR(cc->bs);
01171   if (sb_out > 0 && IS_NOT_NULL(ranges)) {
01172     n = ONIGENC_CODE_RANGE_NUM(ranges);
01173     for (i = 0; i < n; i++) {
01174       for (j  = ONIGENC_CODE_RANGE_FROM(ranges, i);
01175            j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
01176         if (j >= sb_out) goto sb_end;
01177 
01178         BITSET_SET_BIT(cc->bs, j);
01179       }
01180     }
01181   }
01182 
01183  sb_end:
01184   if (IS_NULL(ranges)) {
01185   is_null:
01186     cc->mbuf = NULL;
01187   }
01188   else {
01189     BBuf* bbuf;
01190 
01191     n = ONIGENC_CODE_RANGE_NUM(ranges);
01192     if (n == 0) goto is_null;
01193 
01194     bbuf = (BBuf* )xmalloc(sizeof(BBuf));
01195     CHECK_NULL_RETURN(bbuf);
01196     bbuf->alloc = n + 1;
01197     bbuf->used  = n + 1;
01198     bbuf->p     = (UChar* )((void* )ranges);
01199 
01200     cc->mbuf = bbuf;
01201   }
01202 
01203   return node;
01204 }
01205 
01206 static Node*
01207 node_new_ctype(int type, int not)
01208 {
01209   Node* node = node_new();
01210   CHECK_NULL_RETURN(node);
01211 
01212   SET_NTYPE(node, NT_CTYPE);
01213   NCTYPE(node)->ctype = type;
01214   NCTYPE(node)->not   = not;
01215   return node;
01216 }
01217 
01218 static Node*
01219 node_new_anychar(void)
01220 {
01221   Node* node = node_new();
01222   CHECK_NULL_RETURN(node);
01223 
01224   SET_NTYPE(node, NT_CANY);
01225   return node;
01226 }
01227 
01228 static Node*
01229 node_new_list(Node* left, Node* right)
01230 {
01231   Node* node = node_new();
01232   CHECK_NULL_RETURN(node);
01233 
01234   SET_NTYPE(node, NT_LIST);
01235   NCAR(node)  = left;
01236   NCDR(node) = right;
01237   return node;
01238 }
01239 
01240 extern Node*
01241 onig_node_new_list(Node* left, Node* right)
01242 {
01243   return node_new_list(left, right);
01244 }
01245 
01246 extern Node*
01247 onig_node_list_add(Node* list, Node* x)
01248 {
01249   Node *n;
01250 
01251   n = onig_node_new_list(x, NULL);
01252   if (IS_NULL(n)) return NULL_NODE;
01253 
01254   if (IS_NOT_NULL(list)) {
01255     while (IS_NOT_NULL(NCDR(list)))
01256       list = NCDR(list);
01257 
01258     NCDR(list) = n;
01259   }
01260 
01261   return n;
01262 }
01263 
01264 extern Node*
01265 onig_node_new_alt(Node* left, Node* right)
01266 {
01267   Node* node = node_new();
01268   CHECK_NULL_RETURN(node);
01269 
01270   SET_NTYPE(node, NT_ALT);
01271   NCAR(node)  = left;
01272   NCDR(node) = right;
01273   return node;
01274 }
01275 
01276 extern Node*
01277 onig_node_new_anchor(int type)
01278 {
01279   Node* node = node_new();
01280   CHECK_NULL_RETURN(node);
01281 
01282   SET_NTYPE(node, NT_ANCHOR);
01283   NANCHOR(node)->type     = type;
01284   NANCHOR(node)->target   = NULL;
01285   NANCHOR(node)->char_len = -1;
01286   return node;
01287 }
01288 
01289 static Node*
01290 node_new_backref(int back_num, int* backrefs, int by_name,
01291 #ifdef USE_BACKREF_WITH_LEVEL
01292                  int exist_level, int nest_level,
01293 #endif
01294                  ScanEnv* env)
01295 {
01296   int i;
01297   Node* node = node_new();
01298 
01299   CHECK_NULL_RETURN(node);
01300 
01301   SET_NTYPE(node, NT_BREF);
01302   NBREF(node)->state    = 0;
01303   NBREF(node)->back_num = back_num;
01304   NBREF(node)->back_dynamic = (int* )NULL;
01305   if (by_name != 0)
01306     NBREF(node)->state |= NST_NAME_REF;
01307 
01308 #ifdef USE_BACKREF_WITH_LEVEL
01309   if (exist_level != 0) {
01310     NBREF(node)->state |= NST_NEST_LEVEL;
01311     NBREF(node)->nest_level  = nest_level;
01312   }
01313 #endif
01314 
01315   for (i = 0; i < back_num; i++) {
01316     if (backrefs[i] <= env->num_mem &&
01317         IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
01318       NBREF(node)->state |= NST_RECURSION;   /* /...(\1).../ */
01319       break;
01320     }
01321   }
01322 
01323   if (back_num <= NODE_BACKREFS_SIZE) {
01324     for (i = 0; i < back_num; i++)
01325       NBREF(node)->back_static[i] = backrefs[i];
01326   }
01327   else {
01328     int* p = (int* )xmalloc(sizeof(int) * back_num);
01329     if (IS_NULL(p)) {
01330       onig_node_free(node);
01331       return NULL;
01332     }
01333     NBREF(node)->back_dynamic = p;
01334     for (i = 0; i < back_num; i++)
01335       p[i] = backrefs[i];
01336   }
01337   return node;
01338 }
01339 
01340 #ifdef USE_SUBEXP_CALL
01341 static Node*
01342 node_new_call(UChar* name, UChar* name_end, int gnum)
01343 {
01344   Node* node = node_new();
01345   CHECK_NULL_RETURN(node);
01346 
01347   SET_NTYPE(node, NT_CALL);
01348   NCALL(node)->state     = 0;
01349   NCALL(node)->target    = NULL_NODE;
01350   NCALL(node)->name      = name;
01351   NCALL(node)->name_end  = name_end;
01352   NCALL(node)->group_num = gnum;  /* call by number if gnum != 0 */
01353   return node;
01354 }
01355 #endif
01356 
01357 static Node*
01358 node_new_quantifier(int lower, int upper, int by_number)
01359 {
01360   Node* node = node_new();
01361   CHECK_NULL_RETURN(node);
01362 
01363   SET_NTYPE(node, NT_QTFR);
01364   NQTFR(node)->state  = 0;
01365   NQTFR(node)->target = NULL;
01366   NQTFR(node)->lower  = lower;
01367   NQTFR(node)->upper  = upper;
01368   NQTFR(node)->greedy = 1;
01369   NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
01370   NQTFR(node)->head_exact        = NULL_NODE;
01371   NQTFR(node)->next_head_exact   = NULL_NODE;
01372   NQTFR(node)->is_refered        = 0;
01373   if (by_number != 0)
01374     NQTFR(node)->state |= NST_BY_NUMBER;
01375 
01376 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01377   NQTFR(node)->comb_exp_check_num = 0;
01378 #endif
01379 
01380   return node;
01381 }
01382 
01383 static Node*
01384 node_new_enclose(int type)
01385 {
01386   Node* node = node_new();
01387   CHECK_NULL_RETURN(node);
01388 
01389   SET_NTYPE(node, NT_ENCLOSE);
01390   NENCLOSE(node)->type      = type;
01391   NENCLOSE(node)->state     =  0;
01392   NENCLOSE(node)->regnum    =  0;
01393   NENCLOSE(node)->option    =  0;
01394   NENCLOSE(node)->target    = NULL;
01395   NENCLOSE(node)->call_addr = -1;
01396   NENCLOSE(node)->opt_count =  0;
01397   return node;
01398 }
01399 
01400 extern Node*
01401 onig_node_new_enclose(int type)
01402 {
01403   return node_new_enclose(type);
01404 }
01405 
01406 static Node*
01407 node_new_enclose_memory(OnigOptionType option, int is_named)
01408 {
01409   Node* node = node_new_enclose(ENCLOSE_MEMORY);
01410   CHECK_NULL_RETURN(node);
01411   if (is_named != 0)
01412     SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
01413 
01414 #ifdef USE_SUBEXP_CALL
01415   NENCLOSE(node)->option = option;
01416 #endif
01417   return node;
01418 }
01419 
01420 static Node*
01421 node_new_option(OnigOptionType option)
01422 {
01423   Node* node = node_new_enclose(ENCLOSE_OPTION);
01424   CHECK_NULL_RETURN(node);
01425   NENCLOSE(node)->option = option;
01426   return node;
01427 }
01428 
01429 extern int
01430 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
01431 {
01432   ptrdiff_t addlen = end - s;
01433 
01434   if (addlen > 0) {
01435     ptrdiff_t len  = NSTR(node)->end - NSTR(node)->s;
01436 
01437     if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
01438       UChar* p;
01439       ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
01440 
01441       if (capa <= NSTR(node)->capa) {
01442         onig_strcpy(NSTR(node)->s + len, s, end);
01443       }
01444       else {
01445         if (NSTR(node)->s == NSTR(node)->buf)
01446           p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
01447                                       s, end, capa);
01448         else
01449           p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
01450 
01451         CHECK_NULL_RETURN_MEMERR(p);
01452         NSTR(node)->s    = p;
01453         NSTR(node)->capa = capa;
01454       }
01455     }
01456     else {
01457       onig_strcpy(NSTR(node)->s + len, s, end);
01458     }
01459     NSTR(node)->end = NSTR(node)->s + len + addlen;
01460   }
01461 
01462   return 0;
01463 }
01464 
01465 extern int
01466 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
01467 {
01468   onig_node_str_clear(node);
01469   return onig_node_str_cat(node, s, end);
01470 }
01471 
01472 static int
01473 node_str_cat_char(Node* node, UChar c)
01474 {
01475   UChar s[1];
01476 
01477   s[0] = c;
01478   return onig_node_str_cat(node, s, s + 1);
01479 }
01480 
01481 extern void
01482 onig_node_conv_to_str_node(Node* node, int flag)
01483 {
01484   SET_NTYPE(node, NT_STR);
01485   NSTR(node)->flag = flag;
01486   NSTR(node)->capa = 0;
01487   NSTR(node)->s    = NSTR(node)->buf;
01488   NSTR(node)->end  = NSTR(node)->buf;
01489 }
01490 
01491 extern void
01492 onig_node_str_clear(Node* node)
01493 {
01494   if (NSTR(node)->capa != 0 &&
01495       IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01496     xfree(NSTR(node)->s);
01497   }
01498 
01499   NSTR(node)->capa = 0;
01500   NSTR(node)->flag = 0;
01501   NSTR(node)->s    = NSTR(node)->buf;
01502   NSTR(node)->end  = NSTR(node)->buf;
01503 }
01504 
01505 static Node*
01506 node_new_str(const UChar* s, const UChar* end)
01507 {
01508   Node* node = node_new();
01509   CHECK_NULL_RETURN(node);
01510 
01511   SET_NTYPE(node, NT_STR);
01512   NSTR(node)->capa = 0;
01513   NSTR(node)->flag = 0;
01514   NSTR(node)->s    = NSTR(node)->buf;
01515   NSTR(node)->end  = NSTR(node)->buf;
01516   if (onig_node_str_cat(node, s, end)) {
01517     onig_node_free(node);
01518     return NULL;
01519   }
01520   return node;
01521 }
01522 
01523 extern Node*
01524 onig_node_new_str(const UChar* s, const UChar* end)
01525 {
01526   return node_new_str(s, end);
01527 }
01528 
01529 static Node*
01530 node_new_str_raw(UChar* s, UChar* end)
01531 {
01532   Node* node = node_new_str(s, end);
01533   NSTRING_SET_RAW(node);
01534   return node;
01535 }
01536 
01537 static Node*
01538 node_new_empty(void)
01539 {
01540   return node_new_str(NULL, NULL);
01541 }
01542 
01543 static Node*
01544 node_new_str_raw_char(UChar c)
01545 {
01546   UChar p[1];
01547 
01548   p[0] = c;
01549   return node_new_str_raw(p, p + 1);
01550 }
01551 
01552 static Node*
01553 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
01554 {
01555   const UChar *p;
01556   Node* n = NULL_NODE;
01557 
01558   if (sn->end > sn->s) {
01559     p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
01560     if (p && p > sn->s) { /* can be splitted. */
01561       n = node_new_str(p, sn->end);
01562       if ((sn->flag & NSTR_RAW) != 0)
01563         NSTRING_SET_RAW(n);
01564       sn->end = (UChar* )p;
01565     }
01566   }
01567   return n;
01568 }
01569 
01570 static int
01571 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
01572 {
01573   if (sn->end > sn->s) {
01574     return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s)  ?  1 : 0);
01575   }
01576   return 0;
01577 }
01578 
01579 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
01580 static int
01581 node_str_head_pad(StrNode* sn, int num, UChar val)
01582 {
01583   UChar buf[NODE_STR_BUF_SIZE];
01584   int i, len;
01585 
01586   len = sn->end - sn->s;
01587   onig_strcpy(buf, sn->s, sn->end);
01588   onig_strcpy(&(sn->s[num]), buf, buf + len);
01589   sn->end += num;
01590 
01591   for (i = 0; i < num; i++) {
01592     sn->s[i] = val;
01593   }
01594 }
01595 #endif
01596 
01597 extern int
01598 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
01599 {
01600   unsigned int num, val;
01601   OnigCodePoint c;
01602   UChar* p = *src;
01603   PFETCH_READY;
01604 
01605   num = 0;
01606   while (!PEND) {
01607     PFETCH(c);
01608     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
01609       val = (unsigned int )DIGITVAL(c);
01610       if ((INT_MAX_LIMIT - val) / 10UL < num)
01611         return -1;  /* overflow */
01612 
01613       num = num * 10 + val;
01614     }
01615     else {
01616       PUNFETCH;
01617       break;
01618     }
01619   }
01620   *src = p;
01621   return num;
01622 }
01623 
01624 static int
01625 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
01626                                  OnigEncoding enc)
01627 {
01628   OnigCodePoint c;
01629   unsigned int num, val;
01630   UChar* p = *src;
01631   PFETCH_READY;
01632 
01633   num = 0;
01634   while (!PEND && maxlen-- != 0) {
01635     PFETCH(c);
01636     if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
01637       val = (unsigned int )XDIGITVAL(enc,c);
01638       if ((INT_MAX_LIMIT - val) / 16UL < num)
01639         return -1;  /* overflow */
01640 
01641       num = (num << 4) + XDIGITVAL(enc,c);
01642     }
01643     else {
01644       PUNFETCH;
01645       break;
01646     }
01647   }
01648   *src = p;
01649   return num;
01650 }
01651 
01652 static int
01653 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
01654                            OnigEncoding enc)
01655 {
01656   OnigCodePoint c;
01657   unsigned int num, val;
01658   UChar* p = *src;
01659   PFETCH_READY;
01660 
01661   num = 0;
01662   while (!PEND && maxlen-- != 0) {
01663     PFETCH(c);
01664     if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
01665       val = ODIGITVAL(c);
01666       if ((INT_MAX_LIMIT - val) / 8UL < num)
01667         return -1;  /* overflow */
01668 
01669       num = (num << 3) + val;
01670     }
01671     else {
01672       PUNFETCH;
01673       break;
01674     }
01675   }
01676   *src = p;
01677   return num;
01678 }
01679 
01680 
01681 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
01682     BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
01683 
01684 /* data format:
01685      [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
01686      (all data size is OnigCodePoint)
01687  */
01688 static int
01689 new_code_range(BBuf** pbuf)
01690 {
01691 #define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
01692   int r;
01693   OnigCodePoint n;
01694   BBuf* bbuf;
01695 
01696   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
01697   CHECK_NULL_RETURN_MEMERR(*pbuf);
01698   r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
01699   if (r) return r;
01700 
01701   n = 0;
01702   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01703   return 0;
01704 }
01705 
01706 static int
01707 add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
01708         int checkdup)
01709 {
01710   int r, inc_n, pos;
01711   int low, high, bound, x;
01712   OnigCodePoint n, *data;
01713   BBuf* bbuf;
01714 
01715   if (from > to) {
01716     n = from; from = to; to = n;
01717   }
01718 
01719   if (IS_NULL(*pbuf)) {
01720     r = new_code_range(pbuf);
01721     if (r) return r;
01722     bbuf = *pbuf;
01723     n = 0;
01724   }
01725   else {
01726     bbuf = *pbuf;
01727     GET_CODE_POINT(n, bbuf->p);
01728   }
01729   data = (OnigCodePoint* )(bbuf->p);
01730   data++;
01731 
01732   for (low = 0, bound = n; low < bound; ) {
01733     x = (low + bound) >> 1;
01734     if (from > data[x*2 + 1])
01735       low = x + 1;
01736     else
01737       bound = x;
01738   }
01739 
01740   for (high = low, bound = n; high < bound; ) {
01741     x = (high + bound) >> 1;
01742     if (to >= data[x*2] - 1)
01743       high = x + 1;
01744     else
01745       bound = x;
01746   }
01747 
01748   inc_n = low + 1 - high;
01749   if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
01750     return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
01751 
01752   if (inc_n != 1) {
01753     if (checkdup && to >= data[low*2]) CC_DUP_WARN(env);
01754     if (from > data[low*2])
01755       from = data[low*2];
01756     if (to < data[(high - 1)*2 + 1])
01757       to = data[(high - 1)*2 + 1];
01758   }
01759 
01760   if (inc_n != 0 && (OnigCodePoint )high < n) {
01761     int from_pos = SIZE_CODE_POINT * (1 + high * 2);
01762     int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
01763     int size = (n - high) * 2 * SIZE_CODE_POINT;
01764 
01765     if (inc_n > 0) {
01766       BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
01767     }
01768     else {
01769       BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
01770     }
01771   }
01772 
01773   pos = SIZE_CODE_POINT * (1 + low * 2);
01774   BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
01775   BBUF_WRITE_CODE_POINT(bbuf, pos, from);
01776   BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
01777   n += inc_n;
01778   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01779 
01780   return 0;
01781 }
01782 
01783 static int
01784 add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01785 {
01786   return add_code_range_to_buf0(pbuf, env, from, to, 1);
01787 }
01788 
01789 static int
01790 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
01791 {
01792   if (from > to) {
01793     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
01794       return 0;
01795     else
01796       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
01797   }
01798 
01799   return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
01800 }
01801 
01802 static int
01803 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01804 {
01805     return add_code_range0(pbuf, env, from, to, 1);
01806 }
01807 
01808 static int
01809 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
01810 {
01811   int r, i, n;
01812   OnigCodePoint pre, from, *data, to = 0;
01813 
01814   *pbuf = (BBuf* )NULL;
01815   if (IS_NULL(bbuf)) {
01816   set_all:
01817     return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01818   }
01819 
01820   data = (OnigCodePoint* )(bbuf->p);
01821   GET_CODE_POINT(n, data);
01822   data++;
01823   if (n <= 0) goto set_all;
01824 
01825   r = 0;
01826   pre = MBCODE_START_POS(enc);
01827   for (i = 0; i < n; i++) {
01828     from = data[i*2];
01829     to   = data[i*2+1];
01830     if (pre <= from - 1) {
01831       r = add_code_range_to_buf(pbuf, env, pre, from - 1);
01832       if (r != 0) return r;
01833     }
01834     if (to == ~((OnigCodePoint )0)) break;
01835     pre = to + 1;
01836   }
01837   if (to < ~((OnigCodePoint )0)) {
01838     r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
01839   }
01840   return r;
01841 }
01842 
01843 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
01844   BBuf *tbuf; \
01845   int  tnot; \
01846   tnot = not1;  not1  = not2;  not2  = tnot; \
01847   tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
01848 } while (0)
01849 
01850 static int
01851 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
01852                   BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01853 {
01854   int r;
01855   OnigCodePoint i, n1, *data1;
01856   OnigCodePoint from, to;
01857 
01858   *pbuf = (BBuf* )NULL;
01859   if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
01860     if (not1 != 0 || not2 != 0)
01861       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01862     return 0;
01863   }
01864 
01865   r = 0;
01866   if (IS_NULL(bbuf2))
01867     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01868 
01869   if (IS_NULL(bbuf1)) {
01870     if (not1 != 0) {
01871       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01872     }
01873     else {
01874       if (not2 == 0) {
01875         return bbuf_clone(pbuf, bbuf2);
01876       }
01877       else {
01878         return not_code_range_buf(enc, bbuf2, pbuf, env);
01879       }
01880     }
01881   }
01882 
01883   if (not1 != 0)
01884     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01885 
01886   data1 = (OnigCodePoint* )(bbuf1->p);
01887   GET_CODE_POINT(n1, data1);
01888   data1++;
01889 
01890   if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
01891     r = bbuf_clone(pbuf, bbuf2);
01892   }
01893   else if (not1 == 0) { /* 1 OR (not 2) */
01894     r = not_code_range_buf(enc, bbuf2, pbuf, env);
01895   }
01896   if (r != 0) return r;
01897 
01898   for (i = 0; i < n1; i++) {
01899     from = data1[i*2];
01900     to   = data1[i*2+1];
01901     r = add_code_range_to_buf(pbuf, env, from, to);
01902     if (r != 0) return r;
01903   }
01904   return 0;
01905 }
01906 
01907 static int
01908 and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
01909                 OnigCodePoint* data, int n)
01910 {
01911   int i, r;
01912   OnigCodePoint from2, to2;
01913 
01914   for (i = 0; i < n; i++) {
01915     from2 = data[i*2];
01916     to2   = data[i*2+1];
01917     if (from2 < from1) {
01918       if (to2 < from1) continue;
01919       else {
01920         from1 = to2 + 1;
01921       }
01922     }
01923     else if (from2 <= to1) {
01924       if (to2 < to1) {
01925         if (from1 <= from2 - 1) {
01926           r = add_code_range_to_buf(pbuf, env, from1, from2-1);
01927           if (r != 0) return r;
01928         }
01929         from1 = to2 + 1;
01930       }
01931       else {
01932         to1 = from2 - 1;
01933       }
01934     }
01935     else {
01936       from1 = from2;
01937     }
01938     if (from1 > to1) break;
01939   }
01940   if (from1 <= to1) {
01941     r = add_code_range_to_buf(pbuf, env, from1, to1);
01942     if (r != 0) return r;
01943   }
01944   return 0;
01945 }
01946 
01947 static int
01948 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01949 {
01950   int r;
01951   OnigCodePoint i, j, n1, n2, *data1, *data2;
01952   OnigCodePoint from, to, from1, to1, from2, to2;
01953 
01954   *pbuf = (BBuf* )NULL;
01955   if (IS_NULL(bbuf1)) {
01956     if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
01957       return bbuf_clone(pbuf, bbuf2);
01958     return 0;
01959   }
01960   else if (IS_NULL(bbuf2)) {
01961     if (not2 != 0)
01962       return bbuf_clone(pbuf, bbuf1);
01963     return 0;
01964   }
01965 
01966   if (not1 != 0)
01967     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01968 
01969   data1 = (OnigCodePoint* )(bbuf1->p);
01970   data2 = (OnigCodePoint* )(bbuf2->p);
01971   GET_CODE_POINT(n1, data1);
01972   GET_CODE_POINT(n2, data2);
01973   data1++;
01974   data2++;
01975 
01976   if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
01977     for (i = 0; i < n1; i++) {
01978       from1 = data1[i*2];
01979       to1   = data1[i*2+1];
01980       for (j = 0; j < n2; j++) {
01981         from2 = data2[j*2];
01982         to2   = data2[j*2+1];
01983         if (from2 > to1) break;
01984         if (to2 < from1) continue;
01985         from = MAX(from1, from2);
01986         to   = MIN(to1, to2);
01987         r = add_code_range_to_buf(pbuf, env, from, to);
01988         if (r != 0) return r;
01989       }
01990     }
01991   }
01992   else if (not1 == 0) { /* 1 AND (not 2) */
01993     for (i = 0; i < n1; i++) {
01994       from1 = data1[i*2];
01995       to1   = data1[i*2+1];
01996       r = and_code_range1(pbuf, env, from1, to1, data2, n2);
01997       if (r != 0) return r;
01998     }
01999   }
02000 
02001   return 0;
02002 }
02003 
02004 static int
02005 and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02006 {
02007   OnigEncoding enc = env->enc;
02008   int r, not1, not2;
02009   BBuf *buf1, *buf2, *pbuf;
02010   BitSetRef bsr1, bsr2;
02011   BitSet bs1, bs2;
02012 
02013   not1 = IS_NCCLASS_NOT(dest);
02014   bsr1 = dest->bs;
02015   buf1 = dest->mbuf;
02016   not2 = IS_NCCLASS_NOT(cc);
02017   bsr2 = cc->bs;
02018   buf2 = cc->mbuf;
02019 
02020   if (not1 != 0) {
02021     bitset_invert_to(bsr1, bs1);
02022     bsr1 = bs1;
02023   }
02024   if (not2 != 0) {
02025     bitset_invert_to(bsr2, bs2);
02026     bsr2 = bs2;
02027   }
02028   bitset_and(bsr1, bsr2);
02029   if (bsr1 != dest->bs) {
02030     bitset_copy(dest->bs, bsr1);
02031     bsr1 = dest->bs;
02032   }
02033   if (not1 != 0) {
02034     bitset_invert(dest->bs);
02035   }
02036 
02037   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02038     if (not1 != 0 && not2 != 0) {
02039       r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
02040     }
02041     else {
02042       r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
02043       if (r == 0 && not1 != 0) {
02044         BBuf *tbuf;
02045         r = not_code_range_buf(enc, pbuf, &tbuf, env);
02046         if (r != 0) {
02047           bbuf_free(pbuf);
02048           return r;
02049         }
02050         bbuf_free(pbuf);
02051         pbuf = tbuf;
02052       }
02053     }
02054     if (r != 0) return r;
02055 
02056     dest->mbuf = pbuf;
02057     bbuf_free(buf1);
02058     return r;
02059   }
02060   return 0;
02061 }
02062 
02063 static int
02064 or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02065 {
02066   OnigEncoding enc = env->enc;
02067   int r, not1, not2;
02068   BBuf *buf1, *buf2, *pbuf;
02069   BitSetRef bsr1, bsr2;
02070   BitSet bs1, bs2;
02071 
02072   not1 = IS_NCCLASS_NOT(dest);
02073   bsr1 = dest->bs;
02074   buf1 = dest->mbuf;
02075   not2 = IS_NCCLASS_NOT(cc);
02076   bsr2 = cc->bs;
02077   buf2 = cc->mbuf;
02078 
02079   if (not1 != 0) {
02080     bitset_invert_to(bsr1, bs1);
02081     bsr1 = bs1;
02082   }
02083   if (not2 != 0) {
02084     bitset_invert_to(bsr2, bs2);
02085     bsr2 = bs2;
02086   }
02087   bitset_or(bsr1, bsr2);
02088   if (bsr1 != dest->bs) {
02089     bitset_copy(dest->bs, bsr1);
02090     bsr1 = dest->bs;
02091   }
02092   if (not1 != 0) {
02093     bitset_invert(dest->bs);
02094   }
02095 
02096   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02097     if (not1 != 0 && not2 != 0) {
02098       r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
02099     }
02100     else {
02101       r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
02102       if (r == 0 && not1 != 0) {
02103         BBuf *tbuf;
02104         r = not_code_range_buf(enc, pbuf, &tbuf, env);
02105         if (r != 0) {
02106           bbuf_free(pbuf);
02107           return r;
02108         }
02109         bbuf_free(pbuf);
02110         pbuf = tbuf;
02111       }
02112     }
02113     if (r != 0) return r;
02114 
02115     dest->mbuf = pbuf;
02116     bbuf_free(buf1);
02117     return r;
02118   }
02119   else
02120     return 0;
02121 }
02122 
02123 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
02124 
02125 static int
02126 conv_backslash_value(int c, ScanEnv* env)
02127 {
02128   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
02129     switch (c) {
02130     case 'n': return '\n';
02131     case 't': return '\t';
02132     case 'r': return '\r';
02133     case 'f': return '\f';
02134     case 'a': return '\007';
02135     case 'b': return '\010';
02136     case 'e': return '\033';
02137     case 'v':
02138       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
02139         return '\v';
02140       break;
02141 
02142     default:
02143       if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
02144           UNKNOWN_ESC_WARN(env, c);
02145       break;
02146     }
02147   }
02148   return c;
02149 }
02150 
02151 #if 0                           /* no invalid quantifier */
02152 static int
02153 is_invalid_quantifier_target(Node* node)
02154 {
02155   switch (NTYPE(node)) {
02156   case NT_ANCHOR:
02157     return 1;
02158     break;
02159 
02160   case NT_ENCLOSE:
02161     /* allow enclosed elements */
02162     /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
02163     break;
02164 
02165   case NT_LIST:
02166     do {
02167       if (! is_invalid_quantifier_target(NCAR(node))) return 0;
02168     } while (IS_NOT_NULL(node = NCDR(node)));
02169     return 0;
02170     break;
02171 
02172   case NT_ALT:
02173     do {
02174       if (is_invalid_quantifier_target(NCAR(node))) return 1;
02175     } while (IS_NOT_NULL(node = NCDR(node)));
02176     break;
02177 
02178   default:
02179     break;
02180   }
02181   return 0;
02182 }
02183 #else
02184 #define is_invalid_quantifier_target(node) 0
02185 #endif
02186 
02187 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
02188 static int
02189 popular_quantifier_num(QtfrNode* q)
02190 {
02191   if (q->greedy) {
02192     if (q->lower == 0) {
02193       if (q->upper == 1) return 0;
02194       else if (IS_REPEAT_INFINITE(q->upper)) return 1;
02195     }
02196     else if (q->lower == 1) {
02197       if (IS_REPEAT_INFINITE(q->upper)) return 2;
02198     }
02199   }
02200   else {
02201     if (q->lower == 0) {
02202       if (q->upper == 1) return 3;
02203       else if (IS_REPEAT_INFINITE(q->upper)) return 4;
02204     }
02205     else if (q->lower == 1) {
02206       if (IS_REPEAT_INFINITE(q->upper)) return 5;
02207     }
02208   }
02209   return -1;
02210 }
02211 
02212 
02213 enum ReduceType {
02214   RQ_ASIS = 0, /* as is */
02215   RQ_DEL  = 1, /* delete parent */
02216   RQ_A,        /* to '*'    */
02217   RQ_AQ,       /* to '*?'   */
02218   RQ_QQ,       /* to '??'   */
02219   RQ_P_QQ,     /* to '+)??' */
02220   RQ_PQ_Q      /* to '+?)?' */
02221 };
02222 
02223 static enum ReduceType const ReduceTypeTable[6][6] = {
02224   {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
02225   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
02226   {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
02227   {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
02228   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
02229   {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
02230 };
02231 
02232 extern void
02233 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
02234 {
02235   int pnum, cnum;
02236   QtfrNode *p, *c;
02237 
02238   p = NQTFR(pnode);
02239   c = NQTFR(cnode);
02240   pnum = popular_quantifier_num(p);
02241   cnum = popular_quantifier_num(c);
02242   if (pnum < 0 || cnum < 0) return ;
02243 
02244   switch(ReduceTypeTable[cnum][pnum]) {
02245   case RQ_DEL:
02246     *pnode = *cnode;
02247     break;
02248   case RQ_A:
02249     p->target = c->target;
02250     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;
02251     break;
02252   case RQ_AQ:
02253     p->target = c->target;
02254     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;
02255     break;
02256   case RQ_QQ:
02257     p->target = c->target;
02258     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
02259     break;
02260   case RQ_P_QQ:
02261     p->target = cnode;
02262     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
02263     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;
02264     return ;
02265     break;
02266   case RQ_PQ_Q:
02267     p->target = cnode;
02268     p->lower  = 0;  p->upper = 1;  p->greedy = 1;
02269     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;
02270     return ;
02271     break;
02272   case RQ_ASIS:
02273     p->target = cnode;
02274     return ;
02275     break;
02276   }
02277 
02278   c->target = NULL_NODE;
02279   onig_node_free(cnode);
02280 }
02281 
02282 
02283 enum TokenSyms {
02284   TK_EOT      = 0,   /* end of token */
02285   TK_RAW_BYTE = 1,
02286   TK_CHAR,
02287   TK_STRING,
02288   TK_CODE_POINT,
02289   TK_ANYCHAR,
02290   TK_CHAR_TYPE,
02291   TK_BACKREF,
02292   TK_CALL,
02293   TK_ANCHOR,
02294   TK_OP_REPEAT,
02295   TK_INTERVAL,
02296   TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
02297   TK_ALT,
02298   TK_SUBEXP_OPEN,
02299   TK_SUBEXP_CLOSE,
02300   TK_CC_OPEN,
02301   TK_QUOTE_OPEN,
02302   TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
02303   /* in cc */
02304   TK_CC_CLOSE,
02305   TK_CC_RANGE,
02306   TK_POSIX_BRACKET_OPEN,
02307   TK_CC_AND,             /* && */
02308   TK_CC_CC_OPEN          /* [ */
02309 };
02310 
02311 typedef struct {
02312   enum TokenSyms type;
02313   int escaped;
02314   int base;   /* is number: 8, 16 (used in [....]) */
02315   UChar* backp;
02316   union {
02317     UChar* s;
02318     int   c;
02319     OnigCodePoint code;
02320     int   anchor;
02321     int   subtype;
02322     struct {
02323       int lower;
02324       int upper;
02325       int greedy;
02326       int possessive;
02327     } repeat;
02328     struct {
02329       int  num;
02330       int  ref1;
02331       int* refs;
02332       int  by_name;
02333 #ifdef USE_BACKREF_WITH_LEVEL
02334       int  exist_level;
02335       int  level;   /* \k<name+n> */
02336 #endif
02337     } backref;
02338     struct {
02339       UChar* name;
02340       UChar* name_end;
02341       int    gnum;
02342     } call;
02343     struct {
02344       int ctype;
02345       int not;
02346     } prop;
02347   } u;
02348 } OnigToken;
02349 
02350 
02351 static int
02352 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
02353 {
02354   int low, up, syn_allow, non_low = 0;
02355   int r = 0;
02356   OnigCodePoint c;
02357   OnigEncoding enc = env->enc;
02358   UChar* p = *src;
02359   PFETCH_READY;
02360 
02361   syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
02362 
02363   if (PEND) {
02364     if (syn_allow)
02365       return 1;  /* "....{" : OK! */
02366     else
02367       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
02368   }
02369 
02370   if (! syn_allow) {
02371     c = PPEEK;
02372     if (c == ')' || c == '(' || c == '|') {
02373       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
02374     }
02375   }
02376 
02377   low = onig_scan_unsigned_number(&p, end, env->enc);
02378   if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02379   if (low > ONIG_MAX_REPEAT_NUM)
02380     return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02381 
02382   if (p == *src) { /* can't read low */
02383     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
02384       /* allow {,n} as {0,n} */
02385       low = 0;
02386       non_low = 1;
02387     }
02388     else
02389       goto invalid;
02390   }
02391 
02392   if (PEND) goto invalid;
02393   PFETCH(c);
02394   if (c == ',') {
02395     UChar* prev = p;
02396     up = onig_scan_unsigned_number(&p, end, env->enc);
02397     if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02398     if (up > ONIG_MAX_REPEAT_NUM)
02399       return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02400 
02401     if (p == prev) {
02402       if (non_low != 0)
02403         goto invalid;
02404       up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */
02405     }
02406   }
02407   else {
02408     if (non_low != 0)
02409       goto invalid;
02410 
02411     PUNFETCH;
02412     up = low;  /* {n} : exact n times */
02413     r = 2;     /* fixed */
02414   }
02415 
02416   if (PEND) goto invalid;
02417   PFETCH(c);
02418   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
02419     if (c != MC_ESC(env->syntax)) goto invalid;
02420     PFETCH(c);
02421   }
02422   if (c != '}') goto invalid;
02423 
02424   if (!IS_REPEAT_INFINITE(up) && low > up) {
02425     return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
02426   }
02427 
02428   tok->type = TK_INTERVAL;
02429   tok->u.repeat.lower = low;
02430   tok->u.repeat.upper = up;
02431   *src = p;
02432   return r; /* 0: normal {n,m}, 2: fixed {n} */
02433 
02434  invalid:
02435   if (syn_allow)
02436     return 1;  /* OK */
02437   else
02438     return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
02439 }
02440 
02441 /* \M-, \C-, \c, or \... */
02442 static int
02443 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
02444 {
02445   int v;
02446   OnigCodePoint c;
02447   OnigEncoding enc = env->enc;
02448   UChar* p = *src;
02449   PFETCH_READY;
02450 
02451   if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
02452 
02453   PFETCH(c);
02454   switch (c) {
02455   case 'M':
02456     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
02457       if (PEND) return ONIGERR_END_PATTERN_AT_META;
02458       PFETCH(c);
02459       if (c != '-') return ONIGERR_META_CODE_SYNTAX;
02460       if (PEND) return ONIGERR_END_PATTERN_AT_META;
02461       PFETCH(c);
02462       if (c == MC_ESC(env->syntax)) {
02463         v = fetch_escaped_value(&p, end, env);
02464         if (v < 0) return v;
02465         c = (OnigCodePoint )v;
02466       }
02467       c = ((c & 0xff) | 0x80);
02468     }
02469     else
02470       goto backslash;
02471     break;
02472 
02473   case 'C':
02474     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
02475       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02476       PFETCH(c);
02477       if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
02478       goto control;
02479     }
02480     else
02481       goto backslash;
02482 
02483   case 'c':
02484     if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
02485     control:
02486       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02487       PFETCH(c);
02488       if (c == '?') {
02489         c = 0177;
02490       }
02491       else {
02492         if (c == MC_ESC(env->syntax)) {
02493           v = fetch_escaped_value(&p, end, env);
02494           if (v < 0) return v;
02495           c = (OnigCodePoint )v;
02496         }
02497         c &= 0x9f;
02498       }
02499       break;
02500     }
02501     /* fall through */
02502 
02503   default:
02504     {
02505     backslash:
02506       c = conv_backslash_value(c, env);
02507     }
02508     break;
02509   }
02510 
02511   *src = p;
02512   return c;
02513 }
02514 
02515 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
02516 
02517 static OnigCodePoint
02518 get_name_end_code_point(OnigCodePoint start)
02519 {
02520   switch (start) {
02521   case '<':  return (OnigCodePoint )'>'; break;
02522   case '\'': return (OnigCodePoint )'\''; break;
02523   default:
02524     break;
02525   }
02526 
02527   return (OnigCodePoint )0;
02528 }
02529 
02530 #ifdef USE_NAMED_GROUP
02531 #ifdef USE_BACKREF_WITH_LEVEL
02532 /*
02533    \k<name+n>, \k<name-n>
02534    \k<num+n>,  \k<num-n>
02535    \k<-num+n>, \k<-num-n>
02536 */
02537 static int
02538 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
02539                       UChar** rname_end, ScanEnv* env,
02540                       int* rback_num, int* rlevel)
02541 {
02542   int r, sign, is_num, exist_level;
02543   OnigCodePoint end_code;
02544   OnigCodePoint c = 0;
02545   OnigEncoding enc = env->enc;
02546   UChar *name_end;
02547   UChar *pnum_head;
02548   UChar *p = *src;
02549   PFETCH_READY;
02550 
02551   *rback_num = 0;
02552   is_num = exist_level = 0;
02553   sign = 1;
02554   pnum_head = *src;
02555 
02556   end_code = get_name_end_code_point(start_code);
02557 
02558   name_end = end;
02559   r = 0;
02560   if (PEND) {
02561     return ONIGERR_EMPTY_GROUP_NAME;
02562   }
02563   else {
02564     PFETCH(c);
02565     if (c == end_code)
02566       return ONIGERR_EMPTY_GROUP_NAME;
02567 
02568     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02569       is_num = 1;
02570     }
02571     else if (c == '-') {
02572       is_num = 2;
02573       sign = -1;
02574       pnum_head = p;
02575     }
02576     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02577       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02578     }
02579   }
02580 
02581   while (!PEND) {
02582     name_end = p;
02583     PFETCH(c);
02584     if (c == end_code || c == ')' || c == '+' || c == '-') {
02585       if (is_num == 2)  r = ONIGERR_INVALID_GROUP_NAME;
02586       break;
02587     }
02588 
02589     if (is_num != 0) {
02590       if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02591         is_num = 1;
02592       }
02593       else {
02594         r = ONIGERR_INVALID_GROUP_NAME;
02595         is_num = 0;
02596       }
02597     }
02598     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02599       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02600     }
02601   }
02602 
02603   if (r == 0 && c != end_code) {
02604     if (c == '+' || c == '-') {
02605       int level;
02606       int flag = (c == '-' ? -1 : 1);
02607 
02608       PFETCH(c);
02609       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
02610       PUNFETCH;
02611       level = onig_scan_unsigned_number(&p, end, enc);
02612       if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
02613       *rlevel = (level * flag);
02614       exist_level = 1;
02615 
02616       PFETCH(c);
02617       if (c == end_code)
02618         goto end;
02619     }
02620 
02621   err:
02622     r = ONIGERR_INVALID_GROUP_NAME;
02623     name_end = end;
02624   }
02625 
02626  end:
02627   if (r == 0) {
02628     if (is_num != 0) {
02629       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02630       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02631       else if (*rback_num == 0) goto err;
02632 
02633       *rback_num *= sign;
02634     }
02635 
02636     *rname_end = name_end;
02637     *src = p;
02638     return (exist_level ? 1 : 0);
02639   }
02640   else {
02641     onig_scan_env_set_error_string(env, r, *src, name_end);
02642     return r;
02643   }
02644 }
02645 #endif /* USE_BACKREF_WITH_LEVEL */
02646 
02647 /*
02648   def: 0 -> define name    (don't allow number name)
02649        1 -> reference name (allow number name)
02650 */
02651 static int
02652 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02653            UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02654 {
02655   int r, is_num, sign;
02656   OnigCodePoint end_code;
02657   OnigCodePoint c = 0;
02658   OnigEncoding enc = env->enc;
02659   UChar *name_end;
02660   UChar *pnum_head;
02661   UChar *p = *src;
02662   PFETCH_READY;
02663 
02664   *rback_num = 0;
02665 
02666   end_code = get_name_end_code_point(start_code);
02667 
02668   name_end = end;
02669   pnum_head = *src;
02670   r = 0;
02671   is_num = 0;
02672   sign = 1;
02673   if (PEND) {
02674     return ONIGERR_EMPTY_GROUP_NAME;
02675   }
02676   else {
02677     PFETCH(c);
02678     if (c == end_code)
02679       return ONIGERR_EMPTY_GROUP_NAME;
02680 
02681     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02682       if (ref == 1)
02683         is_num = 1;
02684       else {
02685         r = ONIGERR_INVALID_GROUP_NAME;
02686         is_num = 0;
02687       }
02688     }
02689     else if (c == '-') {
02690       if (ref == 1) {
02691         is_num = 2;
02692         sign = -1;
02693         pnum_head = p;
02694       }
02695       else {
02696         r = ONIGERR_INVALID_GROUP_NAME;
02697         is_num = 0;
02698       }
02699     }
02700     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02701       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02702     }
02703   }
02704 
02705   if (r == 0) {
02706     while (!PEND) {
02707       name_end = p;
02708       PFETCH(c);
02709       if (c == end_code || c == ')') {
02710         if (is_num == 2)        r = ONIGERR_INVALID_GROUP_NAME;
02711         break;
02712       }
02713 
02714       if (is_num != 0) {
02715         if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02716           is_num = 1;
02717         }
02718         else {
02719           if (!ONIGENC_IS_CODE_WORD(enc, c))
02720             r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02721           else
02722             r = ONIGERR_INVALID_GROUP_NAME;
02723 
02724           is_num = 0;
02725         }
02726       }
02727       else {
02728         if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02729           r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02730         }
02731       }
02732     }
02733 
02734     if (c != end_code) {
02735       r = ONIGERR_INVALID_GROUP_NAME;
02736       name_end = end;
02737     }
02738 
02739     if (is_num != 0) {
02740       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02741       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02742       else if (*rback_num == 0) {
02743         r = ONIGERR_INVALID_GROUP_NAME;
02744         goto err;
02745       }
02746 
02747       *rback_num *= sign;
02748     }
02749 
02750     *rname_end = name_end;
02751     *src = p;
02752     return 0;
02753   }
02754   else {
02755     while (!PEND) {
02756       name_end = p;
02757       PFETCH(c);
02758       if (c == end_code || c == ')')
02759         break;
02760     }
02761     if (PEND)
02762       name_end = end;
02763 
02764   err:
02765     onig_scan_env_set_error_string(env, r, *src, name_end);
02766     return r;
02767   }
02768 }
02769 #else
02770 static int
02771 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02772            UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02773 {
02774   int r, is_num, sign;
02775   OnigCodePoint end_code;
02776   OnigCodePoint c = 0;
02777   UChar *name_end;
02778   OnigEncoding enc = env->enc;
02779   UChar *pnum_head;
02780   UChar *p = *src;
02781   PFETCH_READY;
02782 
02783   *rback_num = 0;
02784 
02785   end_code = get_name_end_code_point(start_code);
02786 
02787   *rname_end = name_end = end;
02788   r = 0;
02789   pnum_head = *src;
02790   is_num = 0;
02791   sign = 1;
02792 
02793   if (PEND) {
02794     return ONIGERR_EMPTY_GROUP_NAME;
02795   }
02796   else {
02797     PFETCH(c);
02798     if (c == end_code)
02799       return ONIGERR_EMPTY_GROUP_NAME;
02800 
02801     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02802       is_num = 1;
02803     }
02804     else if (c == '-') {
02805       is_num = 2;
02806       sign = -1;
02807       pnum_head = p;
02808     }
02809     else {
02810       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02811     }
02812   }
02813 
02814   while (!PEND) {
02815     name_end = p;
02816 
02817     PFETCH(c);
02818     if (c == end_code || c == ')') break;
02819     if (! ONIGENC_IS_CODE_DIGIT(enc, c))
02820       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02821   }
02822   if (r == 0 && c != end_code) {
02823     r = ONIGERR_INVALID_GROUP_NAME;
02824     name_end = end;
02825   }
02826 
02827   if (r == 0) {
02828     *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02829     if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02830     else if (*rback_num == 0) {
02831       r = ONIGERR_INVALID_GROUP_NAME;
02832       goto err;
02833     }
02834     *rback_num *= sign;
02835 
02836     *rname_end = name_end;
02837     *src = p;
02838     return 0;
02839   }
02840   else {
02841   err:
02842     onig_scan_env_set_error_string(env, r, *src, name_end);
02843     return r;
02844   }
02845 }
02846 #endif /* USE_NAMED_GROUP */
02847 
02848 void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
02849                            UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
02850 
02851 static void
02852 onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
02853 {
02854     va_list args;
02855     UChar buf[WARN_BUFSIZE];
02856     va_start(args, fmt);
02857     onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
02858                 env->pattern, env->pattern_end,
02859                 (const UChar *)fmt, args);
02860     va_end(args);
02861     if (env->sourcefile == NULL)
02862       rb_warn("%s", (char *)buf);
02863     else
02864       rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
02865 }
02866 
02867 static void
02868 CC_ESC_WARN(ScanEnv *env, UChar *c)
02869 {
02870   if (onig_warn == onig_null_warn) return ;
02871 
02872   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
02873       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
02874     onig_syntax_warn(env, "character class has '%s' without escape", c);
02875   }
02876 }
02877 
02878 static void
02879 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
02880 {
02881   if (onig_warn == onig_null_warn) return ;
02882 
02883   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
02884       onig_syntax_warn(env, "regular expression has '%s' without escape", c);
02885   }
02886 }
02887 
02888 static void
02889 CC_DUP_WARN(ScanEnv *env)
02890 {
02891   if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02892 
02893   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
02894     !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
02895     (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
02896     onig_syntax_warn(env, "character class has duplicated range");
02897   }
02898 }
02899 
02900 static void
02901 UNKNOWN_ESC_WARN(ScanEnv *env, int c)
02902 {
02903   if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02904   onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
02905 }
02906 
02907 static UChar*
02908 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
02909                   UChar **next, OnigEncoding enc)
02910 {
02911   int i;
02912   OnigCodePoint x;
02913   UChar *q;
02914   UChar *p = from;
02915 
02916   while (p < to) {
02917     x = ONIGENC_MBC_TO_CODE(enc, p, to);
02918     q = p + enclen(enc, p, to);
02919     if (x == s[0]) {
02920       for (i = 1; i < n && q < to; i++) {
02921         x = ONIGENC_MBC_TO_CODE(enc, q, to);
02922         if (x != s[i]) break;
02923         q += enclen(enc, q, to);
02924       }
02925       if (i >= n) {
02926         if (IS_NOT_NULL(next))
02927           *next = q;
02928         return p;
02929       }
02930     }
02931     p = q;
02932   }
02933   return NULL_UCHARP;
02934 }
02935 
02936 static int
02937 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
02938                  OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
02939 {
02940   int i, in_esc;
02941   OnigCodePoint x;
02942   UChar *q;
02943   UChar *p = from;
02944 
02945   in_esc = 0;
02946   while (p < to) {
02947     if (in_esc) {
02948       in_esc = 0;
02949       p += enclen(enc, p, to);
02950     }
02951     else {
02952       x = ONIGENC_MBC_TO_CODE(enc, p, to);
02953       q = p + enclen(enc, p, to);
02954       if (x == s[0]) {
02955         for (i = 1; i < n && q < to; i++) {
02956           x = ONIGENC_MBC_TO_CODE(enc, q, to);
02957           if (x != s[i]) break;
02958           q += enclen(enc, q, to);
02959         }
02960         if (i >= n) return 1;
02961         p += enclen(enc, p, to);
02962       }
02963       else {
02964         x = ONIGENC_MBC_TO_CODE(enc, p, to);
02965         if (x == bad) return 0;
02966         else if (x == MC_ESC(syn)) in_esc = 1;
02967         p = q;
02968       }
02969     }
02970   }
02971   return 0;
02972 }
02973 
02974 static int
02975 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
02976 {
02977   int num;
02978   OnigCodePoint c, c2;
02979   const OnigSyntaxType* syn = env->syntax;
02980   OnigEncoding enc = env->enc;
02981   UChar* prev;
02982   UChar* p = *src;
02983   PFETCH_READY;
02984 
02985   if (PEND) {
02986     tok->type = TK_EOT;
02987     return tok->type;
02988   }
02989 
02990   PFETCH(c);
02991   tok->type = TK_CHAR;
02992   tok->base = 0;
02993   tok->u.c  = c;
02994   tok->escaped = 0;
02995 
02996   if (c == ']') {
02997     tok->type = TK_CC_CLOSE;
02998   }
02999   else if (c == '-') {
03000     tok->type = TK_CC_RANGE;
03001   }
03002   else if (c == MC_ESC(syn)) {
03003     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
03004       goto end;
03005 
03006     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03007 
03008     PFETCH(c);
03009     tok->escaped = 1;
03010     tok->u.c = c;
03011     switch (c) {
03012     case 'w':
03013       tok->type = TK_CHAR_TYPE;
03014       tok->u.prop.ctype = ONIGENC_CTYPE_W;
03015       tok->u.prop.not   = 0;
03016       break;
03017     case 'W':
03018       tok->type = TK_CHAR_TYPE;
03019       tok->u.prop.ctype = ONIGENC_CTYPE_W;
03020       tok->u.prop.not   = 1;
03021       break;
03022     case 'd':
03023       tok->type = TK_CHAR_TYPE;
03024       tok->u.prop.ctype = ONIGENC_CTYPE_D;
03025       tok->u.prop.not   = 0;
03026       break;
03027     case 'D':
03028       tok->type = TK_CHAR_TYPE;
03029       tok->u.prop.ctype = ONIGENC_CTYPE_D;
03030       tok->u.prop.not   = 1;
03031       break;
03032     case 's':
03033       tok->type = TK_CHAR_TYPE;
03034       tok->u.prop.ctype = ONIGENC_CTYPE_S;
03035       tok->u.prop.not   = 0;
03036       break;
03037     case 'S':
03038       tok->type = TK_CHAR_TYPE;
03039       tok->u.prop.ctype = ONIGENC_CTYPE_S;
03040       tok->u.prop.not   = 1;
03041       break;
03042     case 'h':
03043       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03044       tok->type = TK_CHAR_TYPE;
03045       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03046       tok->u.prop.not   = 0;
03047       break;
03048     case 'H':
03049       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03050       tok->type = TK_CHAR_TYPE;
03051       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03052       tok->u.prop.not   = 1;
03053       break;
03054 
03055     case 'p':
03056     case 'P':
03057       c2 = PPEEK;
03058       if (c2 == '{' &&
03059           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03060         PINC;
03061         tok->type = TK_CHAR_PROPERTY;
03062         tok->u.prop.not = (c == 'P' ? 1 : 0);
03063 
03064         if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03065           PFETCH(c2);
03066           if (c2 == '^') {
03067             tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03068           }
03069           else
03070             PUNFETCH;
03071         }
03072       }
03073       else {
03074           onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03075       }
03076       break;
03077 
03078     case 'x':
03079       if (PEND) break;
03080 
03081       prev = p;
03082       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03083         PINC;
03084         num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
03085         if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03086         if (!PEND) {
03087           c2 = PPEEK;
03088           if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
03089             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03090         }
03091 
03092         if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
03093           PINC;
03094           tok->type   = TK_CODE_POINT;
03095           tok->base   = 16;
03096           tok->u.code = (OnigCodePoint )num;
03097         }
03098         else {
03099           /* can't read nothing or invalid format */
03100           p = prev;
03101         }
03102       }
03103       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03104         num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
03105         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03106         if (p == prev) {  /* can't read nothing. */
03107           num = 0; /* but, it's not error */
03108         }
03109         tok->type = TK_RAW_BYTE;
03110         tok->base = 16;
03111         tok->u.c  = num;
03112       }
03113       break;
03114 
03115     case 'u':
03116       if (PEND) break;
03117 
03118       prev = p;
03119       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03120         num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
03121         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03122         if (p == prev) {  /* can't read nothing. */
03123           num = 0; /* but, it's not error */
03124         }
03125         tok->type   = TK_CODE_POINT;
03126         tok->base   = 16;
03127         tok->u.code = (OnigCodePoint )num;
03128       }
03129       break;
03130 
03131     case '0':
03132     case '1': case '2': case '3': case '4': case '5': case '6': case '7':
03133       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03134         PUNFETCH;
03135         prev = p;
03136         num = scan_unsigned_octal_number(&p, end, 3, enc);
03137         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03138         if (p == prev) {  /* can't read nothing. */
03139           num = 0; /* but, it's not error */
03140         }
03141         tok->type = TK_RAW_BYTE;
03142         tok->base = 8;
03143         tok->u.c  = num;
03144       }
03145       break;
03146 
03147     default:
03148       PUNFETCH;
03149       num = fetch_escaped_value(&p, end, env);
03150       if (num < 0) return num;
03151       if (tok->u.c != num) {
03152         tok->u.code = (OnigCodePoint )num;
03153         tok->type   = TK_CODE_POINT;
03154       }
03155       break;
03156     }
03157   }
03158   else if (c == '[') {
03159     if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
03160       OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
03161       tok->backp = p; /* point at '[' is readed */
03162       PINC;
03163       if (str_exist_check_with_esc(send, 2, p, end,
03164                                    (OnigCodePoint )']', enc, syn)) {
03165         tok->type = TK_POSIX_BRACKET_OPEN;
03166       }
03167       else {
03168         PUNFETCH;
03169         goto cc_in_cc;
03170       }
03171     }
03172     else {
03173     cc_in_cc:
03174       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
03175         tok->type = TK_CC_CC_OPEN;
03176       }
03177       else {
03178         CC_ESC_WARN(env, (UChar* )"[");
03179       }
03180     }
03181   }
03182   else if (c == '&') {
03183     if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
03184         !PEND && (PPEEK_IS('&'))) {
03185       PINC;
03186       tok->type = TK_CC_AND;
03187     }
03188   }
03189 
03190  end:
03191   *src = p;
03192   return tok->type;
03193 }
03194 
03195 static int
03196 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
03197 {
03198   int r, num;
03199   OnigCodePoint c;
03200   OnigEncoding enc = env->enc;
03201   const OnigSyntaxType* syn = env->syntax;
03202   UChar* prev;
03203   UChar* p = *src;
03204   PFETCH_READY;
03205 
03206  start:
03207   if (PEND) {
03208     tok->type = TK_EOT;
03209     return tok->type;
03210   }
03211 
03212   tok->type  = TK_STRING;
03213   tok->base  = 0;
03214   tok->backp = p;
03215 
03216   PFETCH(c);
03217   if (IS_MC_ESC_CODE(c, syn)) {
03218     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03219 
03220     tok->backp = p;
03221     PFETCH(c);
03222 
03223     tok->u.c = c;
03224     tok->escaped = 1;
03225     switch (c) {
03226     case '*':
03227       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
03228       tok->type = TK_OP_REPEAT;
03229       tok->u.repeat.lower = 0;
03230       tok->u.repeat.upper = REPEAT_INFINITE;
03231       goto greedy_check;
03232       break;
03233 
03234     case '+':
03235       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
03236       tok->type = TK_OP_REPEAT;
03237       tok->u.repeat.lower = 1;
03238       tok->u.repeat.upper = REPEAT_INFINITE;
03239       goto greedy_check;
03240       break;
03241 
03242     case '?':
03243       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
03244       tok->type = TK_OP_REPEAT;
03245       tok->u.repeat.lower = 0;
03246       tok->u.repeat.upper = 1;
03247     greedy_check:
03248       if (!PEND && PPEEK_IS('?') &&
03249           IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
03250         PFETCH(c);
03251         tok->u.repeat.greedy     = 0;
03252         tok->u.repeat.possessive = 0;
03253       }
03254       else {
03255       possessive_check:
03256         if (!PEND && PPEEK_IS('+') &&
03257             ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
03258               tok->type != TK_INTERVAL)  ||
03259              (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
03260               tok->type == TK_INTERVAL))) {
03261           PFETCH(c);
03262           tok->u.repeat.greedy     = 1;
03263           tok->u.repeat.possessive = 1;
03264         }
03265         else {
03266           tok->u.repeat.greedy     = 1;
03267           tok->u.repeat.possessive = 0;
03268         }
03269       }
03270       break;
03271 
03272     case '{':
03273       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
03274       r = fetch_range_quantifier(&p, end, tok, env);
03275       if (r < 0) return r;  /* error */
03276       if (r == 0) goto greedy_check;
03277       else if (r == 2) { /* {n} */
03278         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03279           goto possessive_check;
03280 
03281         goto greedy_check;
03282       }
03283       /* r == 1 : normal char */
03284       break;
03285 
03286     case '|':
03287       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
03288       tok->type = TK_ALT;
03289       break;
03290 
03291     case '(':
03292       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03293       tok->type = TK_SUBEXP_OPEN;
03294       break;
03295 
03296     case ')':
03297       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03298       tok->type = TK_SUBEXP_CLOSE;
03299       break;
03300 
03301     case 'w':
03302       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03303       tok->type = TK_CHAR_TYPE;
03304       tok->u.prop.ctype = ONIGENC_CTYPE_W;
03305       tok->u.prop.not   = 0;
03306       break;
03307 
03308     case 'W':
03309       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03310       tok->type = TK_CHAR_TYPE;
03311       tok->u.prop.ctype = ONIGENC_CTYPE_W;
03312       tok->u.prop.not   = 1;
03313       break;
03314 
03315     case 'b':
03316       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03317       tok->type = TK_ANCHOR;
03318       tok->u.anchor = ANCHOR_WORD_BOUND;
03319       break;
03320 
03321     case 'B':
03322       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03323       tok->type = TK_ANCHOR;
03324       tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
03325       break;
03326 
03327 #ifdef USE_WORD_BEGIN_END
03328     case '<':
03329       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03330       tok->type = TK_ANCHOR;
03331       tok->u.anchor = ANCHOR_WORD_BEGIN;
03332       break;
03333 
03334     case '>':
03335       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03336       tok->type = TK_ANCHOR;
03337       tok->u.anchor = ANCHOR_WORD_END;
03338       break;
03339 #endif
03340 
03341     case 's':
03342       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03343       tok->type = TK_CHAR_TYPE;
03344       tok->u.prop.ctype = ONIGENC_CTYPE_S;
03345       tok->u.prop.not   = 0;
03346       break;
03347 
03348     case 'S':
03349       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03350       tok->type = TK_CHAR_TYPE;
03351       tok->u.prop.ctype = ONIGENC_CTYPE_S;
03352       tok->u.prop.not   = 1;
03353       break;
03354 
03355     case 'd':
03356       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03357       tok->type = TK_CHAR_TYPE;
03358       tok->u.prop.ctype = ONIGENC_CTYPE_D;
03359       tok->u.prop.not   = 0;
03360       break;
03361 
03362     case 'D':
03363       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03364       tok->type = TK_CHAR_TYPE;
03365       tok->u.prop.ctype = ONIGENC_CTYPE_D;
03366       tok->u.prop.not   = 1;
03367       break;
03368 
03369     case 'h':
03370       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03371       tok->type = TK_CHAR_TYPE;
03372       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03373       tok->u.prop.not   = 0;
03374       break;
03375 
03376     case 'H':
03377       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03378       tok->type = TK_CHAR_TYPE;
03379       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03380       tok->u.prop.not   = 1;
03381       break;
03382 
03383     case 'A':
03384       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03385     begin_buf:
03386       tok->type = TK_ANCHOR;
03387       tok->u.subtype = ANCHOR_BEGIN_BUF;
03388       break;
03389 
03390     case 'Z':
03391       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03392       tok->type = TK_ANCHOR;
03393       tok->u.subtype = ANCHOR_SEMI_END_BUF;
03394       break;
03395 
03396     case 'z':
03397       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03398     end_buf:
03399       tok->type = TK_ANCHOR;
03400       tok->u.subtype = ANCHOR_END_BUF;
03401       break;
03402 
03403     case 'G':
03404       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
03405       tok->type = TK_ANCHOR;
03406       tok->u.subtype = ANCHOR_BEGIN_POSITION;
03407       break;
03408 
03409     case '`':
03410       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03411       goto begin_buf;
03412       break;
03413 
03414     case '\'':
03415       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03416       goto end_buf;
03417       break;
03418 
03419     case 'x':
03420       if (PEND) break;
03421 
03422       prev = p;
03423       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03424         PINC;
03425         num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
03426         if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03427         if (!PEND) {
03428           if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
03429             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03430         }
03431 
03432         if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
03433           PINC;
03434           tok->type   = TK_CODE_POINT;
03435           tok->u.code = (OnigCodePoint )num;
03436         }
03437         else {
03438           /* can't read nothing or invalid format */
03439           p = prev;
03440         }
03441       }
03442       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03443         num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
03444         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03445         if (p == prev) {  /* can't read nothing. */
03446           num = 0; /* but, it's not error */
03447         }
03448         tok->type = TK_RAW_BYTE;
03449         tok->base = 16;
03450         tok->u.c  = num;
03451       }
03452       break;
03453 
03454     case 'u':
03455       if (PEND) break;
03456 
03457       prev = p;
03458       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03459         num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
03460         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03461         if (p == prev) {  /* can't read nothing. */
03462           num = 0; /* but, it's not error */
03463         }
03464         tok->type   = TK_CODE_POINT;
03465         tok->base   = 16;
03466         tok->u.code = (OnigCodePoint )num;
03467       }
03468       break;
03469 
03470     case '1': case '2': case '3': case '4':
03471     case '5': case '6': case '7': case '8': case '9':
03472       PUNFETCH;
03473       prev = p;
03474       num = onig_scan_unsigned_number(&p, end, enc);
03475       if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
03476         goto skip_backref;
03477       }
03478 
03479       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
03480           (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
03481         if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03482           if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
03483             return ONIGERR_INVALID_BACKREF;
03484         }
03485 
03486         tok->type = TK_BACKREF;
03487         tok->u.backref.num     = 1;
03488         tok->u.backref.ref1    = num;
03489         tok->u.backref.by_name = 0;
03490 #ifdef USE_BACKREF_WITH_LEVEL
03491         tok->u.backref.exist_level = 0;
03492 #endif
03493         break;
03494       }
03495 
03496     skip_backref:
03497       if (c == '8' || c == '9') {
03498         /* normal char */
03499         p = prev; PINC;
03500         break;
03501       }
03502 
03503       p = prev;
03504       /* fall through */
03505     case '0':
03506       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03507         prev = p;
03508         num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
03509         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03510         if (p == prev) {  /* can't read nothing. */
03511           num = 0; /* but, it's not error */
03512         }
03513         tok->type = TK_RAW_BYTE;
03514         tok->base = 8;
03515         tok->u.c  = num;
03516       }
03517       else if (c != '0') {
03518         PINC;
03519       }
03520       break;
03521 
03522 #ifdef USE_NAMED_GROUP
03523     case 'k':
03524       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
03525         PFETCH(c);
03526         if (c == '<' || c == '\'') {
03527           UChar* name_end;
03528           int* backs;
03529           int back_num;
03530 
03531           prev = p;
03532 
03533 #ifdef USE_BACKREF_WITH_LEVEL
03534           name_end = NULL_UCHARP; /* no need. escape gcc warning. */
03535           r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
03536                                     env, &back_num, &tok->u.backref.level);
03537           if (r == 1) tok->u.backref.exist_level = 1;
03538           else        tok->u.backref.exist_level = 0;
03539 #else
03540           r = fetch_name(&p, end, &name_end, env, &back_num, 1);
03541 #endif
03542           if (r < 0) return r;
03543 
03544           if (back_num != 0) {
03545             if (back_num < 0) {
03546               back_num = BACKREF_REL_TO_ABS(back_num, env);
03547               if (back_num <= 0)
03548                 return ONIGERR_INVALID_BACKREF;
03549             }
03550 
03551             if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03552               if (back_num > env->num_mem ||
03553                   IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
03554                 return ONIGERR_INVALID_BACKREF;
03555             }
03556             tok->type = TK_BACKREF;
03557             tok->u.backref.by_name = 0;
03558             tok->u.backref.num  = 1;
03559             tok->u.backref.ref1 = back_num;
03560           }
03561           else {
03562             num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
03563             if (num <= 0) {
03564               onig_scan_env_set_error_string(env,
03565                              ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
03566               return ONIGERR_UNDEFINED_NAME_REFERENCE;
03567             }
03568             if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03569               int i;
03570               for (i = 0; i < num; i++) {
03571                 if (backs[i] > env->num_mem ||
03572                     IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
03573                   return ONIGERR_INVALID_BACKREF;
03574               }
03575             }
03576 
03577             tok->type = TK_BACKREF;
03578             tok->u.backref.by_name = 1;
03579             if (num == 1) {
03580               tok->u.backref.num  = 1;
03581               tok->u.backref.ref1 = backs[0];
03582             }
03583             else {
03584               tok->u.backref.num  = num;
03585               tok->u.backref.refs = backs;
03586             }
03587           }
03588         }
03589         else {
03590             PUNFETCH;
03591             onig_syntax_warn(env, "invalid back reference");
03592         }
03593       }
03594       break;
03595 #endif
03596 
03597 #ifdef USE_SUBEXP_CALL
03598     case 'g':
03599       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
03600         PFETCH(c);
03601         if (c == '<' || c == '\'') {
03602           int gnum;
03603           UChar* name_end;
03604 
03605           prev = p;
03606           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
03607           if (r < 0) return r;
03608 
03609           tok->type = TK_CALL;
03610           tok->u.call.name     = prev;
03611           tok->u.call.name_end = name_end;
03612           tok->u.call.gnum     = gnum;
03613         }
03614         else {
03615             onig_syntax_warn(env, "invalid subexp call");
03616             PUNFETCH;
03617         }
03618       }
03619       break;
03620 #endif
03621 
03622     case 'Q':
03623       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
03624         tok->type = TK_QUOTE_OPEN;
03625       }
03626       break;
03627 
03628     case 'p':
03629     case 'P':
03630       if (PPEEK_IS('{') &&
03631           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03632         PINC;
03633         tok->type = TK_CHAR_PROPERTY;
03634         tok->u.prop.not = (c == 'P' ? 1 : 0);
03635 
03636         if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03637           PFETCH(c);
03638           if (c == '^') {
03639             tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03640           }
03641           else
03642             PUNFETCH;
03643         }
03644       }
03645       else {
03646           onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03647       }
03648       break;
03649 
03650     default:
03651       PUNFETCH;
03652       num = fetch_escaped_value(&p, end, env);
03653       if (num < 0) return num;
03654       /* set_raw: */
03655       if (tok->u.c != num) {
03656         tok->type = TK_CODE_POINT;
03657         tok->u.code = (OnigCodePoint )num;
03658       }
03659       else { /* string */
03660         p = tok->backp + enclen(enc, tok->backp, end);
03661       }
03662       break;
03663     }
03664   }
03665   else {
03666     tok->u.c = c;
03667     tok->escaped = 0;
03668 
03669 #ifdef USE_VARIABLE_META_CHARS
03670     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
03671         IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
03672       if (c == MC_ANYCHAR(syn))
03673         goto any_char;
03674       else if (c == MC_ANYTIME(syn))
03675         goto anytime;
03676       else if (c == MC_ZERO_OR_ONE_TIME(syn))
03677         goto zero_or_one_time;
03678       else if (c == MC_ONE_OR_MORE_TIME(syn))
03679         goto one_or_more_time;
03680       else if (c == MC_ANYCHAR_ANYTIME(syn)) {
03681         tok->type = TK_ANYCHAR_ANYTIME;
03682         goto out;
03683       }
03684     }
03685 #endif
03686 
03687     switch (c) {
03688     case '.':
03689       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
03690 #ifdef USE_VARIABLE_META_CHARS
03691     any_char:
03692 #endif
03693       tok->type = TK_ANYCHAR;
03694       break;
03695 
03696     case '*':
03697       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
03698 #ifdef USE_VARIABLE_META_CHARS
03699     anytime:
03700 #endif
03701       tok->type = TK_OP_REPEAT;
03702       tok->u.repeat.lower = 0;
03703       tok->u.repeat.upper = REPEAT_INFINITE;
03704       goto greedy_check;
03705       break;
03706 
03707     case '+':
03708       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
03709 #ifdef USE_VARIABLE_META_CHARS
03710     one_or_more_time:
03711 #endif
03712       tok->type = TK_OP_REPEAT;
03713       tok->u.repeat.lower = 1;
03714       tok->u.repeat.upper = REPEAT_INFINITE;
03715       goto greedy_check;
03716       break;
03717 
03718     case '?':
03719       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
03720 #ifdef USE_VARIABLE_META_CHARS
03721     zero_or_one_time:
03722 #endif
03723       tok->type = TK_OP_REPEAT;
03724       tok->u.repeat.lower = 0;
03725       tok->u.repeat.upper = 1;
03726       goto greedy_check;
03727       break;
03728 
03729     case '{':
03730       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
03731       r = fetch_range_quantifier(&p, end, tok, env);
03732       if (r < 0) return r;  /* error */
03733       if (r == 0) goto greedy_check;
03734       else if (r == 2) { /* {n} */
03735         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03736           goto possessive_check;
03737 
03738         goto greedy_check;
03739       }
03740       /* r == 1 : normal char */
03741       break;
03742 
03743     case '|':
03744       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
03745       tok->type = TK_ALT;
03746       break;
03747 
03748     case '(':
03749       if (PPEEK_IS('?') &&
03750           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
03751         PINC;
03752         if (PPEEK_IS('#')) {
03753           PFETCH(c);
03754           while (1) {
03755             if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
03756             PFETCH(c);
03757             if (c == MC_ESC(syn)) {
03758               if (!PEND) PFETCH(c);
03759             }
03760             else {
03761               if (c == ')') break;
03762             }
03763           }
03764           goto start;
03765         }
03766         PUNFETCH;
03767       }
03768 
03769       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03770       tok->type = TK_SUBEXP_OPEN;
03771       break;
03772 
03773     case ')':
03774       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03775       tok->type = TK_SUBEXP_CLOSE;
03776       break;
03777 
03778     case '^':
03779       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03780       tok->type = TK_ANCHOR;
03781       tok->u.subtype = (IS_SINGLELINE(env->option)
03782                         ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
03783       break;
03784 
03785     case '$':
03786       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03787       tok->type = TK_ANCHOR;
03788       tok->u.subtype = (IS_SINGLELINE(env->option)
03789                         ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
03790       break;
03791 
03792     case '[':
03793       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
03794       tok->type = TK_CC_OPEN;
03795       break;
03796 
03797     case ']':
03798       if (*src > env->pattern)   /* /].../ is allowed. */
03799         CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
03800       break;
03801 
03802     case '#':
03803       if (IS_EXTEND(env->option)) {
03804         while (!PEND) {
03805           PFETCH(c);
03806           if (ONIGENC_IS_CODE_NEWLINE(enc, c))
03807             break;
03808         }
03809         goto start;
03810         break;
03811       }
03812       break;
03813 
03814     case ' ': case '\t': case '\n': case '\r': case '\f':
03815       if (IS_EXTEND(env->option))
03816         goto start;
03817       break;
03818 
03819     default:
03820       /* string */
03821       break;
03822     }
03823   }
03824 
03825 #ifdef USE_VARIABLE_META_CHARS
03826  out:
03827 #endif
03828   *src = p;
03829   return tok->type;
03830 }
03831 
03832 static int
03833 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
03834                          ScanEnv* env,
03835                          OnigCodePoint sb_out, const OnigCodePoint mbr[])
03836 {
03837   int i, r;
03838   OnigCodePoint j;
03839 
03840   int n = ONIGENC_CODE_RANGE_NUM(mbr);
03841 
03842   if (not == 0) {
03843     for (i = 0; i < n; i++) {
03844       for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
03845            j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
03846         if (j >= sb_out) {
03847           if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
03848             r = add_code_range_to_buf(&(cc->mbuf), env, j,
03849                                       ONIGENC_CODE_RANGE_TO(mbr, i));
03850             if (r != 0) return r;
03851             i++;
03852           }
03853 
03854           goto sb_end;
03855         }
03856         BITSET_SET_BIT_CHKDUP(cc->bs, j);
03857       }
03858     }
03859 
03860   sb_end:
03861     for ( ; i < n; i++) {
03862       r = add_code_range_to_buf(&(cc->mbuf), env,
03863                                 ONIGENC_CODE_RANGE_FROM(mbr, i),
03864                                 ONIGENC_CODE_RANGE_TO(mbr, i));
03865       if (r != 0) return r;
03866     }
03867   }
03868   else {
03869     OnigCodePoint prev = 0;
03870 
03871     for (i = 0; i < n; i++) {
03872       for (j = prev;
03873            j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
03874         if (j >= sb_out) {
03875           goto sb_end2;
03876         }
03877         BITSET_SET_BIT_CHKDUP(cc->bs, j);
03878       }
03879       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
03880     }
03881     for (j = prev; j < sb_out; j++) {
03882       BITSET_SET_BIT_CHKDUP(cc->bs, j);
03883     }
03884 
03885   sb_end2:
03886     prev = sb_out;
03887 
03888     for (i = 0; i < n; i++) {
03889       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
03890         r = add_code_range_to_buf(&(cc->mbuf), env, prev,
03891                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
03892         if (r != 0) return r;
03893       }
03894       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
03895     }
03896     if (prev < 0x7fffffff) {
03897       r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
03898       if (r != 0) return r;
03899     }
03900   }
03901 
03902   return 0;
03903 }
03904 
03905 static int
03906 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
03907 {
03908   int c, r;
03909   const OnigCodePoint *ranges;
03910   OnigCodePoint sb_out;
03911   OnigEncoding enc = env->enc;
03912 
03913   switch (ctype) {
03914   case ONIGENC_CTYPE_D:
03915   case ONIGENC_CTYPE_S:
03916   case ONIGENC_CTYPE_W:
03917     ctype ^= ONIGENC_CTYPE_SPECIAL_MASK;
03918     if (not != 0) {
03919       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03920         if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
03921           BITSET_SET_BIT_CHKDUP(cc->bs, c);
03922       }
03923       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03924     }
03925     else {
03926       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03927         if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
03928           BITSET_SET_BIT_CHKDUP(cc->bs, c);
03929       }
03930     }
03931     return 0;
03932     break;
03933   }
03934 
03935   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
03936   if (r == 0) {
03937     return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
03938   }
03939   else if (r != ONIG_NO_SUPPORT_CONFIG) {
03940     return r;
03941   }
03942 
03943   r = 0;
03944   switch (ctype) {
03945   case ONIGENC_CTYPE_ALPHA:
03946   case ONIGENC_CTYPE_BLANK:
03947   case ONIGENC_CTYPE_CNTRL:
03948   case ONIGENC_CTYPE_DIGIT:
03949   case ONIGENC_CTYPE_LOWER:
03950   case ONIGENC_CTYPE_PUNCT:
03951   case ONIGENC_CTYPE_SPACE:
03952   case ONIGENC_CTYPE_UPPER:
03953   case ONIGENC_CTYPE_XDIGIT:
03954   case ONIGENC_CTYPE_ASCII:
03955   case ONIGENC_CTYPE_ALNUM:
03956     if (not != 0) {
03957       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03958         if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03959           BITSET_SET_BIT_CHKDUP(cc->bs, c);
03960       }
03961       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03962     }
03963     else {
03964       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03965         if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03966           BITSET_SET_BIT_CHKDUP(cc->bs, c);
03967       }
03968     }
03969     break;
03970 
03971   case ONIGENC_CTYPE_GRAPH:
03972   case ONIGENC_CTYPE_PRINT:
03973     if (not != 0) {
03974       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03975         if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03976           BITSET_SET_BIT_CHKDUP(cc->bs, c);
03977       }
03978     }
03979     else {
03980       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03981         if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03982           BITSET_SET_BIT_CHKDUP(cc->bs, c);
03983       }
03984       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03985     }
03986     break;
03987 
03988   case ONIGENC_CTYPE_WORD:
03989     if (not == 0) {
03990       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03991         if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
03992       }
03993       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03994     }
03995     else {
03996       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03997         if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
03998             && ! ONIGENC_IS_CODE_WORD(enc, c))
03999           BITSET_SET_BIT_CHKDUP(cc->bs, c);
04000       }
04001     }
04002     break;
04003 
04004   default:
04005     return ONIGERR_PARSER_BUG;
04006     break;
04007   }
04008 
04009   return r;
04010 }
04011 
04012 static int
04013 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
04014 {
04015 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
04016 #define POSIX_BRACKET_NAME_MIN_LEN         4
04017 
04018   static const PosixBracketEntryType PBS[] = {
04019     { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
04020     { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
04021     { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
04022     { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
04023     { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
04024     { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
04025     { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
04026     { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
04027     { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
04028     { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
04029     { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
04030     { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
04031     { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
04032     { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
04033     { (UChar* )NULL,     -1, 0 }
04034   };
04035 
04036   const PosixBracketEntryType *pb;
04037   int not, i, r;
04038   OnigCodePoint c;
04039   OnigEncoding enc = env->enc;
04040   UChar *p = *src;
04041   PFETCH_READY;
04042 
04043   if (PPEEK_IS('^')) {
04044     PINC;
04045     not = 1;
04046   }
04047   else
04048     not = 0;
04049 
04050   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
04051     goto not_posix_bracket;
04052 
04053   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
04054     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
04055       p = (UChar* )onigenc_step(enc, p, end, pb->len);
04056       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
04057         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04058 
04059       r = add_ctype_to_cc(cc, pb->ctype, not, env);
04060       if (r != 0) return r;
04061 
04062       PINC; PINC;
04063       *src = p;
04064       return 0;
04065     }
04066   }
04067 
04068  not_posix_bracket:
04069   c = 0;
04070   i = 0;
04071   while (!PEND && ((c = PPEEK) != ':') && c != ']') {
04072     PINC;
04073     if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
04074   }
04075   if (c == ':' && ! PEND) {
04076     PINC;
04077     if (! PEND) {
04078       PFETCH(c);
04079       if (c == ']')
04080         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04081     }
04082   }
04083 
04084   return 1;  /* 1: is not POSIX bracket, but no error. */
04085 }
04086 
04087 static int
04088 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
04089 {
04090   int r;
04091   OnigCodePoint c;
04092   OnigEncoding enc = env->enc;
04093   UChar *prev, *start, *p = *src;
04094   PFETCH_READY;
04095 
04096   r = 0;
04097   start = prev = p;
04098 
04099   while (!PEND) {
04100     prev = p;
04101     PFETCH(c);
04102     if (c == '}') {
04103       r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
04104       if (r < 0) break;
04105 
04106       *src = p;
04107       return r;
04108     }
04109     else if (c == '(' || c == ')' || c == '{' || c == '|') {
04110       r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
04111       break;
04112     }
04113   }
04114 
04115   onig_scan_env_set_error_string(env, r, *src, prev);
04116   return r;
04117 }
04118 
04119 static int
04120 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
04121                     ScanEnv* env)
04122 {
04123   int r, ctype;
04124   CClassNode* cc;
04125 
04126   ctype = fetch_char_property_to_ctype(src, end, env);
04127   if (ctype < 0) return ctype;
04128 
04129   *np = node_new_cclass();
04130   CHECK_NULL_RETURN_MEMERR(*np);
04131   cc = NCCLASS(*np);
04132   r = add_ctype_to_cc(cc, ctype, 0, env);
04133   if (r != 0) return r;
04134   if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
04135 
04136   return 0;
04137 }
04138 
04139 
04140 enum CCSTATE {
04141   CCS_VALUE,
04142   CCS_RANGE,
04143   CCS_COMPLETE,
04144   CCS_START
04145 };
04146 
04147 enum CCVALTYPE {
04148   CCV_SB,
04149   CCV_CODE_POINT,
04150   CCV_CLASS
04151 };
04152 
04153 static int
04154 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
04155                  enum CCSTATE* state, ScanEnv* env)
04156 {
04157   int r;
04158 
04159   if (*state == CCS_RANGE)
04160     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
04161 
04162   if (*state == CCS_VALUE && *type != CCV_CLASS) {
04163     if (*type == CCV_SB)
04164       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04165     else if (*type == CCV_CODE_POINT) {
04166       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04167       if (r < 0) return r;
04168     }
04169   }
04170 
04171   *state = CCS_VALUE;
04172   *type  = CCV_CLASS;
04173   return 0;
04174 }
04175 
04176 static int
04177 next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
04178                int* vs_israw, int v_israw,
04179                enum CCVALTYPE intype, enum CCVALTYPE* type,
04180                enum CCSTATE* state, ScanEnv* env)
04181 {
04182   int r;
04183 
04184   switch (*state) {
04185   case CCS_VALUE:
04186     if (*type == CCV_SB)
04187       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04188     else if (*type == CCV_CODE_POINT) {
04189       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04190       if (r < 0) return r;
04191     }
04192     break;
04193 
04194   case CCS_RANGE:
04195     if (intype == *type) {
04196       if (intype == CCV_SB) {
04197         if (*vs > 0xff || v > 0xff)
04198           return ONIGERR_INVALID_CODE_POINT_VALUE;
04199 
04200         if (*vs > v) {
04201           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04202             goto ccs_range_end;
04203           else
04204             return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04205         }
04206         bitset_set_range(env, cc->bs, (int )*vs, (int )v);
04207       }
04208       else {
04209         r = add_code_range(&(cc->mbuf), env, *vs, v);
04210         if (r < 0) return r;
04211       }
04212     }
04213     else {
04214 #if 0
04215       if (intype == CCV_CODE_POINT && *type == CCV_SB) {
04216 #endif
04217         if (*vs > v) {
04218           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04219             goto ccs_range_end;
04220           else
04221             return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04222         }
04223         bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
04224         r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
04225         if (r < 0) return r;
04226 #if 0
04227       }
04228       else
04229         return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
04230 #endif
04231     }
04232   ccs_range_end:
04233     *state = CCS_COMPLETE;
04234     break;
04235 
04236   case CCS_COMPLETE:
04237   case CCS_START:
04238     *state = CCS_VALUE;
04239     break;
04240 
04241   default:
04242     break;
04243   }
04244 
04245   *vs_israw = v_israw;
04246   *vs       = v;
04247   *type     = intype;
04248   return 0;
04249 }
04250 
04251 static int
04252 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
04253                  ScanEnv* env)
04254 {
04255   int in_esc;
04256   OnigCodePoint code;
04257   OnigEncoding enc = env->enc;
04258   UChar* p = from;
04259   PFETCH_READY;
04260 
04261   in_esc = 0;
04262   while (! PEND) {
04263     if (ignore_escaped && in_esc) {
04264       in_esc = 0;
04265     }
04266     else {
04267       PFETCH(code);
04268       if (code == c) return 1;
04269       if (code == MC_ESC(env->syntax)) in_esc = 1;
04270     }
04271   }
04272   return 0;
04273 }
04274 
04275 static int
04276 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
04277                  ScanEnv* env)
04278 {
04279   int r, neg, len, fetched, and_start;
04280   OnigCodePoint v, vs;
04281   UChar *p;
04282   Node* node;
04283   CClassNode *cc, *prev_cc;
04284   CClassNode work_cc;
04285 
04286   enum CCSTATE state;
04287   enum CCVALTYPE val_type, in_type;
04288   int val_israw, in_israw;
04289 
04290   prev_cc = (CClassNode* )NULL;
04291   *np = NULL_NODE;
04292   r = fetch_token_in_cc(tok, src, end, env);
04293   if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
04294     neg = 1;
04295     r = fetch_token_in_cc(tok, src, end, env);
04296   }
04297   else {
04298     neg = 0;
04299   }
04300 
04301   if (r < 0) return r;
04302   if (r == TK_CC_CLOSE) {
04303     if (! code_exist_check((OnigCodePoint )']',
04304                            *src, env->pattern_end, 1, env))
04305       return ONIGERR_EMPTY_CHAR_CLASS;
04306 
04307     CC_ESC_WARN(env, (UChar* )"]");
04308     r = tok->type = TK_CHAR;  /* allow []...] */
04309   }
04310 
04311   *np = node = node_new_cclass();
04312   CHECK_NULL_RETURN_MEMERR(node);
04313   cc = NCCLASS(node);
04314 
04315   and_start = 0;
04316   state = CCS_START;
04317   p = *src;
04318   while (r != TK_CC_CLOSE) {
04319     fetched = 0;
04320     switch (r) {
04321     case TK_CHAR:
04322       if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
04323           (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
04324         in_type = CCV_CODE_POINT;
04325       }
04326       else if (len < 0) {
04327         r = len;
04328         goto err;
04329       }
04330       else {
04331       sb_char:
04332         in_type = CCV_SB;
04333       }
04334       v = (OnigCodePoint )tok->u.c;
04335       in_israw = 0;
04336       goto val_entry2;
04337       break;
04338 
04339     case TK_RAW_BYTE:
04340       /* tok->base != 0 : octal or hexadec. */
04341       if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
04342         UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04343         UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
04344         UChar* psave = p;
04345         int i, base = tok->base;
04346 
04347         buf[0] = tok->u.c;
04348         for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
04349           r = fetch_token_in_cc(tok, &p, end, env);
04350           if (r < 0) goto err;
04351           if (r != TK_RAW_BYTE || tok->base != base) {
04352             fetched = 1;
04353             break;
04354           }
04355           buf[i] = tok->u.c;
04356         }
04357 
04358         if (i < ONIGENC_MBC_MINLEN(env->enc)) {
04359           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04360           goto err;
04361         }
04362 
04363         len = enclen(env->enc, buf, buf+i);
04364         if (i < len) {
04365           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04366           goto err;
04367         }
04368         else if (i > len) { /* fetch back */
04369           p = psave;
04370           for (i = 1; i < len; i++) {
04371             r = fetch_token_in_cc(tok, &p, end, env);
04372           }
04373           fetched = 0;
04374         }
04375 
04376         if (i == 1) {
04377           v = (OnigCodePoint )buf[0];
04378           goto raw_single;
04379         }
04380         else {
04381           v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
04382           in_type = CCV_CODE_POINT;
04383         }
04384       }
04385       else {
04386         v = (OnigCodePoint )tok->u.c;
04387       raw_single:
04388         in_type = CCV_SB;
04389       }
04390       in_israw = 1;
04391       goto val_entry2;
04392       break;
04393 
04394     case TK_CODE_POINT:
04395       v = tok->u.code;
04396       in_israw = 1;
04397     val_entry:
04398       len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
04399       if (len < 0) {
04400         r = len;
04401         goto err;
04402       }
04403       in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
04404     val_entry2:
04405       r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
04406                          &state, env);
04407       if (r != 0) goto err;
04408       break;
04409 
04410     case TK_POSIX_BRACKET_OPEN:
04411       r = parse_posix_bracket(cc, &p, end, env);
04412       if (r < 0) goto err;
04413       if (r == 1) {  /* is not POSIX bracket */
04414         CC_ESC_WARN(env, (UChar* )"[");
04415         p = tok->backp;
04416         v = (OnigCodePoint )tok->u.c;
04417         in_israw = 0;
04418         goto val_entry;
04419       }
04420       goto next_class;
04421       break;
04422 
04423     case TK_CHAR_TYPE:
04424       r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
04425       if (r != 0) return r;
04426 
04427     next_class:
04428       r = next_state_class(cc, &vs, &val_type, &state, env);
04429       if (r != 0) goto err;
04430       break;
04431 
04432     case TK_CHAR_PROPERTY:
04433       {
04434         int ctype;
04435 
04436         ctype = fetch_char_property_to_ctype(&p, end, env);
04437         if (ctype < 0) return ctype;
04438         r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
04439         if (r != 0) return r;
04440         goto next_class;
04441       }
04442       break;
04443 
04444     case TK_CC_RANGE:
04445       if (state == CCS_VALUE) {
04446         r = fetch_token_in_cc(tok, &p, end, env);
04447         if (r < 0) goto err;
04448         fetched = 1;
04449         if (r == TK_CC_CLOSE) { /* allow [x-] */
04450         range_end_val:
04451           v = (OnigCodePoint )'-';
04452           in_israw = 0;
04453           goto val_entry;
04454         }
04455         else if (r == TK_CC_AND) {
04456           CC_ESC_WARN(env, (UChar* )"-");
04457           goto range_end_val;
04458         }
04459         state = CCS_RANGE;
04460       }
04461       else if (state == CCS_START) {
04462         /* [-xa] is allowed */
04463         v = (OnigCodePoint )tok->u.c;
04464         in_israw = 0;
04465 
04466         r = fetch_token_in_cc(tok, &p, end, env);
04467         if (r < 0) goto err;
04468         fetched = 1;
04469         /* [--x] or [a&&-x] is warned. */
04470         if (r == TK_CC_RANGE || and_start != 0)
04471           CC_ESC_WARN(env, (UChar* )"-");
04472 
04473         goto val_entry;
04474       }
04475       else if (state == CCS_RANGE) {
04476         CC_ESC_WARN(env, (UChar* )"-");
04477         goto sb_char;  /* [!--x] is allowed */
04478       }
04479       else { /* CCS_COMPLETE */
04480         r = fetch_token_in_cc(tok, &p, end, env);
04481         if (r < 0) goto err;
04482         fetched = 1;
04483         if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
04484         else if (r == TK_CC_AND) {
04485           CC_ESC_WARN(env, (UChar* )"-");
04486           goto range_end_val;
04487         }
04488 
04489         if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
04490           CC_ESC_WARN(env, (UChar* )"-");
04491           goto sb_char;   /* [0-9-a] is allowed as [0-9\-a] */
04492         }
04493         r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
04494         goto err;
04495       }
04496       break;
04497 
04498     case TK_CC_CC_OPEN: /* [ */
04499       {
04500         Node *anode;
04501         CClassNode* acc;
04502 
04503         r = parse_char_class(&anode, tok, &p, end, env);
04504         if (r == 0) {
04505           acc = NCCLASS(anode);
04506           r = or_cclass(cc, acc, env);
04507         }
04508         onig_node_free(anode);
04509         if (r != 0) goto err;
04510       }
04511       break;
04512 
04513     case TK_CC_AND: /* && */
04514       {
04515         if (state == CCS_VALUE) {
04516           r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04517                              &val_type, &state, env);
04518           if (r != 0) goto err;
04519         }
04520         /* initialize local variables */
04521         and_start = 1;
04522         state = CCS_START;
04523 
04524         if (IS_NOT_NULL(prev_cc)) {
04525           r = and_cclass(prev_cc, cc, env);
04526           if (r != 0) goto err;
04527           bbuf_free(cc->mbuf);
04528         }
04529         else {
04530           prev_cc = cc;
04531           cc = &work_cc;
04532         }
04533         initialize_cclass(cc);
04534       }
04535       break;
04536 
04537     case TK_EOT:
04538       r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
04539       goto err;
04540       break;
04541     default:
04542       r = ONIGERR_PARSER_BUG;
04543       goto err;
04544       break;
04545     }
04546 
04547     if (fetched)
04548       r = tok->type;
04549     else {
04550       r = fetch_token_in_cc(tok, &p, end, env);
04551       if (r < 0) goto err;
04552     }
04553   }
04554 
04555   if (state == CCS_VALUE) {
04556     r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04557                        &val_type, &state, env);
04558     if (r != 0) goto err;
04559   }
04560 
04561   if (IS_NOT_NULL(prev_cc)) {
04562     r = and_cclass(prev_cc, cc, env);
04563     if (r != 0) goto err;
04564     bbuf_free(cc->mbuf);
04565     cc = prev_cc;
04566   }
04567 
04568   if (neg != 0)
04569     NCCLASS_SET_NOT(cc);
04570   else
04571     NCCLASS_CLEAR_NOT(cc);
04572   if (IS_NCCLASS_NOT(cc) &&
04573       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
04574     int is_empty;
04575 
04576     is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
04577     if (is_empty != 0)
04578       BITSET_IS_EMPTY(cc->bs, is_empty);
04579 
04580     if (is_empty == 0) {
04581 #define NEWLINE_CODE    0x0a
04582 
04583       if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
04584         if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
04585           BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
04586         else
04587           add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
04588       }
04589     }
04590   }
04591   *src = p;
04592   return 0;
04593 
04594  err:
04595   if (cc != NCCLASS(*np))
04596     bbuf_free(cc->mbuf);
04597   return r;
04598 }
04599 
04600 static int parse_subexp(Node** top, OnigToken* tok, int term,
04601                         UChar** src, UChar* end, ScanEnv* env);
04602 
04603 static int
04604 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
04605               ScanEnv* env)
04606 {
04607   int r, num;
04608   Node *target;
04609   OnigOptionType option;
04610   OnigCodePoint c;
04611   OnigEncoding enc = env->enc;
04612 
04613 #ifdef USE_NAMED_GROUP
04614   int list_capture;
04615 #endif
04616 
04617   UChar* p = *src;
04618   PFETCH_READY;
04619 
04620   *np = NULL;
04621   if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
04622 
04623   option = env->option;
04624   if (PPEEK_IS('?') &&
04625       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
04626     PINC;
04627     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04628 
04629     PFETCH(c);
04630     switch (c) {
04631     case ':':   /* (?:...) grouping only */
04632     group:
04633       r = fetch_token(tok, &p, end, env);
04634       if (r < 0) return r;
04635       r = parse_subexp(np, tok, term, &p, end, env);
04636       if (r < 0) return r;
04637       *src = p;
04638       return 1; /* group */
04639       break;
04640 
04641     case '=':
04642       *np = onig_node_new_anchor(ANCHOR_PREC_READ);
04643       break;
04644     case '!':  /*         preceding read */
04645       *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
04646       break;
04647     case '>':            /* (?>...) stop backtrack */
04648       *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
04649       break;
04650 
04651 #ifdef USE_NAMED_GROUP
04652     case '\'':
04653       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04654         goto named_group1;
04655       }
04656       else
04657         return ONIGERR_UNDEFINED_GROUP_OPTION;
04658       break;
04659 #endif
04660 
04661     case '<':   /* look behind (?<=...), (?<!...) */
04662       PFETCH(c);
04663       if (c == '=')
04664         *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
04665       else if (c == '!')
04666         *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
04667 #ifdef USE_NAMED_GROUP
04668       else {
04669         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04670           UChar *name;
04671           UChar *name_end;
04672 
04673           PUNFETCH;
04674           c = '<';
04675 
04676         named_group1:
04677           list_capture = 0;
04678 
04679         named_group2:
04680           name = p;
04681           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
04682           if (r < 0) return r;
04683 
04684           num = scan_env_add_mem_entry(env);
04685           if (num < 0) return num;
04686           if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
04687             return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04688 
04689           r = name_add(env->reg, name, name_end, num, env);
04690           if (r != 0) return r;
04691           *np = node_new_enclose_memory(env->option, 1);
04692           CHECK_NULL_RETURN_MEMERR(*np);
04693           NENCLOSE(*np)->regnum = num;
04694           if (list_capture != 0)
04695             BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04696           env->num_named++;
04697         }
04698         else {
04699           return ONIGERR_UNDEFINED_GROUP_OPTION;
04700         }
04701       }
04702 #else
04703       else {
04704         return ONIGERR_UNDEFINED_GROUP_OPTION;
04705       }
04706 #endif
04707       break;
04708 
04709     case '@':
04710       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
04711 #ifdef USE_NAMED_GROUP
04712         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04713           PFETCH(c);
04714           if (c == '<' || c == '\'') {
04715             list_capture = 1;
04716             goto named_group2; /* (?@<name>...) */
04717           }
04718           PUNFETCH;
04719         }
04720 #endif
04721         *np = node_new_enclose_memory(env->option, 0);
04722         CHECK_NULL_RETURN_MEMERR(*np);
04723         num = scan_env_add_mem_entry(env);
04724         if (num < 0) {
04725           onig_node_free(*np);
04726           return num;
04727         }
04728         else if (num >= (int )BIT_STATUS_BITS_NUM) {
04729           onig_node_free(*np);
04730           return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04731         }
04732         NENCLOSE(*np)->regnum = num;
04733         BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04734       }
04735       else {
04736         return ONIGERR_UNDEFINED_GROUP_OPTION;
04737       }
04738       break;
04739 
04740 #ifdef USE_POSIXLINE_OPTION
04741     case 'p':
04742 #endif
04743     case '-': case 'i': case 'm': case 's': case 'x':
04744       {
04745         int neg = 0;
04746 
04747         while (1) {
04748           switch (c) {
04749           case ':':
04750           case ')':
04751           break;
04752 
04753           case '-':  neg = 1; break;
04754           case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;
04755           case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
04756           case 's':
04757             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04758               ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
04759             }
04760             else
04761               return ONIGERR_UNDEFINED_GROUP_OPTION;
04762             break;
04763 
04764           case 'm':
04765             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04766               ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
04767             }
04768             else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
04769               ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
04770             }
04771             else
04772               return ONIGERR_UNDEFINED_GROUP_OPTION;
04773             break;
04774 #ifdef USE_POSIXLINE_OPTION
04775           case 'p':
04776             ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
04777             break;
04778 #endif
04779           default:
04780             return ONIGERR_UNDEFINED_GROUP_OPTION;
04781           }
04782 
04783           if (c == ')') {
04784             *np = node_new_option(option);
04785             CHECK_NULL_RETURN_MEMERR(*np);
04786             *src = p;
04787             return 2; /* option only */
04788           }
04789           else if (c == ':') {
04790             OnigOptionType prev = env->option;
04791 
04792             env->option     = option;
04793             r = fetch_token(tok, &p, end, env);
04794             if (r < 0) return r;
04795             r = parse_subexp(&target, tok, term, &p, end, env);
04796             env->option = prev;
04797             if (r < 0) return r;
04798             *np = node_new_option(option);
04799             CHECK_NULL_RETURN_MEMERR(*np);
04800             NENCLOSE(*np)->target = target;
04801             *src = p;
04802             return 0;
04803           }
04804 
04805           if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04806           PFETCH(c);
04807         }
04808       }
04809       break;
04810 
04811     default:
04812       return ONIGERR_UNDEFINED_GROUP_OPTION;
04813     }
04814   }
04815   else {
04816     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
04817       goto group;
04818 
04819     *np = node_new_enclose_memory(env->option, 0);
04820     CHECK_NULL_RETURN_MEMERR(*np);
04821     num = scan_env_add_mem_entry(env);
04822     if (num < 0) return num;
04823     NENCLOSE(*np)->regnum = num;
04824   }
04825 
04826   CHECK_NULL_RETURN_MEMERR(*np);
04827   r = fetch_token(tok, &p, end, env);
04828   if (r < 0) return r;
04829   r = parse_subexp(&target, tok, term, &p, end, env);
04830   if (r < 0) {
04831     onig_node_free(target);
04832     return r;
04833   }
04834 
04835   if (NTYPE(*np) == NT_ANCHOR)
04836     NANCHOR(*np)->target = target;
04837   else {
04838     NENCLOSE(*np)->target = target;
04839     if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
04840       /* Don't move this to previous of parse_subexp() */
04841       r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
04842       if (r != 0) return r;
04843     }
04844   }
04845 
04846   *src = p;
04847   return 0;
04848 }
04849 
04850 static const char* const PopularQStr[] = {
04851   "?", "*", "+", "??", "*?", "+?"
04852 };
04853 
04854 static const char* const ReduceQStr[] = {
04855   "", "", "*", "*?", "??", "+ and ??", "+? and ?"
04856 };
04857 
04858 static int
04859 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
04860 {
04861   QtfrNode* qn;
04862 
04863   qn = NQTFR(qnode);
04864   if (qn->lower == 1 && qn->upper == 1) {
04865     return 1;
04866   }
04867 
04868   switch (NTYPE(target)) {
04869   case NT_STR:
04870     if (! group) {
04871       StrNode* sn = NSTR(target);
04872       if (str_node_can_be_split(sn, env->enc)) {
04873         Node* n = str_node_split_last_char(sn, env->enc);
04874         if (IS_NOT_NULL(n)) {
04875           qn->target = n;
04876           return 2;
04877         }
04878       }
04879     }
04880     break;
04881 
04882   case NT_QTFR:
04883     { /* check redundant double repeat. */
04884       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
04885       QtfrNode* qnt   = NQTFR(target);
04886       int nestq_num   = popular_quantifier_num(qn);
04887       int targetq_num = popular_quantifier_num(qnt);
04888 
04889 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
04890       if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
04891           IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
04892         UChar buf[WARN_BUFSIZE];
04893 
04894         switch(ReduceTypeTable[targetq_num][nestq_num]) {
04895         case RQ_ASIS:
04896           break;
04897 
04898         case RQ_DEL:
04899           if (onig_verb_warn != onig_null_warn) {
04900             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04901                                  env->pattern, env->pattern_end,
04902                                  (UChar* )"redundant nested repeat operator");
04903             (*onig_verb_warn)((char* )buf);
04904           }
04905           goto warn_exit;
04906           break;
04907 
04908         default:
04909           if (onig_verb_warn != onig_null_warn) {
04910             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04911                                        env->pattern, env->pattern_end,
04912             (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
04913             PopularQStr[targetq_num], PopularQStr[nestq_num],
04914             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
04915             (*onig_verb_warn)((char* )buf);
04916           }
04917           goto warn_exit;
04918           break;
04919         }
04920       }
04921 
04922     warn_exit:
04923 #endif
04924       if (targetq_num >= 0) {
04925         if (nestq_num >= 0) {
04926           onig_reduce_nested_quantifier(qnode, target);
04927           goto q_exit;
04928         }
04929         else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
04930           /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
04931           if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
04932             qn->upper = (qn->lower == 0 ? 1 : qn->lower);
04933           }
04934         }
04935       }
04936     }
04937     break;
04938 
04939   default:
04940     break;
04941   }
04942 
04943   qn->target = target;
04944  q_exit:
04945   return 0;
04946 }
04947 
04948 
04949 #ifdef USE_SHARED_CCLASS_TABLE
04950 
04951 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
04952 
04953 /* for ctype node hash table */
04954 
04955 typedef struct {
04956   OnigEncoding enc;
04957   int not;
04958   int type;
04959 } type_cclass_key;
04960 
04961 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
04962 {
04963   if (x->type != y->type) return 1;
04964   if (x->enc  != y->enc)  return 1;
04965   if (x->not  != y->not)  return 1;
04966   return 0;
04967 }
04968 
04969 static st_index_t type_cclass_hash(type_cclass_key* key)
04970 {
04971   int i, val;
04972   UChar *p;
04973 
04974   val = 0;
04975 
04976   p = (UChar* )&(key->enc);
04977   for (i = 0; i < (int )sizeof(key->enc); i++) {
04978     val = val * 997 + (int )*p++;
04979   }
04980 
04981   p = (UChar* )(&key->type);
04982   for (i = 0; i < (int )sizeof(key->type); i++) {
04983     val = val * 997 + (int )*p++;
04984   }
04985 
04986   val += key->not;
04987   return val + (val >> 5);
04988 }
04989 
04990 static const struct st_hash_type type_type_cclass_hash = {
04991     type_cclass_cmp,
04992     type_cclass_hash,
04993 };
04994 
04995 static st_table* OnigTypeCClassTable;
04996 
04997 
04998 static int
04999 i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
05000 {
05001   if (IS_NOT_NULL(node)) {
05002     CClassNode* cc = NCCLASS(node);
05003     if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
05004     xfree(node);
05005   }
05006 
05007   if (IS_NOT_NULL(key)) xfree(key);
05008   return ST_DELETE;
05009 }
05010 
05011 extern int
05012 onig_free_shared_cclass_table(void)
05013 {
05014   THREAD_ATOMIC_START;
05015   if (IS_NOT_NULL(OnigTypeCClassTable)) {
05016     onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
05017     onig_st_free_table(OnigTypeCClassTable);
05018     OnigTypeCClassTable = NULL;
05019   }
05020   THREAD_ATOMIC_END;
05021 
05022   return 0;
05023 }
05024 
05025 #endif /* USE_SHARED_CCLASS_TABLE */
05026 
05027 
05028 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05029 static int
05030 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
05031 {
05032   BBuf *tbuf;
05033   int r;
05034 
05035   if (IS_NCCLASS_NOT(cc)) {
05036     bitset_invert(cc->bs);
05037 
05038     if (! ONIGENC_IS_SINGLEBYTE(enc)) {
05039       r = not_code_range_buf(enc, cc->mbuf, &tbuf);
05040       if (r != 0) return r;
05041 
05042       bbuf_free(cc->mbuf);
05043       cc->mbuf = tbuf;
05044     }
05045 
05046     NCCLASS_CLEAR_NOT(cc);
05047   }
05048 
05049   return 0;
05050 }
05051 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
05052 
05053 typedef struct {
05054   ScanEnv*    env;
05055   CClassNode* cc;
05056   Node*       alt_root;
05057   Node**      ptail;
05058 } IApplyCaseFoldArg;
05059 
05060 static int
05061 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
05062                   int to_len, void* arg)
05063 {
05064   IApplyCaseFoldArg* iarg;
05065   ScanEnv* env;
05066   CClassNode* cc;
05067   BitSetRef bs;
05068 
05069   iarg = (IApplyCaseFoldArg* )arg;
05070   env = iarg->env;
05071   cc  = iarg->cc;
05072   bs = cc->bs;
05073 
05074   if (to_len == 1) {
05075     int is_in = onig_is_code_in_cc(env->enc, from, cc);
05076 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05077     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
05078         (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
05079       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05080         add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05081       }
05082       else {
05083         BITSET_SET_BIT(bs, *to);
05084       }
05085     }
05086 #else
05087     if (is_in != 0) {
05088       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05089         if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
05090         add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05091       }
05092       else {
05093         if (IS_NCCLASS_NOT(cc)) {
05094           BITSET_CLEAR_BIT(bs, *to);
05095         }
05096         else
05097           BITSET_SET_BIT(bs, *to);
05098       }
05099     }
05100 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
05101   }
05102   else {
05103     int r, i, len;
05104     UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
05105     Node *snode = NULL_NODE;
05106 
05107     if (onig_is_code_in_cc(env->enc, from, cc)
05108 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05109         && !IS_NCCLASS_NOT(cc)
05110 #endif
05111         ) {
05112       for (i = 0; i < to_len; i++) {
05113         len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
05114         if (i == 0) {
05115           snode = onig_node_new_str(buf, buf + len);
05116           CHECK_NULL_RETURN_MEMERR(snode);
05117 
05118           /* char-class expanded multi-char only
05119              compare with string folded at match time. */
05120           NSTRING_SET_AMBIG(snode);
05121         }
05122         else {
05123           r = onig_node_str_cat(snode, buf, buf + len);
05124           if (r < 0) {
05125             onig_node_free(snode);
05126             return r;
05127           }
05128         }
05129       }
05130 
05131       *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
05132       CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
05133       iarg->ptail = &(NCDR((*(iarg->ptail))));
05134     }
05135   }
05136 
05137   return 0;
05138 }
05139 
05140 static int
05141 parse_exp(Node** np, OnigToken* tok, int term,
05142           UChar** src, UChar* end, ScanEnv* env)
05143 {
05144   int r, len, group = 0;
05145   Node* qn;
05146   Node** targetp;
05147 
05148   *np = NULL;
05149   if (tok->type == (enum TokenSyms )term)
05150     goto end_of_token;
05151 
05152   switch (tok->type) {
05153   case TK_ALT:
05154   case TK_EOT:
05155   end_of_token:
05156     *np = node_new_empty();
05157     return tok->type;
05158 
05159   case TK_SUBEXP_OPEN:
05160     r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
05161     if (r < 0) return r;
05162     if (r == 1) group = 1;
05163     else if (r == 2) { /* option only */
05164       Node* target;
05165       OnigOptionType prev = env->option;
05166 
05167       env->option = NENCLOSE(*np)->option;
05168       r = fetch_token(tok, src, end, env);
05169       if (r < 0) return r;
05170       r = parse_subexp(&target, tok, term, src, end, env);
05171       env->option = prev;
05172       if (r < 0) {
05173         onig_node_free(target);
05174         return r;
05175       }
05176       NENCLOSE(*np)->target = target;
05177       return tok->type;
05178     }
05179     break;
05180 
05181   case TK_SUBEXP_CLOSE:
05182     if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
05183       return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
05184 
05185     if (tok->escaped) goto tk_raw_byte;
05186     else goto tk_byte;
05187     break;
05188 
05189   case TK_STRING:
05190   tk_byte:
05191     {
05192       *np = node_new_str(tok->backp, *src);
05193       CHECK_NULL_RETURN_MEMERR(*np);
05194 
05195       while (1) {
05196         r = fetch_token(tok, src, end, env);
05197         if (r < 0) return r;
05198         if (r != TK_STRING) break;
05199 
05200         r = onig_node_str_cat(*np, tok->backp, *src);
05201         if (r < 0) return r;
05202       }
05203 
05204     string_end:
05205       targetp = np;
05206       goto repeat;
05207     }
05208     break;
05209 
05210   case TK_RAW_BYTE:
05211   tk_raw_byte:
05212     {
05213       *np = node_new_str_raw_char((UChar )tok->u.c);
05214       CHECK_NULL_RETURN_MEMERR(*np);
05215       len = 1;
05216       while (1) {
05217         if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
05218           if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
05219             r = fetch_token(tok, src, end, env);
05220             NSTRING_CLEAR_RAW(*np);
05221             goto string_end;
05222           }
05223         }
05224 
05225         r = fetch_token(tok, src, end, env);
05226         if (r < 0) return r;
05227         if (r != TK_RAW_BYTE) {
05228           /* Don't use this, it is wrong for little endian encodings. */
05229 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
05230           int rem;
05231           if (len < ONIGENC_MBC_MINLEN(env->enc)) {
05232             rem = ONIGENC_MBC_MINLEN(env->enc) - len;
05233             (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
05234             if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
05235               NSTRING_CLEAR_RAW(*np);
05236               goto string_end;
05237             }
05238           }
05239 #endif
05240           return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
05241         }
05242 
05243         r = node_str_cat_char(*np, (UChar )tok->u.c);
05244         if (r < 0) return r;
05245 
05246         len++;
05247       }
05248     }
05249     break;
05250 
05251   case TK_CODE_POINT:
05252     {
05253       UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
05254       int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
05255       if (num < 0) return num;
05256 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
05257       *np = node_new_str_raw(buf, buf + num);
05258 #else
05259       *np = node_new_str(buf, buf + num);
05260 #endif
05261       CHECK_NULL_RETURN_MEMERR(*np);
05262     }
05263     break;
05264 
05265   case TK_QUOTE_OPEN:
05266     {
05267       OnigCodePoint end_op[2];
05268       UChar *qstart, *qend, *nextp;
05269 
05270       end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
05271       end_op[1] = (OnigCodePoint )'E';
05272       qstart = *src;
05273       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
05274       if (IS_NULL(qend)) {
05275         nextp = qend = end;
05276       }
05277       *np = node_new_str(qstart, qend);
05278       CHECK_NULL_RETURN_MEMERR(*np);
05279       *src = nextp;
05280     }
05281     break;
05282 
05283   case TK_CHAR_TYPE:
05284     {
05285       switch (tok->u.prop.ctype) {
05286       case ONIGENC_CTYPE_D:
05287       case ONIGENC_CTYPE_S:
05288       case ONIGENC_CTYPE_W:
05289         {
05290             CClassNode* cc;
05291             *np = node_new_cclass();
05292             CHECK_NULL_RETURN_MEMERR(*np);
05293             cc = NCCLASS(*np);
05294             add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
05295             if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
05296         }
05297         break;
05298 
05299       case ONIGENC_CTYPE_WORD:
05300         *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
05301         CHECK_NULL_RETURN_MEMERR(*np);
05302         break;
05303 
05304       case ONIGENC_CTYPE_SPACE:
05305       case ONIGENC_CTYPE_DIGIT:
05306       case ONIGENC_CTYPE_XDIGIT:
05307         {
05308           CClassNode* cc;
05309 
05310 #ifdef USE_SHARED_CCLASS_TABLE
05311           const OnigCodePoint *mbr;
05312           OnigCodePoint sb_out;
05313 
05314           r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
05315                                            &sb_out, &mbr);
05316           if (r == 0 &&
05317               ONIGENC_CODE_RANGE_NUM(mbr)
05318               >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
05319             type_cclass_key  key;
05320             type_cclass_key* new_key;
05321 
05322             key.enc  = env->enc;
05323             key.not  = tok->u.prop.not;
05324             key.type = tok->u.prop.ctype;
05325 
05326             THREAD_ATOMIC_START;
05327 
05328             if (IS_NULL(OnigTypeCClassTable)) {
05329               OnigTypeCClassTable
05330                 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
05331               if (IS_NULL(OnigTypeCClassTable)) {
05332                 THREAD_ATOMIC_END;
05333                 return ONIGERR_MEMORY;
05334               }
05335             }
05336             else {
05337               if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
05338                                  (st_data_t* )np)) {
05339                 THREAD_ATOMIC_END;
05340                 break;
05341               }
05342             }
05343 
05344             *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
05345                                                      sb_out, mbr);
05346             if (IS_NULL(*np)) {
05347               THREAD_ATOMIC_END;
05348               return ONIGERR_MEMORY;
05349             }
05350 
05351             cc = NCCLASS(*np);
05352             NCCLASS_SET_SHARE(cc);
05353             new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
05354             xmemcpy(new_key, &key, sizeof(type_cclass_key));
05355             onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
05356                                (st_data_t )*np);
05357 
05358             THREAD_ATOMIC_END;
05359           }
05360           else {
05361 #endif
05362             *np = node_new_cclass();
05363             CHECK_NULL_RETURN_MEMERR(*np);
05364             cc = NCCLASS(*np);
05365             add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
05366             if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
05367 #ifdef USE_SHARED_CCLASS_TABLE
05368           }
05369 #endif
05370         }
05371         break;
05372 
05373       default:
05374         return ONIGERR_PARSER_BUG;
05375         break;
05376       }
05377     }
05378     break;
05379 
05380   case TK_CHAR_PROPERTY:
05381     r = parse_char_property(np, tok, src, end, env);
05382     if (r != 0) return r;
05383     break;
05384 
05385   case TK_CC_OPEN:
05386     {
05387       CClassNode* cc;
05388 
05389       r = parse_char_class(np, tok, src, end, env);
05390       if (r != 0) return r;
05391 
05392       cc = NCCLASS(*np);
05393       if (IS_IGNORECASE(env->option)) {
05394         IApplyCaseFoldArg iarg;
05395 
05396         iarg.env      = env;
05397         iarg.cc       = cc;
05398         iarg.alt_root = NULL_NODE;
05399         iarg.ptail    = &(iarg.alt_root);
05400 
05401         r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
05402                                         i_apply_case_fold, &iarg);
05403         if (r != 0) {
05404           onig_node_free(iarg.alt_root);
05405           return r;
05406         }
05407         if (IS_NOT_NULL(iarg.alt_root)) {
05408           Node* work = onig_node_new_alt(*np, iarg.alt_root);
05409           if (IS_NULL(work)) {
05410             onig_node_free(iarg.alt_root);
05411             return ONIGERR_MEMORY;
05412           }
05413           *np = work;
05414         }
05415       }
05416     }
05417     break;
05418 
05419   case TK_ANYCHAR:
05420     *np = node_new_anychar();
05421     CHECK_NULL_RETURN_MEMERR(*np);
05422     break;
05423 
05424   case TK_ANYCHAR_ANYTIME:
05425     *np = node_new_anychar();
05426     CHECK_NULL_RETURN_MEMERR(*np);
05427     qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
05428     CHECK_NULL_RETURN_MEMERR(qn);
05429     NQTFR(qn)->target = *np;
05430     *np = qn;
05431     break;
05432 
05433   case TK_BACKREF:
05434     len = tok->u.backref.num;
05435     *np = node_new_backref(len,
05436                    (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
05437                            tok->u.backref.by_name,
05438 #ifdef USE_BACKREF_WITH_LEVEL
05439                            tok->u.backref.exist_level,
05440                            tok->u.backref.level,
05441 #endif
05442                            env);
05443     CHECK_NULL_RETURN_MEMERR(*np);
05444     break;
05445 
05446 #ifdef USE_SUBEXP_CALL
05447   case TK_CALL:
05448     {
05449       int gnum = tok->u.call.gnum;
05450 
05451       if (gnum < 0) {
05452         gnum = BACKREF_REL_TO_ABS(gnum, env);
05453         if (gnum <= 0)
05454           return ONIGERR_INVALID_BACKREF;
05455       }
05456       *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
05457       CHECK_NULL_RETURN_MEMERR(*np);
05458       env->num_call++;
05459     }
05460     break;
05461 #endif
05462 
05463   case TK_ANCHOR:
05464     *np = onig_node_new_anchor(tok->u.anchor);
05465     break;
05466 
05467   case TK_OP_REPEAT:
05468   case TK_INTERVAL:
05469     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
05470       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
05471         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
05472       else
05473         *np = node_new_empty();
05474     }
05475     else {
05476       goto tk_byte;
05477     }
05478     break;
05479 
05480   default:
05481     return ONIGERR_PARSER_BUG;
05482     break;
05483   }
05484 
05485   {
05486     targetp = np;
05487 
05488   re_entry:
05489     r = fetch_token(tok, src, end, env);
05490     if (r < 0) return r;
05491 
05492   repeat:
05493     if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
05494       if (is_invalid_quantifier_target(*targetp))
05495         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
05496 
05497       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
05498                                (r == TK_INTERVAL ? 1 : 0));
05499       CHECK_NULL_RETURN_MEMERR(qn);
05500       NQTFR(qn)->greedy = tok->u.repeat.greedy;
05501       r = set_quantifier(qn, *targetp, group, env);
05502       if (r < 0) {
05503         onig_node_free(qn);
05504         return r;
05505       }
05506 
05507       if (tok->u.repeat.possessive != 0) {
05508         Node* en;
05509         en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
05510         if (IS_NULL(en)) {
05511           onig_node_free(qn);
05512           return ONIGERR_MEMORY;
05513         }
05514         NENCLOSE(en)->target = qn;
05515         qn = en;
05516       }
05517 
05518       if (r == 0) {
05519         *targetp = qn;
05520       }
05521       else if (r == 1) {
05522         onig_node_free(qn);
05523       }
05524       else if (r == 2) { /* split case: /abc+/ */
05525         Node *tmp;
05526 
05527         *targetp = node_new_list(*targetp, NULL);
05528         if (IS_NULL(*targetp)) {
05529           onig_node_free(qn);
05530           return ONIGERR_MEMORY;
05531         }
05532         tmp = NCDR(*targetp) = node_new_list(qn, NULL);
05533         if (IS_NULL(tmp)) {
05534           onig_node_free(qn);
05535           return ONIGERR_MEMORY;
05536         }
05537         targetp = &(NCAR(tmp));
05538       }
05539       goto re_entry;
05540     }
05541   }
05542 
05543   return r;
05544 }
05545 
05546 static int
05547 parse_branch(Node** top, OnigToken* tok, int term,
05548              UChar** src, UChar* end, ScanEnv* env)
05549 {
05550   int r;
05551   Node *node, **headp;
05552 
05553   *top = NULL;
05554   r = parse_exp(&node, tok, term, src, end, env);
05555   if (r < 0) {
05556     onig_node_free(node);
05557     return r;
05558   }
05559 
05560   if (r == TK_EOT || r == term || r == TK_ALT) {
05561     *top = node;
05562   }
05563   else {
05564     *top  = node_new_list(node, NULL);
05565     headp = &(NCDR(*top));
05566     while (r != TK_EOT && r != term && r != TK_ALT) {
05567       r = parse_exp(&node, tok, term, src, end, env);
05568       if (r < 0) {
05569         onig_node_free(node);
05570         return r;
05571       }
05572 
05573       if (NTYPE(node) == NT_LIST) {
05574         *headp = node;
05575         while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
05576         headp = &(NCDR(node));
05577       }
05578       else {
05579         *headp = node_new_list(node, NULL);
05580         headp = &(NCDR(*headp));
05581       }
05582     }
05583   }
05584 
05585   return r;
05586 }
05587 
05588 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
05589 static int
05590 parse_subexp(Node** top, OnigToken* tok, int term,
05591              UChar** src, UChar* end, ScanEnv* env)
05592 {
05593   int r;
05594   Node *node, **headp;
05595 
05596   *top = NULL;
05597   r = parse_branch(&node, tok, term, src, end, env);
05598   if (r < 0) {
05599     onig_node_free(node);
05600     return r;
05601   }
05602 
05603   if (r == term) {
05604     *top = node;
05605   }
05606   else if (r == TK_ALT) {
05607     *top  = onig_node_new_alt(node, NULL);
05608     headp = &(NCDR(*top));
05609     while (r == TK_ALT) {
05610       r = fetch_token(tok, src, end, env);
05611       if (r < 0) return r;
05612       r = parse_branch(&node, tok, term, src, end, env);
05613       if (r < 0) {
05614         onig_node_free(node);
05615         return r;
05616       }
05617 
05618       *headp = onig_node_new_alt(node, NULL);
05619       headp = &(NCDR(*headp));
05620     }
05621 
05622     if (tok->type != (enum TokenSyms )term)
05623       goto err;
05624   }
05625   else {
05626     onig_node_free(node);
05627   err:
05628     if (term == TK_SUBEXP_CLOSE)
05629       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
05630     else
05631       return ONIGERR_PARSER_BUG;
05632   }
05633 
05634   return r;
05635 }
05636 
05637 static int
05638 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
05639 {
05640   int r;
05641   OnigToken tok;
05642 
05643   r = fetch_token(&tok, src, end, env);
05644   if (r < 0) return r;
05645   r = parse_subexp(top, &tok, TK_EOT, src, end, env);
05646   if (r < 0) return r;
05647   return 0;
05648 }
05649 
05650 extern int
05651 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
05652                      regex_t* reg, ScanEnv* env)
05653 {
05654   int r;
05655   UChar* p;
05656 
05657 #ifdef USE_NAMED_GROUP
05658   names_clear(reg);
05659 #endif
05660 
05661   scan_env_clear(env);
05662   env->option         = reg->options;
05663   env->case_fold_flag = reg->case_fold_flag;
05664   env->enc            = reg->enc;
05665   env->syntax         = reg->syntax;
05666   env->pattern        = (UChar* )pattern;
05667   env->pattern_end    = (UChar* )end;
05668   env->reg            = reg;
05669 
05670   *root = NULL;
05671   p = (UChar* )pattern;
05672   r = parse_regexp(root, &p, (UChar* )end, env);
05673   reg->num_mem = env->num_mem;
05674   return r;
05675 }
05676 
05677 extern void
05678 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
05679                                 UChar* arg, UChar* arg_end)
05680 {
05681   env->error     = arg;
05682   env->error_end = arg_end;
05683 }
05684 

Generated on Wed Sep 8 2010 21:55:14 for Ruby by  doxygen 1.7.1