/* $NetBSD: parse_rx.c,v 1.1.1.1 2008/12/22 00:18:37 haad Exp $ */ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This file is part of the device-mapper userspace tools. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU Lesser General Public License v.2.1. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "dmlib.h" #include "parse_rx.h" struct parse_sp { /* scratch pad for the parsing process */ struct dm_pool *mem; int type; /* token type, 0 indicates a charset */ dm_bitset_t charset; /* The current charset */ const char *cursor; /* where we are in the regex */ const char *rx_end; /* 1pte for the expression being parsed */ }; static struct rx_node *_or_term(struct parse_sp *ps); static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr) { ps->type = 0; ps->cursor = ptr + 1; dm_bit_clear_all(ps->charset); dm_bit_set(ps->charset, c); } /* * Get the next token from the regular expression. * Returns: 1 success, 0 end of input, -1 error. */ static int _rx_get_token(struct parse_sp *ps) { int neg = 0, range = 0; char c, lc = 0; const char *ptr = ps->cursor; if (ptr == ps->rx_end) { /* end of input ? */ ps->type = -1; return 0; } switch (*ptr) { /* charsets and ncharsets */ case '[': ptr++; if (*ptr == '^') { dm_bit_set_all(ps->charset); /* never transition on zero */ dm_bit_clear(ps->charset, 0); neg = 1; ptr++; } else dm_bit_clear_all(ps->charset); while ((ptr < ps->rx_end) && (*ptr != ']')) { if (*ptr == '\\') { /* an escaped character */ ptr++; switch (*ptr) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; default: c = *ptr; } } else if (*ptr == '-' && lc) { /* we've got a range on our hands */ range = 1; ptr++; if (ptr == ps->rx_end) { log_error("Incomplete range" "specification"); return -1; } c = *ptr; } else c = *ptr; if (range) { /* add lc - c into the bitset */ if (lc > c) { char tmp = c; c = lc; lc = tmp; } for (; lc <= c; lc++) { if (neg) dm_bit_clear(ps->charset, lc); else dm_bit_set(ps->charset, lc); } range = 0; } else { /* add c into the bitset */ if (neg) dm_bit_clear(ps->charset, c); else dm_bit_set(ps->charset, c); } ptr++; lc = c; } if (ptr >= ps->rx_end) { ps->type = -1; return -1; } ps->type = 0; ps->cursor = ptr + 1; break; /* These characters are special, we just return their ASCII codes as the type. Sorted into ascending order to help the compiler */ case '(': case ')': case '*': case '+': case '?': case '|': ps->type = (int) *ptr; ps->cursor = ptr + 1; break; case '^': _single_char(ps, HAT_CHAR, ptr); break; case '$': _single_char(ps, DOLLAR_CHAR, ptr); break; case '.': /* The 'all but newline' character set */ ps->type = 0; ps->cursor = ptr + 1; dm_bit_set_all(ps->charset); dm_bit_clear(ps->charset, (int) '\n'); dm_bit_clear(ps->charset, (int) '\r'); dm_bit_clear(ps->charset, 0); break; case '\\': /* escaped character */ ptr++; if (ptr >= ps->rx_end) { log_error("Badly quoted character at end " "of expression"); ps->type = -1; return -1; } ps->type = 0; ps->cursor = ptr + 1; dm_bit_clear_all(ps->charset); switch (*ptr) { case 'n': dm_bit_set(ps->charset, (int) '\n'); break; case 'r': dm_bit_set(ps->charset, (int) '\r'); break; case 't': dm_bit_set(ps->charset, (int) '\t'); break; default: dm_bit_set(ps->charset, (int) *ptr); } break; default: /* add a single character to the bitset */ ps->type = 0; ps->cursor = ptr + 1; dm_bit_clear_all(ps->charset); dm_bit_set(ps->charset, (int) *ptr); break; } return 1; } static struct rx_node *_node(struct dm_pool *mem, int type, struct rx_node *l, struct rx_node *r) { struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n)); if (n) { if (!(n->charset = dm_bitset_create(mem, 256))) { dm_pool_free(mem, n); return NULL; } n->type = type; n->left = l; n->right = r; } return n; } static struct rx_node *_term(struct parse_sp *ps) { struct rx_node *n; switch (ps->type) { case 0: if (!(n = _node(ps->mem, CHARSET, NULL, NULL))) { stack; return NULL; } dm_bit_copy(n->charset, ps->charset); _rx_get_token(ps); /* match charset */ break; case '(': _rx_get_token(ps); /* match '(' */ n = _or_term(ps); if (ps->type != ')') { log_error("missing ')' in regular expression"); return 0; } _rx_get_token(ps); /* match ')' */ break; default: n = 0; } return n; } static struct rx_node *_closure_term(struct parse_sp *ps) { struct rx_node *l, *n; if (!(l = _term(ps))) return NULL; for (;;) { switch (ps->type) { case '*': n = _node(ps->mem, STAR, l, NULL); break; case '+': n = _node(ps->mem, PLUS, l, NULL); break; case '?': n = _node(ps->mem, QUEST, l, NULL); break; default: return l; } if (!n) { stack; return NULL; } _rx_get_token(ps); l = n; } return n; } static struct rx_node *_cat_term(struct parse_sp *ps) { struct rx_node *l, *r, *n; if (!(l = _closure_term(ps))) return NULL; if (ps->type == '|') return l; if (!(r = _cat_term(ps))) return l; if (!(n = _node(ps->mem, CAT, l, r))) stack; return n; } static struct rx_node *_or_term(struct parse_sp *ps) { struct rx_node *l, *r, *n; if (!(l = _cat_term(ps))) return NULL; if (ps->type != '|') return l; _rx_get_token(ps); /* match '|' */ if (!(r = _or_term(ps))) { log_error("Badly formed 'or' expression"); return NULL; } if (!(n = _node(ps->mem, OR, l, r))) stack; return n; } struct rx_node *rx_parse_tok(struct dm_pool *mem, const char *begin, const char *end) { struct rx_node *r; struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps)); if (!ps) { stack; return NULL; } ps->mem = mem; ps->charset = dm_bitset_create(mem, 256); ps->cursor = begin; ps->rx_end = end; _rx_get_token(ps); /* load the first token */ if (!(r = _or_term(ps))) { log_error("Parse error in regex"); dm_pool_free(mem, ps); } return r; } struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str) { return rx_parse_tok(mem, str, str + strlen(str)); }