kjs Library API Documentation

regexp.cpp

00001 // -*- c-basic-offset: 2 -*- 00002 /* 00003 * This file is part of the KDE libraries 00004 * Copyright (C) 1999-2001 Harri Porten (porten@kde.org) 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with this library; if not, write to the Free Software 00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 * 00020 */ 00021 00022 #include "regexp.h" 00023 00024 #include <stdio.h> 00025 #include <stdlib.h> 00026 #include <string.h> 00027 00028 using namespace KJS; 00029 00030 RegExp::RegExp(const UString &p, int f) 00031 : pattern(p), flgs(f), m_notEmpty(false) 00032 { 00033 #ifdef HAVE_PCREPOSIX 00034 int pcreflags = 0; 00035 const char *perrormsg; 00036 int errorOffset; 00037 00038 if (flgs & IgnoreCase) 00039 pcreflags |= PCRE_CASELESS; 00040 00041 if (flgs & Multiline) 00042 pcreflags |= PCRE_MULTILINE; 00043 00044 pcregex = pcre_compile(p.ascii(), pcreflags, 00045 &perrormsg, &errorOffset, NULL); 00046 #ifndef NDEBUG 00047 if (!pcregex) 00048 fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg); 00049 #endif 00050 00051 #ifdef PCRE_INFO_CAPTURECOUNT 00052 // Get number of subpatterns that will be returned 00053 int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns); 00054 if (rc != 0) 00055 #endif 00056 nrSubPatterns = 0; // fallback. We always need the first pair of offsets. 00057 00058 #else /* HAVE_PCREPOSIX */ 00059 00060 nrSubPatterns = 0; // determined in match() with POSIX regex. 00061 int regflags = 0; 00062 #ifdef REG_EXTENDED 00063 regflags |= REG_EXTENDED; 00064 #endif 00065 #ifdef REG_ICASE 00066 if ( f & IgnoreCase ) 00067 regflags |= REG_ICASE; 00068 #endif 00069 00070 //NOTE: Multiline is not feasible with POSIX regex. 00071 //if ( f & Multiline ) 00072 // ; 00073 // Note: the Global flag is already handled by RegExpProtoFunc::execute 00074 00075 if (regcomp(&preg, p.ascii(), regflags) != 0) { 00076 /* TODO: throw JS exception */ 00077 regcomp(&preg, "", regflags); 00078 } 00079 #endif 00080 } 00081 00082 RegExp::~RegExp() 00083 { 00084 #ifdef HAVE_PCREPOSIX 00085 if (pcregex) 00086 pcre_free(pcregex); 00087 #else 00088 /* TODO: is this really okay after an error ? */ 00089 regfree(&preg); 00090 #endif 00091 } 00092 00093 UString RegExp::match(const UString &s, int i, int *pos, int **ovector) 00094 { 00095 if (i < 0) 00096 i = 0; 00097 if (ovector) 00098 *ovector = 0L; 00099 int dummyPos; 00100 if (!pos) 00101 pos = &dummyPos; 00102 *pos = -1; 00103 if (i > s.size() || s.isNull()) 00104 return UString::null; 00105 00106 #ifdef HAVE_PCREPOSIX 00107 CString buffer(s.cstring()); 00108 int bufferSize = buffer.size(); 00109 int ovecsize = (nrSubPatterns+1)*3; // see pcre docu 00110 if (ovector) *ovector = new int[ovecsize]; 00111 if (!pcregex) 00112 return UString::null; 00113 00114 if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i, 00115 m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0, // see man pcretest 00116 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH) 00117 { 00118 // Failed to match. 00119 if ((flgs & Global) && m_notEmpty && ovector) 00120 { 00121 // We set m_notEmpty ourselves, to look for a non-empty match 00122 // (see man pcretest or pcretest.c for details). 00123 // So we don't stop here, we want to try again at i+1. 00124 fprintf(stderr, "No match after m_notEmpty. +1 and keep going.\n"); 00125 m_notEmpty = 0; 00126 if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i+1, 0, 00127 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH) 00128 return UString::null; 00129 } 00130 else // done 00131 return UString::null; 00132 } 00133 00134 // Got a match, proceed with it. 00135 00136 if (!ovector) 00137 return UString::null; // don't rely on the return value if you pass ovector==0 00138 #else 00139 const uint maxMatch = 10; 00140 regmatch_t rmatch[maxMatch]; 00141 00142 char *str = strdup(s.ascii()); // TODO: why ??? 00143 if (regexec(&preg, str + i, maxMatch, rmatch, 0)) { 00144 free(str); 00145 return UString::null; 00146 } 00147 free(str); 00148 00149 if (!ovector) { 00150 *pos = rmatch[0].rm_so + i; 00151 return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so); 00152 } 00153 00154 // map rmatch array to ovector used in PCRE case 00155 nrSubPatterns = 0; 00156 for(uint j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++) 00157 nrSubPatterns++; 00158 int ovecsize = (nrSubPatterns+1)*3; // see above 00159 *ovector = new int[ovecsize]; 00160 for (uint j = 0; j < nrSubPatterns + 1; j++) { 00161 if (j>maxMatch) 00162 break; 00163 (*ovector)[2*j] = rmatch[j].rm_so + i; 00164 (*ovector)[2*j+1] = rmatch[j].rm_eo + i; 00165 } 00166 #endif 00167 00168 *pos = (*ovector)[0]; 00169 #ifdef HAVE_PCREPOSIX // TODO check this stuff in non-pcre mode 00170 if ( *pos == (*ovector)[1] && (flgs & Global) ) 00171 { 00172 // empty match, next try will be with m_notEmpty=true 00173 m_notEmpty=true; 00174 } 00175 #endif 00176 return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]); 00177 } 00178 00179 #if 0 // unused 00180 bool RegExp::test(const UString &s, int) 00181 { 00182 #ifdef HAVE_PCREPOSIX 00183 int ovector[300]; 00184 CString buffer(s.cstring()); 00185 00186 if (s.isNull() || 00187 pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0, 00188 0, ovector, 300) == PCRE_ERROR_NOMATCH) 00189 return false; 00190 else 00191 return true; 00192 00193 #else 00194 00195 char *str = strdup(s.ascii()); 00196 int r = regexec(&preg, str, 0, 0, 0); 00197 free(str); 00198 00199 return r == 0; 00200 #endif 00201 } 00202 #endif
KDE Logo
This file is part of the documentation for kjs Library Version 3.3.1.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Sun Oct 17 11:28:51 2004 by doxygen 1.3.8 written by Dimitri van Heesch, © 1997-2003