regexp_object.cpp
00001 // -*- c-basic-offset: 2 -*- 00002 /* 00003 * This file is part of the KDE libraries 00004 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 00005 * Copyright (C) 2003 Apple Computer, Inc. 00006 * 00007 * This library is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2 of the License, or (at your option) any later version. 00011 * 00012 * This library is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with this library; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 * 00021 */ 00022 00023 #include <stdio.h> 00024 00025 #include "value.h" 00026 #include "object.h" 00027 #include "types.h" 00028 #include "interpreter.h" 00029 #include "operations.h" 00030 #include "internal.h" 00031 #include "regexp.h" 00032 #include "regexp_object.h" 00033 #include "error_object.h" 00034 #include "lookup.h" 00035 00036 using namespace KJS; 00037 00038 // ------------------------------ RegExpPrototypeImp --------------------------- 00039 00040 // ECMA 15.9.4 00041 00042 const ClassInfo RegExpPrototypeImp::info = {"RegExp", 0, 0, 0}; 00043 00044 RegExpPrototypeImp::RegExpPrototypeImp(ExecState *exec, 00045 ObjectPrototypeImp *objProto, 00046 FunctionPrototypeImp *funcProto) 00047 : ObjectImp(objProto) 00048 { 00049 Value protect(this); 00050 setInternalValue(String("")); 00051 00052 // The constructor will be added later in RegExpObject's constructor (?) 00053 00054 static const Identifier execPropertyName("exec"); 00055 putDirect(execPropertyName, 00056 new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Exec, 0, execPropertyName), DontEnum); 00057 static const Identifier testPropertyName("test"); 00058 putDirect(testPropertyName, 00059 new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Test, 0, testPropertyName), DontEnum); 00060 putDirect(toStringPropertyName, 00061 new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::ToString, 0, toStringPropertyName), DontEnum); 00062 static const Identifier compilePropertyName("compile"); 00063 putDirect(compilePropertyName, 00064 new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Compile, 1, compilePropertyName), DontEnum); 00065 } 00066 00067 // ------------------------------ RegExpProtoFuncImp --------------------------- 00068 00069 RegExpProtoFuncImp::RegExpProtoFuncImp(ExecState * /*exec*/, FunctionPrototypeImp *funcProto, 00070 int i, int len, const Identifier &_ident) 00071 : InternalFunctionImp(funcProto), id(i) 00072 { 00073 Value protect(this); 00074 putDirect(lengthPropertyName, len, DontDelete|ReadOnly|DontEnum); 00075 ident = _ident; 00076 } 00077 00078 bool RegExpProtoFuncImp::implementsCall() const 00079 { 00080 return true; 00081 } 00082 00083 Value RegExpProtoFuncImp::call(ExecState *exec, Object &thisObj, const List &args) 00084 { 00085 if (!thisObj.inherits(&RegExpImp::info)) { 00086 if (thisObj.inherits(&RegExpPrototypeImp::info)) { 00087 switch (id) { 00088 case ToString: return String("//"); // FireFox returns /(?:)/ 00089 } 00090 } 00091 Object err = Error::create(exec,TypeError); 00092 exec->setException(err); 00093 return err; 00094 } 00095 00096 RegExpImp *reimp = static_cast<RegExpImp*>(thisObj.imp()); 00097 RegExp *re = reimp->regExp(); 00098 String s; 00099 UString str; 00100 switch (id) { 00101 case Exec: // 15.10.6.2 00102 case Test: 00103 { 00104 s = args[0].toString(exec); 00105 int length = s.value().size(); 00106 00107 // Get values from the last time (in case of /g) 00108 Value lastIndex = thisObj.get(exec,"lastIndex"); 00109 int i = lastIndex.isValid() ? lastIndex.toInt32(exec) : 0; 00110 bool globalFlag = thisObj.get(exec,"global").toBoolean(exec); 00111 if (!globalFlag) 00112 i = 0; 00113 if (i < 0 || i > length) { 00114 thisObj.put(exec,"lastIndex", Number(0), DontDelete | DontEnum); 00115 if (id == Test) 00116 return Boolean(false); 00117 else 00118 return Null(); 00119 } 00120 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp().imp()); 00121 int **ovector = regExpObj->registerRegexp( re, s.value() ); 00122 00123 re->prepareMatch(s.value()); 00124 str = re->match(s.value(), i, 0L, ovector); 00125 re->doneMatch(); 00126 regExpObj->setSubPatterns(re->subPatterns()); 00127 00128 if (id == Test) 00129 return Boolean(!str.isNull()); 00130 00131 if (str.isNull()) // no match 00132 { 00133 if (globalFlag) 00134 thisObj.put(exec,"lastIndex",Number(0), DontDelete | DontEnum); 00135 return Null(); 00136 } 00137 else // success 00138 { 00139 if (globalFlag) 00140 thisObj.put(exec,"lastIndex",Number( (*ovector)[1] ), DontDelete | DontEnum); 00141 return regExpObj->arrayOfMatches(exec,str); 00142 } 00143 } 00144 break; 00145 case ToString: 00146 s = thisObj.get(exec,"source").toString(exec); 00147 str = "/"; 00148 str += s.value(); 00149 str += "/"; 00150 if (thisObj.get(exec,"global").toBoolean(exec)) { 00151 str += "g"; 00152 } 00153 if (thisObj.get(exec,"ignoreCase").toBoolean(exec)) { 00154 str += "i"; 00155 } 00156 if (thisObj.get(exec,"multiline").toBoolean(exec)) { 00157 str += "m"; 00158 } 00159 return String(str); 00160 case Compile: { 00161 RegExp* newEngine = RegExpObjectImp::makeEngine(exec, args[0].toString(exec), args[1]); 00162 if (!newEngine) 00163 return exec->exception(); 00164 reimp->setRegExp(newEngine); 00165 return Value(reimp); 00166 } 00167 } 00168 00169 00170 return Undefined(); 00171 } 00172 00173 // ------------------------------ RegExpImp ------------------------------------ 00174 00175 const ClassInfo RegExpImp::info = {"RegExp", 0, 0, 0}; 00176 00177 RegExpImp::RegExpImp(RegExpPrototypeImp *regexpProto) 00178 : ObjectImp(regexpProto), reg(0L) 00179 { 00180 } 00181 00182 RegExpImp::~RegExpImp() 00183 { 00184 delete reg; 00185 } 00186 00187 void RegExpImp::setRegExp(RegExp *r) 00188 { 00189 delete reg; 00190 reg = r; 00191 00192 Object protect(this);//Protect self from GC (we are allocating a StringImp, and may be new) 00193 putDirect("global", (r->flags() & RegExp::Global) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 00194 DontDelete | ReadOnly | DontEnum); 00195 putDirect("ignoreCase", (r->flags() & RegExp::IgnoreCase) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 00196 DontDelete | ReadOnly | DontEnum); 00197 putDirect("multiline", (r->flags() & RegExp::Multiline) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 00198 DontDelete | ReadOnly | DontEnum); 00199 00200 putDirect("source", new StringImp(r->pattern()), DontDelete | ReadOnly | DontEnum); 00201 putDirect("lastIndex", NumberImp::zero(), DontDelete | DontEnum); 00202 } 00203 00204 // ------------------------------ RegExpObjectImp ------------------------------ 00205 00206 RegExpObjectImp::RegExpObjectImp(ExecState * /*exec*/, 00207 FunctionPrototypeImp *funcProto, 00208 RegExpPrototypeImp *regProto) 00209 00210 : InternalFunctionImp(funcProto), lastOvector(0L), lastNrSubPatterns(0) 00211 { 00212 Value protect(this); 00213 // ECMA 15.10.5.1 RegExp.prototype 00214 putDirect(prototypePropertyName, regProto, DontEnum|DontDelete|ReadOnly); 00215 00216 // no. of arguments for constructor 00217 putDirect(lengthPropertyName, NumberImp::two(), ReadOnly|DontDelete|DontEnum); 00218 } 00219 00220 RegExpObjectImp::~RegExpObjectImp() 00221 { 00222 delete [] lastOvector; 00223 } 00224 00225 int **RegExpObjectImp::registerRegexp( const RegExp* re, const UString& s ) 00226 { 00227 lastString = s; 00228 delete [] lastOvector; 00229 lastOvector = 0; 00230 lastNrSubPatterns = re->subPatterns(); 00231 return &lastOvector; 00232 } 00233 00234 Object RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const 00235 { 00236 List list; 00237 // The returned array contains 'result' as first item, followed by the list of matches 00238 list.append(String(result)); 00239 if ( lastOvector ) 00240 for ( unsigned int i = 1 ; i < lastNrSubPatterns + 1 ; ++i ) 00241 { 00242 UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] ); 00243 list.append(String(substring)); 00244 } 00245 Object arr = exec->lexicalInterpreter()->builtinArray().construct(exec, list); 00246 arr.put(exec, "index", Number(lastOvector[0])); 00247 arr.put(exec, "input", String(lastString)); 00248 return arr; 00249 } 00250 00251 Value RegExpObjectImp::get(ExecState *exec, const Identifier &p) const 00252 { 00253 UString s = p.ustring(); 00254 if (s[0] == '$' && lastOvector) 00255 { 00256 bool ok; 00257 unsigned long i = s.substr(1).toULong(&ok); 00258 if (ok) 00259 { 00260 if (i < lastNrSubPatterns + 1) 00261 { 00262 UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] ); 00263 return String(substring); 00264 } 00265 return String(""); 00266 } 00267 } 00268 return InternalFunctionImp::get(exec, p); 00269 } 00270 00271 bool RegExpObjectImp::hasProperty(ExecState *exec, const Identifier &p) const 00272 { 00273 UString s = p.ustring(); 00274 if (s[0] == '$' && lastOvector) { 00275 bool ok; 00276 (void)s.substr(1).toULong(&ok); 00277 if (ok) 00278 return true; 00279 } 00280 00281 return InternalFunctionImp::hasProperty(exec, p); 00282 } 00283 00284 bool RegExpObjectImp::implementsConstruct() const 00285 { 00286 return true; 00287 } 00288 00289 RegExp* RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, const Value &flagsInput) 00290 { 00291 UString flags = flagsInput.type() == UndefinedType ? UString("") : flagsInput.toString(exec); 00292 00293 // Check for validity of flags 00294 for (int pos = 0; pos < flags.size(); ++pos) { 00295 switch (flags[pos].unicode()) { 00296 case 'g': 00297 case 'i': 00298 case 'm': 00299 break; 00300 default: { 00301 Object err = Error::create(exec, SyntaxError, 00302 "Invalid regular expression flags"); 00303 exec->setException(err); 00304 return 0; 00305 } 00306 } 00307 } 00308 00309 bool global = (flags.find("g") >= 0); 00310 bool ignoreCase = (flags.find("i") >= 0); 00311 bool multiline = (flags.find("m") >= 0); 00312 00313 int reflags = RegExp::None; 00314 if (global) 00315 reflags |= RegExp::Global; 00316 if (ignoreCase) 00317 reflags |= RegExp::IgnoreCase; 00318 if (multiline) 00319 reflags |= RegExp::Multiline; 00320 00321 RegExp *re = new RegExp(p, reflags); 00322 if (!re->isValid()) { 00323 Object err = Error::create(exec, SyntaxError, 00324 "Invalid regular expression"); 00325 exec->setException(err); 00326 delete re; 00327 return 0; 00328 } 00329 return re; 00330 } 00331 00332 // ECMA 15.10.4 00333 Object RegExpObjectImp::construct(ExecState *exec, const List &args) 00334 { 00335 UString p; 00336 if (args.isEmpty()) { 00337 p = ""; 00338 } else { 00339 Value a0 = args[0]; 00340 if (a0.isA(ObjectType) && a0.toObject(exec).inherits(&RegExpImp::info)) { 00341 // It's a regexp. Check that no flags were passed. 00342 if (args.size() > 1 && args[1].type() != UndefinedType) { 00343 Object err = Error::create(exec,TypeError); 00344 exec->setException(err); 00345 return err; 00346 } 00347 RegExpImp *rimp = static_cast<RegExpImp*>(Object::dynamicCast(a0).imp()); 00348 p = rimp->regExp()->pattern(); 00349 } else { 00350 p = a0.toString(exec); 00351 } 00352 } 00353 00354 RegExp* re = makeEngine(exec, p, args[1]); 00355 if (!re) 00356 return exec->exception().toObject(exec); 00357 00358 RegExpPrototypeImp *proto = static_cast<RegExpPrototypeImp*>(exec->lexicalInterpreter()->builtinRegExpPrototype().imp()); 00359 RegExpImp *dat = new RegExpImp(proto); 00360 Object obj(dat); // protect from GC 00361 dat->setRegExp(re); 00362 00363 return obj; 00364 } 00365 00366 bool RegExpObjectImp::implementsCall() const 00367 { 00368 return true; 00369 } 00370 00371 // ECMA 15.10.3 00372 Value RegExpObjectImp::call(ExecState *exec, Object &/*thisObj*/, 00373 const List &args) 00374 { 00375 // TODO: handle RegExp argument case (15.10.3.1) 00376 00377 return construct(exec, args); 00378 }