lexer.h
00001 // -*- c-basic-offset: 2 -*- 00002 /* 00003 * This file is part of the KDE libraries 00004 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Library General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Library General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Library General Public License 00017 * along with this library; see the file COPYING.LIB. If not, write to 00018 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00019 * Boston, MA 02110-1301, USA. 00020 * 00021 */ 00022 00023 #ifndef _KJSLEXER_H_ 00024 #define _KJSLEXER_H_ 00025 00026 #include "ustring.h" 00027 00028 00029 namespace KJS { 00030 00031 class Identifier; 00032 00033 class RegExp; 00034 00035 class Lexer { 00036 public: 00037 Lexer(); 00038 ~Lexer(); 00039 static Lexer *curr(); 00040 00041 void setCode(const UChar *c, unsigned int len); 00042 int lex(); 00043 00044 int lineNo() const { return yylineno + 1; } 00045 00046 bool prevTerminator() const { return terminator; } 00047 00048 enum State { Start, 00049 IdentifierOrKeyword, 00050 Identifier, 00051 InIdentifierOrKeyword, 00052 InIdentifier, 00053 InIdentifierUnicodeEscapeStart, 00054 InIdentifierUnicodeEscape, 00055 InSingleLineComment, 00056 InMultiLineComment, 00057 InNum, 00058 InNum0, 00059 InHex, 00060 InOctal, 00061 InDecimal, 00062 InExponentIndicator, 00063 InExponent, 00064 Hex, 00065 Octal, 00066 Number, 00067 String, 00068 Eof, 00069 InString, 00070 InEscapeSequence, 00071 InHexEscape, 00072 InUnicodeEscape, 00073 Other, 00074 Bad }; 00075 00076 bool scanRegExp(); 00077 UString pattern, flags; 00078 bool hadError() const { return foundBad; } 00079 00080 static bool isWhiteSpace(unsigned short c); 00081 static bool isIdentLetter(unsigned short c); 00082 static bool isDecimalDigit(unsigned short c); 00083 static bool isHexDigit(unsigned short c); 00084 static bool isOctalDigit(unsigned short c); 00085 00086 private: 00087 int yylineno; 00088 bool done; 00089 char *buffer8; 00090 UChar *buffer16; 00091 unsigned int size8, size16; 00092 unsigned int pos8, pos16; 00093 bool terminator; 00094 bool restrKeyword; 00095 // encountered delimiter like "'" and "}" on last run 00096 bool delimited; 00097 bool skipLF; 00098 bool skipCR; 00099 bool convertNextIdentifier; 00100 int stackToken; 00101 int lastToken; 00102 bool foundBad; 00103 00104 State state; 00105 void setDone(State s); 00106 unsigned int pos; 00107 void shift(unsigned int p); 00108 void nextLine(); 00109 int lookupKeyword(const char *); 00110 00111 int matchPunctuator(unsigned short c1, unsigned short c2, 00112 unsigned short c3, unsigned short c4); 00113 unsigned short singleEscape(unsigned short c) const; 00114 unsigned short convertOctal(unsigned short c1, unsigned short c2, 00115 unsigned short c3) const; 00116 public: 00117 static unsigned char convertHex(unsigned short c1); 00118 static unsigned char convertHex(unsigned short c1, unsigned short c2); 00119 static UChar convertUnicode(unsigned short c1, unsigned short c2, 00120 unsigned short c3, unsigned short c4); 00121 00122 #ifdef KJS_DEBUG_MEM 00123 00126 static void globalClear(); 00127 #endif 00128 00129 void doneParsing(); 00130 00131 private: 00132 00133 void record8(unsigned short c); 00134 void record16(int c); 00135 void record16(UChar c); 00136 00137 KJS::Identifier *makeIdentifier(UChar *buffer, unsigned int pos); 00138 UString *makeUString(UChar *buffer, unsigned int pos); 00139 00140 const UChar *code; 00141 unsigned int length; 00142 int yycolumn; 00143 #ifndef KJS_PURE_ECMA 00144 int bol; // begin of line 00145 #endif 00146 00147 // current and following unicode characters (int to allow for -1 for end-of-file marker) 00148 int current, next1, next2, next3; 00149 00150 UString **strings; 00151 unsigned int numStrings; 00152 unsigned int stringsCapacity; 00153 00154 KJS::Identifier **identifiers; 00155 unsigned int numIdentifiers; 00156 unsigned int identifiersCapacity; 00157 00158 // for future extensions 00159 class LexerPrivate; 00160 LexerPrivate *priv; 00161 }; 00162 00163 } // namespace 00164 00165 #endif