00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef REGEX_H
00017 #define REGEX_H
00018
00019
00020
00045 #include "unicode/utypes.h"
00046
00047 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00048
00049 #include "unicode/uobject.h"
00050 #include "unicode/unistr.h"
00051 #include "unicode/parseerr.h"
00052
00053 #include "unicode/uregex.h"
00054
00055 U_NAMESPACE_BEGIN
00056
00057
00058
00059
00060 class RegexMatcher;
00061 class RegexPattern;
00062 class UVector;
00063 class UVector32;
00064 class UnicodeSet;
00065 struct REStackFrame;
00066 struct Regex8BitSet;
00067 class RuleBasedBreakIterator;
00068 class RegexCImpl;
00069
00070
00071
00072
00077 #ifdef REGEX_DEBUG
00078 U_INTERNAL void U_EXPORT2
00079 RegexPatternDump(const RegexPattern *pat);
00080 #else
00081 #define RegexPatternDump(pat)
00082 #endif
00083
00084
00085
00097 class U_I18N_API RegexPattern: public UObject {
00098 public:
00099
00107 RegexPattern();
00108
00115 RegexPattern(const RegexPattern &source);
00116
00122 virtual ~RegexPattern();
00123
00132 UBool operator==(const RegexPattern& that) const;
00133
00142 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);};
00143
00149 RegexPattern &operator =(const RegexPattern &source);
00150
00158 virtual RegexPattern *clone() const;
00159
00160
00185 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00186 UParseError &pe,
00187 UErrorCode &status);
00188
00213 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00214 uint32_t flags,
00215 UParseError &pe,
00216 UErrorCode &status);
00217
00218
00241 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00242 uint32_t flags,
00243 UErrorCode &status);
00244
00245
00251 virtual uint32_t flags() const;
00252
00270 virtual RegexMatcher *matcher(const UnicodeString &input,
00271 UErrorCode &status) const;
00272
00273 private:
00285 RegexMatcher *matcher(const UChar *input,
00286 UErrorCode &status) const;
00287 public:
00288
00289
00301 virtual RegexMatcher *matcher(UErrorCode &status) const;
00302
00303
00318 static UBool U_EXPORT2 matches(const UnicodeString ®ex,
00319 const UnicodeString &input,
00320 UParseError &pe,
00321 UErrorCode &status);
00322
00323
00328 virtual UnicodeString pattern() const;
00329
00330
00356 virtual int32_t split(const UnicodeString &input,
00357 UnicodeString dest[],
00358 int32_t destCapacity,
00359 UErrorCode &status) const;
00360
00361
00367 virtual UClassID getDynamicClassID() const;
00368
00374 static UClassID U_EXPORT2 getStaticClassID();
00375
00376 private:
00377
00378
00379
00380 UnicodeString fPattern;
00381 uint32_t fFlags;
00382
00383 UVector32 *fCompiledPat;
00384 UnicodeString fLiteralText;
00385
00386
00387 UVector *fSets;
00388 Regex8BitSet *fSets8;
00389
00390
00391 UErrorCode fDeferredStatus;
00392
00393
00394 int32_t fMinMatchLen;
00395
00396
00397
00398
00399 int32_t fFrameSize;
00400
00401
00402 int32_t fDataSize;
00403
00404
00405
00406 UVector32 *fGroupMap;
00407
00408
00409 int32_t fMaxCaptureDigits;
00410
00411 UnicodeSet **fStaticSets;
00412
00413
00414 Regex8BitSet *fStaticSets8;
00415
00416
00417 int32_t fStartType;
00418 int32_t fInitialStringIdx;
00419 int32_t fInitialStringLen;
00420 UnicodeSet *fInitialChars;
00421 UChar32 fInitialChar;
00422 Regex8BitSet *fInitialChars8;
00423
00424 friend class RegexCompile;
00425 friend class RegexMatcher;
00426 friend class RegexCImpl;
00427
00428
00429
00430
00431 void init();
00432 void zap();
00433 #ifdef REGEX_DEBUG
00434 void dumpOp(int32_t index) const;
00435 friend void U_EXPORT2 RegexPatternDump(const RegexPattern *);
00436 #endif
00437
00438 };
00439
00440
00441
00451 class U_I18N_API RegexMatcher: public UObject {
00452 public:
00453
00468 RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status);
00469
00491 RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
00492 uint32_t flags, UErrorCode &status);
00493
00494 private:
00506 RegexMatcher(const UnicodeString ®exp, const UChar *input,
00507 uint32_t flags, UErrorCode &status);
00508 public:
00509
00510
00516 virtual ~RegexMatcher();
00517
00518
00525 virtual UBool matches(UErrorCode &status);
00526
00535 virtual UBool matches(int32_t startIndex, UErrorCode &status);
00536
00537
00538
00539
00552 virtual UBool lookingAt(UErrorCode &status);
00553
00554
00568 virtual UBool lookingAt(int32_t startIndex, UErrorCode &status);
00569
00582 virtual UBool find();
00583
00584
00594 virtual UBool find(int32_t start, UErrorCode &status);
00595
00596
00606 virtual UnicodeString group(UErrorCode &status) const;
00607
00608
00621 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00622
00623
00629 virtual int32_t groupCount() const;
00630
00631
00639 virtual int32_t start(UErrorCode &status) const;
00640
00641
00655 virtual int32_t start(int32_t group, UErrorCode &status) const;
00656
00657
00667 virtual int32_t end(UErrorCode &status) const;
00668
00669
00683 virtual int32_t end(int32_t group, UErrorCode &status) const;
00684
00685
00694 virtual RegexMatcher &reset();
00695
00696
00706 virtual RegexMatcher &reset(int32_t index, UErrorCode &status);
00707
00708
00722 virtual RegexMatcher &reset(const UnicodeString &input);
00723
00724 private:
00736 virtual RegexMatcher &reset(const UChar *input);
00737 public:
00738
00745 virtual const UnicodeString &input() const;
00746
00747
00753 virtual const RegexPattern &pattern() const;
00754
00755
00772 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
00773
00774
00795 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
00796
00824 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
00825 const UnicodeString &replacement, UErrorCode &status);
00826
00827
00838 virtual UnicodeString &appendTail(UnicodeString &dest);
00839
00840
00841
00866 virtual int32_t split(const UnicodeString &input,
00867 UnicodeString dest[],
00868 int32_t destCapacity,
00869 UErrorCode &status);
00870
00871
00872
00878 void setTrace(UBool state);
00879
00880
00886 static UClassID U_EXPORT2 getStaticClassID();
00887
00893 virtual UClassID getDynamicClassID() const;
00894
00895 private:
00896
00897
00898 RegexMatcher();
00899 RegexMatcher(const RegexPattern *pat);
00900 RegexMatcher(const RegexMatcher &other);
00901 RegexMatcher &operator =(const RegexMatcher &rhs);
00902 friend class RegexPattern;
00903 friend class RegexCImpl;
00904
00905
00906
00907
00908
00909 void MatchAt(int32_t startIdx, UErrorCode &status);
00910 inline void backTrack(int32_t &inputIdx, int32_t &patIdx);
00911 UBool isWordBoundary(int32_t pos);
00912 UBool isUWordBoundary(int32_t pos);
00913 REStackFrame *resetStack();
00914 inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx,
00915 int32_t frameSize, UErrorCode &status);
00916
00917
00918 const RegexPattern *fPattern;
00919 RegexPattern *fPatternOwned;
00920
00921 const UnicodeString *fInput;
00922
00923 UBool fMatch;
00924 int32_t fMatchStart;
00925 int32_t fMatchEnd;
00926 int32_t fLastMatchEnd;
00927
00928 int32_t fLastReplaceEnd;
00929
00930 UVector32 *fStack;
00931 REStackFrame *fFrame;
00932
00933
00934
00935 int32_t *fData;
00936 int32_t fSmallData[8];
00937
00938 UBool fTraceDebug;
00939
00940 UErrorCode fDeferredStatus;
00941
00942
00943 RuleBasedBreakIterator *fWordBreakItr;
00944
00945
00946 };
00947
00948 U_NAMESPACE_END
00949 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
00950 #endif