00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017
00023 U_NAMESPACE_BEGIN
00024
00025 class ParsePosition;
00026 class SymbolTable;
00027 class UVector;
00028 class RuleCharacterIterator;
00029
00261 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00262
00263 int32_t len;
00264 int32_t capacity;
00265 int32_t bufferCapacity;
00266 UChar32* list;
00267 UChar32* buffer;
00268
00269 UVector* strings;
00270
00280 UnicodeString pat;
00281
00282 public:
00283
00284 enum {
00289 MIN_VALUE = 0,
00290
00295 MAX_VALUE = 0x10ffff
00296 };
00297
00298
00299
00300
00301
00302 public:
00303
00308 UnicodeSet();
00309
00318 UnicodeSet(UChar32 start, UChar32 end);
00319
00328 UnicodeSet(const UnicodeString& pattern,
00329 UErrorCode& status);
00330
00343 UnicodeSet(const UnicodeString& pattern,
00344 uint32_t options,
00345 const SymbolTable* symbols,
00346 UErrorCode& status);
00347
00361 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00362 uint32_t options,
00363 const SymbolTable* symbols,
00364 UErrorCode& status);
00365
00370 UnicodeSet(const UnicodeSet& o);
00371
00376 virtual ~UnicodeSet();
00377
00382 UnicodeSet& operator=(const UnicodeSet& o);
00383
00395 virtual UBool operator==(const UnicodeSet& o) const;
00396
00402 UBool operator!=(const UnicodeSet& o) const;
00403
00410 virtual UnicodeFunctor* clone() const;
00411
00419 virtual int32_t hashCode(void) const;
00420
00421
00422
00423
00424
00434 UnicodeSet& set(UChar32 start, UChar32 end);
00435
00441 static UBool resemblesPattern(const UnicodeString& pattern,
00442 int32_t pos);
00443
00455 UnicodeSet& applyPattern(const UnicodeString& pattern,
00456 UErrorCode& status);
00457
00473 UnicodeSet& applyPattern(const UnicodeString& pattern,
00474 uint32_t options,
00475 const SymbolTable* symbols,
00476 UErrorCode& status);
00477
00508 UnicodeSet& applyPattern(const UnicodeString& pattern,
00509 ParsePosition& pos,
00510 uint32_t options,
00511 const SymbolTable* symbols,
00512 UErrorCode& status);
00513
00526 virtual UnicodeString& toPattern(UnicodeString& result,
00527 UBool escapeUnprintable = FALSE) const;
00528
00550 UnicodeSet& applyIntPropertyValue(UProperty prop,
00551 int32_t value,
00552 UErrorCode& ec);
00553
00582 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00583 const UnicodeString& value,
00584 UErrorCode& ec);
00585
00594 virtual int32_t size(void) const;
00595
00602 virtual UBool isEmpty(void) const;
00603
00610 virtual UBool contains(UChar32 c) const;
00611
00620 virtual UBool contains(UChar32 start, UChar32 end) const;
00621
00629 UBool contains(const UnicodeString& s) const;
00630
00638 virtual UBool containsAll(const UnicodeSet& c) const;
00639
00647 UBool containsAll(const UnicodeString& s) const;
00648
00657 UBool containsNone(UChar32 start, UChar32 end) const;
00658
00666 UBool containsNone(const UnicodeSet& c) const;
00667
00675 UBool containsNone(const UnicodeString& s) const;
00676
00685 inline UBool containsSome(UChar32 start, UChar32 end) const;
00686
00694 inline UBool containsSome(const UnicodeSet& s) const;
00695
00703 inline UBool containsSome(const UnicodeString& s) const;
00704
00709 virtual UMatchDegree matches(const Replaceable& text,
00710 int32_t& offset,
00711 int32_t limit,
00712 UBool incremental);
00713
00714 private:
00736 static int32_t matchRest(const Replaceable& text,
00737 int32_t start, int32_t limit,
00738 const UnicodeString& s);
00739
00749 int32_t findCodePoint(UChar32 c) const;
00750
00751 public:
00752
00760 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00761
00770 int32_t indexOf(UChar32 c) const;
00771
00781 UChar32 charAt(int32_t index) const;
00782
00796 virtual UnicodeSet& add(UChar32 start, UChar32 end);
00797
00804 UnicodeSet& add(UChar32 c);
00805
00816 UnicodeSet& add(const UnicodeString& s);
00817
00818 private:
00824 static int32_t getSingleCP(const UnicodeString& s);
00825
00826 void _add(const UnicodeString& s);
00827
00828 public:
00836 UnicodeSet& addAll(const UnicodeString& s);
00837
00845 UnicodeSet& retainAll(const UnicodeString& s);
00846
00854 UnicodeSet& complementAll(const UnicodeString& s);
00855
00863 UnicodeSet& removeAll(const UnicodeString& s);
00864
00873 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
00874
00875
00883 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
00884
00897 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00898
00899
00904 UnicodeSet& retain(UChar32 c);
00905
00918 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00919
00926 UnicodeSet& remove(UChar32 c);
00927
00936 UnicodeSet& remove(const UnicodeString& s);
00937
00944 virtual UnicodeSet& complement(void);
00945
00959 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00960
00967 UnicodeSet& complement(UChar32 c);
00968
00978 UnicodeSet& complement(const UnicodeString& s);
00979
00991 virtual UnicodeSet& addAll(const UnicodeSet& c);
00992
01003 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01004
01015 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01016
01026 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01027
01033 virtual UnicodeSet& clear(void);
01034
01058 UnicodeSet& closeOver(int32_t attribute);
01059
01067 virtual int32_t getRangeCount(void) const;
01068
01076 virtual UChar32 getRangeStart(int32_t index) const;
01077
01085 virtual UChar32 getRangeEnd(int32_t index) const;
01086
01135 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01136
01142 virtual UnicodeSet& compact();
01143
01155 static UClassID U_EXPORT2 getStaticClassID(void);
01156
01165 virtual UClassID getDynamicClassID(void) const;
01166
01167 private:
01168
01169
01170
01171 friend class USetAccess;
01172
01173 int32_t getStringCount() const;
01174
01175 const UnicodeString* getString(int32_t index) const;
01176
01177
01178
01179
01180
01181 private:
01182
01188 virtual UBool matchesIndexValue(uint8_t v) const;
01189
01190 private:
01191
01192
01193
01194
01195
01196 void applyPattern(RuleCharacterIterator& chars,
01197 const SymbolTable* symbols,
01198 UnicodeString& rebuiltPat,
01199 uint32_t options,
01200 UErrorCode& ec);
01201
01202
01203
01204
01205
01206 void ensureCapacity(int32_t newLen);
01207
01208 void ensureBufferCapacity(int32_t newLen);
01209
01210 void swapBuffers(void);
01211
01212 UBool allocateStrings();
01213
01214 UnicodeString& _toPattern(UnicodeString& result,
01215 UBool escapeUnprintable) const;
01216
01217 UnicodeString& _generatePattern(UnicodeString& result,
01218 UBool escapeUnprintable) const;
01219
01220 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01221
01222 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01223
01224
01225
01226
01227
01228 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01229
01230 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01231
01232 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01233
01239 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01240 int32_t pos);
01241
01242 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01243 int32_t iterOpts);
01244
01283 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01284 ParsePosition& ppos,
01285 UErrorCode &ec);
01286
01287 void applyPropertyPattern(RuleCharacterIterator& chars,
01288 UnicodeString& rebuiltPat,
01289 UErrorCode& ec);
01290
01295 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01296
01306 void applyFilter(Filter filter,
01307 void* context,
01308 int32_t src,
01309 UErrorCode &status);
01310
01314 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);
01315
01316 friend class UnicodeSetIterator;
01317 };
01318
01319 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01320 return !operator==(o);
01321 }
01322
01323 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01324 return !containsNone(start, end);
01325 }
01326
01327 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01328 return !containsNone(s);
01329 }
01330
01331 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01332 return !containsNone(s);
01333 }
01334
01335 U_NAMESPACE_END
01336
01337 #endif