uniset.h

Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 * Copyright (C) 1999-2006, International Business Machines Corporation
00004 * and others. All Rights Reserved.
00005 ***************************************************************************
00006 *   Date        Name        Description
00007 *   10/20/99    alan        Creation.
00008 ***************************************************************************
00009 */
00010 
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013 
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017 
00023 U_NAMESPACE_BEGIN
00024 
00025 class ParsePosition;
00026 class SymbolTable;
00027 class UVector;
00028 class RuleCharacterIterator;
00029 
00261 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00262 
00263     int32_t len; // length of list used; 0 <= len <= capacity
00264     int32_t capacity; // capacity of list
00265     int32_t bufferCapacity; // capacity of buffer
00266     UChar32* list; // MUST be terminated with HIGH
00267     UChar32* buffer; // internal buffer, may be NULL
00268 
00269     UVector* strings; // maintained in sorted order
00270 
00280     UnicodeString pat;
00281 
00282 public:
00283 
00284     enum {
00289         MIN_VALUE = 0,
00290 
00295         MAX_VALUE = 0x10ffff
00296     };
00297 
00298     //----------------------------------------------------------------
00299     // Constructors &c
00300     //----------------------------------------------------------------
00301 
00302 public:
00303 
00308     UnicodeSet();
00309 
00318     UnicodeSet(UChar32 start, UChar32 end);
00319 
00328     UnicodeSet(const UnicodeString& pattern,
00329                UErrorCode& status);
00330 
00343     UnicodeSet(const UnicodeString& pattern,
00344                uint32_t options,
00345                const SymbolTable* symbols,
00346                UErrorCode& status);
00347 
00361     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00362                uint32_t options,
00363                const SymbolTable* symbols,
00364                UErrorCode& status);
00365 
00370     UnicodeSet(const UnicodeSet& o);
00371 
00376     virtual ~UnicodeSet();
00377 
00382     UnicodeSet& operator=(const UnicodeSet& o);
00383 
00395     virtual UBool operator==(const UnicodeSet& o) const;
00396 
00402     UBool operator!=(const UnicodeSet& o) const;
00403 
00410     virtual UnicodeFunctor* clone() const;
00411 
00419     virtual int32_t hashCode(void) const;
00420 
00421     //----------------------------------------------------------------
00422     // Public API
00423     //----------------------------------------------------------------
00424 
00434     UnicodeSet& set(UChar32 start, UChar32 end);
00435 
00441     static UBool resemblesPattern(const UnicodeString& pattern,
00442                                   int32_t pos);
00443 
00455     UnicodeSet& applyPattern(const UnicodeString& pattern,
00456                              UErrorCode& status);
00457 
00473     UnicodeSet& applyPattern(const UnicodeString& pattern,
00474                              uint32_t options,
00475                              const SymbolTable* symbols,
00476                              UErrorCode& status);
00477 
00508     UnicodeSet& applyPattern(const UnicodeString& pattern,
00509                              ParsePosition& pos,
00510                              uint32_t options,
00511                              const SymbolTable* symbols,
00512                              UErrorCode& status);
00513 
00526     virtual UnicodeString& toPattern(UnicodeString& result,
00527                              UBool escapeUnprintable = FALSE) const;
00528 
00550     UnicodeSet& applyIntPropertyValue(UProperty prop,
00551                                       int32_t value,
00552                                       UErrorCode& ec);
00553 
00582     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00583                                    const UnicodeString& value,
00584                                    UErrorCode& ec);
00585 
00594     virtual int32_t size(void) const;
00595 
00602     virtual UBool isEmpty(void) const;
00603 
00610     virtual UBool contains(UChar32 c) const;
00611 
00620     virtual UBool contains(UChar32 start, UChar32 end) const;
00621 
00629     UBool contains(const UnicodeString& s) const;
00630 
00638     virtual UBool containsAll(const UnicodeSet& c) const;
00639 
00647     UBool containsAll(const UnicodeString& s) const;
00648 
00657     UBool containsNone(UChar32 start, UChar32 end) const;
00658 
00666     UBool containsNone(const UnicodeSet& c) const;
00667 
00675     UBool containsNone(const UnicodeString& s) const;
00676 
00685     inline UBool containsSome(UChar32 start, UChar32 end) const;
00686 
00694     inline UBool containsSome(const UnicodeSet& s) const;
00695 
00703     inline UBool containsSome(const UnicodeString& s) const;
00704 
00709     virtual UMatchDegree matches(const Replaceable& text,
00710                          int32_t& offset,
00711                          int32_t limit,
00712                          UBool incremental);
00713 
00714 private:
00736     static int32_t matchRest(const Replaceable& text,
00737                              int32_t start, int32_t limit,
00738                              const UnicodeString& s);
00739 
00749     int32_t findCodePoint(UChar32 c) const;
00750 
00751 public:
00752 
00760     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00761 
00770     int32_t indexOf(UChar32 c) const;
00771 
00781     UChar32 charAt(int32_t index) const;
00782 
00796     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00797 
00804     UnicodeSet& add(UChar32 c);
00805 
00816     UnicodeSet& add(const UnicodeString& s);
00817 
00818  private:
00824     static int32_t getSingleCP(const UnicodeString& s);
00825 
00826     void _add(const UnicodeString& s);
00827 
00828  public:
00836     UnicodeSet& addAll(const UnicodeString& s);
00837 
00845     UnicodeSet& retainAll(const UnicodeString& s);
00846 
00854     UnicodeSet& complementAll(const UnicodeString& s);
00855 
00863     UnicodeSet& removeAll(const UnicodeString& s);
00864 
00873     static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
00874 
00875 
00883     static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
00884 
00897     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00898 
00899 
00904     UnicodeSet& retain(UChar32 c);
00905 
00918     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00919 
00926     UnicodeSet& remove(UChar32 c);
00927 
00936     UnicodeSet& remove(const UnicodeString& s);
00937 
00944     virtual UnicodeSet& complement(void);
00945 
00959     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00960 
00967     UnicodeSet& complement(UChar32 c);
00968 
00978     UnicodeSet& complement(const UnicodeString& s);
00979 
00991     virtual UnicodeSet& addAll(const UnicodeSet& c);
00992 
01003     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01004 
01015     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01016 
01026     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01027 
01033     virtual UnicodeSet& clear(void);
01034 
01058     UnicodeSet& closeOver(int32_t attribute);
01059 
01067     virtual int32_t getRangeCount(void) const;
01068 
01076     virtual UChar32 getRangeStart(int32_t index) const;
01077 
01085     virtual UChar32 getRangeEnd(int32_t index) const;
01086 
01135     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01136 
01142     virtual UnicodeSet& compact();
01143 
01155     static UClassID U_EXPORT2 getStaticClassID(void);
01156 
01165     virtual UClassID getDynamicClassID(void) const;
01166 
01167 private:
01168 
01169     // Private API for the USet API
01170 
01171     friend class USetAccess;
01172 
01173     int32_t getStringCount() const;
01174 
01175     const UnicodeString* getString(int32_t index) const;
01176 
01177     //----------------------------------------------------------------
01178     // RuleBasedTransliterator support
01179     //----------------------------------------------------------------
01180 
01181 private:
01182 
01188     virtual UBool matchesIndexValue(uint8_t v) const;
01189 
01190 private:
01191 
01192     //----------------------------------------------------------------
01193     // Implementation: Pattern parsing
01194     //----------------------------------------------------------------
01195 
01196     void applyPattern(RuleCharacterIterator& chars,
01197                       const SymbolTable* symbols,
01198                       UnicodeString& rebuiltPat,
01199                       uint32_t options,
01200                       UErrorCode& ec);
01201 
01202     //----------------------------------------------------------------
01203     // Implementation: Utility methods
01204     //----------------------------------------------------------------
01205 
01206     void ensureCapacity(int32_t newLen);
01207 
01208     void ensureBufferCapacity(int32_t newLen);
01209 
01210     void swapBuffers(void);
01211 
01212     UBool allocateStrings();
01213 
01214     UnicodeString& _toPattern(UnicodeString& result,
01215                               UBool escapeUnprintable) const;
01216 
01217     UnicodeString& _generatePattern(UnicodeString& result,
01218                                     UBool escapeUnprintable) const;
01219 
01220     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01221 
01222     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01223 
01224     //----------------------------------------------------------------
01225     // Implementation: Fundamental operators
01226     //----------------------------------------------------------------
01227 
01228     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01229 
01230     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01231 
01232     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01233 
01239     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01240                                           int32_t pos);
01241 
01242     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01243                                           int32_t iterOpts);
01244 
01283     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01284                                      ParsePosition& ppos,
01285                                      UErrorCode &ec);
01286 
01287     void applyPropertyPattern(RuleCharacterIterator& chars,
01288                               UnicodeString& rebuiltPat,
01289                               UErrorCode& ec);
01290 
01295     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01296 
01306     void applyFilter(Filter filter,
01307                      void* context,
01308                      int32_t src,
01309                      UErrorCode &status);
01310 
01314     static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);
01315 
01316     friend class UnicodeSetIterator;
01317 };
01318 
01319 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01320     return !operator==(o);
01321 }
01322 
01323 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01324     return !containsNone(start, end);
01325 }
01326 
01327 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01328     return !containsNone(s);
01329 }
01330 
01331 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01332     return !containsNone(s);
01333 }
01334 
01335 U_NAMESPACE_END
01336 
01337 #endif

Generated on Mon Aug 13 07:17:24 2007 for ICU 3.6 by  doxygen 1.5.2