00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011
00012 #include "unicode/utypes.h"
00013
00019 #if !UCONFIG_NO_NORMALIZATION
00020
00021 #include "unicode/uobject.h"
00022 #include "unicode/unistr.h"
00023 #include "unicode/chariter.h"
00024 #include "unicode/unorm.h"
00025
00026
00027 struct UCharIterator;
00028 typedef struct UCharIterator UCharIterator;
00030 U_NAMESPACE_BEGIN
00123 class U_COMMON_API Normalizer : public UObject {
00124 public:
00130 enum {
00131 DONE=0xffff
00132 };
00133
00134
00135
00146 Normalizer(const UnicodeString& str, UNormalizationMode mode);
00147
00159 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00160
00171 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00172
00178 Normalizer(const Normalizer& copy);
00179
00184 virtual ~Normalizer();
00185
00186
00187
00188
00189
00190
00205 static void U_EXPORT2 normalize(const UnicodeString& source,
00206 UNormalizationMode mode, int32_t options,
00207 UnicodeString& result,
00208 UErrorCode &status);
00209
00227 static void U_EXPORT2 compose(const UnicodeString& source,
00228 UBool compat, int32_t options,
00229 UnicodeString& result,
00230 UErrorCode &status);
00231
00249 static void U_EXPORT2 decompose(const UnicodeString& source,
00250 UBool compat, int32_t options,
00251 UnicodeString& result,
00252 UErrorCode &status);
00253
00274 static inline UNormalizationCheckResult
00275 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00276
00290 static inline UNormalizationCheckResult
00291 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00292
00313 static inline UBool
00314 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00315
00331 static inline UBool
00332 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00333
00363 static UnicodeString &
00364 U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
00365 UnicodeString &result,
00366 UNormalizationMode mode, int32_t options,
00367 UErrorCode &errorCode);
00368
00433 static inline int32_t
00434 compare(const UnicodeString &s1, const UnicodeString &s2,
00435 uint32_t options,
00436 UErrorCode &errorCode);
00437
00438
00439
00440
00441
00450 UChar32 current(void);
00451
00460 UChar32 first(void);
00461
00470 UChar32 last(void);
00471
00486 UChar32 next(void);
00487
00502 UChar32 previous(void);
00503
00513 void setIndexOnly(int32_t index);
00514
00520 void reset(void);
00521
00536 int32_t getIndex(void) const;
00537
00546 int32_t startIndex(void) const;
00547
00558 int32_t endIndex(void) const;
00559
00568 UBool operator==(const Normalizer& that) const;
00569
00578 inline UBool operator!=(const Normalizer& that) const;
00579
00586 Normalizer* clone(void) const;
00587
00594 int32_t hashCode(void) const;
00595
00596
00597
00598
00599
00615 void setMode(UNormalizationMode newMode);
00616
00627 UNormalizationMode getUMode(void) const;
00628
00645 void setOption(int32_t option,
00646 UBool value);
00647
00658 UBool getOption(int32_t option) const;
00659
00668 void setText(const UnicodeString& newText,
00669 UErrorCode &status);
00670
00679 void setText(const CharacterIterator& newText,
00680 UErrorCode &status);
00681
00691 void setText(const UChar* newText,
00692 int32_t length,
00693 UErrorCode &status);
00700 void getText(UnicodeString& result);
00701
00707 static UClassID U_EXPORT2 getStaticClassID();
00708
00714 virtual UClassID getDynamicClassID() const;
00715
00716 private:
00717
00718
00719
00720
00721 Normalizer();
00722 Normalizer &operator=(const Normalizer &that);
00723
00724
00725
00726 UBool nextNormalize();
00727 UBool previousNormalize();
00728
00729 void init(CharacterIterator *iter);
00730 void clearBuffer(void);
00731
00732
00733
00734
00735
00736 UNormalizationMode fUMode;
00737 int32_t fOptions;
00738
00739
00740 UCharIterator *text;
00741
00742
00743
00744 int32_t currentIndex, nextIndex;
00745
00746
00747 UnicodeString buffer;
00748 int32_t bufferPos;
00749
00750 };
00751
00752
00753
00754
00755
00756 inline UBool
00757 Normalizer::operator!= (const Normalizer& other) const
00758 { return ! operator==(other); }
00759
00760 inline UNormalizationCheckResult
00761 Normalizer::quickCheck(const UnicodeString& source,
00762 UNormalizationMode mode,
00763 UErrorCode &status) {
00764 if(U_FAILURE(status)) {
00765 return UNORM_MAYBE;
00766 }
00767
00768 return unorm_quickCheck(source.getBuffer(), source.length(),
00769 mode, &status);
00770 }
00771
00772 inline UNormalizationCheckResult
00773 Normalizer::quickCheck(const UnicodeString& source,
00774 UNormalizationMode mode, int32_t options,
00775 UErrorCode &status) {
00776 if(U_FAILURE(status)) {
00777 return UNORM_MAYBE;
00778 }
00779
00780 return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
00781 mode, options, &status);
00782 }
00783
00784 inline UBool
00785 Normalizer::isNormalized(const UnicodeString& source,
00786 UNormalizationMode mode,
00787 UErrorCode &status) {
00788 if(U_FAILURE(status)) {
00789 return FALSE;
00790 }
00791
00792 return unorm_isNormalized(source.getBuffer(), source.length(),
00793 mode, &status);
00794 }
00795
00796 inline UBool
00797 Normalizer::isNormalized(const UnicodeString& source,
00798 UNormalizationMode mode, int32_t options,
00799 UErrorCode &status) {
00800 if(U_FAILURE(status)) {
00801 return FALSE;
00802 }
00803
00804 return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
00805 mode, options, &status);
00806 }
00807
00808 inline int32_t
00809 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00810 uint32_t options,
00811 UErrorCode &errorCode) {
00812
00813 return unorm_compare(s1.getBuffer(), s1.length(),
00814 s2.getBuffer(), s2.length(),
00815 options,
00816 &errorCode);
00817 }
00818
00819 U_NAMESPACE_END
00820
00821 #endif
00822
00823 #endif // NORMLZR_H