normlzr.h

Go to the documentation of this file.
00001 /*
00002  ********************************************************************
00003  * COPYRIGHT:
00004  * Copyright (c) 1996-2006, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 
00019 #if !UCONFIG_NO_NORMALIZATION
00020 
00021 #include "unicode/uobject.h"
00022 #include "unicode/unistr.h"
00023 #include "unicode/chariter.h"
00024 #include "unicode/unorm.h"
00025 
00026 
00027 struct UCharIterator;
00028 typedef struct UCharIterator UCharIterator; 
00030 U_NAMESPACE_BEGIN
00123 class U_COMMON_API Normalizer : public UObject {
00124 public:
00130   enum {
00131       DONE=0xffff
00132   };
00133 
00134   // Constructors
00135 
00146   Normalizer(const UnicodeString& str, UNormalizationMode mode);
00147 
00159   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00160 
00171   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00172 
00178   Normalizer(const Normalizer& copy);
00179 
00184   virtual ~Normalizer();
00185 
00186 
00187   //-------------------------------------------------------------------------
00188   // Static utility methods
00189   //-------------------------------------------------------------------------
00190 
00205   static void U_EXPORT2 normalize(const UnicodeString& source,
00206                         UNormalizationMode mode, int32_t options,
00207                         UnicodeString& result,
00208                         UErrorCode &status);
00209 
00227   static void U_EXPORT2 compose(const UnicodeString& source,
00228                       UBool compat, int32_t options,
00229                       UnicodeString& result,
00230                       UErrorCode &status);
00231 
00249   static void U_EXPORT2 decompose(const UnicodeString& source,
00250                         UBool compat, int32_t options,
00251                         UnicodeString& result,
00252                         UErrorCode &status);
00253 
00274   static inline UNormalizationCheckResult
00275   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00276 
00290   static inline UNormalizationCheckResult
00291   quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00292 
00313   static inline UBool
00314   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00315 
00331   static inline UBool
00332   isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00333 
00363   static UnicodeString &
00364   U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
00365               UnicodeString &result,
00366               UNormalizationMode mode, int32_t options,
00367               UErrorCode &errorCode);
00368 
00433   static inline int32_t
00434   compare(const UnicodeString &s1, const UnicodeString &s2,
00435           uint32_t options,
00436           UErrorCode &errorCode);
00437 
00438   //-------------------------------------------------------------------------
00439   // Iteration API
00440   //-------------------------------------------------------------------------
00441 
00450   UChar32              current(void);
00451 
00460   UChar32              first(void);
00461 
00470   UChar32              last(void);
00471 
00486   UChar32              next(void);
00487 
00502   UChar32              previous(void);
00503 
00513   void                 setIndexOnly(int32_t index);
00514 
00520   void                reset(void);
00521 
00536   int32_t            getIndex(void) const;
00537 
00546   int32_t            startIndex(void) const;
00547 
00558   int32_t            endIndex(void) const;
00559 
00568   UBool        operator==(const Normalizer& that) const;
00569 
00578   inline UBool        operator!=(const Normalizer& that) const;
00579 
00586   Normalizer*        clone(void) const;
00587 
00594   int32_t                hashCode(void) const;
00595 
00596   //-------------------------------------------------------------------------
00597   // Property access methods
00598   //-------------------------------------------------------------------------
00599 
00615   void setMode(UNormalizationMode newMode);
00616 
00627   UNormalizationMode getUMode(void) const;
00628 
00645   void setOption(int32_t option,
00646          UBool value);
00647 
00658   UBool getOption(int32_t option) const;
00659 
00668   void setText(const UnicodeString& newText,
00669            UErrorCode &status);
00670 
00679   void setText(const CharacterIterator& newText,
00680            UErrorCode &status);
00681 
00691   void setText(const UChar* newText,
00692                     int32_t length,
00693             UErrorCode &status);
00700   void            getText(UnicodeString&  result);
00701 
00707   static UClassID U_EXPORT2 getStaticClassID();
00708 
00714   virtual UClassID getDynamicClassID() const;
00715 
00716 private:
00717   //-------------------------------------------------------------------------
00718   // Private functions
00719   //-------------------------------------------------------------------------
00720 
00721   Normalizer(); // default constructor not implemented
00722   Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
00723 
00724   // Private utility methods for iteration
00725   // For documentation, see the source code
00726   UBool nextNormalize();
00727   UBool previousNormalize();
00728 
00729   void    init(CharacterIterator *iter);
00730   void    clearBuffer(void);
00731 
00732   //-------------------------------------------------------------------------
00733   // Private data
00734   //-------------------------------------------------------------------------
00735 
00736   UNormalizationMode  fUMode;
00737   int32_t             fOptions;
00738 
00739   // The input text and our position in it
00740   UCharIterator       *text;
00741 
00742   // The normalization buffer is the result of normalization
00743   // of the source in [currentIndex..nextIndex[ .
00744   int32_t         currentIndex, nextIndex;
00745 
00746   // A buffer for holding intermediate results
00747   UnicodeString       buffer;
00748   int32_t         bufferPos;
00749 
00750 };
00751 
00752 //-------------------------------------------------------------------------
00753 // Inline implementations
00754 //-------------------------------------------------------------------------
00755 
00756 inline UBool
00757 Normalizer::operator!= (const Normalizer& other) const
00758 { return ! operator==(other); }
00759 
00760 inline UNormalizationCheckResult
00761 Normalizer::quickCheck(const UnicodeString& source,
00762                        UNormalizationMode mode,
00763                        UErrorCode &status) {
00764     if(U_FAILURE(status)) {
00765         return UNORM_MAYBE;
00766     }
00767 
00768     return unorm_quickCheck(source.getBuffer(), source.length(),
00769                             mode, &status);
00770 }
00771 
00772 inline UNormalizationCheckResult
00773 Normalizer::quickCheck(const UnicodeString& source,
00774                        UNormalizationMode mode, int32_t options,
00775                        UErrorCode &status) {
00776     if(U_FAILURE(status)) {
00777         return UNORM_MAYBE;
00778     }
00779 
00780     return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
00781                                        mode, options, &status);
00782 }
00783 
00784 inline UBool
00785 Normalizer::isNormalized(const UnicodeString& source,
00786                          UNormalizationMode mode,
00787                          UErrorCode &status) {
00788     if(U_FAILURE(status)) {
00789         return FALSE;
00790     }
00791 
00792     return unorm_isNormalized(source.getBuffer(), source.length(),
00793                               mode, &status);
00794 }
00795 
00796 inline UBool
00797 Normalizer::isNormalized(const UnicodeString& source,
00798                          UNormalizationMode mode, int32_t options,
00799                          UErrorCode &status) {
00800     if(U_FAILURE(status)) {
00801         return FALSE;
00802     }
00803 
00804     return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
00805                                          mode, options, &status);
00806 }
00807 
00808 inline int32_t
00809 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00810                     uint32_t options,
00811                     UErrorCode &errorCode) {
00812   // all argument checking is done in unorm_compare
00813   return unorm_compare(s1.getBuffer(), s1.length(),
00814                        s2.getBuffer(), s2.length(),
00815                        options,
00816                        &errorCode);
00817 }
00818 
00819 U_NAMESPACE_END
00820 
00821 #endif /* #if !UCONFIG_NO_NORMALIZATION */
00822 
00823 #endif // NORMLZR_H

Generated on Mon Aug 13 07:17:24 2007 for ICU 3.6 by  doxygen 1.5.2