rbbi.h

Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 *   Copyright (C) 1999-2006 International Business Machines Corporation   *
00004 *   and others. All rights reserved.                                      *
00005 ***************************************************************************
00006 
00007 **********************************************************************
00008 *   Date        Name        Description
00009 *   10/22/99    alan        Creation.
00010 *   11/11/99    rgillam     Complete port from Java.
00011 **********************************************************************
00012 */
00013 
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016 
00017 #include "unicode/utypes.h"
00018 
00024 #if !UCONFIG_NO_BREAK_ITERATION
00025 
00026 #include "unicode/brkiter.h"
00027 #include "unicode/udata.h"
00028 #include "unicode/parseerr.h"
00029 #include "unicode/schriter.h"
00030 #include "unicode/uchriter.h"
00031 
00032 
00033 struct UTrie;
00034 
00035 U_NAMESPACE_BEGIN
00036 
00038 struct RBBIDataHeader;
00039 class  RuleBasedBreakIteratorTables;
00040 class  BreakIterator;
00041 class  RBBIDataWrapper;
00042 class  UStack;
00043 class  LanguageBreakEngine;
00044 class  UnhandledEngine;
00045 struct RBBIStateTable;
00046 
00047 
00048 
00049 
00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00066 
00067 protected:
00072     UText  *fText;
00073 
00079     CharacterIterator  *fCharIter;
00080 
00086     StringCharacterIterator *fSCharIter;
00087 
00093     UCharCharacterIterator *fDCharIter;
00094 
00099     RBBIDataWrapper    *fData;
00100 
00104     int32_t             fLastRuleStatusIndex;
00105 
00112     UBool               fLastStatusIndexValid;
00113 
00119     uint32_t            fDictionaryCharCount;
00120 
00128     int32_t*            fCachedBreakPositions;
00129 
00134     int32_t             fNumCachedBreakPositions;
00135 
00141     int32_t             fPositionInCache;
00142     
00150     UStack              *fLanguageBreakEngines;
00151     
00159     UnhandledEngine     *fUnhandledBreakEngine;
00160     
00166     int32_t             fBreakType;
00167     
00168 protected:
00169     //=======================================================================
00170     // constructors
00171     //=======================================================================
00172 
00183     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00184 
00185 
00186     friend class RBBIRuleBuilder;
00188     friend class BreakIterator;
00189 
00190 
00191 
00192 public:
00193 
00198     RuleBasedBreakIterator();
00199 
00206     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00207 
00216     RuleBasedBreakIterator( const UnicodeString    &rules,
00217                              UParseError           &parseError,
00218                              UErrorCode            &status);
00219 
00220 
00233     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00234 
00239     virtual ~RuleBasedBreakIterator();
00240 
00248     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00249 
00258     virtual UBool operator==(const BreakIterator& that) const;
00259 
00267     UBool operator!=(const BreakIterator& that) const;
00268 
00279     virtual BreakIterator* clone() const;
00280 
00286     virtual int32_t hashCode(void) const;
00287 
00293     virtual const UnicodeString& getRules(void) const;
00294 
00295     //=======================================================================
00296     // BreakIterator overrides
00297     //=======================================================================
00298 
00324     virtual  CharacterIterator& getText(void) const;
00325 
00326 
00341      virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00342 
00350     virtual void adoptText(CharacterIterator* newText);
00351 
00358     virtual void setText(const UnicodeString& newText);
00359 
00373     virtual void  setText(UText *text, UErrorCode &status);
00374 
00380     virtual int32_t first(void);
00381 
00387     virtual int32_t last(void);
00388 
00399     virtual int32_t next(int32_t n);
00400 
00406     virtual int32_t next(void);
00407 
00413     virtual int32_t previous(void);
00414 
00422     virtual int32_t following(int32_t offset);
00423 
00431     virtual int32_t preceding(int32_t offset);
00432 
00441     virtual UBool isBoundary(int32_t offset);
00442 
00448     virtual int32_t current(void) const;
00449 
00450 
00483     virtual int32_t getRuleStatus() const;
00484 
00508     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00509 
00521     virtual UClassID getDynamicClassID(void) const;
00522 
00534     static UClassID U_EXPORT2 getStaticClassID(void);
00535 
00536     /*
00537      * Create a clone (copy) of this break iterator in memory provided
00538      *  by the caller.  The idea is to increase performance by avoiding
00539      *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
00540      *  Performance gains are minimal, and correct buffer management is
00541      *  tricky.  Use clone() instead.
00542      *
00543      * @param stackBuffer  The pointer to the memory into which the cloned object
00544      *                     should be placed.  If NULL,  allocate heap memory
00545      *                     for the cloned object.
00546      * @param BufferSize   The size of the buffer.  If zero, return the required
00547      *                     buffer size, but do not clone the object.  If the
00548      *                     size was too small (but not zero), allocate heap
00549      *                     storage for the cloned object.
00550      *
00551      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
00552      *                     returned if the the provided buffer was too small, and
00553      *                     the clone was therefore put on the heap.
00554      *
00555      * @return  Pointer to the clone object.  This may differ from the stackBuffer
00556      *          address if the byte alignment of the stack buffer was not suitable
00557      *          or if the stackBuffer was too small to hold the clone.
00558      * @stable ICU 2.0
00559      */
00560     virtual BreakIterator *  createBufferClone(void *stackBuffer,
00561                                                int32_t &BufferSize,
00562                                                UErrorCode &status);
00563 
00564 
00582     virtual const uint8_t *getBinaryRules(uint32_t &length);
00583 
00584 
00585 protected:
00586     //=======================================================================
00587     // implementation
00588     //=======================================================================
00594     virtual void reset(void);
00595 
00596 #if 0
00597 
00605     virtual UBool isDictionaryChar(UChar32);
00606 
00611     virtual int32_t getBreakType() const;
00612 #endif
00613 
00618     virtual void setBreakType(int32_t type);
00619 
00625     void init();
00626 
00627 private:
00628 
00638     int32_t handlePrevious(const RBBIStateTable *statetable);
00639 
00649     int32_t handleNext(const RBBIStateTable *statetable);
00650 
00665     int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
00666 
00673     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00674 
00678     void makeRuleStatusValid();
00679 
00680 };
00681 
00682 //------------------------------------------------------------------------------
00683 //
00684 //   Inline Functions Definitions ...
00685 //
00686 //------------------------------------------------------------------------------
00687 
00688 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00689     return !operator==(that);
00690 }
00691 
00692 U_NAMESPACE_END
00693 
00694 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
00695 
00696 #endif

Generated on Mon Aug 13 07:17:24 2007 for ICU 3.6 by  doxygen 1.5.2