00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016
00017 #include "unicode/utypes.h"
00018
00024 #if !UCONFIG_NO_BREAK_ITERATION
00025
00026 #include "unicode/brkiter.h"
00027 #include "unicode/udata.h"
00028 #include "unicode/parseerr.h"
00029 #include "unicode/schriter.h"
00030 #include "unicode/uchriter.h"
00031
00032
00033 struct UTrie;
00034
00035 U_NAMESPACE_BEGIN
00036
00038 struct RBBIDataHeader;
00039 class RuleBasedBreakIteratorTables;
00040 class BreakIterator;
00041 class RBBIDataWrapper;
00042 class UStack;
00043 class LanguageBreakEngine;
00044 class UnhandledEngine;
00045 struct RBBIStateTable;
00046
00047
00048
00049
00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00066
00067 protected:
00072 UText *fText;
00073
00079 CharacterIterator *fCharIter;
00080
00086 StringCharacterIterator *fSCharIter;
00087
00093 UCharCharacterIterator *fDCharIter;
00094
00099 RBBIDataWrapper *fData;
00100
00104 int32_t fLastRuleStatusIndex;
00105
00112 UBool fLastStatusIndexValid;
00113
00119 uint32_t fDictionaryCharCount;
00120
00128 int32_t* fCachedBreakPositions;
00129
00134 int32_t fNumCachedBreakPositions;
00135
00141 int32_t fPositionInCache;
00142
00150 UStack *fLanguageBreakEngines;
00151
00159 UnhandledEngine *fUnhandledBreakEngine;
00160
00166 int32_t fBreakType;
00167
00168 protected:
00169
00170
00171
00172
00183 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00184
00185
00186 friend class RBBIRuleBuilder;
00188 friend class BreakIterator;
00189
00190
00191
00192 public:
00193
00198 RuleBasedBreakIterator();
00199
00206 RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00207
00216 RuleBasedBreakIterator( const UnicodeString &rules,
00217 UParseError &parseError,
00218 UErrorCode &status);
00219
00220
00233 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00234
00239 virtual ~RuleBasedBreakIterator();
00240
00248 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00249
00258 virtual UBool operator==(const BreakIterator& that) const;
00259
00267 UBool operator!=(const BreakIterator& that) const;
00268
00279 virtual BreakIterator* clone() const;
00280
00286 virtual int32_t hashCode(void) const;
00287
00293 virtual const UnicodeString& getRules(void) const;
00294
00295
00296
00297
00298
00324 virtual CharacterIterator& getText(void) const;
00325
00326
00341 virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00342
00350 virtual void adoptText(CharacterIterator* newText);
00351
00358 virtual void setText(const UnicodeString& newText);
00359
00373 virtual void setText(UText *text, UErrorCode &status);
00374
00380 virtual int32_t first(void);
00381
00387 virtual int32_t last(void);
00388
00399 virtual int32_t next(int32_t n);
00400
00406 virtual int32_t next(void);
00407
00413 virtual int32_t previous(void);
00414
00422 virtual int32_t following(int32_t offset);
00423
00431 virtual int32_t preceding(int32_t offset);
00432
00441 virtual UBool isBoundary(int32_t offset);
00442
00448 virtual int32_t current(void) const;
00449
00450
00483 virtual int32_t getRuleStatus() const;
00484
00508 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00509
00521 virtual UClassID getDynamicClassID(void) const;
00522
00534 static UClassID U_EXPORT2 getStaticClassID(void);
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560 virtual BreakIterator * createBufferClone(void *stackBuffer,
00561 int32_t &BufferSize,
00562 UErrorCode &status);
00563
00564
00582 virtual const uint8_t *getBinaryRules(uint32_t &length);
00583
00584
00585 protected:
00586
00587
00588
00594 virtual void reset(void);
00595
00596 #if 0
00597
00605 virtual UBool isDictionaryChar(UChar32);
00606
00611 virtual int32_t getBreakType() const;
00612 #endif
00613
00618 virtual void setBreakType(int32_t type);
00619
00625 void init();
00626
00627 private:
00628
00638 int32_t handlePrevious(const RBBIStateTable *statetable);
00639
00649 int32_t handleNext(const RBBIStateTable *statetable);
00650
00665 int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
00666
00673 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00674
00678 void makeRuleStatusValid();
00679
00680 };
00681
00682
00683
00684
00685
00686
00687
00688 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00689 return !operator==(that);
00690 }
00691
00692 U_NAMESPACE_END
00693
00694 #endif
00695
00696 #endif