utf_old.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2002-2005, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2002sep21
00014 *   created by: Markus W. Scherer
00015 */
00016 
00146 #ifndef __UTF_OLD_H__
00147 #define __UTF_OLD_H__
00148 
00149 #ifndef U_HIDE_DEPRECATED_API
00150 
00151 /* utf.h must be included first. */
00152 #ifndef __UTF_H__
00153 #   include "unicode/utf.h"
00154 #endif
00155 
00156 /* Formerly utf.h, part 1 --------------------------------------------------- */
00157 
00158 #ifdef U_USE_UTF_DEPRECATES
00159 
00166 typedef int32_t UTextOffset;
00167 #endif
00168 
00170 #define UTF_SIZE 16
00171 
00178 #define UTF_SAFE
00179 
00180 #undef UTF_UNSAFE
00181 
00182 #undef UTF_STRICT
00183 
00196 #define UTF8_ERROR_VALUE_1 0x15
00197 
00203 #define UTF8_ERROR_VALUE_2 0x9f
00204 
00211 #define UTF_ERROR_VALUE 0xffff
00212 
00219 #define UTF_IS_ERROR(c) \
00220     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00221 
00227 #define UTF_IS_VALID(c) \
00228     (UTF_IS_UNICODE_CHAR(c) && \
00229      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00230 
00235 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00236 
00242 #define UTF_IS_UNICODE_NONCHAR(c) \
00243     ((c)>=0xfdd0 && \
00244      ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00245      (uint32_t)(c)<=0x10ffff)
00246 
00262 #define UTF_IS_UNICODE_CHAR(c) \
00263     ((uint32_t)(c)<0xd800 || \
00264         ((uint32_t)(c)>0xdfff && \
00265          (uint32_t)(c)<=0x10ffff && \
00266          !UTF_IS_UNICODE_NONCHAR(c)))
00267 
00268 /* Formerly utf8.h ---------------------------------------------------------- */
00269 
00274 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
00275 
00280 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
00281 
00283 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
00284 
00285 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
00286 
00287 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
00288 
00290 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
00291 
00305 #if 1
00306 #   define UTF8_CHAR_LENGTH(c) \
00307         ((uint32_t)(c)<=0x7f ? 1 : \
00308             ((uint32_t)(c)<=0x7ff ? 2 : \
00309                 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
00310             ) \
00311         )
00312 #else
00313 #   define UTF8_CHAR_LENGTH(c) \
00314         ((uint32_t)(c)<=0x7f ? 1 : \
00315             ((uint32_t)(c)<=0x7ff ? 2 : \
00316                 ((uint32_t)(c)<=0xffff ? 3 : \
00317                     ((uint32_t)(c)<=0x10ffff ? 4 : \
00318                         ((uint32_t)(c)<=0x3ffffff ? 5 : \
00319                             ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
00320                         ) \
00321                     ) \
00322                 ) \
00323             ) \
00324         )
00325 #endif
00326 
00328 #define UTF8_MAX_CHAR_LENGTH 4
00329 
00331 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
00332 
00334 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
00335     int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
00336     UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
00337     UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
00338 }
00339 
00341 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00342     int32_t _utf8_get_char_safe_index=(int32_t)(i); \
00343     UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
00344     UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
00345 }
00346 
00348 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
00349     (c)=(s)[(i)++]; \
00350     if((uint8_t)((c)-0xc0)<0x35) { \
00351         uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
00352         UTF8_MASK_LEAD_BYTE(c, __count); \
00353         switch(__count) { \
00354         /* each following branch falls through to the next one */ \
00355         case 3: \
00356             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00357         case 2: \
00358             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00359         case 1: \
00360             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00361         /* no other branches to optimize switch() */ \
00362             break; \
00363         } \
00364     } \
00365 }
00366 
00368 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
00369     if((uint32_t)(c)<=0x7f) { \
00370         (s)[(i)++]=(uint8_t)(c); \
00371     } else { \
00372         if((uint32_t)(c)<=0x7ff) { \
00373             (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
00374         } else { \
00375             if((uint32_t)(c)<=0xffff) { \
00376                 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
00377             } else { \
00378                 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
00379                 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
00380             } \
00381             (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
00382         } \
00383         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
00384     } \
00385 }
00386 
00388 #define UTF8_FWD_1_UNSAFE(s, i) { \
00389     (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
00390 }
00391 
00393 #define UTF8_FWD_N_UNSAFE(s, i, n) { \
00394     int32_t __N=(n); \
00395     while(__N>0) { \
00396         UTF8_FWD_1_UNSAFE(s, i); \
00397         --__N; \
00398     } \
00399 }
00400 
00402 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
00403     while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
00404 }
00405 
00407 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00408     (c)=(s)[(i)++]; \
00409     if((c)>=0x80) { \
00410         if(UTF8_IS_LEAD(c)) { \
00411             (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
00412         } else { \
00413             (c)=UTF8_ERROR_VALUE_1; \
00414         } \
00415     } \
00416 }
00417 
00419 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
00420     if((uint32_t)(c)<=0x7f) { \
00421         (s)[(i)++]=(uint8_t)(c); \
00422     } else { \
00423         (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
00424     } \
00425 }
00426 
00428 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
00429 
00431 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
00432 
00434 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
00435 
00437 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
00438     (c)=(s)[--(i)]; \
00439     if(UTF8_IS_TRAIL(c)) { \
00440         uint8_t __b, __count=1, __shift=6; \
00441 \
00442         /* c is a trail byte */ \
00443         (c)&=0x3f; \
00444         for(;;) { \
00445             __b=(s)[--(i)]; \
00446             if(__b>=0xc0) { \
00447                 UTF8_MASK_LEAD_BYTE(__b, __count); \
00448                 (c)|=(UChar32)__b<<__shift; \
00449                 break; \
00450             } else { \
00451                 (c)|=(UChar32)(__b&0x3f)<<__shift; \
00452                 ++__count; \
00453                 __shift+=6; \
00454             } \
00455         } \
00456     } \
00457 }
00458 
00460 #define UTF8_BACK_1_UNSAFE(s, i) { \
00461     while(UTF8_IS_TRAIL((s)[--(i)])) {} \
00462 }
00463 
00465 #define UTF8_BACK_N_UNSAFE(s, i, n) { \
00466     int32_t __N=(n); \
00467     while(__N>0) { \
00468         UTF8_BACK_1_UNSAFE(s, i); \
00469         --__N; \
00470     } \
00471 }
00472 
00474 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00475     UTF8_BACK_1_UNSAFE(s, i); \
00476     UTF8_FWD_1_UNSAFE(s, i); \
00477 }
00478 
00480 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00481     (c)=(s)[--(i)]; \
00482     if((c)>=0x80) { \
00483         if((c)<=0xbf) { \
00484             (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
00485         } else { \
00486             (c)=UTF8_ERROR_VALUE_1; \
00487         } \
00488     } \
00489 }
00490 
00492 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
00493 
00495 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
00496 
00498 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
00499 
00500 /* Formerly utf16.h --------------------------------------------------------- */
00501 
00503 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00504 
00506 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00507 
00509 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00510 
00512 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00513 
00515 #define UTF16_GET_PAIR_VALUE(first, second) \
00516     (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00517 
00519 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
00520 
00522 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
00523 
00525 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
00526 
00528 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
00529 
00531 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00532 
00534 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00535 
00537 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00538 
00540 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00541 
00543 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00544 
00546 #define UTF16_MAX_CHAR_LENGTH 2
00547 
00549 #define UTF16_ARRAY_SIZE(size) (size)
00550 
00562 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00563     (c)=(s)[i]; \
00564     if(UTF_IS_SURROGATE(c)) { \
00565         if(UTF_IS_SURROGATE_FIRST(c)) { \
00566             (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00567         } else { \
00568             (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00569         } \
00570     } \
00571 }
00572 
00574 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00575     (c)=(s)[i]; \
00576     if(UTF_IS_SURROGATE(c)) { \
00577         uint16_t __c2; \
00578         if(UTF_IS_SURROGATE_FIRST(c)) { \
00579             if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00580                 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00581                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00582             } else if(strict) {\
00583                 /* unmatched first surrogate */ \
00584                 (c)=UTF_ERROR_VALUE; \
00585             } \
00586         } else { \
00587             if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00588                 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00589                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00590             } else if(strict) {\
00591                 /* unmatched second surrogate */ \
00592                 (c)=UTF_ERROR_VALUE; \
00593             } \
00594         } \
00595     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00596         (c)=UTF_ERROR_VALUE; \
00597     } \
00598 }
00599 
00601 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00602     (c)=(s)[(i)++]; \
00603     if(UTF_IS_FIRST_SURROGATE(c)) { \
00604         (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00605     } \
00606 }
00607 
00609 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00610     if((uint32_t)(c)<=0xffff) { \
00611         (s)[(i)++]=(uint16_t)(c); \
00612     } else { \
00613         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00614         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00615     } \
00616 }
00617 
00619 #define UTF16_FWD_1_UNSAFE(s, i) { \
00620     if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00621         ++(i); \
00622     } \
00623 }
00624 
00626 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00627     int32_t __N=(n); \
00628     while(__N>0) { \
00629         UTF16_FWD_1_UNSAFE(s, i); \
00630         --__N; \
00631     } \
00632 }
00633 
00635 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00636     if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00637         --(i); \
00638     } \
00639 }
00640 
00642 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00643     (c)=(s)[(i)++]; \
00644     if(UTF_IS_FIRST_SURROGATE(c)) { \
00645         uint16_t __c2; \
00646         if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00647             ++(i); \
00648             (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00649             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00650         } else if(strict) {\
00651             /* unmatched first surrogate */ \
00652             (c)=UTF_ERROR_VALUE; \
00653         } \
00654     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00655         /* unmatched second surrogate or other non-character */ \
00656         (c)=UTF_ERROR_VALUE; \
00657     } \
00658 }
00659 
00661 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00662     if((uint32_t)(c)<=0xffff) { \
00663         (s)[(i)++]=(uint16_t)(c); \
00664     } else if((uint32_t)(c)<=0x10ffff) { \
00665         if((i)+1<(length)) { \
00666             (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00667             (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00668         } else /* not enough space */ { \
00669             (s)[(i)++]=UTF_ERROR_VALUE; \
00670         } \
00671     } else /* c>0x10ffff, write error value */ { \
00672         (s)[(i)++]=UTF_ERROR_VALUE; \
00673     } \
00674 }
00675 
00677 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
00678 
00680 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
00681 
00683 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
00684 
00686 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00687     (c)=(s)[--(i)]; \
00688     if(UTF_IS_SECOND_SURROGATE(c)) { \
00689         (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00690     } \
00691 }
00692 
00694 #define UTF16_BACK_1_UNSAFE(s, i) { \
00695     if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00696         --(i); \
00697     } \
00698 }
00699 
00701 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00702     int32_t __N=(n); \
00703     while(__N>0) { \
00704         UTF16_BACK_1_UNSAFE(s, i); \
00705         --__N; \
00706     } \
00707 }
00708 
00710 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00711     if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00712         ++(i); \
00713     } \
00714 }
00715 
00717 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00718     (c)=(s)[--(i)]; \
00719     if(UTF_IS_SECOND_SURROGATE(c)) { \
00720         uint16_t __c2; \
00721         if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00722             --(i); \
00723             (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00724             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00725         } else if(strict) {\
00726             /* unmatched second surrogate */ \
00727             (c)=UTF_ERROR_VALUE; \
00728         } \
00729     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00730         /* unmatched first surrogate or other non-character */ \
00731         (c)=UTF_ERROR_VALUE; \
00732     } \
00733 }
00734 
00736 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
00737 
00739 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
00740 
00742 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
00743 
00744 /* Formerly utf32.h --------------------------------------------------------- */
00745 
00746 /*
00747 * Old documentation:
00748 *
00749 *   This file defines macros to deal with UTF-32 code units and code points.
00750 *   Signatures and semantics are the same as for the similarly named macros
00751 *   in utf16.h.
00752 *   utf32.h is included by utf.h after unicode/umachine.h</p>
00753 *   and some common definitions.
00754 *   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
00755 *                  Compound statements (curly braces {}) must be used  for if-else-while...
00756 *                  bodies and all macro statements should be terminated with semicolon.</p>
00757 */
00758 
00759 /* internal definitions ----------------------------------------------------- */
00760 
00762 #define UTF32_IS_SAFE(c, strict) \
00763     (!(strict) ? \
00764         (uint32_t)(c)<=0x10ffff : \
00765         UTF_IS_UNICODE_CHAR(c))
00766 
00767 /*
00768  * For the semantics of all of these macros, see utf16.h.
00769  * The UTF-32 versions are trivial because any code point is
00770  * encoded using exactly one code unit.
00771  */
00772 
00773 /* single-code point definitions -------------------------------------------- */
00774 
00775 /* classes of code unit values */
00776 
00778 #define UTF32_IS_SINGLE(uchar) 1
00779 
00780 #define UTF32_IS_LEAD(uchar) 0
00781 
00782 #define UTF32_IS_TRAIL(uchar) 0
00783 
00784 /* number of code units per code point */
00785 
00787 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0
00788 
00789 #define UTF32_CHAR_LENGTH(c) 1
00790 
00791 #define UTF32_MAX_CHAR_LENGTH 1
00792 
00793 /* average number of code units compared to UTF-16 */
00794 
00796 #define UTF32_ARRAY_SIZE(size) (size)
00797 
00799 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
00800     (c)=(s)[i]; \
00801 }
00802 
00804 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00805     (c)=(s)[i]; \
00806     if(!UTF32_IS_SAFE(c, strict)) { \
00807         (c)=UTF_ERROR_VALUE; \
00808     } \
00809 }
00810 
00811 /* definitions with forward iteration --------------------------------------- */
00812 
00814 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
00815     (c)=(s)[(i)++]; \
00816 }
00817 
00819 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
00820     (s)[(i)++]=(c); \
00821 }
00822 
00824 #define UTF32_FWD_1_UNSAFE(s, i) { \
00825     ++(i); \
00826 }
00827 
00829 #define UTF32_FWD_N_UNSAFE(s, i, n) { \
00830     (i)+=(n); \
00831 }
00832 
00834 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
00835 }
00836 
00838 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00839     (c)=(s)[(i)++]; \
00840     if(!UTF32_IS_SAFE(c, strict)) { \
00841         (c)=UTF_ERROR_VALUE; \
00842     } \
00843 }
00844 
00846 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
00847     if((uint32_t)(c)<=0x10ffff) { \
00848         (s)[(i)++]=(c); \
00849     } else /* c>0x10ffff, write 0xfffd */ { \
00850         (s)[(i)++]=0xfffd; \
00851     } \
00852 }
00853 
00855 #define UTF32_FWD_1_SAFE(s, i, length) { \
00856     ++(i); \
00857 }
00858 
00860 #define UTF32_FWD_N_SAFE(s, i, length, n) { \
00861     if(((i)+=(n))>(length)) { \
00862         (i)=(length); \
00863     } \
00864 }
00865 
00867 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
00868 }
00869 
00870 /* definitions with backward iteration -------------------------------------- */
00871 
00873 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
00874     (c)=(s)[--(i)]; \
00875 }
00876 
00878 #define UTF32_BACK_1_UNSAFE(s, i) { \
00879     --(i); \
00880 }
00881 
00883 #define UTF32_BACK_N_UNSAFE(s, i, n) { \
00884     (i)-=(n); \
00885 }
00886 
00888 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00889 }
00890 
00892 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00893     (c)=(s)[--(i)]; \
00894     if(!UTF32_IS_SAFE(c, strict)) { \
00895         (c)=UTF_ERROR_VALUE; \
00896     } \
00897 }
00898 
00900 #define UTF32_BACK_1_SAFE(s, start, i) { \
00901     --(i); \
00902 }
00903 
00905 #define UTF32_BACK_N_SAFE(s, start, i, n) { \
00906     (i)-=(n); \
00907     if((i)<(start)) { \
00908         (i)=(start); \
00909     } \
00910 }
00911 
00913 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
00914 }
00915 
00916 /* Formerly utf.h, part 2 --------------------------------------------------- */
00917 
00923 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00924 
00926 #define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00927 
00929 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00930 
00931 
00933 #define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00934 
00936 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00937 
00938 
00940 #define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00941 
00943 #define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00944 
00945 
00947 #define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00948 
00950 #define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00951 
00952 
00954 #define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00955 
00957 #define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00958 
00959 
00961 #define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00962 
00964 #define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00965 
00966 
00968 #define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00969 
00971 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00972 
00973 
00975 #define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00976 
00978 #define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00979 
00980 
00982 #define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00983 
00985 #define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00986 
00987 
00989 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00990 
00992 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00993 
00994 /* Define default macros (UTF-16 "safe") ------------------------------------ */
00995 
01001 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
01002 
01008 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
01009 
01015 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
01016 
01022 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
01023 
01029 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
01030 
01036 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
01037 
01047 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
01048 
01060 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
01061 
01073 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
01074 
01084 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
01085 
01095 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
01096 
01111 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
01112 
01124 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
01125 
01137 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
01138 
01150 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
01151 
01166 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
01167 
01168 #endif /* U_HIDE_DEPRECATED_API */
01169 
01170 #endif
01171 

Generated on Mon Aug 13 07:17:25 2007 for ICU 3.6 by  doxygen 1.5.2