ICU 4.4.2 4.4.2
unistr.h
Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2010, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/utypes.h"
00030 #include "unicode/rep.h"
00031 #include "unicode/std_string.h"
00032 #include "unicode/stringpiece.h"
00033 #include "unicode/bytestream.h"
00034 
00035 struct UConverter;          // unicode/ucnv.h
00036 class  StringThreadTest;
00037 
00038 #ifndef U_COMPARE_CODE_POINT_ORDER
00039 /* see also ustring.h and unorm.h */
00045 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00046 #endif
00047 
00048 #ifndef USTRING_H
00049 
00052 U_STABLE int32_t U_EXPORT2
00053 u_strlen(const UChar *s);
00054 #endif
00055 
00056 U_NAMESPACE_BEGIN
00057 
00058 class Locale;               // unicode/locid.h
00059 class StringCharacterIterator;
00060 class BreakIterator;        // unicode/brkiter.h
00061 
00062 /* The <iostream> include has been moved to unicode/ustream.h */
00063 
00074 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
00075 
00093 #if defined(U_DECLARE_UTF16)
00094 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00095 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00096 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00097 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00098 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
00099 #else
00100 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
00101 #endif
00102 
00116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00117 
00187 class U_COMMON_API UnicodeString : public Replaceable
00188 {
00189 public:
00190 
00199   enum EInvariant {
00204     kInvariant
00205   };
00206 
00207   //========================================
00208   // Read-only operations
00209   //========================================
00210 
00211   /* Comparison - bitwise only - for international comparison use collation */
00212 
00220   inline UBool operator== (const UnicodeString& text) const;
00221 
00229   inline UBool operator!= (const UnicodeString& text) const;
00230 
00238   inline UBool operator> (const UnicodeString& text) const;
00239 
00247   inline UBool operator< (const UnicodeString& text) const;
00248 
00256   inline UBool operator>= (const UnicodeString& text) const;
00257 
00265   inline UBool operator<= (const UnicodeString& text) const;
00266 
00278   inline int8_t compare(const UnicodeString& text) const;
00279 
00294   inline int8_t compare(int32_t start,
00295          int32_t length,
00296          const UnicodeString& text) const;
00297 
00315    inline int8_t compare(int32_t start,
00316          int32_t length,
00317          const UnicodeString& srcText,
00318          int32_t srcStart,
00319          int32_t srcLength) const;
00320 
00333   inline int8_t compare(const UChar *srcChars,
00334          int32_t srcLength) const;
00335 
00350   inline int8_t compare(int32_t start,
00351          int32_t length,
00352          const UChar *srcChars) const;
00353 
00371   inline int8_t compare(int32_t start,
00372          int32_t length,
00373          const UChar *srcChars,
00374          int32_t srcStart,
00375          int32_t srcLength) const;
00376 
00394   inline int8_t compareBetween(int32_t start,
00395             int32_t limit,
00396             const UnicodeString& srcText,
00397             int32_t srcStart,
00398             int32_t srcLimit) const;
00399 
00417   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00418 
00438   inline int8_t compareCodePointOrder(int32_t start,
00439                                       int32_t length,
00440                                       const UnicodeString& srcText) const;
00441 
00463    inline int8_t compareCodePointOrder(int32_t start,
00464                                        int32_t length,
00465                                        const UnicodeString& srcText,
00466                                        int32_t srcStart,
00467                                        int32_t srcLength) const;
00468 
00487   inline int8_t compareCodePointOrder(const UChar *srcChars,
00488                                       int32_t srcLength) const;
00489 
00509   inline int8_t compareCodePointOrder(int32_t start,
00510                                       int32_t length,
00511                                       const UChar *srcChars) const;
00512 
00534   inline int8_t compareCodePointOrder(int32_t start,
00535                                       int32_t length,
00536                                       const UChar *srcChars,
00537                                       int32_t srcStart,
00538                                       int32_t srcLength) const;
00539 
00561   inline int8_t compareCodePointOrderBetween(int32_t start,
00562                                              int32_t limit,
00563                                              const UnicodeString& srcText,
00564                                              int32_t srcStart,
00565                                              int32_t srcLimit) const;
00566 
00585   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00586 
00607   inline int8_t caseCompare(int32_t start,
00608          int32_t length,
00609          const UnicodeString& srcText,
00610          uint32_t options) const;
00611 
00634   inline int8_t caseCompare(int32_t start,
00635          int32_t length,
00636          const UnicodeString& srcText,
00637          int32_t srcStart,
00638          int32_t srcLength,
00639          uint32_t options) const;
00640 
00660   inline int8_t caseCompare(const UChar *srcChars,
00661          int32_t srcLength,
00662          uint32_t options) const;
00663 
00684   inline int8_t caseCompare(int32_t start,
00685          int32_t length,
00686          const UChar *srcChars,
00687          uint32_t options) const;
00688 
00711   inline int8_t caseCompare(int32_t start,
00712          int32_t length,
00713          const UChar *srcChars,
00714          int32_t srcStart,
00715          int32_t srcLength,
00716          uint32_t options) const;
00717 
00740   inline int8_t caseCompareBetween(int32_t start,
00741             int32_t limit,
00742             const UnicodeString& srcText,
00743             int32_t srcStart,
00744             int32_t srcLimit,
00745             uint32_t options) const;
00746 
00754   inline UBool startsWith(const UnicodeString& text) const;
00755 
00766   inline UBool startsWith(const UnicodeString& srcText,
00767             int32_t srcStart,
00768             int32_t srcLength) const;
00769 
00778   inline UBool startsWith(const UChar *srcChars,
00779             int32_t srcLength) const;
00780 
00790   inline UBool startsWith(const UChar *srcChars,
00791             int32_t srcStart,
00792             int32_t srcLength) const;
00793 
00801   inline UBool endsWith(const UnicodeString& text) const;
00802 
00813   inline UBool endsWith(const UnicodeString& srcText,
00814           int32_t srcStart,
00815           int32_t srcLength) const;
00816 
00825   inline UBool endsWith(const UChar *srcChars,
00826           int32_t srcLength) const;
00827 
00838   inline UBool endsWith(const UChar *srcChars,
00839           int32_t srcStart,
00840           int32_t srcLength) const;
00841 
00842 
00843   /* Searching - bitwise only */
00844 
00853   inline int32_t indexOf(const UnicodeString& text) const;
00854 
00864   inline int32_t indexOf(const UnicodeString& text,
00865               int32_t start) const;
00866 
00878   inline int32_t indexOf(const UnicodeString& text,
00879               int32_t start,
00880               int32_t length) const;
00881 
00898   inline int32_t indexOf(const UnicodeString& srcText,
00899               int32_t srcStart,
00900               int32_t srcLength,
00901               int32_t start,
00902               int32_t length) const;
00903 
00915   inline int32_t indexOf(const UChar *srcChars,
00916               int32_t srcLength,
00917               int32_t start) const;
00918 
00931   inline int32_t indexOf(const UChar *srcChars,
00932               int32_t srcLength,
00933               int32_t start,
00934               int32_t length) const;
00935 
00952   int32_t indexOf(const UChar *srcChars,
00953               int32_t srcStart,
00954               int32_t srcLength,
00955               int32_t start,
00956               int32_t length) const;
00957 
00965   inline int32_t indexOf(UChar c) const;
00966 
00975   inline int32_t indexOf(UChar32 c) const;
00976 
00985   inline int32_t indexOf(UChar c,
00986               int32_t start) const;
00987 
00997   inline int32_t indexOf(UChar32 c,
00998               int32_t start) const;
00999 
01010   inline int32_t indexOf(UChar c,
01011               int32_t start,
01012               int32_t length) const;
01013 
01025   inline int32_t indexOf(UChar32 c,
01026               int32_t start,
01027               int32_t length) const;
01028 
01037   inline int32_t lastIndexOf(const UnicodeString& text) const;
01038 
01048   inline int32_t lastIndexOf(const UnicodeString& text,
01049               int32_t start) const;
01050 
01062   inline int32_t lastIndexOf(const UnicodeString& text,
01063               int32_t start,
01064               int32_t length) const;
01065 
01082   inline int32_t lastIndexOf(const UnicodeString& srcText,
01083               int32_t srcStart,
01084               int32_t srcLength,
01085               int32_t start,
01086               int32_t length) const;
01087 
01098   inline int32_t lastIndexOf(const UChar *srcChars,
01099               int32_t srcLength,
01100               int32_t start) const;
01101 
01114   inline int32_t lastIndexOf(const UChar *srcChars,
01115               int32_t srcLength,
01116               int32_t start,
01117               int32_t length) const;
01118 
01135   int32_t lastIndexOf(const UChar *srcChars,
01136               int32_t srcStart,
01137               int32_t srcLength,
01138               int32_t start,
01139               int32_t length) const;
01140 
01148   inline int32_t lastIndexOf(UChar c) const;
01149 
01158   inline int32_t lastIndexOf(UChar32 c) const;
01159 
01168   inline int32_t lastIndexOf(UChar c,
01169               int32_t start) const;
01170 
01180   inline int32_t lastIndexOf(UChar32 c,
01181               int32_t start) const;
01182 
01193   inline int32_t lastIndexOf(UChar c,
01194               int32_t start,
01195               int32_t length) const;
01196 
01208   inline int32_t lastIndexOf(UChar32 c,
01209               int32_t start,
01210               int32_t length) const;
01211 
01212 
01213   /* Character access */
01214 
01223   inline UChar charAt(int32_t offset) const;
01224 
01232   inline UChar operator[] (int32_t offset) const;
01233 
01245   inline UChar32 char32At(int32_t offset) const;
01246 
01262   inline int32_t getChar32Start(int32_t offset) const;
01263 
01280   inline int32_t getChar32Limit(int32_t offset) const;
01281 
01332   int32_t moveIndex32(int32_t index, int32_t delta) const;
01333 
01334   /* Substring extraction */
01335 
01351   inline void extract(int32_t start,
01352            int32_t length,
01353            UChar *dst,
01354            int32_t dstStart = 0) const;
01355 
01377   int32_t
01378   extract(UChar *dest, int32_t destCapacity,
01379           UErrorCode &errorCode) const;
01380 
01391   inline void extract(int32_t start,
01392            int32_t length,
01393            UnicodeString& target) const;
01394 
01406   inline void extractBetween(int32_t start,
01407               int32_t limit,
01408               UChar *dst,
01409               int32_t dstStart = 0) const;
01410 
01420   virtual void extractBetween(int32_t start,
01421               int32_t limit,
01422               UnicodeString& target) const;
01423 
01445   int32_t extract(int32_t start,
01446            int32_t startLength,
01447            char *target,
01448            int32_t targetCapacity,
01449            enum EInvariant inv) const;
01450 
01451 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
01452 
01472   int32_t extract(int32_t start,
01473            int32_t startLength,
01474            char *target,
01475            uint32_t targetLength) const;
01476 
01477 #endif
01478 
01479 #if !UCONFIG_NO_CONVERSION
01480 
01506   inline int32_t extract(int32_t start,
01507                  int32_t startLength,
01508                  char *target,
01509                  const char *codepage = 0) const;
01510 
01540   int32_t extract(int32_t start,
01541            int32_t startLength,
01542            char *target,
01543            uint32_t targetLength,
01544            const char *codepage) const;
01545 
01563   int32_t extract(char *dest, int32_t destCapacity,
01564                   UConverter *cnv,
01565                   UErrorCode &errorCode) const;
01566 
01567 #endif
01568 
01582   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
01583 
01594   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
01595 
01606   void toUTF8(ByteSink &sink) const;
01607 
01608 #if U_HAVE_STD_STRING
01609 
01622   template<typename StringClass>
01623   StringClass &toUTF8String(StringClass &result) const {
01624     StringByteSink<StringClass> sbs(&result);
01625     toUTF8(sbs);
01626     return result;
01627   }
01628 
01629 #endif
01630 
01646   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
01647 
01648   /* Length operations */
01649 
01658   inline int32_t length(void) const;
01659 
01673   int32_t
01674   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01675 
01699   UBool
01700   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01701 
01707   inline UBool isEmpty(void) const;
01708 
01718   inline int32_t getCapacity(void) const;
01719 
01720   /* Other operations */
01721 
01727   inline int32_t hashCode(void) const;
01728 
01741   inline UBool isBogus(void) const;
01742 
01743 
01744   //========================================
01745   // Write operations
01746   //========================================
01747 
01748   /* Assignment operations */
01749 
01757   UnicodeString &operator=(const UnicodeString &srcText);
01758 
01779   UnicodeString &fastCopyFrom(const UnicodeString &src);
01780 
01788   inline UnicodeString& operator= (UChar ch);
01789 
01797   inline UnicodeString& operator= (UChar32 ch);
01798 
01810   inline UnicodeString& setTo(const UnicodeString& srcText,
01811                int32_t srcStart);
01812 
01826   inline UnicodeString& setTo(const UnicodeString& srcText,
01827                int32_t srcStart,
01828                int32_t srcLength);
01829 
01838   inline UnicodeString& setTo(const UnicodeString& srcText);
01839 
01848   inline UnicodeString& setTo(const UChar *srcChars,
01849                int32_t srcLength);
01850 
01859   UnicodeString& setTo(UChar srcChar);
01860 
01869   UnicodeString& setTo(UChar32 srcChar);
01870 
01891   UnicodeString &setTo(UBool isTerminated,
01892                        const UChar *text,
01893                        int32_t textLength);
01894 
01914   UnicodeString &setTo(UChar *buffer,
01915                        int32_t buffLength,
01916                        int32_t buffCapacity);
01917 
01958   void setToBogus();
01959 
01967   UnicodeString& setCharAt(int32_t offset,
01968                UChar ch);
01969 
01970 
01971   /* Append operations */
01972 
01980  inline  UnicodeString& operator+= (UChar ch);
01981 
01989  inline  UnicodeString& operator+= (UChar32 ch);
01990 
01999   inline UnicodeString& operator+= (const UnicodeString& srcText);
02000 
02015   inline UnicodeString& append(const UnicodeString& srcText,
02016             int32_t srcStart,
02017             int32_t srcLength);
02018 
02026   inline UnicodeString& append(const UnicodeString& srcText);
02027 
02041   inline UnicodeString& append(const UChar *srcChars,
02042             int32_t srcStart,
02043             int32_t srcLength);
02044 
02053   inline UnicodeString& append(const UChar *srcChars,
02054             int32_t srcLength);
02055 
02062   inline UnicodeString& append(UChar srcChar);
02063 
02070   inline UnicodeString& append(UChar32 srcChar);
02071 
02072 
02073   /* Insert operations */
02074 
02088   inline UnicodeString& insert(int32_t start,
02089             const UnicodeString& srcText,
02090             int32_t srcStart,
02091             int32_t srcLength);
02092 
02101   inline UnicodeString& insert(int32_t start,
02102             const UnicodeString& srcText);
02103 
02117   inline UnicodeString& insert(int32_t start,
02118             const UChar *srcChars,
02119             int32_t srcStart,
02120             int32_t srcLength);
02121 
02131   inline UnicodeString& insert(int32_t start,
02132             const UChar *srcChars,
02133             int32_t srcLength);
02134 
02143   inline UnicodeString& insert(int32_t start,
02144             UChar srcChar);
02145 
02154   inline UnicodeString& insert(int32_t start,
02155             UChar32 srcChar);
02156 
02157 
02158   /* Replace operations */
02159 
02177   UnicodeString& replace(int32_t start,
02178              int32_t length,
02179              const UnicodeString& srcText,
02180              int32_t srcStart,
02181              int32_t srcLength);
02182 
02195   UnicodeString& replace(int32_t start,
02196              int32_t length,
02197              const UnicodeString& srcText);
02198 
02216   UnicodeString& replace(int32_t start,
02217              int32_t length,
02218              const UChar *srcChars,
02219              int32_t srcStart,
02220              int32_t srcLength);
02221 
02234   inline UnicodeString& replace(int32_t start,
02235              int32_t length,
02236              const UChar *srcChars,
02237              int32_t srcLength);
02238 
02250   inline UnicodeString& replace(int32_t start,
02251              int32_t length,
02252              UChar srcChar);
02253 
02265   inline UnicodeString& replace(int32_t start,
02266              int32_t length,
02267              UChar32 srcChar);
02268 
02278   inline UnicodeString& replaceBetween(int32_t start,
02279                 int32_t limit,
02280                 const UnicodeString& srcText);
02281 
02296   inline UnicodeString& replaceBetween(int32_t start,
02297                 int32_t limit,
02298                 const UnicodeString& srcText,
02299                 int32_t srcStart,
02300                 int32_t srcLimit);
02301 
02312   virtual void handleReplaceBetween(int32_t start,
02313                                     int32_t limit,
02314                                     const UnicodeString& text);
02315 
02321   virtual UBool hasMetaData() const;
02322 
02338   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02339 
02340   /* Search and replace operations */
02341 
02350   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02351                 const UnicodeString& newText);
02352 
02364   inline UnicodeString& findAndReplace(int32_t start,
02365                 int32_t length,
02366                 const UnicodeString& oldText,
02367                 const UnicodeString& newText);
02368 
02386   UnicodeString& findAndReplace(int32_t start,
02387                 int32_t length,
02388                 const UnicodeString& oldText,
02389                 int32_t oldStart,
02390                 int32_t oldLength,
02391                 const UnicodeString& newText,
02392                 int32_t newStart,
02393                 int32_t newLength);
02394 
02395 
02396   /* Remove operations */
02397 
02403   inline UnicodeString& remove(void);
02404 
02413   inline UnicodeString& remove(int32_t start,
02414                                int32_t length = (int32_t)INT32_MAX);
02415 
02424   inline UnicodeString& removeBetween(int32_t start,
02425                                       int32_t limit = (int32_t)INT32_MAX);
02426 
02436   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
02437 
02438   /* Length operations */
02439 
02451   UBool padLeading(int32_t targetLength,
02452                     UChar padChar = 0x0020);
02453 
02465   UBool padTrailing(int32_t targetLength,
02466                      UChar padChar = 0x0020);
02467 
02474   inline UBool truncate(int32_t targetLength);
02475 
02481   UnicodeString& trim(void);
02482 
02483 
02484   /* Miscellaneous operations */
02485 
02491   inline UnicodeString& reverse(void);
02492 
02501   inline UnicodeString& reverse(int32_t start,
02502              int32_t length);
02503 
02510   UnicodeString& toUpper(void);
02511 
02519   UnicodeString& toUpper(const Locale& locale);
02520 
02527   UnicodeString& toLower(void);
02528 
02536   UnicodeString& toLower(const Locale& locale);
02537 
02538 #if !UCONFIG_NO_BREAK_ITERATION
02539 
02566   UnicodeString &toTitle(BreakIterator *titleIter);
02567 
02595   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02596 
02628   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02629 
02630 #endif
02631 
02643   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02644 
02645   //========================================
02646   // Access to the internal buffer
02647   //========================================
02648 
02692   UChar *getBuffer(int32_t minCapacity);
02693 
02714   void releaseBuffer(int32_t newLength=-1);
02715 
02746   inline const UChar *getBuffer() const;
02747 
02781   inline const UChar *getTerminatedBuffer();
02782 
02783   //========================================
02784   // Constructors
02785   //========================================
02786 
02790   UnicodeString();
02791 
02803   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02804 
02810   UnicodeString(UChar ch);
02811 
02817   UnicodeString(UChar32 ch);
02818 
02825   UnicodeString(const UChar *text);
02826 
02834   UnicodeString(const UChar *text,
02835         int32_t textLength);
02836 
02856   UnicodeString(UBool isTerminated,
02857                 const UChar *text,
02858                 int32_t textLength);
02859 
02878   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02879 
02880 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
02881 
02888   UnicodeString(const char *codepageData);
02889 
02896   UnicodeString(const char *codepageData, int32_t dataLength);
02897 
02898 #endif
02899 
02900 #if !UCONFIG_NO_CONVERSION
02901 
02919   UnicodeString(const char *codepageData, const char *codepage);
02920 
02938   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
02939 
02961   UnicodeString(
02962         const char *src, int32_t srcLength,
02963         UConverter *cnv,
02964         UErrorCode &errorCode);
02965 
02966 #endif
02967 
02992   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
02993 
02994 
03000   UnicodeString(const UnicodeString& that);
03001 
03008   UnicodeString(const UnicodeString& src, int32_t srcStart);
03009 
03017   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
03018 
03035   virtual Replaceable *clone() const;
03036 
03040   virtual ~UnicodeString();
03041 
03055   static UnicodeString fromUTF8(const StringPiece &utf8);
03056 
03068   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
03069 
03070   /* Miscellaneous operations */
03071 
03106   UnicodeString unescape() const;
03107 
03127   UChar32 unescapeAt(int32_t &offset) const;
03128 
03134   static UClassID U_EXPORT2 getStaticClassID();
03135 
03141   virtual UClassID getDynamicClassID() const;
03142 
03143   //========================================
03144   // Implementation methods
03145   //========================================
03146 
03147 protected:
03152   virtual int32_t getLength() const;
03153 
03159   virtual UChar getCharAt(int32_t offset) const;
03160 
03166   virtual UChar32 getChar32At(int32_t offset) const;
03167 
03168 private:
03169   // For char* constructors. Could be made public.
03170   UnicodeString &setToUTF8(const StringPiece &utf8);
03171   // For extract(char*).
03172   // We could make a toUTF8(target, capacity, errorCode) public but not
03173   // this version: New API will be cleaner if we make callers create substrings
03174   // rather than having start+length on every method,
03175   // and it should take a UErrorCode&.
03176   int32_t
03177   toUTF8(int32_t start, int32_t len,
03178          char *target, int32_t capacity) const;
03179 
03180 
03181   inline int8_t
03182   doCompare(int32_t start,
03183            int32_t length,
03184            const UnicodeString& srcText,
03185            int32_t srcStart,
03186            int32_t srcLength) const;
03187 
03188   int8_t doCompare(int32_t start,
03189            int32_t length,
03190            const UChar *srcChars,
03191            int32_t srcStart,
03192            int32_t srcLength) const;
03193 
03194   inline int8_t
03195   doCompareCodePointOrder(int32_t start,
03196                           int32_t length,
03197                           const UnicodeString& srcText,
03198                           int32_t srcStart,
03199                           int32_t srcLength) const;
03200 
03201   int8_t doCompareCodePointOrder(int32_t start,
03202                                  int32_t length,
03203                                  const UChar *srcChars,
03204                                  int32_t srcStart,
03205                                  int32_t srcLength) const;
03206 
03207   inline int8_t
03208   doCaseCompare(int32_t start,
03209                 int32_t length,
03210                 const UnicodeString &srcText,
03211                 int32_t srcStart,
03212                 int32_t srcLength,
03213                 uint32_t options) const;
03214 
03215   int8_t
03216   doCaseCompare(int32_t start,
03217                 int32_t length,
03218                 const UChar *srcChars,
03219                 int32_t srcStart,
03220                 int32_t srcLength,
03221                 uint32_t options) const;
03222 
03223   int32_t doIndexOf(UChar c,
03224             int32_t start,
03225             int32_t length) const;
03226 
03227   int32_t doIndexOf(UChar32 c,
03228                         int32_t start,
03229                         int32_t length) const;
03230 
03231   int32_t doLastIndexOf(UChar c,
03232                 int32_t start,
03233                 int32_t length) const;
03234 
03235   int32_t doLastIndexOf(UChar32 c,
03236                             int32_t start,
03237                             int32_t length) const;
03238 
03239   void doExtract(int32_t start,
03240          int32_t length,
03241          UChar *dst,
03242          int32_t dstStart) const;
03243 
03244   inline void doExtract(int32_t start,
03245          int32_t length,
03246          UnicodeString& target) const;
03247 
03248   inline UChar doCharAt(int32_t offset)  const;
03249 
03250   UnicodeString& doReplace(int32_t start,
03251                int32_t length,
03252                const UnicodeString& srcText,
03253                int32_t srcStart,
03254                int32_t srcLength);
03255 
03256   UnicodeString& doReplace(int32_t start,
03257                int32_t length,
03258                const UChar *srcChars,
03259                int32_t srcStart,
03260                int32_t srcLength);
03261 
03262   UnicodeString& doReverse(int32_t start,
03263                int32_t length);
03264 
03265   // calculate hash code
03266   int32_t doHashCode(void) const;
03267 
03268   // get pointer to start of array
03269   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03270   inline UChar* getArrayStart(void);
03271   inline const UChar* getArrayStart(void) const;
03272 
03273   // A UnicodeString object (not necessarily its current buffer)
03274   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03275   inline UBool isWritable() const;
03276 
03277   // Is the current buffer writable?
03278   inline UBool isBufferWritable() const;
03279 
03280   // None of the following does releaseArray().
03281   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03282   inline void setToEmpty();                  // sets fFlags=kShortString
03283   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03284 
03285   // allocate the array; result may be fStackBuffer
03286   // sets refCount to 1 if appropriate
03287   // sets fArray, fCapacity, and fFlags
03288   // returns boolean for success or failure
03289   UBool allocate(int32_t capacity);
03290 
03291   // release the array if owned
03292   void releaseArray(void);
03293 
03294   // turn a bogus string into an empty one
03295   void unBogus();
03296 
03297   // implements assigment operator, copy constructor, and fastCopyFrom()
03298   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03299 
03300   // Pin start and limit to acceptable values.
03301   inline void pinIndex(int32_t& start) const;
03302   inline void pinIndices(int32_t& start,
03303                          int32_t& length) const;
03304 
03305 #if !UCONFIG_NO_CONVERSION
03306 
03307   /* Internal extract() using UConverter. */
03308   int32_t doExtract(int32_t start, int32_t length,
03309                     char *dest, int32_t destCapacity,
03310                     UConverter *cnv,
03311                     UErrorCode &errorCode) const;
03312 
03313   /*
03314    * Real constructor for converting from codepage data.
03315    * It assumes that it is called with !fRefCounted.
03316    *
03317    * If <code>codepage==0</code>, then the default converter
03318    * is used for the platform encoding.
03319    * If <code>codepage</code> is an empty string (<code>""</code>),
03320    * then a simple conversion is performed on the codepage-invariant
03321    * subset ("invariant characters") of the platform encoding. See utypes.h.
03322    */
03323   void doCodepageCreate(const char *codepageData,
03324                         int32_t dataLength,
03325                         const char *codepage);
03326 
03327   /*
03328    * Worker function for creating a UnicodeString from
03329    * a codepage string using a UConverter.
03330    */
03331   void
03332   doCodepageCreate(const char *codepageData,
03333                    int32_t dataLength,
03334                    UConverter *converter,
03335                    UErrorCode &status);
03336 
03337 #endif
03338 
03339   /*
03340    * This function is called when write access to the array
03341    * is necessary.
03342    *
03343    * We need to make a copy of the array if
03344    * the buffer is read-only, or
03345    * the buffer is refCounted (shared), and refCount>1, or
03346    * the buffer is too small.
03347    *
03348    * Return FALSE if memory could not be allocated.
03349    */
03350   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03351                             int32_t growCapacity = -1,
03352                             UBool doCopyArray = TRUE,
03353                             int32_t **pBufferToDelete = 0,
03354                             UBool forceClone = FALSE);
03355 
03356   // common function for case mappings
03357   UnicodeString &
03358   caseMap(BreakIterator *titleIter,
03359           const char *locale,
03360           uint32_t options,
03361           int32_t toWhichCase);
03362 
03363   // ref counting
03364   void addRef(void);
03365   int32_t removeRef(void);
03366   int32_t refCount(void) const;
03367 
03368   // constants
03369   enum {
03370     // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
03371     // 32-bit pointers: 4+1+1+13*2 = 32 bytes
03372     // 64-bit pointers: 8+1+1+15*2 = 40 bytes
03373     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
03374     kInvalidUChar=0xffff, // invalid UChar index
03375     kGrowSize=128, // grow size for this buffer
03376     kInvalidHashCode=0, // invalid hash code
03377     kEmptyHashCode=1, // hash code for empty string
03378 
03379     // bit flag values for fFlags
03380     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03381     kUsingStackBuffer=2,// fArray==fStackBuffer
03382     kRefCounted=4,      // there is a refCount field before the characters in fArray
03383     kBufferIsReadonly=8,// do not write to this buffer
03384     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03385                         // and releaseBuffer(newLength) must be called
03386 
03387     // combined values for convenience
03388     kShortString=kUsingStackBuffer,
03389     kLongString=kRefCounted,
03390     kReadonlyAlias=kBufferIsReadonly,
03391     kWritableAlias=0
03392   };
03393 
03394   friend class StringThreadTest;
03395 
03396   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03397   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03398 
03399   /*
03400    * The following are all the class fields that are stored
03401    * in each UnicodeString object.
03402    * Note that UnicodeString has virtual functions,
03403    * therefore there is an implicit vtable pointer
03404    * as the first real field.
03405    * The fields should be aligned such that no padding is
03406    * necessary, mostly by having larger types first.
03407    * On 32-bit machines, the size should be 32 bytes,
03408    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03409    */
03410   // (implicit) *vtable;
03411   int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
03412   uint8_t   fFlags;         // bit flags: see constants above
03413   union StackBufferOrFields {
03414     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03415     // else fFields is used
03416     UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
03417     struct {
03418       uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
03419       int32_t   fLength;    // number of characters in fArray if >127; else undefined
03420       UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
03421       int32_t   fCapacity;  // sizeof fArray
03422     } fFields;
03423   } fUnion;
03424 };
03425 
03434 U_COMMON_API UnicodeString U_EXPORT2
03435 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03436 
03437 //========================================
03438 // Inline members
03439 //========================================
03440 
03441 //========================================
03442 // Privates
03443 //========================================
03444 
03445 inline void
03446 UnicodeString::pinIndex(int32_t& start) const
03447 {
03448   // pin index
03449   if(start < 0) {
03450     start = 0;
03451   } else if(start > length()) {
03452     start = length();
03453   }
03454 }
03455 
03456 inline void
03457 UnicodeString::pinIndices(int32_t& start,
03458                           int32_t& _length) const
03459 {
03460   // pin indices
03461   int32_t len = length();
03462   if(start < 0) {
03463     start = 0;
03464   } else if(start > len) {
03465     start = len;
03466   }
03467   if(_length < 0) {
03468     _length = 0;
03469   } else if(_length > (len - start)) {
03470     _length = (len - start);
03471   }
03472 }
03473 
03474 inline UChar*
03475 UnicodeString::getArrayStart()
03476 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03477 
03478 inline const UChar*
03479 UnicodeString::getArrayStart() const
03480 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03481 
03482 //========================================
03483 // Read-only implementation methods
03484 //========================================
03485 inline int32_t
03486 UnicodeString::length() const
03487 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03488 
03489 inline int32_t
03490 UnicodeString::getCapacity() const
03491 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03492 
03493 inline int32_t
03494 UnicodeString::hashCode() const
03495 { return doHashCode(); }
03496 
03497 inline UBool
03498 UnicodeString::isBogus() const
03499 { return (UBool)(fFlags & kIsBogus); }
03500 
03501 inline UBool
03502 UnicodeString::isWritable() const
03503 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03504 
03505 inline UBool
03506 UnicodeString::isBufferWritable() const
03507 {
03508   return (UBool)(
03509       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03510       (!(fFlags&kRefCounted) || refCount()==1));
03511 }
03512 
03513 inline const UChar *
03514 UnicodeString::getBuffer() const {
03515   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03516     return 0;
03517   } else if(fFlags&kUsingStackBuffer) {
03518     return fUnion.fStackBuffer;
03519   } else {
03520     return fUnion.fFields.fArray;
03521   }
03522 }
03523 
03524 //========================================
03525 // Read-only alias methods
03526 //========================================
03527 inline int8_t
03528 UnicodeString::doCompare(int32_t start,
03529               int32_t thisLength,
03530               const UnicodeString& srcText,
03531               int32_t srcStart,
03532               int32_t srcLength) const
03533 {
03534   if(srcText.isBogus()) {
03535     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03536   } else {
03537     srcText.pinIndices(srcStart, srcLength);
03538     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03539   }
03540 }
03541 
03542 inline UBool
03543 UnicodeString::operator== (const UnicodeString& text) const
03544 {
03545   if(isBogus()) {
03546     return text.isBogus();
03547   } else {
03548     int32_t len = length(), textLength = text.length();
03549     return
03550       !text.isBogus() &&
03551       len == textLength &&
03552       doCompare(0, len, text, 0, textLength) == 0;
03553   }
03554 }
03555 
03556 inline UBool
03557 UnicodeString::operator!= (const UnicodeString& text) const
03558 { return (! operator==(text)); }
03559 
03560 inline UBool
03561 UnicodeString::operator> (const UnicodeString& text) const
03562 { return doCompare(0, length(), text, 0, text.length()) == 1; }
03563 
03564 inline UBool
03565 UnicodeString::operator< (const UnicodeString& text) const
03566 { return doCompare(0, length(), text, 0, text.length()) == -1; }
03567 
03568 inline UBool
03569 UnicodeString::operator>= (const UnicodeString& text) const
03570 { return doCompare(0, length(), text, 0, text.length()) != -1; }
03571 
03572 inline UBool
03573 UnicodeString::operator<= (const UnicodeString& text) const
03574 { return doCompare(0, length(), text, 0, text.length()) != 1; }
03575 
03576 inline int8_t
03577 UnicodeString::compare(const UnicodeString& text) const
03578 { return doCompare(0, length(), text, 0, text.length()); }
03579 
03580 inline int8_t
03581 UnicodeString::compare(int32_t start,
03582                int32_t _length,
03583                const UnicodeString& srcText) const
03584 { return doCompare(start, _length, srcText, 0, srcText.length()); }
03585 
03586 inline int8_t
03587 UnicodeString::compare(const UChar *srcChars,
03588                int32_t srcLength) const
03589 { return doCompare(0, length(), srcChars, 0, srcLength); }
03590 
03591 inline int8_t
03592 UnicodeString::compare(int32_t start,
03593                int32_t _length,
03594                const UnicodeString& srcText,
03595                int32_t srcStart,
03596                int32_t srcLength) const
03597 { return doCompare(start, _length, srcText, srcStart, srcLength); }
03598 
03599 inline int8_t
03600 UnicodeString::compare(int32_t start,
03601                int32_t _length,
03602                const UChar *srcChars) const
03603 { return doCompare(start, _length, srcChars, 0, _length); }
03604 
03605 inline int8_t
03606 UnicodeString::compare(int32_t start,
03607                int32_t _length,
03608                const UChar *srcChars,
03609                int32_t srcStart,
03610                int32_t srcLength) const
03611 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
03612 
03613 inline int8_t
03614 UnicodeString::compareBetween(int32_t start,
03615                   int32_t limit,
03616                   const UnicodeString& srcText,
03617                   int32_t srcStart,
03618                   int32_t srcLimit) const
03619 { return doCompare(start, limit - start,
03620            srcText, srcStart, srcLimit - srcStart); }
03621 
03622 inline int8_t
03623 UnicodeString::doCompareCodePointOrder(int32_t start,
03624                                        int32_t thisLength,
03625                                        const UnicodeString& srcText,
03626                                        int32_t srcStart,
03627                                        int32_t srcLength) const
03628 {
03629   if(srcText.isBogus()) {
03630     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03631   } else {
03632     srcText.pinIndices(srcStart, srcLength);
03633     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03634   }
03635 }
03636 
03637 inline int8_t
03638 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
03639 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
03640 
03641 inline int8_t
03642 UnicodeString::compareCodePointOrder(int32_t start,
03643                                      int32_t _length,
03644                                      const UnicodeString& srcText) const
03645 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
03646 
03647 inline int8_t
03648 UnicodeString::compareCodePointOrder(const UChar *srcChars,
03649                                      int32_t srcLength) const
03650 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
03651 
03652 inline int8_t
03653 UnicodeString::compareCodePointOrder(int32_t start,
03654                                      int32_t _length,
03655                                      const UnicodeString& srcText,
03656                                      int32_t srcStart,
03657                                      int32_t srcLength) const
03658 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
03659 
03660 inline int8_t
03661 UnicodeString::compareCodePointOrder(int32_t start,
03662                                      int32_t _length,
03663                                      const UChar *srcChars) const
03664 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
03665 
03666 inline int8_t
03667 UnicodeString::compareCodePointOrder(int32_t start,
03668                                      int32_t _length,
03669                                      const UChar *srcChars,
03670                                      int32_t srcStart,
03671                                      int32_t srcLength) const
03672 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
03673 
03674 inline int8_t
03675 UnicodeString::compareCodePointOrderBetween(int32_t start,
03676                                             int32_t limit,
03677                                             const UnicodeString& srcText,
03678                                             int32_t srcStart,
03679                                             int32_t srcLimit) const
03680 { return doCompareCodePointOrder(start, limit - start,
03681            srcText, srcStart, srcLimit - srcStart); }
03682 
03683 inline int8_t
03684 UnicodeString::doCaseCompare(int32_t start,
03685                              int32_t thisLength,
03686                              const UnicodeString &srcText,
03687                              int32_t srcStart,
03688                              int32_t srcLength,
03689                              uint32_t options) const
03690 {
03691   if(srcText.isBogus()) {
03692     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03693   } else {
03694     srcText.pinIndices(srcStart, srcLength);
03695     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
03696   }
03697 }
03698 
03699 inline int8_t
03700 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
03701   return doCaseCompare(0, length(), text, 0, text.length(), options);
03702 }
03703 
03704 inline int8_t
03705 UnicodeString::caseCompare(int32_t start,
03706                            int32_t _length,
03707                            const UnicodeString &srcText,
03708                            uint32_t options) const {
03709   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
03710 }
03711 
03712 inline int8_t
03713 UnicodeString::caseCompare(const UChar *srcChars,
03714                            int32_t srcLength,
03715                            uint32_t options) const {
03716   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
03717 }
03718 
03719 inline int8_t
03720 UnicodeString::caseCompare(int32_t start,
03721                            int32_t _length,
03722                            const UnicodeString &srcText,
03723                            int32_t srcStart,
03724                            int32_t srcLength,
03725                            uint32_t options) const {
03726   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
03727 }
03728 
03729 inline int8_t
03730 UnicodeString::caseCompare(int32_t start,
03731                            int32_t _length,
03732                            const UChar *srcChars,
03733                            uint32_t options) const {
03734   return doCaseCompare(start, _length, srcChars, 0, _length, options);
03735 }
03736 
03737 inline int8_t
03738 UnicodeString::caseCompare(int32_t start,
03739                            int32_t _length,
03740                            const UChar *srcChars,
03741                            int32_t srcStart,
03742                            int32_t srcLength,
03743                            uint32_t options) const {
03744   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
03745 }
03746 
03747 inline int8_t
03748 UnicodeString::caseCompareBetween(int32_t start,
03749                                   int32_t limit,
03750                                   const UnicodeString &srcText,
03751                                   int32_t srcStart,
03752                                   int32_t srcLimit,
03753                                   uint32_t options) const {
03754   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
03755 }
03756 
03757 inline int32_t
03758 UnicodeString::indexOf(const UnicodeString& srcText,
03759                int32_t srcStart,
03760                int32_t srcLength,
03761                int32_t start,
03762                int32_t _length) const
03763 {
03764   if(!srcText.isBogus()) {
03765     srcText.pinIndices(srcStart, srcLength);
03766     if(srcLength > 0) {
03767       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03768     }
03769   }
03770   return -1;
03771 }
03772 
03773 inline int32_t
03774 UnicodeString::indexOf(const UnicodeString& text) const
03775 { return indexOf(text, 0, text.length(), 0, length()); }
03776 
03777 inline int32_t
03778 UnicodeString::indexOf(const UnicodeString& text,
03779                int32_t start) const {
03780   pinIndex(start);
03781   return indexOf(text, 0, text.length(), start, length() - start);
03782 }
03783 
03784 inline int32_t
03785 UnicodeString::indexOf(const UnicodeString& text,
03786                int32_t start,
03787                int32_t _length) const
03788 { return indexOf(text, 0, text.length(), start, _length); }
03789 
03790 inline int32_t
03791 UnicodeString::indexOf(const UChar *srcChars,
03792                int32_t srcLength,
03793                int32_t start) const {
03794   pinIndex(start);
03795   return indexOf(srcChars, 0, srcLength, start, length() - start);
03796 }
03797 
03798 inline int32_t
03799 UnicodeString::indexOf(const UChar *srcChars,
03800                int32_t srcLength,
03801                int32_t start,
03802                int32_t _length) const
03803 { return indexOf(srcChars, 0, srcLength, start, _length); }
03804 
03805 inline int32_t
03806 UnicodeString::indexOf(UChar c,
03807                int32_t start,
03808                int32_t _length) const
03809 { return doIndexOf(c, start, _length); }
03810 
03811 inline int32_t
03812 UnicodeString::indexOf(UChar32 c,
03813                int32_t start,
03814                int32_t _length) const
03815 { return doIndexOf(c, start, _length); }
03816 
03817 inline int32_t
03818 UnicodeString::indexOf(UChar c) const
03819 { return doIndexOf(c, 0, length()); }
03820 
03821 inline int32_t
03822 UnicodeString::indexOf(UChar32 c) const
03823 { return indexOf(c, 0, length()); }
03824 
03825 inline int32_t
03826 UnicodeString::indexOf(UChar c,
03827                int32_t start) const {
03828   pinIndex(start);
03829   return doIndexOf(c, start, length() - start);
03830 }
03831 
03832 inline int32_t
03833 UnicodeString::indexOf(UChar32 c,
03834                int32_t start) const {
03835   pinIndex(start);
03836   return indexOf(c, start, length() - start);
03837 }
03838 
03839 inline int32_t
03840 UnicodeString::lastIndexOf(const UChar *srcChars,
03841                int32_t srcLength,
03842                int32_t start,
03843                int32_t _length) const
03844 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
03845 
03846 inline int32_t
03847 UnicodeString::lastIndexOf(const UChar *srcChars,
03848                int32_t srcLength,
03849                int32_t start) const {
03850   pinIndex(start);
03851   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
03852 }
03853 
03854 inline int32_t
03855 UnicodeString::lastIndexOf(const UnicodeString& srcText,
03856                int32_t srcStart,
03857                int32_t srcLength,
03858                int32_t start,
03859                int32_t _length) const
03860 {
03861   if(!srcText.isBogus()) {
03862     srcText.pinIndices(srcStart, srcLength);
03863     if(srcLength > 0) {
03864       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03865     }
03866   }
03867   return -1;
03868 }
03869 
03870 inline int32_t
03871 UnicodeString::lastIndexOf(const UnicodeString& text,
03872                int32_t start,
03873                int32_t _length) const
03874 { return lastIndexOf(text, 0, text.length(), start, _length); }
03875 
03876 inline int32_t
03877 UnicodeString::lastIndexOf(const UnicodeString& text,
03878                int32_t start) const {
03879   pinIndex(start);
03880   return lastIndexOf(text, 0, text.length(), start, length() - start);
03881 }
03882 
03883 inline int32_t
03884 UnicodeString::lastIndexOf(const UnicodeString& text) const
03885 { return lastIndexOf(text, 0, text.length(), 0, length()); }
03886 
03887 inline int32_t
03888 UnicodeString::lastIndexOf(UChar c,
03889                int32_t start,
03890                int32_t _length) const
03891 { return doLastIndexOf(c, start, _length); }
03892 
03893 inline int32_t
03894 UnicodeString::lastIndexOf(UChar32 c,
03895                int32_t start,
03896                int32_t _length) const {
03897   return doLastIndexOf(c, start, _length);
03898 }
03899 
03900 inline int32_t
03901 UnicodeString::lastIndexOf(UChar c) const
03902 { return doLastIndexOf(c, 0, length()); }
03903 
03904 inline int32_t
03905 UnicodeString::lastIndexOf(UChar32 c) const {
03906   return lastIndexOf(c, 0, length());
03907 }
03908 
03909 inline int32_t
03910 UnicodeString::lastIndexOf(UChar c,
03911                int32_t start) const {
03912   pinIndex(start);
03913   return doLastIndexOf(c, start, length() - start);
03914 }
03915 
03916 inline int32_t
03917 UnicodeString::lastIndexOf(UChar32 c,
03918                int32_t start) const {
03919   pinIndex(start);
03920   return lastIndexOf(c, start, length() - start);
03921 }
03922 
03923 inline UBool
03924 UnicodeString::startsWith(const UnicodeString& text) const
03925 { return compare(0, text.length(), text, 0, text.length()) == 0; }
03926 
03927 inline UBool
03928 UnicodeString::startsWith(const UnicodeString& srcText,
03929               int32_t srcStart,
03930               int32_t srcLength) const
03931 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
03932 
03933 inline UBool
03934 UnicodeString::startsWith(const UChar *srcChars,
03935               int32_t srcLength) const
03936 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
03937 
03938 inline UBool
03939 UnicodeString::startsWith(const UChar *srcChars,
03940               int32_t srcStart,
03941               int32_t srcLength) const
03942 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
03943 
03944 inline UBool
03945 UnicodeString::endsWith(const UnicodeString& text) const
03946 { return doCompare(length() - text.length(), text.length(),
03947            text, 0, text.length()) == 0; }
03948 
03949 inline UBool
03950 UnicodeString::endsWith(const UnicodeString& srcText,
03951             int32_t srcStart,
03952             int32_t srcLength) const {
03953   srcText.pinIndices(srcStart, srcLength);
03954   return doCompare(length() - srcLength, srcLength,
03955                    srcText, srcStart, srcLength) == 0;
03956 }
03957 
03958 inline UBool
03959 UnicodeString::endsWith(const UChar *srcChars,
03960             int32_t srcLength) const {
03961   if(srcLength < 0) {
03962     srcLength = u_strlen(srcChars);
03963   }
03964   return doCompare(length() - srcLength, srcLength,
03965                    srcChars, 0, srcLength) == 0;
03966 }
03967 
03968 inline UBool
03969 UnicodeString::endsWith(const UChar *srcChars,
03970             int32_t srcStart,
03971             int32_t srcLength) const {
03972   if(srcLength < 0) {
03973     srcLength = u_strlen(srcChars + srcStart);
03974   }
03975   return doCompare(length() - srcLength, srcLength,
03976                    srcChars, srcStart, srcLength) == 0;
03977 }
03978 
03979 //========================================
03980 // replace
03981 //========================================
03982 inline UnicodeString&
03983 UnicodeString::replace(int32_t start,
03984                int32_t _length,
03985                const UnicodeString& srcText)
03986 { return doReplace(start, _length, srcText, 0, srcText.length()); }
03987 
03988 inline UnicodeString&
03989 UnicodeString::replace(int32_t start,
03990                int32_t _length,
03991                const UnicodeString& srcText,
03992                int32_t srcStart,
03993                int32_t srcLength)
03994 { return doReplace(start, _length, srcText, srcStart, srcLength); }
03995 
03996 inline UnicodeString&
03997 UnicodeString::replace(int32_t start,
03998                int32_t _length,
03999                const UChar *srcChars,
04000                int32_t srcLength)
04001 { return doReplace(start, _length, srcChars, 0, srcLength); }
04002 
04003 inline UnicodeString&
04004 UnicodeString::replace(int32_t start,
04005                int32_t _length,
04006                const UChar *srcChars,
04007                int32_t srcStart,
04008                int32_t srcLength)
04009 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
04010 
04011 inline UnicodeString&
04012 UnicodeString::replace(int32_t start,
04013                int32_t _length,
04014                UChar srcChar)
04015 { return doReplace(start, _length, &srcChar, 0, 1); }
04016 
04017 inline UnicodeString&
04018 UnicodeString::replace(int32_t start,
04019                int32_t _length,
04020                UChar32 srcChar) {
04021   UChar buffer[U16_MAX_LENGTH];
04022   int32_t count = 0;
04023   UBool isError = FALSE;
04024   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
04025   return doReplace(start, _length, buffer, 0, count);
04026 }
04027 
04028 inline UnicodeString&
04029 UnicodeString::replaceBetween(int32_t start,
04030                   int32_t limit,
04031                   const UnicodeString& srcText)
04032 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
04033 
04034 inline UnicodeString&
04035 UnicodeString::replaceBetween(int32_t start,
04036                   int32_t limit,
04037                   const UnicodeString& srcText,
04038                   int32_t srcStart,
04039                   int32_t srcLimit)
04040 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
04041 
04042 inline UnicodeString&
04043 UnicodeString::findAndReplace(const UnicodeString& oldText,
04044                   const UnicodeString& newText)
04045 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
04046             newText, 0, newText.length()); }
04047 
04048 inline UnicodeString&
04049 UnicodeString::findAndReplace(int32_t start,
04050                   int32_t _length,
04051                   const UnicodeString& oldText,
04052                   const UnicodeString& newText)
04053 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
04054             newText, 0, newText.length()); }
04055 
04056 // ============================
04057 // extract
04058 // ============================
04059 inline void
04060 UnicodeString::doExtract(int32_t start,
04061              int32_t _length,
04062              UnicodeString& target) const
04063 { target.replace(0, target.length(), *this, start, _length); }
04064 
04065 inline void
04066 UnicodeString::extract(int32_t start,
04067                int32_t _length,
04068                UChar *target,
04069                int32_t targetStart) const
04070 { doExtract(start, _length, target, targetStart); }
04071 
04072 inline void
04073 UnicodeString::extract(int32_t start,
04074                int32_t _length,
04075                UnicodeString& target) const
04076 { doExtract(start, _length, target); }
04077 
04078 #if !UCONFIG_NO_CONVERSION
04079 
04080 inline int32_t
04081 UnicodeString::extract(int32_t start,
04082                int32_t _length,
04083                char *dst,
04084                const char *codepage) const
04085 
04086 {
04087   // This dstSize value will be checked explicitly
04088 #if defined(__GNUC__)
04089   // Ticket #7039: Clip length to the maximum valid length to the end of addressable memory given the starting address
04090   // This is only an issue when using GCC and certain optimizations are turned on.
04091   return extract(start, _length, dst, dst!=0 ? ((dst >= (char*)((size_t)-1) - UINT32_MAX) ? (((char*)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage);
04092 #else
04093   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
04094 #endif
04095 }
04096 
04097 #endif
04098 
04099 inline void
04100 UnicodeString::extractBetween(int32_t start,
04101                   int32_t limit,
04102                   UChar *dst,
04103                   int32_t dstStart) const {
04104   pinIndex(start);
04105   pinIndex(limit);
04106   doExtract(start, limit - start, dst, dstStart);
04107 }
04108 
04109 inline UnicodeString
04110 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
04111     return tempSubString(start, limit - start);
04112 }
04113 
04114 inline UChar
04115 UnicodeString::doCharAt(int32_t offset) const
04116 {
04117   if((uint32_t)offset < (uint32_t)length()) {
04118     return getArrayStart()[offset];
04119   } else {
04120     return kInvalidUChar;
04121   }
04122 }
04123 
04124 inline UChar
04125 UnicodeString::charAt(int32_t offset) const
04126 { return doCharAt(offset); }
04127 
04128 inline UChar
04129 UnicodeString::operator[] (int32_t offset) const
04130 { return doCharAt(offset); }
04131 
04132 inline UChar32
04133 UnicodeString::char32At(int32_t offset) const
04134 {
04135   int32_t len = length();
04136   if((uint32_t)offset < (uint32_t)len) {
04137     const UChar *array = getArrayStart();
04138     UChar32 c;
04139     U16_GET(array, 0, offset, len, c);
04140     return c;
04141   } else {
04142     return kInvalidUChar;
04143   }
04144 }
04145 
04146 inline int32_t
04147 UnicodeString::getChar32Start(int32_t offset) const {
04148   if((uint32_t)offset < (uint32_t)length()) {
04149     const UChar *array = getArrayStart();
04150     U16_SET_CP_START(array, 0, offset);
04151     return offset;
04152   } else {
04153     return 0;
04154   }
04155 }
04156 
04157 inline int32_t
04158 UnicodeString::getChar32Limit(int32_t offset) const {
04159   int32_t len = length();
04160   if((uint32_t)offset < (uint32_t)len) {
04161     const UChar *array = getArrayStart();
04162     U16_SET_CP_LIMIT(array, 0, offset, len);
04163     return offset;
04164   } else {
04165     return len;
04166   }
04167 }
04168 
04169 inline UBool
04170 UnicodeString::isEmpty() const {
04171   return fShortLength == 0;
04172 }
04173 
04174 //========================================
04175 // Write implementation methods
04176 //========================================
04177 inline void
04178 UnicodeString::setLength(int32_t len) {
04179   if(len <= 127) {
04180     fShortLength = (int8_t)len;
04181   } else {
04182     fShortLength = (int8_t)-1;
04183     fUnion.fFields.fLength = len;
04184   }
04185 }
04186 
04187 inline void
04188 UnicodeString::setToEmpty() {
04189   fShortLength = 0;
04190   fFlags = kShortString;
04191 }
04192 
04193 inline void
04194 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
04195   setLength(len);
04196   fUnion.fFields.fArray = array;
04197   fUnion.fFields.fCapacity = capacity;
04198 }
04199 
04200 inline const UChar *
04201 UnicodeString::getTerminatedBuffer() {
04202   if(!isWritable()) {
04203     return 0;
04204   } else {
04205     UChar *array = getArrayStart();
04206     int32_t len = length();
04207     if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
04208       /*
04209        * kRefCounted: Do not write the NUL if the buffer is shared.
04210        * That is mostly safe, except when the length of one copy was modified
04211        * without copy-on-write, e.g., via truncate(newLength) or remove(void).
04212        * Then the NUL would be written into the middle of another copy's string.
04213        */
04214       if(!(fFlags&kBufferIsReadonly)) {
04215         /*
04216          * We must not write to a readonly buffer, but it is known to be
04217          * NUL-terminated if len<capacity.
04218          * A shared, allocated buffer (refCount()>1) must not have its contents
04219          * modified, but the NUL at [len] is beyond the string contents,
04220          * and multiple string objects and threads writing the same NUL into the
04221          * same location is harmless.
04222          * In all other cases, the buffer is fully writable and it is anyway safe
04223          * to write the NUL.
04224          *
04225          * Note: An earlier version of this code tested whether there is a NUL
04226          * at [len] already, but, while safe, it generated lots of warnings from
04227          * tools like valgrind and Purify.
04228          */
04229         array[len] = 0;
04230       }
04231       return array;
04232     } else if(cloneArrayIfNeeded(len+1)) {
04233       array = getArrayStart();
04234       array[len] = 0;
04235       return array;
04236     } else {
04237       return 0;
04238     }
04239   }
04240 }
04241 
04242 inline UnicodeString&
04243 UnicodeString::operator= (UChar ch)
04244 { return doReplace(0, length(), &ch, 0, 1); }
04245 
04246 inline UnicodeString&
04247 UnicodeString::operator= (UChar32 ch)
04248 { return replace(0, length(), ch); }
04249 
04250 inline UnicodeString&
04251 UnicodeString::setTo(const UnicodeString& srcText,
04252              int32_t srcStart,
04253              int32_t srcLength)
04254 {
04255   unBogus();
04256   return doReplace(0, length(), srcText, srcStart, srcLength);
04257 }
04258 
04259 inline UnicodeString&
04260 UnicodeString::setTo(const UnicodeString& srcText,
04261              int32_t srcStart)
04262 {
04263   unBogus();
04264   srcText.pinIndex(srcStart);
04265   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
04266 }
04267 
04268 inline UnicodeString&
04269 UnicodeString::setTo(const UnicodeString& srcText)
04270 {
04271   unBogus();
04272   return doReplace(0, length(), srcText, 0, srcText.length());
04273 }
04274 
04275 inline UnicodeString&
04276 UnicodeString::setTo(const UChar *srcChars,
04277              int32_t srcLength)
04278 {
04279   unBogus();
04280   return doReplace(0, length(), srcChars, 0, srcLength);
04281 }
04282 
04283 inline UnicodeString&
04284 UnicodeString::setTo(UChar srcChar)
04285 {
04286   unBogus();
04287   return doReplace(0, length(), &srcChar, 0, 1);
04288 }
04289 
04290 inline UnicodeString&
04291 UnicodeString::setTo(UChar32 srcChar)
04292 {
04293   unBogus();
04294   return replace(0, length(), srcChar);
04295 }
04296 
04297 inline UnicodeString&
04298 UnicodeString::append(const UnicodeString& srcText,
04299               int32_t srcStart,
04300               int32_t srcLength)
04301 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
04302 
04303 inline UnicodeString&
04304 UnicodeString::append(const UnicodeString& srcText)
04305 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04306 
04307 inline UnicodeString&
04308 UnicodeString::append(const UChar *srcChars,
04309               int32_t srcStart,
04310               int32_t srcLength)
04311 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
04312 
04313 inline UnicodeString&
04314 UnicodeString::append(const UChar *srcChars,
04315               int32_t srcLength)
04316 { return doReplace(length(), 0, srcChars, 0, srcLength); }
04317 
04318 inline UnicodeString&
04319 UnicodeString::append(UChar srcChar)
04320 { return doReplace(length(), 0, &srcChar, 0, 1); }
04321 
04322 inline UnicodeString&
04323 UnicodeString::append(UChar32 srcChar) {
04324   UChar buffer[U16_MAX_LENGTH];
04325   int32_t _length = 0;
04326   UBool isError = FALSE;
04327   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
04328   return doReplace(length(), 0, buffer, 0, _length);
04329 }
04330 
04331 inline UnicodeString&
04332 UnicodeString::operator+= (UChar ch)
04333 { return doReplace(length(), 0, &ch, 0, 1); }
04334 
04335 inline UnicodeString&
04336 UnicodeString::operator+= (UChar32 ch) {
04337   return append(ch);
04338 }
04339 
04340 inline UnicodeString&
04341 UnicodeString::operator+= (const UnicodeString& srcText)
04342 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04343 
04344 inline UnicodeString&
04345 UnicodeString::insert(int32_t start,
04346               const UnicodeString& srcText,
04347               int32_t srcStart,
04348               int32_t srcLength)
04349 { return doReplace(start, 0, srcText, srcStart, srcLength); }
04350 
04351 inline UnicodeString&
04352 UnicodeString::insert(int32_t start,
04353               const UnicodeString& srcText)
04354 { return doReplace(start, 0, srcText, 0, srcText.length()); }
04355 
04356 inline UnicodeString&
04357 UnicodeString::insert(int32_t start,
04358               const UChar *srcChars,
04359               int32_t srcStart,
04360               int32_t srcLength)
04361 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
04362 
04363 inline UnicodeString&
04364 UnicodeString::insert(int32_t start,
04365               const UChar *srcChars,
04366               int32_t srcLength)
04367 { return doReplace(start, 0, srcChars, 0, srcLength); }
04368 
04369 inline UnicodeString&
04370 UnicodeString::insert(int32_t start,
04371               UChar srcChar)
04372 { return doReplace(start, 0, &srcChar, 0, 1); }
04373 
04374 inline UnicodeString&
04375 UnicodeString::insert(int32_t start,
04376               UChar32 srcChar)
04377 { return replace(start, 0, srcChar); }
04378 
04379 
04380 inline UnicodeString&
04381 UnicodeString::remove()
04382 {
04383   // remove() of a bogus string makes the string empty and non-bogus
04384   // we also un-alias a read-only alias to deal with NUL-termination
04385   // issues with getTerminatedBuffer()
04386   if(fFlags & (kIsBogus|kBufferIsReadonly)) {
04387     setToEmpty();
04388   } else {
04389     fShortLength = 0;
04390   }
04391   return *this;
04392 }
04393 
04394 inline UnicodeString&
04395 UnicodeString::remove(int32_t start,
04396              int32_t _length)
04397 {
04398     if(start <= 0 && _length == INT32_MAX) {
04399         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
04400         return remove();
04401     }
04402     return doReplace(start, _length, NULL, 0, 0);
04403 }
04404 
04405 inline UnicodeString&
04406 UnicodeString::removeBetween(int32_t start,
04407                 int32_t limit)
04408 { return doReplace(start, limit - start, NULL, 0, 0); }
04409 
04410 inline UnicodeString &
04411 UnicodeString::retainBetween(int32_t start, int32_t limit) {
04412   truncate(limit);
04413   return doReplace(0, start, NULL, 0, 0);
04414 }
04415 
04416 inline UBool
04417 UnicodeString::truncate(int32_t targetLength)
04418 {
04419   if(isBogus() && targetLength == 0) {
04420     // truncate(0) of a bogus string makes the string empty and non-bogus
04421     unBogus();
04422     return FALSE;
04423   } else if((uint32_t)targetLength < (uint32_t)length()) {
04424     setLength(targetLength);
04425     if(fFlags&kBufferIsReadonly) {
04426       fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
04427     }
04428     return TRUE;
04429   } else {
04430     return FALSE;
04431   }
04432 }
04433 
04434 inline UnicodeString&
04435 UnicodeString::reverse()
04436 { return doReverse(0, length()); }
04437 
04438 inline UnicodeString&
04439 UnicodeString::reverse(int32_t start,
04440                int32_t _length)
04441 { return doReverse(start, _length); }
04442 
04443 U_NAMESPACE_END
04444 
04445 #endif
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines