Home | History | Annotate | Download | only in fxcrt
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef CORE_FXCRT_FX_STRING_H_
      8 #define CORE_FXCRT_FX_STRING_H_
      9 
     10 #include <stdint.h>  // For intptr_t.
     11 
     12 #include <algorithm>
     13 #include <functional>
     14 
     15 #include "core/fxcrt/cfx_retain_ptr.h"
     16 #include "core/fxcrt/cfx_string_c_template.h"
     17 #include "core/fxcrt/cfx_string_data_template.h"
     18 #include "core/fxcrt/fx_memory.h"
     19 #include "core/fxcrt/fx_system.h"
     20 
     21 class CFX_ByteString;
     22 class CFX_WideString;
     23 
     24 using CFX_ByteStringC = CFX_StringCTemplate<FX_CHAR>;
     25 using CFX_WideStringC = CFX_StringCTemplate<FX_WCHAR>;
     26 
     27 #define FXBSTR_ID(c1, c2, c3, c4)                                      \
     28   (((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | ((uint32_t)c3 << 8) | \
     29    ((uint32_t)c4))
     30 
     31 // A mutable string with shared buffers using copy-on-write semantics that
     32 // avoids the cost of std::string's iterator stability guarantees.
     33 class CFX_ByteString {
     34  public:
     35   using CharType = FX_CHAR;
     36 
     37   CFX_ByteString();
     38   CFX_ByteString(const CFX_ByteString& other);
     39   CFX_ByteString(CFX_ByteString&& other);
     40 
     41   // Deliberately implicit to avoid calling on every string literal.
     42   // NOLINTNEXTLINE(runtime/explicit)
     43   CFX_ByteString(char ch);
     44   // NOLINTNEXTLINE(runtime/explicit)
     45   CFX_ByteString(const FX_CHAR* ptr);
     46 
     47   CFX_ByteString(const FX_CHAR* ptr, FX_STRSIZE len);
     48   CFX_ByteString(const uint8_t* ptr, FX_STRSIZE len);
     49 
     50   explicit CFX_ByteString(const CFX_ByteStringC& bstrc);
     51   CFX_ByteString(const CFX_ByteStringC& bstrc1, const CFX_ByteStringC& bstrc2);
     52 
     53   ~CFX_ByteString();
     54 
     55   void clear() { m_pData.Reset(); }
     56 
     57   static CFX_ByteString FromUnicode(const FX_WCHAR* ptr, FX_STRSIZE len = -1);
     58   static CFX_ByteString FromUnicode(const CFX_WideString& str);
     59 
     60   // Explicit conversion to C-style string.
     61   // Note: Any subsequent modification of |this| will invalidate the result.
     62   const FX_CHAR* c_str() const { return m_pData ? m_pData->m_String : ""; }
     63 
     64   // Explicit conversion to uint8_t*.
     65   // Note: Any subsequent modification of |this| will invalidate the result.
     66   const uint8_t* raw_str() const {
     67     return m_pData ? reinterpret_cast<const uint8_t*>(m_pData->m_String)
     68                    : nullptr;
     69   }
     70 
     71   // Explicit conversion to CFX_ByteStringC.
     72   // Note: Any subsequent modification of |this| will invalidate the result.
     73   CFX_ByteStringC AsStringC() const {
     74     return CFX_ByteStringC(raw_str(), GetLength());
     75   }
     76 
     77   FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
     78   bool IsEmpty() const { return !GetLength(); }
     79 
     80   int Compare(const CFX_ByteStringC& str) const;
     81   bool EqualNoCase(const CFX_ByteStringC& str) const;
     82 
     83   bool operator==(const char* ptr) const;
     84   bool operator==(const CFX_ByteStringC& str) const;
     85   bool operator==(const CFX_ByteString& other) const;
     86 
     87   bool operator!=(const char* ptr) const { return !(*this == ptr); }
     88   bool operator!=(const CFX_ByteStringC& str) const { return !(*this == str); }
     89   bool operator!=(const CFX_ByteString& other) const {
     90     return !(*this == other);
     91   }
     92 
     93   bool operator<(const CFX_ByteString& str) const;
     94 
     95   const CFX_ByteString& operator=(const FX_CHAR* str);
     96   const CFX_ByteString& operator=(const CFX_ByteStringC& bstrc);
     97   const CFX_ByteString& operator=(const CFX_ByteString& stringSrc);
     98 
     99   const CFX_ByteString& operator+=(FX_CHAR ch);
    100   const CFX_ByteString& operator+=(const FX_CHAR* str);
    101   const CFX_ByteString& operator+=(const CFX_ByteString& str);
    102   const CFX_ByteString& operator+=(const CFX_ByteStringC& bstrc);
    103 
    104   uint8_t GetAt(FX_STRSIZE nIndex) const {
    105     return m_pData ? m_pData->m_String[nIndex] : 0;
    106   }
    107 
    108   uint8_t operator[](FX_STRSIZE nIndex) const {
    109     return m_pData ? m_pData->m_String[nIndex] : 0;
    110   }
    111 
    112   void SetAt(FX_STRSIZE nIndex, FX_CHAR ch);
    113   FX_STRSIZE Insert(FX_STRSIZE index, FX_CHAR ch);
    114   FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
    115 
    116   void Format(const FX_CHAR* lpszFormat, ...);
    117   void FormatV(const FX_CHAR* lpszFormat, va_list argList);
    118 
    119   void Reserve(FX_STRSIZE len);
    120   FX_CHAR* GetBuffer(FX_STRSIZE len);
    121   void ReleaseBuffer(FX_STRSIZE len = -1);
    122 
    123   CFX_ByteString Mid(FX_STRSIZE first) const;
    124   CFX_ByteString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
    125   CFX_ByteString Left(FX_STRSIZE count) const;
    126   CFX_ByteString Right(FX_STRSIZE count) const;
    127 
    128   FX_STRSIZE Find(const CFX_ByteStringC& lpszSub, FX_STRSIZE start = 0) const;
    129   FX_STRSIZE Find(FX_CHAR ch, FX_STRSIZE start = 0) const;
    130   FX_STRSIZE ReverseFind(FX_CHAR ch) const;
    131 
    132   void MakeLower();
    133   void MakeUpper();
    134 
    135   void TrimRight();
    136   void TrimRight(FX_CHAR chTarget);
    137   void TrimRight(const CFX_ByteStringC& lpszTargets);
    138 
    139   void TrimLeft();
    140   void TrimLeft(FX_CHAR chTarget);
    141   void TrimLeft(const CFX_ByteStringC& lpszTargets);
    142 
    143   FX_STRSIZE Replace(const CFX_ByteStringC& lpszOld,
    144                      const CFX_ByteStringC& lpszNew);
    145 
    146   FX_STRSIZE Remove(FX_CHAR ch);
    147 
    148   CFX_WideString UTF8Decode() const;
    149 
    150   uint32_t GetID(FX_STRSIZE start_pos = 0) const;
    151 
    152 #define FXFORMAT_SIGNED 1
    153 #define FXFORMAT_HEX 2
    154 #define FXFORMAT_CAPITAL 4
    155 
    156   static CFX_ByteString FormatInteger(int i, uint32_t flags = 0);
    157   static CFX_ByteString FormatFloat(FX_FLOAT f, int precision = 0);
    158 
    159  protected:
    160   using StringData = CFX_StringDataTemplate<FX_CHAR>;
    161 
    162   void ReallocBeforeWrite(FX_STRSIZE nNewLen);
    163   void AllocBeforeWrite(FX_STRSIZE nNewLen);
    164   void AllocCopy(CFX_ByteString& dest,
    165                  FX_STRSIZE nCopyLen,
    166                  FX_STRSIZE nCopyIndex) const;
    167   void AssignCopy(const FX_CHAR* pSrcData, FX_STRSIZE nSrcLen);
    168   void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen);
    169 
    170   CFX_RetainPtr<StringData> m_pData;
    171 
    172   friend class fxcrt_ByteStringConcat_Test;
    173   friend class fxcrt_ByteStringPool_Test;
    174 };
    175 
    176 inline bool operator==(const char* lhs, const CFX_ByteString& rhs) {
    177   return rhs == lhs;
    178 }
    179 inline bool operator==(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
    180   return rhs == lhs;
    181 }
    182 inline bool operator!=(const char* lhs, const CFX_ByteString& rhs) {
    183   return rhs != lhs;
    184 }
    185 inline bool operator!=(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
    186   return rhs != lhs;
    187 }
    188 
    189 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
    190                                 const CFX_ByteStringC& str2) {
    191   return CFX_ByteString(str1, str2);
    192 }
    193 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
    194                                 const FX_CHAR* str2) {
    195   return CFX_ByteString(str1, str2);
    196 }
    197 inline CFX_ByteString operator+(const FX_CHAR* str1,
    198                                 const CFX_ByteStringC& str2) {
    199   return CFX_ByteString(str1, str2);
    200 }
    201 inline CFX_ByteString operator+(const CFX_ByteStringC& str1, FX_CHAR ch) {
    202   return CFX_ByteString(str1, CFX_ByteStringC(ch));
    203 }
    204 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteStringC& str2) {
    205   return CFX_ByteString(ch, str2);
    206 }
    207 inline CFX_ByteString operator+(const CFX_ByteString& str1,
    208                                 const CFX_ByteString& str2) {
    209   return CFX_ByteString(str1.AsStringC(), str2.AsStringC());
    210 }
    211 inline CFX_ByteString operator+(const CFX_ByteString& str1, FX_CHAR ch) {
    212   return CFX_ByteString(str1.AsStringC(), CFX_ByteStringC(ch));
    213 }
    214 inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteString& str2) {
    215   return CFX_ByteString(ch, str2.AsStringC());
    216 }
    217 inline CFX_ByteString operator+(const CFX_ByteString& str1,
    218                                 const FX_CHAR* str2) {
    219   return CFX_ByteString(str1.AsStringC(), str2);
    220 }
    221 inline CFX_ByteString operator+(const FX_CHAR* str1,
    222                                 const CFX_ByteString& str2) {
    223   return CFX_ByteString(str1, str2.AsStringC());
    224 }
    225 inline CFX_ByteString operator+(const CFX_ByteString& str1,
    226                                 const CFX_ByteStringC& str2) {
    227   return CFX_ByteString(str1.AsStringC(), str2);
    228 }
    229 inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
    230                                 const CFX_ByteString& str2) {
    231   return CFX_ByteString(str1, str2.AsStringC());
    232 }
    233 
    234 // A mutable string with shared buffers using copy-on-write semantics that
    235 // avoids the cost of std::string's iterator stability guarantees.
    236 class CFX_WideString {
    237  public:
    238   using CharType = FX_WCHAR;
    239 
    240   CFX_WideString();
    241   CFX_WideString(const CFX_WideString& other);
    242   CFX_WideString(CFX_WideString&& other);
    243 
    244   // Deliberately implicit to avoid calling on every string literal.
    245   // NOLINTNEXTLINE(runtime/explicit)
    246   CFX_WideString(FX_WCHAR ch);
    247   // NOLINTNEXTLINE(runtime/explicit)
    248   CFX_WideString(const FX_WCHAR* ptr);
    249 
    250   CFX_WideString(const FX_WCHAR* ptr, FX_STRSIZE len);
    251 
    252   explicit CFX_WideString(const CFX_WideStringC& str);
    253   CFX_WideString(const CFX_WideStringC& str1, const CFX_WideStringC& str2);
    254 
    255   ~CFX_WideString();
    256 
    257   static CFX_WideString FromLocal(const CFX_ByteStringC& str);
    258   static CFX_WideString FromCodePage(const CFX_ByteStringC& str,
    259                                      uint16_t codepage);
    260 
    261   static CFX_WideString FromUTF8(const CFX_ByteStringC& str);
    262   static CFX_WideString FromUTF16LE(const unsigned short* str, FX_STRSIZE len);
    263 
    264   static FX_STRSIZE WStringLength(const unsigned short* str);
    265 
    266   // Explicit conversion to C-style wide string.
    267   // Note: Any subsequent modification of |this| will invalidate the result.
    268   const FX_WCHAR* c_str() const { return m_pData ? m_pData->m_String : L""; }
    269 
    270   // Explicit conversion to CFX_WideStringC.
    271   // Note: Any subsequent modification of |this| will invalidate the result.
    272   CFX_WideStringC AsStringC() const {
    273     return CFX_WideStringC(c_str(), GetLength());
    274   }
    275 
    276   void clear() { m_pData.Reset(); }
    277 
    278   FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
    279   bool IsEmpty() const { return !GetLength(); }
    280 
    281   const CFX_WideString& operator=(const FX_WCHAR* str);
    282   const CFX_WideString& operator=(const CFX_WideString& stringSrc);
    283   const CFX_WideString& operator=(const CFX_WideStringC& stringSrc);
    284 
    285   const CFX_WideString& operator+=(const FX_WCHAR* str);
    286   const CFX_WideString& operator+=(FX_WCHAR ch);
    287   const CFX_WideString& operator+=(const CFX_WideString& str);
    288   const CFX_WideString& operator+=(const CFX_WideStringC& str);
    289 
    290   bool operator==(const wchar_t* ptr) const;
    291   bool operator==(const CFX_WideStringC& str) const;
    292   bool operator==(const CFX_WideString& other) const;
    293 
    294   bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); }
    295   bool operator!=(const CFX_WideStringC& str) const { return !(*this == str); }
    296   bool operator!=(const CFX_WideString& other) const {
    297     return !(*this == other);
    298   }
    299 
    300   bool operator<(const CFX_WideString& str) const;
    301 
    302   FX_WCHAR GetAt(FX_STRSIZE nIndex) const {
    303     return m_pData ? m_pData->m_String[nIndex] : 0;
    304   }
    305 
    306   FX_WCHAR operator[](FX_STRSIZE nIndex) const {
    307     return m_pData ? m_pData->m_String[nIndex] : 0;
    308   }
    309 
    310   void SetAt(FX_STRSIZE nIndex, FX_WCHAR ch);
    311 
    312   int Compare(const FX_WCHAR* str) const;
    313   int Compare(const CFX_WideString& str) const;
    314   int CompareNoCase(const FX_WCHAR* str) const;
    315 
    316   CFX_WideString Mid(FX_STRSIZE first) const;
    317   CFX_WideString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
    318   CFX_WideString Left(FX_STRSIZE count) const;
    319   CFX_WideString Right(FX_STRSIZE count) const;
    320 
    321   FX_STRSIZE Insert(FX_STRSIZE index, FX_WCHAR ch);
    322   FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
    323 
    324   void Format(const FX_WCHAR* lpszFormat, ...);
    325   void FormatV(const FX_WCHAR* lpszFormat, va_list argList);
    326 
    327   void MakeLower();
    328   void MakeUpper();
    329 
    330   void TrimRight();
    331   void TrimRight(FX_WCHAR chTarget);
    332   void TrimRight(const CFX_WideStringC& pTargets);
    333 
    334   void TrimLeft();
    335   void TrimLeft(FX_WCHAR chTarget);
    336   void TrimLeft(const CFX_WideStringC& pTargets);
    337 
    338   void Reserve(FX_STRSIZE len);
    339   FX_WCHAR* GetBuffer(FX_STRSIZE len);
    340   void ReleaseBuffer(FX_STRSIZE len = -1);
    341 
    342   int GetInteger() const;
    343   FX_FLOAT GetFloat() const;
    344 
    345   FX_STRSIZE Find(const CFX_WideStringC& pSub, FX_STRSIZE start = 0) const;
    346   FX_STRSIZE Find(FX_WCHAR ch, FX_STRSIZE start = 0) const;
    347   FX_STRSIZE Replace(const CFX_WideStringC& pOld, const CFX_WideStringC& pNew);
    348   FX_STRSIZE Remove(FX_WCHAR ch);
    349 
    350   CFX_ByteString UTF8Encode() const;
    351   CFX_ByteString UTF16LE_Encode() const;
    352 
    353  protected:
    354   using StringData = CFX_StringDataTemplate<FX_WCHAR>;
    355 
    356   void ReallocBeforeWrite(FX_STRSIZE nLen);
    357   void AllocBeforeWrite(FX_STRSIZE nLen);
    358   void AllocCopy(CFX_WideString& dest,
    359                  FX_STRSIZE nCopyLen,
    360                  FX_STRSIZE nCopyIndex) const;
    361   void AssignCopy(const FX_WCHAR* pSrcData, FX_STRSIZE nSrcLen);
    362   void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen);
    363 
    364   CFX_RetainPtr<StringData> m_pData;
    365 
    366   friend class fxcrt_WideStringConcatInPlace_Test;
    367   friend class fxcrt_WideStringPool_Test;
    368 };
    369 
    370 inline CFX_WideString operator+(const CFX_WideStringC& str1,
    371                                 const CFX_WideStringC& str2) {
    372   return CFX_WideString(str1, str2);
    373 }
    374 inline CFX_WideString operator+(const CFX_WideStringC& str1,
    375                                 const FX_WCHAR* str2) {
    376   return CFX_WideString(str1, str2);
    377 }
    378 inline CFX_WideString operator+(const FX_WCHAR* str1,
    379                                 const CFX_WideStringC& str2) {
    380   return CFX_WideString(str1, str2);
    381 }
    382 inline CFX_WideString operator+(const CFX_WideStringC& str1, FX_WCHAR ch) {
    383   return CFX_WideString(str1, CFX_WideStringC(ch));
    384 }
    385 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideStringC& str2) {
    386   return CFX_WideString(ch, str2);
    387 }
    388 inline CFX_WideString operator+(const CFX_WideString& str1,
    389                                 const CFX_WideString& str2) {
    390   return CFX_WideString(str1.AsStringC(), str2.AsStringC());
    391 }
    392 inline CFX_WideString operator+(const CFX_WideString& str1, FX_WCHAR ch) {
    393   return CFX_WideString(str1.AsStringC(), CFX_WideStringC(ch));
    394 }
    395 inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideString& str2) {
    396   return CFX_WideString(ch, str2.AsStringC());
    397 }
    398 inline CFX_WideString operator+(const CFX_WideString& str1,
    399                                 const FX_WCHAR* str2) {
    400   return CFX_WideString(str1.AsStringC(), str2);
    401 }
    402 inline CFX_WideString operator+(const FX_WCHAR* str1,
    403                                 const CFX_WideString& str2) {
    404   return CFX_WideString(str1, str2.AsStringC());
    405 }
    406 inline CFX_WideString operator+(const CFX_WideString& str1,
    407                                 const CFX_WideStringC& str2) {
    408   return CFX_WideString(str1.AsStringC(), str2);
    409 }
    410 inline CFX_WideString operator+(const CFX_WideStringC& str1,
    411                                 const CFX_WideString& str2) {
    412   return CFX_WideString(str1, str2.AsStringC());
    413 }
    414 inline bool operator==(const wchar_t* lhs, const CFX_WideString& rhs) {
    415   return rhs == lhs;
    416 }
    417 inline bool operator==(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
    418   return rhs == lhs;
    419 }
    420 inline bool operator!=(const wchar_t* lhs, const CFX_WideString& rhs) {
    421   return rhs != lhs;
    422 }
    423 inline bool operator!=(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
    424   return rhs != lhs;
    425 }
    426 
    427 CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr);
    428 FX_FLOAT FX_atof(const CFX_ByteStringC& str);
    429 inline FX_FLOAT FX_atof(const CFX_WideStringC& wsStr) {
    430   return FX_atof(FX_UTF8Encode(wsStr).c_str());
    431 }
    432 bool FX_atonum(const CFX_ByteStringC& str, void* pData);
    433 FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf);
    434 
    435 uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase);
    436 uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase);
    437 
    438 namespace std {
    439 
    440 template <>
    441 struct hash<CFX_ByteString> {
    442   std::size_t operator()(const CFX_ByteString& str) const {
    443     return FX_HashCode_GetA(str.AsStringC(), false);
    444   }
    445 };
    446 
    447 template <>
    448 struct hash<CFX_WideString> {
    449   std::size_t operator()(const CFX_WideString& str) const {
    450     return FX_HashCode_GetW(str.AsStringC(), false);
    451   }
    452 };
    453 
    454 }  // namespace std
    455 
    456 extern template struct std::hash<CFX_ByteString>;
    457 extern template struct std::hash<CFX_WideString>;
    458 
    459 #endif  // CORE_FXCRT_FX_STRING_H_
    460