Home | History | Annotate | Download | only in fxcrt
      1 // Copyright 2017 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fxcrt/cfx_seekablestreamproxy.h"
      8 
      9 #if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
     10 #include <io.h>
     11 #endif  // _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
     12 
     13 #include <algorithm>
     14 #include <limits>
     15 #include <memory>
     16 #include <utility>
     17 #include <vector>
     18 
     19 #include "core/fxcrt/cfx_memorystream.h"
     20 #include "core/fxcrt/fx_codepage.h"
     21 #include "core/fxcrt/fx_extension.h"
     22 #include "third_party/base/ptr_util.h"
     23 #include "third_party/base/stl_util.h"
     24 
     25 namespace {
     26 
     27 // Returns {src bytes consumed, dst bytes produced}.
     28 std::pair<size_t, size_t> UTF8Decode(const char* pSrc,
     29                                      size_t srcLen,
     30                                      wchar_t* pDst,
     31                                      size_t dstLen) {
     32   ASSERT(pDst && dstLen > 0);
     33 
     34   if (srcLen < 1)
     35     return {0, 0};
     36 
     37   uint32_t dwCode = 0;
     38   int32_t iPending = 0;
     39   size_t iSrcNum = 0;
     40   size_t iDstNum = 0;
     41   size_t iIndex = 0;
     42   int32_t k = 1;
     43   while (iIndex < srcLen) {
     44     uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
     45     if (byte < 0x80) {
     46       iPending = 0;
     47       k = 1;
     48       iDstNum++;
     49       iSrcNum += k;
     50       *pDst++ = byte;
     51       if (iDstNum >= dstLen)
     52         break;
     53     } else if (byte < 0xc0) {
     54       if (iPending < 1)
     55         break;
     56 
     57       iPending--;
     58       dwCode |= (byte & 0x3f) << (iPending * 6);
     59       if (iPending == 0) {
     60         iDstNum++;
     61         iSrcNum += k;
     62         *pDst++ = dwCode;
     63         if (iDstNum >= dstLen)
     64           break;
     65       }
     66     } else if (byte < 0xe0) {
     67       iPending = 1;
     68       k = 2;
     69       dwCode = (byte & 0x1f) << 6;
     70     } else if (byte < 0xf0) {
     71       iPending = 2;
     72       k = 3;
     73       dwCode = (byte & 0x0f) << 12;
     74     } else if (byte < 0xf8) {
     75       iPending = 3;
     76       k = 4;
     77       dwCode = (byte & 0x07) << 18;
     78     } else if (byte < 0xfc) {
     79       iPending = 4;
     80       k = 5;
     81       dwCode = (byte & 0x03) << 24;
     82     } else if (byte < 0xfe) {
     83       iPending = 5;
     84       k = 6;
     85       dwCode = (byte & 0x01) << 30;
     86     } else {
     87       break;
     88     }
     89     iIndex++;
     90   }
     91   return {iSrcNum, iDstNum};
     92 }
     93 
     94 void UTF16ToWChar(void* pBuffer, size_t iLength) {
     95   ASSERT(pBuffer);
     96   ASSERT(iLength > 0);
     97   ASSERT(sizeof(wchar_t) > 2);
     98 
     99   uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
    100   wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
    101   for (size_t i = 0; i < iLength; i++)
    102     pDst[i] = static_cast<wchar_t>(pSrc[i]);
    103 }
    104 
    105 void SwapByteOrder(wchar_t* pStr, size_t iLength) {
    106   ASSERT(pStr);
    107 
    108   uint16_t wch;
    109   if (sizeof(wchar_t) > 2) {
    110     while (iLength-- > 0) {
    111       wch = static_cast<uint16_t>(*pStr);
    112       wch = (wch >> 8) | (wch << 8);
    113       wch &= 0x00FF;
    114       *pStr = wch;
    115       ++pStr;
    116     }
    117     return;
    118   }
    119 
    120   while (iLength-- > 0) {
    121     wch = static_cast<uint16_t>(*pStr);
    122     wch = (wch >> 8) | (wch << 8);
    123     *pStr = wch;
    124     ++pStr;
    125   }
    126 }
    127 
    128 }  // namespace
    129 
    130 #define BOM_MASK 0x00FFFFFF
    131 #define BOM_UTF8 0x00BFBBEF
    132 #define BOM_UTF16_MASK 0x0000FFFF
    133 #define BOM_UTF16_BE 0x0000FFFE
    134 #define BOM_UTF16_LE 0x0000FEFF
    135 
    136 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
    137     const RetainPtr<IFX_SeekableStream>& stream,
    138     bool isWriteStream)
    139     : m_IsWriteStream(isWriteStream),
    140       m_wCodePage(FX_CODEPAGE_DefANSI),
    141       m_wBOMLength(0),
    142       m_iPosition(0),
    143       m_pStream(stream) {
    144   ASSERT(m_pStream);
    145 
    146   if (isWriteStream) {
    147     m_iPosition = m_pStream->GetSize();
    148     return;
    149   }
    150 
    151   Seek(From::Begin, 0);
    152 
    153   uint32_t bom = 0;
    154   ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
    155 
    156   bom &= BOM_MASK;
    157   if (bom == BOM_UTF8) {
    158     m_wBOMLength = 3;
    159     m_wCodePage = FX_CODEPAGE_UTF8;
    160   } else {
    161     bom &= BOM_UTF16_MASK;
    162     if (bom == BOM_UTF16_BE) {
    163       m_wBOMLength = 2;
    164       m_wCodePage = FX_CODEPAGE_UTF16BE;
    165     } else if (bom == BOM_UTF16_LE) {
    166       m_wBOMLength = 2;
    167       m_wCodePage = FX_CODEPAGE_UTF16LE;
    168     } else {
    169       m_wBOMLength = 0;
    170       m_wCodePage = FXSYS_GetACP();
    171     }
    172   }
    173 
    174   Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
    175 }
    176 
    177 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, size_t size)
    178     : CFX_SeekableStreamProxy(
    179           pdfium::MakeRetain<CFX_MemoryStream>(data, size, false),
    180           false) {}
    181 
    182 CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {}
    183 
    184 void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
    185   switch (eSeek) {
    186     case From::Begin:
    187       m_iPosition = iOffset;
    188       break;
    189     case From::Current: {
    190       pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
    191       new_pos += iOffset;
    192       m_iPosition =
    193           new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
    194     } break;
    195   }
    196   m_iPosition =
    197       pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
    198 }
    199 
    200 void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) {
    201   if (m_wBOMLength > 0)
    202     return;
    203   m_wCodePage = wCodePage;
    204 }
    205 
    206 size_t CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, size_t iBufferSize) {
    207   ASSERT(pBuffer && iBufferSize > 0);
    208 
    209   if (m_IsWriteStream)
    210     return 0;
    211 
    212   iBufferSize =
    213       std::min(iBufferSize, static_cast<size_t>(GetLength() - m_iPosition));
    214   if (iBufferSize <= 0)
    215     return 0;
    216 
    217   if (!m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize))
    218     return 0;
    219 
    220   pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
    221   new_pos += iBufferSize;
    222   m_iPosition = new_pos.ValueOrDefault(m_iPosition);
    223   return new_pos.IsValid() ? iBufferSize : 0;
    224 }
    225 
    226 size_t CFX_SeekableStreamProxy::ReadString(wchar_t* pStr,
    227                                            size_t iMaxLength,
    228                                            bool* bEOS) {
    229   if (!pStr || iMaxLength == 0)
    230     return 0;
    231 
    232   if (m_IsWriteStream)
    233     return 0;
    234 
    235   if (m_wCodePage == FX_CODEPAGE_UTF16LE ||
    236       m_wCodePage == FX_CODEPAGE_UTF16BE) {
    237     size_t iBytes = iMaxLength * 2;
    238     size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
    239     iMaxLength = iLen / 2;
    240     if (sizeof(wchar_t) > 2 && iMaxLength > 0)
    241       UTF16ToWChar(pStr, iMaxLength);
    242 
    243     if (m_wCodePage == FX_CODEPAGE_UTF16BE)
    244       SwapByteOrder(pStr, iMaxLength);
    245 
    246   } else {
    247     FX_FILESIZE pos = GetPosition();
    248     size_t iBytes =
    249         std::min(iMaxLength, static_cast<size_t>(GetLength() - pos));
    250 
    251     if (iBytes > 0) {
    252       std::vector<uint8_t> buf(iBytes);
    253 
    254       size_t iLen = ReadData(buf.data(), iBytes);
    255       if (m_wCodePage != FX_CODEPAGE_UTF8)
    256         return 0;
    257 
    258       size_t iSrc = 0;
    259       std::tie(iSrc, iMaxLength) = UTF8Decode(
    260           reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength);
    261       Seek(From::Current, iSrc - iLen);
    262     } else {
    263       iMaxLength = 0;
    264     }
    265   }
    266 
    267   *bEOS = IsEOF();
    268   return iMaxLength;
    269 }
    270 
    271 void CFX_SeekableStreamProxy::WriteString(const WideStringView& str) {
    272   if (!m_IsWriteStream || str.GetLength() == 0 ||
    273       m_wCodePage != FX_CODEPAGE_UTF8) {
    274     return;
    275   }
    276   if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition,
    277                              str.GetLength() * sizeof(wchar_t))) {
    278     return;
    279   }
    280 
    281   pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
    282   new_pos += str.GetLength() * sizeof(wchar_t);
    283   m_iPosition = new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
    284   m_iPosition =
    285       pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
    286 }
    287