1 // Copyright 2017 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fxcrt/cfx_seekablestreamproxy.h" 8 9 #if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ 10 #include <io.h> 11 #endif // _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ 12 13 #include <algorithm> 14 #include <limits> 15 #include <memory> 16 #include <utility> 17 #include <vector> 18 19 #include "core/fxcrt/cfx_memorystream.h" 20 #include "core/fxcrt/fx_codepage.h" 21 #include "core/fxcrt/fx_extension.h" 22 #include "third_party/base/ptr_util.h" 23 #include "third_party/base/stl_util.h" 24 25 namespace { 26 27 // Returns {src bytes consumed, dst bytes produced}. 28 std::pair<size_t, size_t> UTF8Decode(const char* pSrc, 29 size_t srcLen, 30 wchar_t* pDst, 31 size_t dstLen) { 32 ASSERT(pDst && dstLen > 0); 33 34 if (srcLen < 1) 35 return {0, 0}; 36 37 uint32_t dwCode = 0; 38 int32_t iPending = 0; 39 size_t iSrcNum = 0; 40 size_t iDstNum = 0; 41 size_t iIndex = 0; 42 int32_t k = 1; 43 while (iIndex < srcLen) { 44 uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex)); 45 if (byte < 0x80) { 46 iPending = 0; 47 k = 1; 48 iDstNum++; 49 iSrcNum += k; 50 *pDst++ = byte; 51 if (iDstNum >= dstLen) 52 break; 53 } else if (byte < 0xc0) { 54 if (iPending < 1) 55 break; 56 57 iPending--; 58 dwCode |= (byte & 0x3f) << (iPending * 6); 59 if (iPending == 0) { 60 iDstNum++; 61 iSrcNum += k; 62 *pDst++ = dwCode; 63 if (iDstNum >= dstLen) 64 break; 65 } 66 } else if (byte < 0xe0) { 67 iPending = 1; 68 k = 2; 69 dwCode = (byte & 0x1f) << 6; 70 } else if (byte < 0xf0) { 71 iPending = 2; 72 k = 3; 73 dwCode = (byte & 0x0f) << 12; 74 } else if (byte < 0xf8) { 75 iPending = 3; 76 k = 4; 77 dwCode = (byte & 0x07) << 18; 78 } else if (byte < 0xfc) { 79 iPending = 4; 80 k = 5; 81 dwCode = (byte & 0x03) << 24; 82 } else if (byte < 0xfe) { 83 iPending = 5; 84 k = 6; 85 dwCode = (byte & 0x01) << 30; 86 } else { 87 break; 88 } 89 iIndex++; 90 } 91 return {iSrcNum, iDstNum}; 92 } 93 94 void UTF16ToWChar(void* pBuffer, size_t iLength) { 95 ASSERT(pBuffer); 96 ASSERT(iLength > 0); 97 ASSERT(sizeof(wchar_t) > 2); 98 99 uint16_t* pSrc = static_cast<uint16_t*>(pBuffer); 100 wchar_t* pDst = static_cast<wchar_t*>(pBuffer); 101 for (size_t i = 0; i < iLength; i++) 102 pDst[i] = static_cast<wchar_t>(pSrc[i]); 103 } 104 105 void SwapByteOrder(wchar_t* pStr, size_t iLength) { 106 ASSERT(pStr); 107 108 uint16_t wch; 109 if (sizeof(wchar_t) > 2) { 110 while (iLength-- > 0) { 111 wch = static_cast<uint16_t>(*pStr); 112 wch = (wch >> 8) | (wch << 8); 113 wch &= 0x00FF; 114 *pStr = wch; 115 ++pStr; 116 } 117 return; 118 } 119 120 while (iLength-- > 0) { 121 wch = static_cast<uint16_t>(*pStr); 122 wch = (wch >> 8) | (wch << 8); 123 *pStr = wch; 124 ++pStr; 125 } 126 } 127 128 } // namespace 129 130 #define BOM_MASK 0x00FFFFFF 131 #define BOM_UTF8 0x00BFBBEF 132 #define BOM_UTF16_MASK 0x0000FFFF 133 #define BOM_UTF16_BE 0x0000FFFE 134 #define BOM_UTF16_LE 0x0000FEFF 135 136 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy( 137 const RetainPtr<IFX_SeekableStream>& stream, 138 bool isWriteStream) 139 : m_IsWriteStream(isWriteStream), 140 m_wCodePage(FX_CODEPAGE_DefANSI), 141 m_wBOMLength(0), 142 m_iPosition(0), 143 m_pStream(stream) { 144 ASSERT(m_pStream); 145 146 if (isWriteStream) { 147 m_iPosition = m_pStream->GetSize(); 148 return; 149 } 150 151 Seek(From::Begin, 0); 152 153 uint32_t bom = 0; 154 ReadData(reinterpret_cast<uint8_t*>(&bom), 3); 155 156 bom &= BOM_MASK; 157 if (bom == BOM_UTF8) { 158 m_wBOMLength = 3; 159 m_wCodePage = FX_CODEPAGE_UTF8; 160 } else { 161 bom &= BOM_UTF16_MASK; 162 if (bom == BOM_UTF16_BE) { 163 m_wBOMLength = 2; 164 m_wCodePage = FX_CODEPAGE_UTF16BE; 165 } else if (bom == BOM_UTF16_LE) { 166 m_wBOMLength = 2; 167 m_wCodePage = FX_CODEPAGE_UTF16LE; 168 } else { 169 m_wBOMLength = 0; 170 m_wCodePage = FXSYS_GetACP(); 171 } 172 } 173 174 Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength)); 175 } 176 177 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, size_t size) 178 : CFX_SeekableStreamProxy( 179 pdfium::MakeRetain<CFX_MemoryStream>(data, size, false), 180 false) {} 181 182 CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {} 183 184 void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) { 185 switch (eSeek) { 186 case From::Begin: 187 m_iPosition = iOffset; 188 break; 189 case From::Current: { 190 pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition; 191 new_pos += iOffset; 192 m_iPosition = 193 new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max()); 194 } break; 195 } 196 m_iPosition = 197 pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength()); 198 } 199 200 void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) { 201 if (m_wBOMLength > 0) 202 return; 203 m_wCodePage = wCodePage; 204 } 205 206 size_t CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, size_t iBufferSize) { 207 ASSERT(pBuffer && iBufferSize > 0); 208 209 if (m_IsWriteStream) 210 return 0; 211 212 iBufferSize = 213 std::min(iBufferSize, static_cast<size_t>(GetLength() - m_iPosition)); 214 if (iBufferSize <= 0) 215 return 0; 216 217 if (!m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize)) 218 return 0; 219 220 pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition; 221 new_pos += iBufferSize; 222 m_iPosition = new_pos.ValueOrDefault(m_iPosition); 223 return new_pos.IsValid() ? iBufferSize : 0; 224 } 225 226 size_t CFX_SeekableStreamProxy::ReadString(wchar_t* pStr, 227 size_t iMaxLength, 228 bool* bEOS) { 229 if (!pStr || iMaxLength == 0) 230 return 0; 231 232 if (m_IsWriteStream) 233 return 0; 234 235 if (m_wCodePage == FX_CODEPAGE_UTF16LE || 236 m_wCodePage == FX_CODEPAGE_UTF16BE) { 237 size_t iBytes = iMaxLength * 2; 238 size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes); 239 iMaxLength = iLen / 2; 240 if (sizeof(wchar_t) > 2 && iMaxLength > 0) 241 UTF16ToWChar(pStr, iMaxLength); 242 243 if (m_wCodePage == FX_CODEPAGE_UTF16BE) 244 SwapByteOrder(pStr, iMaxLength); 245 246 } else { 247 FX_FILESIZE pos = GetPosition(); 248 size_t iBytes = 249 std::min(iMaxLength, static_cast<size_t>(GetLength() - pos)); 250 251 if (iBytes > 0) { 252 std::vector<uint8_t> buf(iBytes); 253 254 size_t iLen = ReadData(buf.data(), iBytes); 255 if (m_wCodePage != FX_CODEPAGE_UTF8) 256 return 0; 257 258 size_t iSrc = 0; 259 std::tie(iSrc, iMaxLength) = UTF8Decode( 260 reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength); 261 Seek(From::Current, iSrc - iLen); 262 } else { 263 iMaxLength = 0; 264 } 265 } 266 267 *bEOS = IsEOF(); 268 return iMaxLength; 269 } 270 271 void CFX_SeekableStreamProxy::WriteString(const WideStringView& str) { 272 if (!m_IsWriteStream || str.GetLength() == 0 || 273 m_wCodePage != FX_CODEPAGE_UTF8) { 274 return; 275 } 276 if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition, 277 str.GetLength() * sizeof(wchar_t))) { 278 return; 279 } 280 281 pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition; 282 new_pos += str.GetLength() * sizeof(wchar_t); 283 m_iPosition = new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max()); 284 m_iPosition = 285 pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength()); 286 } 287