1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "../../../include/fpdfapi/fpdf_parser.h" 8 extern const FX_LPCSTR _PDF_CharType = 9 "WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR" 10 "WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR" 11 "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" 12 "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" 13 "WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" 14 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" 15 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" 16 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW"; 17 #ifndef MAX_PATH 18 #define MAX_PATH 4096 19 #endif 20 CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize) 21 { 22 m_pData = pData; 23 m_dwSize = dwSize; 24 m_dwCurPos = 0; 25 } 26 CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str) 27 { 28 m_pData = str; 29 m_dwSize = str.GetLength(); 30 m_dwCurPos = 0; 31 } 32 void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type) 33 { 34 pStart = NULL; 35 dwSize = 0; 36 type = PDFWORD_EOF; 37 FX_BYTE ch; 38 char chartype; 39 while (1) { 40 if (m_dwSize <= m_dwCurPos) { 41 return; 42 } 43 ch = m_pData[m_dwCurPos++]; 44 chartype = _PDF_CharType[ch]; 45 while (chartype == 'W') { 46 if (m_dwSize <= m_dwCurPos) { 47 return; 48 } 49 ch = m_pData[m_dwCurPos++]; 50 chartype = _PDF_CharType[ch]; 51 } 52 if (ch != '%') { 53 break; 54 } 55 while (1) { 56 if (m_dwSize <= m_dwCurPos) { 57 return; 58 } 59 ch = m_pData[m_dwCurPos++]; 60 if (ch == '\r' || ch == '\n') { 61 break; 62 } 63 } 64 chartype = _PDF_CharType[ch]; 65 } 66 FX_DWORD start_pos = m_dwCurPos - 1; 67 pStart = m_pData + start_pos; 68 if (chartype == 'D') { 69 if (ch == '/') { 70 while (1) { 71 if (m_dwSize <= m_dwCurPos) { 72 return; 73 } 74 ch = m_pData[m_dwCurPos++]; 75 chartype = _PDF_CharType[ch]; 76 if (chartype != 'R' && chartype != 'N') { 77 m_dwCurPos --; 78 dwSize = m_dwCurPos - start_pos; 79 type = PDFWORD_NAME; 80 return; 81 } 82 } 83 } else { 84 type = PDFWORD_DELIMITER; 85 dwSize = 1; 86 if (ch == '<') { 87 if (m_dwSize <= m_dwCurPos) { 88 return; 89 } 90 ch = m_pData[m_dwCurPos++]; 91 if (ch == '<') { 92 dwSize = 2; 93 } else { 94 m_dwCurPos --; 95 } 96 } else if (ch == '>') { 97 if (m_dwSize <= m_dwCurPos) { 98 return; 99 } 100 ch = m_pData[m_dwCurPos++]; 101 if (ch == '>') { 102 dwSize = 2; 103 } else { 104 m_dwCurPos --; 105 } 106 } 107 } 108 return; 109 } 110 type = PDFWORD_NUMBER; 111 dwSize = 1; 112 while (1) { 113 if (chartype != 'N') { 114 type = PDFWORD_TEXT; 115 } 116 if (m_dwSize <= m_dwCurPos) { 117 return; 118 } 119 ch = m_pData[m_dwCurPos++]; 120 chartype = _PDF_CharType[ch]; 121 if (chartype == 'D' || chartype == 'W') { 122 m_dwCurPos --; 123 break; 124 } 125 dwSize ++; 126 } 127 } 128 CFX_ByteStringC CPDF_SimpleParser::GetWord() 129 { 130 FX_LPCBYTE pStart; 131 FX_DWORD dwSize; 132 int type; 133 ParseWord(pStart, dwSize, type); 134 if (dwSize == 1 && pStart[0] == '<') { 135 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { 136 m_dwCurPos ++; 137 } 138 if (m_dwCurPos < m_dwSize) { 139 m_dwCurPos ++; 140 } 141 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); 142 } else if (dwSize == 1 && pStart[0] == '(') { 143 int level = 1; 144 while (m_dwCurPos < m_dwSize) { 145 if (m_pData[m_dwCurPos] == ')') { 146 level --; 147 if (level == 0) { 148 break; 149 } 150 } 151 if (m_pData[m_dwCurPos] == '\\') { 152 if (m_dwSize <= m_dwCurPos) { 153 break; 154 } 155 m_dwCurPos ++; 156 } else if (m_pData[m_dwCurPos] == '(') { 157 level ++; 158 } 159 if (m_dwSize <= m_dwCurPos) { 160 break; 161 } 162 m_dwCurPos ++; 163 } 164 if (m_dwCurPos < m_dwSize) { 165 m_dwCurPos ++; 166 } 167 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); 168 } 169 return CFX_ByteStringC(pStart, dwSize); 170 } 171 FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token) 172 { 173 int token_len = token.GetLength(); 174 while (m_dwCurPos < m_dwSize - token_len) { 175 if (FXSYS_memcmp32(m_pData + m_dwCurPos, token, token_len) == 0) { 176 break; 177 } 178 m_dwCurPos ++; 179 } 180 if (m_dwCurPos == m_dwSize - token_len) { 181 return FALSE; 182 } 183 m_dwCurPos += token_len; 184 return TRUE; 185 } 186 FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token) 187 { 188 while (1) { 189 CFX_ByteStringC word = GetWord(); 190 if (word.IsEmpty()) { 191 return FALSE; 192 } 193 if (word == token) { 194 return TRUE; 195 } 196 } 197 return FALSE; 198 } 199 FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token, 200 FX_DWORD& start_pos, FX_DWORD& end_pos) 201 { 202 if (!start_token.IsEmpty()) { 203 if (!SkipWord(start_token)) { 204 return FALSE; 205 } 206 start_pos = m_dwCurPos; 207 } 208 while (1) { 209 end_pos = m_dwCurPos; 210 CFX_ByteStringC word = GetWord(); 211 if (word.IsEmpty()) { 212 return FALSE; 213 } 214 if (word == end_token) { 215 return TRUE; 216 } 217 } 218 return FALSE; 219 } 220 FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams) 221 { 222 nParams ++; 223 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); 224 int buf_index = 0; 225 int buf_count = 0; 226 while (1) { 227 pBuf[buf_index++] = m_dwCurPos; 228 if (buf_index == nParams) { 229 buf_index = 0; 230 } 231 buf_count ++; 232 if (buf_count > nParams) { 233 buf_count = nParams; 234 } 235 CFX_ByteStringC word = GetWord(); 236 if (word.IsEmpty()) { 237 FX_Free(pBuf); 238 return FALSE; 239 } 240 if (word == token) { 241 if (buf_count < nParams) { 242 continue; 243 } 244 m_dwCurPos = pBuf[buf_index]; 245 FX_Free(pBuf); 246 return TRUE; 247 } 248 } 249 return FALSE; 250 } 251 static int _hex2dec(char ch) 252 { 253 if (ch >= '0' && ch <= '9') { 254 return ch - '0'; 255 } 256 if (ch >= 'a' && ch <= 'f') { 257 return ch - 'a' + 10; 258 } 259 if (ch >= 'A' && ch <= 'F') { 260 return ch - 'A' + 10; 261 } 262 return 0; 263 } 264 CFX_ByteString PDF_NameDecode(FX_BSTR bstr) 265 { 266 int size = bstr.GetLength(); 267 FX_LPCSTR pSrc = bstr.GetCStr(); 268 if (FXSYS_memchr(pSrc, '#', size) == NULL) { 269 return bstr; 270 } 271 CFX_ByteString result; 272 FX_LPSTR pDestStart = result.GetBuffer(size); 273 FX_LPSTR pDest = pDestStart; 274 for (int i = 0; i < size; i ++) { 275 if (pSrc[i] == '#' && i < size - 2) { 276 *pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]); 277 i += 2; 278 } else { 279 *pDest ++ = pSrc[i]; 280 } 281 } 282 result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart)); 283 return result; 284 } 285 CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) 286 { 287 if (FXSYS_memchr((FX_LPCSTR)orig, '#', orig.GetLength()) == NULL) { 288 return orig; 289 } 290 return PDF_NameDecode(CFX_ByteStringC(orig)); 291 } 292 CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) 293 { 294 FX_LPBYTE src_buf = (FX_LPBYTE)(FX_LPCSTR)orig; 295 int src_len = orig.GetLength(); 296 int dest_len = 0; 297 int i; 298 for (i = 0; i < src_len; i ++) { 299 FX_BYTE ch = src_buf[i]; 300 if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || 301 _PDF_CharType[ch] == 'D') { 302 dest_len += 3; 303 } else { 304 dest_len ++; 305 } 306 } 307 if (dest_len == src_len) { 308 return orig; 309 } 310 CFX_ByteString res; 311 FX_LPSTR dest_buf = res.GetBuffer(dest_len); 312 dest_len = 0; 313 for (i = 0; i < src_len; i ++) { 314 FX_BYTE ch = src_buf[i]; 315 if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || 316 _PDF_CharType[ch] == 'D') { 317 dest_buf[dest_len++] = '#'; 318 dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; 319 dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; 320 } else { 321 dest_buf[dest_len++] = ch; 322 } 323 } 324 dest_buf[dest_len] = 0; 325 res.ReleaseBuffer(); 326 return res; 327 } 328 CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj) 329 { 330 if (pObj == NULL) { 331 buf << FX_BSTRC(" null"); 332 return buf; 333 } 334 switch (pObj->GetType()) { 335 case PDFOBJ_NULL: 336 buf << FX_BSTRC(" null"); 337 break; 338 case PDFOBJ_BOOLEAN: 339 case PDFOBJ_NUMBER: 340 buf << " " << pObj->GetString(); 341 break; 342 case PDFOBJ_STRING: { 343 CFX_ByteString str = pObj->GetString(); 344 FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex(); 345 buf << PDF_EncodeString(str, bHex); 346 break; 347 } 348 case PDFOBJ_NAME: { 349 CFX_ByteString str = pObj->GetString(); 350 buf << FX_BSTRC("/") << PDF_NameEncode(str); 351 break; 352 } 353 case PDFOBJ_REFERENCE: { 354 CPDF_Reference* p = (CPDF_Reference*)pObj; 355 buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R "); 356 break; 357 } 358 case PDFOBJ_ARRAY: { 359 CPDF_Array* p = (CPDF_Array*)pObj; 360 buf << FX_BSTRC("["); 361 for (FX_DWORD i = 0; i < p->GetCount(); i ++) { 362 CPDF_Object* pElement = p->GetElement(i); 363 if (pElement->GetObjNum()) { 364 buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R"); 365 } else { 366 buf << pElement; 367 } 368 } 369 buf << FX_BSTRC("]"); 370 break; 371 } 372 case PDFOBJ_DICTIONARY: { 373 CPDF_Dictionary* p = (CPDF_Dictionary*)pObj; 374 buf << FX_BSTRC("<<"); 375 FX_POSITION pos = p->GetStartPos(); 376 while (pos) { 377 CFX_ByteString key; 378 CPDF_Object* pValue = p->GetNextElement(pos, key); 379 buf << FX_BSTRC("/") << PDF_NameEncode(key); 380 if (pValue->GetObjNum()) { 381 buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R "); 382 } else { 383 buf << pValue; 384 } 385 } 386 buf << FX_BSTRC(">>"); 387 break; 388 } 389 case PDFOBJ_STREAM: { 390 CPDF_Stream* p = (CPDF_Stream*)pObj; 391 buf << p->GetDict() << FX_BSTRC("stream\r\n"); 392 CPDF_StreamAcc acc; 393 acc.LoadAllData(p, TRUE); 394 buf.AppendBlock(acc.GetData(), acc.GetSize()); 395 buf << FX_BSTRC("\r\nendstream"); 396 break; 397 } 398 default: 399 ASSERT(FALSE); 400 break; 401 } 402 return buf; 403 } 404 FX_FLOAT PDF_ClipFloat(FX_FLOAT f) 405 { 406 if (f < 0) { 407 return 0; 408 } 409 if (f > 1.0f) { 410 return 1.0f; 411 } 412 return f; 413 } 414 static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num) 415 { 416 CPDF_Array* pLimits = pNode->GetArray("Limits"); 417 if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) { 418 return NULL; 419 } 420 CPDF_Array* pNumbers = pNode->GetArray("Nums"); 421 if (pNumbers) { 422 FX_DWORD dwCount = pNumbers->GetCount() / 2; 423 for (FX_DWORD i = 0; i < dwCount; i ++) { 424 int index = pNumbers->GetInteger(i * 2); 425 if (num == index) { 426 return pNumbers->GetElementValue(i * 2 + 1); 427 } 428 if (index > num) { 429 break; 430 } 431 } 432 return NULL; 433 } 434 CPDF_Array* pKids = pNode->GetArray("Kids"); 435 if (pKids == NULL) { 436 return NULL; 437 } 438 for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) { 439 CPDF_Dictionary* pKid = pKids->GetDict(i); 440 if (pKid == NULL) { 441 continue; 442 } 443 CPDF_Object* pFound = SearchNumberNode(pKid, num); 444 if (pFound) { 445 return pFound; 446 } 447 } 448 return NULL; 449 } 450 CPDF_Object* CPDF_NumberTree::LookupValue(int num) 451 { 452 return SearchNumberNode(m_pRoot, num); 453 } 454