Home | History | Annotate | Download | only in parser
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
      8 
      9 #include "core/fpdfapi/parser/cpdf_array.h"
     10 #include "core/fpdfapi/parser/cpdf_boolean.h"
     11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     12 #include "core/fpdfapi/parser/cpdf_number.h"
     13 #include "core/fpdfapi/parser/cpdf_reference.h"
     14 #include "core/fpdfapi/parser/cpdf_stream.h"
     15 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     16 #include "core/fpdfapi/parser/cpdf_string.h"
     17 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
     18 #include "core/fxcrt/fx_ext.h"
     19 
     20 // Indexed by 8-bit character code, contains either:
     21 //   'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
     22 //   'N' - for numeric: 0123456789+-.
     23 //   'D' - for delimiter: %()/<>[]{}
     24 //   'R' - otherwise.
     25 const char PDF_CharType[256] = {
     26     // NUL  SOH  STX  ETX  EOT  ENQ  ACK  BEL  BS   HT   LF   VT   FF   CR   SO
     27     // SI
     28     'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
     29     'R',
     30 
     31     // DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB  CAN  EM   SUB  ESC  FS   GS   RS
     32     // US
     33     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     34     'R',
     35 
     36     // SP    !    "    #    $    %    &        (    )    *    +    ,    -    .
     37     // /
     38     'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
     39     'D',
     40 
     41     // 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    > ?
     42     'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
     43     'R',
     44 
     45     // @    A    B    C    D    E    F    G    H    I    J    K    L    M    N O
     46     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     47     'R',
     48 
     49     // P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^ _
     50     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
     51     'R',
     52 
     53     // `    a    b    c    d    e    f    g    h    i    j    k    l    m    n o
     54     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     55     'R',
     56 
     57     // p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
     58     // DEL
     59     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
     60     'R',
     61 
     62     'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     63     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     64     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     65     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     66     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     67     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     68     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     69     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     70     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
     71 
     72 int32_t GetHeaderOffset(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile) {
     73   const size_t kBufSize = 4;
     74   uint8_t buf[kBufSize];
     75   for (int32_t offset = 0; offset <= 1024; ++offset) {
     76     if (!pFile->ReadBlock(buf, offset, kBufSize))
     77       return -1;
     78 
     79     if (memcmp(buf, "%PDF", 4) == 0)
     80       return offset;
     81   }
     82   return -1;
     83 }
     84 
     85 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteString& key) {
     86   CPDF_Number* pObj = ToNumber(pDict->GetObjectFor(key));
     87   return pObj ? pObj->GetInteger() : 0;
     88 }
     89 
     90 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {
     91   if (bstr.Find('#') == -1)
     92     return CFX_ByteString(bstr);
     93 
     94   int size = bstr.GetLength();
     95   CFX_ByteString result;
     96   FX_CHAR* pDestStart = result.GetBuffer(size);
     97   FX_CHAR* pDest = pDestStart;
     98   for (int i = 0; i < size; i++) {
     99     if (bstr[i] == '#' && i < size - 2) {
    100       *pDest++ =
    101           FXSYS_toHexDigit(bstr[i + 1]) * 16 + FXSYS_toHexDigit(bstr[i + 2]);
    102       i += 2;
    103     } else {
    104       *pDest++ = bstr[i];
    105     }
    106   }
    107   result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
    108   return result;
    109 }
    110 
    111 CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) {
    112   if (orig.Find('#') == -1)
    113     return orig;
    114   return PDF_NameDecode(orig.AsStringC());
    115 }
    116 
    117 CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) {
    118   uint8_t* src_buf = (uint8_t*)orig.c_str();
    119   int src_len = orig.GetLength();
    120   int dest_len = 0;
    121   int i;
    122   for (i = 0; i < src_len; i++) {
    123     uint8_t ch = src_buf[i];
    124     if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
    125         PDFCharIsDelimiter(ch)) {
    126       dest_len += 3;
    127     } else {
    128       dest_len++;
    129     }
    130   }
    131   if (dest_len == src_len)
    132     return orig;
    133 
    134   CFX_ByteString res;
    135   FX_CHAR* dest_buf = res.GetBuffer(dest_len);
    136   dest_len = 0;
    137   for (i = 0; i < src_len; i++) {
    138     uint8_t ch = src_buf[i];
    139     if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
    140         PDFCharIsDelimiter(ch)) {
    141       dest_buf[dest_len++] = '#';
    142       dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
    143       dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
    144     } else {
    145       dest_buf[dest_len++] = ch;
    146     }
    147   }
    148   dest_buf[dest_len] = 0;
    149   res.ReleaseBuffer();
    150   return res;
    151 }
    152 
    153 CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) {
    154   if (!pObj) {
    155     buf << " null";
    156     return buf;
    157   }
    158   switch (pObj->GetType()) {
    159     case CPDF_Object::NULLOBJ:
    160       buf << " null";
    161       break;
    162     case CPDF_Object::BOOLEAN:
    163     case CPDF_Object::NUMBER:
    164       buf << " " << pObj->GetString();
    165       break;
    166     case CPDF_Object::STRING:
    167       buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex());
    168       break;
    169     case CPDF_Object::NAME: {
    170       CFX_ByteString str = pObj->GetString();
    171       buf << "/" << PDF_NameEncode(str);
    172       break;
    173     }
    174     case CPDF_Object::REFERENCE: {
    175       buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
    176       break;
    177     }
    178     case CPDF_Object::ARRAY: {
    179       const CPDF_Array* p = pObj->AsArray();
    180       buf << "[";
    181       for (size_t i = 0; i < p->GetCount(); i++) {
    182         CPDF_Object* pElement = p->GetObjectAt(i);
    183         if (pElement && !pElement->IsInline()) {
    184           buf << " " << pElement->GetObjNum() << " 0 R";
    185         } else {
    186           buf << pElement;
    187         }
    188       }
    189       buf << "]";
    190       break;
    191     }
    192     case CPDF_Object::DICTIONARY: {
    193       const CPDF_Dictionary* p = pObj->AsDictionary();
    194       buf << "<<";
    195       for (const auto& it : *p) {
    196         const CFX_ByteString& key = it.first;
    197         CPDF_Object* pValue = it.second.get();
    198         buf << "/" << PDF_NameEncode(key);
    199         if (pValue && !pValue->IsInline()) {
    200           buf << " " << pValue->GetObjNum() << " 0 R ";
    201         } else {
    202           buf << pValue;
    203         }
    204       }
    205       buf << ">>";
    206       break;
    207     }
    208     case CPDF_Object::STREAM: {
    209       const CPDF_Stream* p = pObj->AsStream();
    210       buf << p->GetDict() << "stream\r\n";
    211       CPDF_StreamAcc acc;
    212       acc.LoadAllData(p, true);
    213       buf.AppendBlock(acc.GetData(), acc.GetSize());
    214       buf << "\r\nendstream";
    215       break;
    216     }
    217     default:
    218       ASSERT(false);
    219       break;
    220   }
    221   return buf;
    222 }
    223