Home | History | Annotate | Download | only in parser
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
      8 
      9 #include "core/fpdfapi/parser/cpdf_array.h"
     10 #include "core/fpdfapi/parser/cpdf_boolean.h"
     11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     12 #include "core/fpdfapi/parser/cpdf_number.h"
     13 #include "core/fpdfapi/parser/cpdf_reference.h"
     14 #include "core/fpdfapi/parser/cpdf_stream.h"
     15 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     16 #include "core/fpdfapi/parser/cpdf_string.h"
     17 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
     18 #include "core/fxcrt/fx_extension.h"
     19 #include "core/fxcrt/fx_stream.h"
     20 #include "third_party/base/logging.h"
     21 
     22 // Indexed by 8-bit character code, contains either:
     23 //   'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
     24 //   'N' - for numeric: 0123456789+-.
     25 //   'D' - for delimiter: %()/<>[]{}
     26 //   'R' - otherwise.
     27 const char PDF_CharType[256] = {
     28     // NUL  SOH  STX  ETX  EOT  ENQ  ACK  BEL  BS   HT   LF   VT   FF   CR   SO
     29     // SI
     30     'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
     31     'R',
     32 
     33     // DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB  CAN  EM   SUB  ESC  FS   GS   RS
     34     // US
     35     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     36     'R',
     37 
     38     // SP    !    "    #    $    %    &        (    )    *    +    ,    -    .
     39     // /
     40     'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
     41     'D',
     42 
     43     // 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    > ?
     44     'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
     45     'R',
     46 
     47     // @    A    B    C    D    E    F    G    H    I    J    K    L    M    N O
     48     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     49     'R',
     50 
     51     // P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^ _
     52     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
     53     'R',
     54 
     55     // `    a    b    c    d    e    f    g    h    i    j    k    l    m    n o
     56     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     57     'R',
     58 
     59     // p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
     60     // DEL
     61     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
     62     'R',
     63 
     64     'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     65     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     66     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     67     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     68     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     69     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     70     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     71     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
     72     'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
     73 
     74 int32_t GetHeaderOffset(const RetainPtr<IFX_SeekableReadStream>& pFile) {
     75   const size_t kBufSize = 4;
     76   uint8_t buf[kBufSize];
     77   for (int32_t offset = 0; offset <= 1024; ++offset) {
     78     if (!pFile->ReadBlock(buf, offset, kBufSize))
     79       return kInvalidHeaderOffset;
     80 
     81     if (memcmp(buf, "%PDF", 4) == 0)
     82       return offset;
     83   }
     84   return kInvalidHeaderOffset;
     85 }
     86 
     87 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const ByteString& key) {
     88   CPDF_Number* pObj = ToNumber(pDict->GetObjectFor(key));
     89   return pObj ? pObj->GetInteger() : 0;
     90 }
     91 
     92 ByteString PDF_NameDecode(const ByteStringView& bstr) {
     93   if (!bstr.Contains('#'))
     94     return ByteString(bstr);
     95 
     96   int size = bstr.GetLength();
     97   ByteString result;
     98   char* pDestStart = result.GetBuffer(size);
     99   char* pDest = pDestStart;
    100   for (int i = 0; i < size; i++) {
    101     if (bstr[i] == '#' && i < size - 2) {
    102       *pDest++ = FXSYS_HexCharToInt(bstr[i + 1]) * 16 +
    103                  FXSYS_HexCharToInt(bstr[i + 2]);
    104       i += 2;
    105     } else {
    106       *pDest++ = bstr[i];
    107     }
    108   }
    109   result.ReleaseBuffer(static_cast<size_t>(pDest - pDestStart));
    110   return result;
    111 }
    112 
    113 ByteString PDF_NameDecode(const ByteString& orig) {
    114   return orig.Contains("#") ? PDF_NameDecode(orig.AsStringView()) : orig;
    115 }
    116 
    117 ByteString PDF_NameEncode(const ByteString& orig) {
    118   uint8_t* src_buf = (uint8_t*)orig.c_str();
    119   int src_len = orig.GetLength();
    120   int dest_len = 0;
    121   int i;
    122   for (i = 0; i < src_len; i++) {
    123     uint8_t ch = src_buf[i];
    124     if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
    125         PDFCharIsDelimiter(ch)) {
    126       dest_len += 3;
    127     } else {
    128       dest_len++;
    129     }
    130   }
    131   if (dest_len == src_len)
    132     return orig;
    133 
    134   ByteString res;
    135   char* dest_buf = res.GetBuffer(dest_len);
    136   dest_len = 0;
    137   for (i = 0; i < src_len; i++) {
    138     uint8_t ch = src_buf[i];
    139     if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
    140         PDFCharIsDelimiter(ch)) {
    141       dest_buf[dest_len++] = '#';
    142       FXSYS_IntToTwoHexChars(ch, dest_buf + dest_len);
    143       dest_len += 2;
    144     } else {
    145       dest_buf[dest_len++] = ch;
    146     }
    147   }
    148   dest_buf[dest_len] = 0;
    149   res.ReleaseBuffer(res.GetStringLength());
    150   return res;
    151 }
    152 
    153 std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) {
    154   if (!pObj) {
    155     buf << " null";
    156     return buf;
    157   }
    158   switch (pObj->GetType()) {
    159     case CPDF_Object::NULLOBJ:
    160       buf << " null";
    161       break;
    162     case CPDF_Object::BOOLEAN:
    163     case CPDF_Object::NUMBER:
    164       buf << " " << pObj->GetString();
    165       break;
    166     case CPDF_Object::STRING:
    167       buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex());
    168       break;
    169     case CPDF_Object::NAME: {
    170       ByteString str = pObj->GetString();
    171       buf << "/" << PDF_NameEncode(str);
    172       break;
    173     }
    174     case CPDF_Object::REFERENCE: {
    175       buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
    176       break;
    177     }
    178     case CPDF_Object::ARRAY: {
    179       const CPDF_Array* p = pObj->AsArray();
    180       buf << "[";
    181       for (size_t i = 0; i < p->GetCount(); i++) {
    182         CPDF_Object* pElement = p->GetObjectAt(i);
    183         if (pElement && !pElement->IsInline()) {
    184           buf << " " << pElement->GetObjNum() << " 0 R";
    185         } else {
    186           buf << pElement;
    187         }
    188       }
    189       buf << "]";
    190       break;
    191     }
    192     case CPDF_Object::DICTIONARY: {
    193       const CPDF_Dictionary* p = pObj->AsDictionary();
    194       buf << "<<";
    195       for (const auto& it : *p) {
    196         const ByteString& key = it.first;
    197         CPDF_Object* pValue = it.second.get();
    198         buf << "/" << PDF_NameEncode(key);
    199         if (pValue && !pValue->IsInline()) {
    200           buf << " " << pValue->GetObjNum() << " 0 R ";
    201         } else {
    202           buf << pValue;
    203         }
    204       }
    205       buf << ">>";
    206       break;
    207     }
    208     case CPDF_Object::STREAM: {
    209       const CPDF_Stream* p = pObj->AsStream();
    210       buf << p->GetDict() << "stream\r\n";
    211       auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(p);
    212       pAcc->LoadAllDataRaw();
    213       buf.write(reinterpret_cast<const char*>(pAcc->GetData()),
    214                 pAcc->GetSize());
    215       buf << "\r\nendstream";
    216       break;
    217     }
    218     default:
    219       NOTREACHED();
    220       break;
    221   }
    222   return buf;
    223 }
    224