Home | History | Annotate | Download | only in native
      1 /*
      2  * Copyright 2013 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkPdfNativeTokenizer_DEFINED
      9 #define SkPdfNativeTokenizer_DEFINED
     10 
     11 #include <math.h>
     12 #include <string.h>
     13 
     14 #include "SkPdfConfig.h"
     15 #include "SkTDArray.h"
     16 #include "SkTDict.h"
     17 
     18 // All these constants are defined by the PDF 1.4 Spec.
     19 
     20 class SkPdfDictionary;
     21 class SkPdfImageDictionary;
     22 class SkPdfNativeDoc;
     23 class SkPdfNativeObject;
     24 
     25 
     26 // White Spaces
     27 #define kNUL_PdfWhiteSpace '\x00'
     28 #define kHT_PdfWhiteSpace  '\x09'
     29 #define kLF_PdfWhiteSpace  '\x0A'
     30 #define kFF_PdfWhiteSpace  '\x0C'
     31 #define kCR_PdfWhiteSpace  '\x0D'
     32 #define kSP_PdfWhiteSpace  '\x20'
     33 
     34 // PdfDelimiters
     35 #define kOpenedRoundBracket_PdfDelimiter        '('
     36 #define kClosedRoundBracket_PdfDelimiter        ')'
     37 #define kOpenedInequityBracket_PdfDelimiter     '<'
     38 #define kClosedInequityBracket_PdfDelimiter     '>'
     39 #define kOpenedSquareBracket_PdfDelimiter       '['
     40 #define kClosedSquareBracket_PdfDelimiter       ']'
     41 #define kOpenedCurlyBracket_PdfDelimiter        '{'
     42 #define kClosedCurlyBracket_PdfDelimiter        '}'
     43 #define kNamed_PdfDelimiter                     '/'
     44 #define kComment_PdfDelimiter                   '%'
     45 
     46 #define kEscape_PdfSpecial                      '\\'
     47 #define kBackspace_PdfSpecial                   '\x08'
     48 
     49 // TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
     50 // we should evaluate all options. might be even different from one machine to another
     51 // 1) expand expression, let compiler optimize it
     52 // 2) binary search
     53 // 3) linear search in array
     54 // 4) vector (e.f. T type[256] .. return type[ch] ...
     55 // 5) manually build the expression with least number of operators, e.g. for consecutive
     56 // chars, we can use an binary equal ignoring last bit
     57 #define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \
     58                              ((ch)==kHT_PdfWhiteSpace)|| \
     59                              ((ch)==kLF_PdfWhiteSpace)|| \
     60                              ((ch)==kFF_PdfWhiteSpace)|| \
     61                              ((ch)==kCR_PdfWhiteSpace)|| \
     62                              ((ch)==kSP_PdfWhiteSpace))
     63 
     64 #define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
     65 
     66 
     67 #define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
     68                             ((ch)==kClosedRoundBracket_PdfDelimiter)||\
     69                             ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
     70                             ((ch)==kClosedInequityBracket_PdfDelimiter)||\
     71                             ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
     72                             ((ch)==kClosedSquareBracket_PdfDelimiter)||\
     73                             ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
     74                             ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
     75                             ((ch)==kNamed_PdfDelimiter)||\
     76                             ((ch)==kComment_PdfDelimiter))
     77 
     78 #define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
     79 
     80 #define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
     81 #define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
     82 
     83 const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end);
     84 const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end);
     85 
     86 #define BUFFER_SIZE 1024
     87 
     88 /** \class SkPdfAllocator
     89  *
     90  *   An allocator only allocates memory, and it deletes it all when the allocator is destroyed.
     91  *   This strategy would allow us not to do any garbage collection while we parse and/or render
     92  *   a pdf.
     93  *
     94  */
     95 class SkPdfAllocator {
     96 public:
     97     SkPdfAllocator() {
     98         fSizeInBytes = sizeof(*this);
     99         fCurrent = allocBlock();
    100         fCurrentUsed = 0;
    101     }
    102 
    103     ~SkPdfAllocator();
    104 
    105     // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called.
    106     SkPdfNativeObject* allocObject();
    107 
    108     // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called.
    109     void* alloc(size_t bytes) {
    110         void* data = malloc(bytes);
    111         fHandles.push(data);
    112         fSizeInBytes += bytes;
    113         return data;
    114     }
    115 
    116     // Returns the number of bytes used in this allocator.
    117     size_t bytesUsed() const {
    118         return fSizeInBytes;
    119     }
    120 
    121 private:
    122     SkTDArray<SkPdfNativeObject*> fHistory;
    123     SkTDArray<void*> fHandles;
    124     SkPdfNativeObject* fCurrent;
    125     int fCurrentUsed;
    126 
    127     SkPdfNativeObject* allocBlock();
    128     size_t fSizeInBytes;
    129 };
    130 
    131 // Type of a parsed token.
    132 enum SkPdfTokenType {
    133     kKeyword_TokenType,
    134     kObject_TokenType,
    135 };
    136 
    137 
    138 /** \struct PdfToken
    139  *
    140  *   Stores the result of the parsing - a keyword or an object.
    141  *
    142  */
    143 struct PdfToken {
    144     const char*             fKeyword;
    145     size_t                  fKeywordLength;
    146     SkPdfNativeObject*      fObject;
    147     SkPdfTokenType          fType;
    148 
    149     PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
    150 };
    151 
    152 /** \class SkPdfNativeTokenizer
    153  *
    154  *   Responsible to tokenize a stream in small tokens, eityh a keyword or an object.
    155  *   A renderer can feed on the tokens and render a pdf.
    156  *
    157  */
    158 class SkPdfNativeTokenizer {
    159 public:
    160     SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
    161                          SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
    162     SkPdfNativeTokenizer(const unsigned char* buffer, int len,
    163                          SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
    164 
    165     virtual ~SkPdfNativeTokenizer();
    166 
    167     // Reads one token. Returns false if there are no more tokens.
    168     // If writeDiff is true, and a token was read, create a PNG highlighting
    169     // the difference caused by this command in /tmp/log_step_by_step.
    170     // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing.
    171     bool readToken(PdfToken* token, bool writeDiff = false);
    172 
    173     // Put back a token to be read in the nextToken read. Only one token is allowed to be put
    174     // back. Must not necesaarely be the last token read.
    175     void PutBack(PdfToken token);
    176 
    177     // Reads the inline image that is present in the stream. At this point we just consumed the ID
    178     // token already.
    179     SkPdfImageDictionary* readInlineImage();
    180 
    181 private:
    182     bool readTokenCore(PdfToken* token);
    183 
    184     SkPdfNativeDoc* fDoc;
    185     SkPdfAllocator* fAllocator;
    186 
    187     const unsigned char* fUncompressedStreamStart;
    188     const unsigned char* fUncompressedStream;
    189     const unsigned char* fUncompressedStreamEnd;
    190 
    191     bool fEmpty;
    192     bool fHasPutBack;
    193     PdfToken fPutBack;
    194 };
    195 
    196 const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
    197                                 SkPdfNativeObject* token,
    198                                 SkPdfAllocator* allocator,
    199                                 SkPdfNativeDoc* doc);
    200 
    201 #endif  // SkPdfNativeTokenizer_DEFINED
    202