Home | History | Annotate | Download | only in native
      1 /*
      2  * Copyright 2013 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkPdfConfig.h"
      9 #include "SkPdfDiffEncoder.h"
     10 #include "SkPdfNativeObject.h"
     11 #include "SkPdfNativeTokenizer.h"
     12 #include "SkPdfUtils.h"
     13 
     14 // TODO(edisonn): mac builder does not find the header ... but from headers is ok
     15 //#include "SkPdfStreamCommonDictionary_autogen.h"
     16 //#include "SkPdfImageDictionary_autogen.h"
     17 #include "SkPdfHeaders_autogen.h"
     18 
     19 
     20 // TODO(edisonn): Perf, Make this function run faster.
     21 // There could be 0s between start and end.
     22 // needle will not contain 0s.
     23 static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) {
     24     int needleLen = strlen(needle);
     25     if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) &&
     26             strncmp(hayStart, needle, needleLen) == 0) {
     27         return hayStart;
     28     }
     29 
     30     hayStart++;
     31 
     32     while (hayStart < hayEnd) {
     33         if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) &&
     34                 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) ||
     35                       (hayStart+needleLen == hayEnd)) &&
     36                 strncmp(hayStart, needle, needleLen) == 0) {
     37             return hayStart;
     38         }
     39         hayStart++;
     40     }
     41     return NULL;
     42 }
     43 
     44 const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsigned char* end) {
     45     while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) {
     46         TRACE_COMMENT(*start);
     47         if (*start == kComment_PdfDelimiter) {
     48             // skip the comment until end of line
     49             while (start < end && !isPdfEOL(*start)) {
     50                 start++;
     51                 TRACE_COMMENT(*start);
     52             }
     53         } else {
     54             start++;
     55         }
     56     }
     57     return start;
     58 }
     59 
     60 const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end) {
     61     SkASSERT(!isPdfWhiteSpace(*start));
     62 
     63     if (start < end && isPdfDelimiter(*start)) {
     64         TRACE_TK(*start);
     65         start++;
     66         return start;
     67     }
     68 
     69     while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) {
     70         TRACE_TK(*start);
     71         start++;
     72     }
     73     return start;
     74 }
     75 
     76 // The parsing should end with a ].
     77 static const unsigned char* readArray(const unsigned char* start, const unsigned char* end,
     78                                       SkPdfNativeObject* array,
     79                                       SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
     80     SkPdfNativeObject::makeEmptyArray(array);
     81     // PUT_TRACK_STREAM(array, start, start)
     82 
     83     if (allocator == NULL) {
     84         // TODO(edisonn): report/warning error/assert
     85         return end;
     86     }
     87 
     88     while (start < end) {
     89         // skip white spaces
     90         start = skipPdfWhiteSpaces(start, end);
     91 
     92         const unsigned char* endOfToken = endOfPdfToken(start, end);
     93 
     94         if (endOfToken == start) {
     95             // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray
     96             return start;
     97         }
     98 
     99         if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) {
    100             return endOfToken;
    101         }
    102 
    103         SkPdfNativeObject* newObj = allocator->allocObject();
    104         start = nextObject(start, end, newObj, allocator, doc);
    105         // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array
    106         // only when we are sure they are not references!
    107         if (newObj->isKeywordReference() && array->size() >= 2 &&
    108                 array->objAtAIndex(array->size() - 1)->isInteger() &&
    109                 array->objAtAIndex(array->size() - 2)->isInteger()) {
    110             SkPdfNativeObject* gen = array->removeLastInArray();
    111             SkPdfNativeObject* id = array->removeLastInArray();
    112 
    113             SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(),
    114                                                      (unsigned int)gen->intValue(), newObj);
    115             // newObj  PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now
    116         }
    117         array->appendInArray(newObj);
    118     }
    119     // TODO(edisonn): report not reached, we should never get here
    120     // TODO(edisonn): there might be a bug here, enable an assert and run it on files
    121     // or it might be that the files were actually corrupted
    122     return start;
    123 }
    124 
    125 static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
    126                                        unsigned char* out) {
    127     const unsigned char* in = start;
    128     bool hasOut = (out != NULL);
    129 
    130     int openRoundBrackets = 1;
    131     while (in < end) {
    132         openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter);
    133         openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter);
    134         if (openRoundBrackets == 0) {
    135             in++;   // consumed )
    136             break;
    137         }
    138 
    139         if (*in == kEscape_PdfSpecial) {
    140             if (in + 1 < end) {
    141                 switch (in[1]) {
    142                     case 'n':
    143                         if (hasOut) { *out = kLF_PdfWhiteSpace; }
    144                         out++;
    145                         in += 2;
    146                         break;
    147 
    148                     case 'r':
    149                         if (hasOut) { *out = kCR_PdfWhiteSpace; }
    150                         out++;
    151                         in += 2;
    152                         break;
    153 
    154                     case 't':
    155                         if (hasOut) { *out = kHT_PdfWhiteSpace; }
    156                         out++;
    157                         in += 2;
    158                         break;
    159 
    160                     case 'b':
    161                         // TODO(edisonn): any special meaning to backspace?
    162                         if (hasOut) { *out = kBackspace_PdfSpecial; }
    163                         out++;
    164                         in += 2;
    165                         break;
    166 
    167                     case 'f':
    168                         if (hasOut) { *out = kFF_PdfWhiteSpace; }
    169                         out++;
    170                         in += 2;
    171                         break;
    172 
    173                     case kOpenedRoundBracket_PdfDelimiter:
    174                         if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; }
    175                         out++;
    176                         in += 2;
    177                         break;
    178 
    179                     case kClosedRoundBracket_PdfDelimiter:
    180                         if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; }
    181                         out++;
    182                         in += 2;
    183                         break;
    184 
    185                     case kEscape_PdfSpecial:
    186                         if (hasOut) { *out = kEscape_PdfSpecial; }
    187                         out++;
    188                         in += 2;
    189                         break;
    190 
    191                     case '0':
    192                     case '1':
    193                     case '2':
    194                     case '3':
    195                     case '4':
    196                     case '5':
    197                     case '6':
    198                     case '7': {
    199                             //read octals
    200                             in++;   // consume backslash
    201 
    202                             int code = 0;
    203                             int i = 0;
    204                             while (in < end && *in >= '0' && *in < '8') {
    205                                 code = (code << 3) + ((*in) - '0');  // code * 8 + d
    206                                 i++;
    207                                 in++;
    208                                 if (i == 3) {
    209                                     if (hasOut) { *out = code & 0xff; }
    210                                     out++;
    211                                     i = 0;
    212                                 }
    213                             }
    214                             if (i > 0) {
    215                                 if (hasOut) { *out = code & 0xff; }
    216                                 out++;
    217                             }
    218                         }
    219                         break;
    220 
    221                     default:
    222                         // Per spec, backslash is ignored if escaped ch is unknown
    223                         in++;
    224                         break;
    225                 }
    226             } else {
    227                 in++;
    228             }
    229         } else {
    230             if (hasOut) { *out = *in; }
    231             in++;
    232             out++;
    233         }
    234     }
    235 
    236     if (hasOut) {
    237         return in;  // consumed already ) at the end of the string
    238     } else {
    239         // return where the string would end if we reuse the string
    240         return start + (out - (const unsigned char*)NULL);
    241     }
    242 }
    243 
    244 static int readStringLength(const unsigned char* start, const unsigned char* end) {
    245     return readString(start, end, NULL) - start;
    246 }
    247 
    248 static const unsigned char* readString(const unsigned char* start, const unsigned char* end,
    249                                        SkPdfNativeObject* str, SkPdfAllocator* allocator) {
    250     if (!allocator) {
    251         // TODO(edisonn): report error/warn/assert
    252         return end;
    253     }
    254 
    255     int outLength = readStringLength(start, end);
    256     unsigned char* out = (unsigned char*)allocator->alloc(outLength);
    257     const unsigned char* now = readString(start, end, out);
    258     SkPdfNativeObject::makeString(out, out + outLength, str);
    259     //  PUT_TRACK_STREAM(str, start, now)
    260     TRACE_STRING(out, out + outLength);
    261     return now;  // consumed already ) at the end of the string
    262 }
    263 
    264 static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end,
    265                                           unsigned char* out) {
    266     bool hasOut = (out != NULL);
    267     const unsigned char* in = start;
    268 
    269     unsigned char code = 0;
    270 
    271     while (in < end) {
    272         while (in < end && isPdfWhiteSpace(*in)) {
    273             in++;
    274         }
    275 
    276         if (*in == kClosedInequityBracket_PdfDelimiter) {
    277             in++;  // consume >
    278             // normal exit
    279             break;
    280         }
    281 
    282         if (in >= end) {
    283             // end too soon
    284             break;
    285         }
    286 
    287         switch (*in) {
    288             case '0':
    289             case '1':
    290             case '2':
    291             case '3':
    292             case '4':
    293             case '5':
    294             case '6':
    295             case '7':
    296             case '8':
    297             case '9':
    298                 code = (*in - '0') << 4;
    299                 break;
    300 
    301             case 'a':
    302             case 'b':
    303             case 'c':
    304             case 'd':
    305             case 'e':
    306             case 'f':
    307                 code = (*in - 'a' + 10) << 4;
    308                 break;
    309 
    310             case 'A':
    311             case 'B':
    312             case 'C':
    313             case 'D':
    314             case 'E':
    315             case 'F':
    316                 code = (*in - 'A' + 10) << 4;
    317                 break;
    318 
    319             // TODO(edisonn): spec does not say how to handle this error
    320             default:
    321                 break;
    322         }
    323 
    324         in++;  // advance
    325 
    326         while (in < end && isPdfWhiteSpace(*in)) {
    327             in++;
    328         }
    329 
    330         // TODO(edisonn): report error
    331         if (in >= end) {
    332             if (hasOut) { *out = code; }
    333             out++;
    334             break;
    335         }
    336 
    337         if (*in == kClosedInequityBracket_PdfDelimiter) {
    338             if (hasOut) { *out = code; }
    339             out++;
    340             in++;
    341             break;
    342         }
    343 
    344         switch (*in) {
    345             case '0':
    346             case '1':
    347             case '2':
    348             case '3':
    349             case '4':
    350             case '5':
    351             case '6':
    352             case '7':
    353             case '8':
    354             case '9':
    355                 code += (*in - '0');
    356                 break;
    357 
    358             case 'a':
    359             case 'b':
    360             case 'c':
    361             case 'd':
    362             case 'e':
    363             case 'f':
    364                 code += (*in - 'a' + 10);
    365                 break;
    366 
    367             case 'A':
    368             case 'B':
    369             case 'C':
    370             case 'D':
    371             case 'E':
    372             case 'F':
    373                 code += (*in - 'A' + 10);
    374                 break;
    375 
    376             // TODO(edisonn): spec does not say how to handle this error
    377             default:
    378                 break;
    379         }
    380 
    381         if (hasOut) { *out = code; }
    382         out++;
    383         in++;
    384     }
    385 
    386     if (hasOut) {
    387         return in;  // consumed already ) at the end of the string
    388     } else {
    389         // return where the string would end if we reuse the string
    390         return start + (out - (const unsigned char*)NULL);
    391     }
    392 }
    393 
    394 static int readHexStringLength(const unsigned char* start, const unsigned char* end) {
    395     return readHexString(start, end, NULL) - start;
    396 }
    397 
    398 static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) {
    399     if (!allocator) {
    400         // TODO(edisonn): report error/warn/assert
    401         return end;
    402     }
    403     int outLength = readHexStringLength(start, end);
    404     unsigned char* out = (unsigned char*)allocator->alloc(outLength);
    405     const unsigned char* now = readHexString(start, end, out);
    406     SkPdfNativeObject::makeHexString(out, out + outLength, str);
    407     // str PUT_TRACK_STREAM(start, now)
    408     TRACE_HEXSTRING(out, out + outLength);
    409     return now;  // consumed already > at the end of the string
    410 }
    411 
    412 // TODO(edisonn): add version parameter, before PDF 1.2 name could not have special characters.
    413 static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
    414                                      unsigned char* out) {
    415     bool hasOut = (out != NULL);
    416     const unsigned char* in = start;
    417 
    418     unsigned char code = 0;
    419 
    420     while (in < end) {
    421         if (isPdfWhiteSpaceOrPdfDelimiter(*in)) {
    422             break;
    423         }
    424 
    425         if (*in == '#' && in + 2 < end) {
    426             in++;
    427             switch (*in) {
    428                 case '0':
    429                 case '1':
    430                 case '2':
    431                 case '3':
    432                 case '4':
    433                 case '5':
    434                 case '6':
    435                 case '7':
    436                 case '8':
    437                 case '9':
    438                     code = (*in - '0') << 4;
    439                     break;
    440 
    441                 case 'a':
    442                 case 'b':
    443                 case 'c':
    444                 case 'd':
    445                 case 'e':
    446                 case 'f':
    447                     code = (*in - 'a' + 10) << 4;
    448                     break;
    449 
    450                 case 'A':
    451                 case 'B':
    452                 case 'C':
    453                 case 'D':
    454                 case 'E':
    455                 case 'F':
    456                     code = (*in - 'A' + 10) << 4;
    457                     break;
    458 
    459                 // TODO(edisonn): spec does not say how to handle this error
    460                 default:
    461                     break;
    462             }
    463 
    464             in++;  // advance
    465 
    466             switch (*in) {
    467                 case '0':
    468                 case '1':
    469                 case '2':
    470                 case '3':
    471                 case '4':
    472                 case '5':
    473                 case '6':
    474                 case '7':
    475                 case '8':
    476                 case '9':
    477                     code += (*in - '0');
    478                     break;
    479 
    480                 case 'a':
    481                 case 'b':
    482                 case 'c':
    483                 case 'd':
    484                 case 'e':
    485                 case 'f':
    486                     code += (*in - 'a' + 10);
    487                     break;
    488 
    489                 case 'A':
    490                 case 'B':
    491                 case 'C':
    492                 case 'D':
    493                 case 'E':
    494                 case 'F':
    495                     code += (*in - 'A' + 10);
    496                     break;
    497 
    498                 // TODO(edisonn): spec does not say how to handle this error
    499                 default:
    500                     break;
    501             }
    502 
    503             if (hasOut) { *out = code; }
    504             out++;
    505             in++;
    506         } else {
    507             if (hasOut) { *out = *in; }
    508             out++;
    509             in++;
    510         }
    511     }
    512 
    513     if (hasOut) {
    514         return in;  // consumed already ) at the end of the string
    515     } else {
    516         // return where the string would end if we reuse the string
    517         return start + (out - (const unsigned char*)NULL);
    518     }
    519 }
    520 
    521 static int readNameLength(const unsigned char* start, const unsigned char* end) {
    522     return readName(start, end, NULL) - start;
    523 }
    524 
    525 static const unsigned char* readName(const unsigned char* start, const unsigned char* end,
    526                                      SkPdfNativeObject* name, SkPdfAllocator* allocator) {
    527     if (!allocator) {
    528         // TODO(edisonn): report error/warn/assert
    529         return end;
    530     }
    531     int outLength = readNameLength(start, end);
    532     unsigned char* out = (unsigned char*)allocator->alloc(outLength);
    533     const unsigned char* now = readName(start, end, out);
    534     SkPdfNativeObject::makeName(out, out + outLength, name);
    535     //PUT_TRACK_STREAM(start, now)
    536     TRACE_NAME(out, out + outLength);
    537     return now;
    538 }
    539 
    540 // TODO(edisonn): pdf spec let Length to be an indirect object define after the stream
    541 // that makes for an interesting scenario, where the stream itself contains endstream, together
    542 // with a reference object with the length, but the real length object would be somewhere else
    543 // it could confuse the parser
    544 /*example:
    545 
    546 7 0 obj
    547 << /length 8 0 R>>
    548 stream
    549 ...............
    550 endstream
    551 8 0 obj #we are in stream actually, not a real object
    552 << 10 >> #we are in stream actually, not a real object
    553 endobj
    554 endstream
    555 8 0 obj #real obj
    556 << 100 >> #real obj
    557 endobj
    558 and it could get worse, with multiple object like this
    559 */
    560 
    561 // right now implement the silly algorithm that assumes endstream is finishing the stream
    562 
    563 static const unsigned char* readStream(const unsigned char* start, const unsigned char* end,
    564                                        SkPdfNativeObject* dict, SkPdfNativeDoc* doc) {
    565     start = skipPdfWhiteSpaces(start, end);
    566     if (!(  start[0] == 's' &&
    567             start[1] == 't' &&
    568             start[2] == 'r' &&
    569             start[3] == 'e' &&
    570             start[4] == 'a' &&
    571             start[5] == 'm')) {
    572         // no stream. return.
    573         return start;
    574     }
    575 
    576     start += 6; // strlen("stream")
    577     if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
    578         start += 2;
    579     } else if (start[0] == kLF_PdfWhiteSpace) {
    580         start += 1;
    581     } else if (isPdfWhiteSpace(start[0])) {
    582         start += 1;
    583     } else {
    584         // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ?
    585     }
    586 
    587     SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict;
    588     // TODO(edisonn): load Length
    589     int64_t length = -1;
    590 
    591     // TODO(edisonn): very basic implementation
    592     if (stream->has_Length() && stream->Length(doc) > 0) {
    593         length = stream->Length(doc);
    594     }
    595 
    596     // TODO(edisonn): load external streams
    597     // TODO(edisonn): look at the last filter, to determine how to deal with possible parsing
    598     // issues. The last filter can have special rules to terminate a stream, which we could
    599     // use to determine end of stream.
    600 
    601     if (length >= 0) {
    602         const unsigned char* endstream = start + length;
    603 
    604         if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
    605             endstream += 2;
    606         } else if (endstream[0] == kLF_PdfWhiteSpace) {
    607             endstream += 1;
    608         }
    609 
    610         if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) {
    611             length = -1;
    612         }
    613     }
    614 
    615     if (length < 0) {
    616         // scan the buffer, until we find first endstream
    617         // TODO(edisonn): all buffers must have a 0 at the end now,
    618         const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end,
    619                                                                         "endstream");
    620 
    621         if (endstream) {
    622             length = endstream - start;
    623             if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
    624             if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
    625         }
    626     }
    627     if (length >= 0) {
    628         const unsigned char* endstream = start + length;
    629 
    630         if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) {
    631             endstream += 2;
    632         } else if (endstream[0] == kLF_PdfWhiteSpace) {
    633             endstream += 1;
    634         }
    635 
    636         // TODO(edisonn): verify the next bytes are "endstream"
    637 
    638         endstream += strlen("endstream");
    639         // TODO(edisonn): Assert? report error/warning?
    640         dict->addStream(start, (size_t)length);
    641         return endstream;
    642     }
    643     return start;
    644 }
    645 
    646 static const unsigned char* readInlineImageStream(const unsigned char* start,
    647                                                   const unsigned char* end,
    648                                                   SkPdfImageDictionary* inlineImage,
    649                                                   SkPdfNativeDoc* doc) {
    650     // We already processed ID keyword, and we should be positioned immediately after it
    651 
    652     // TODO(edisonn): security: either make all streams to have extra 2 bytes at the end,
    653     // instead of this if.
    654     //if (end - start <= 2) {
    655     //    // TODO(edisonn): warning?
    656     //    return end; // but can we have a pixel image encoded in 1-2 bytes?
    657     //}
    658 
    659     if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) {
    660         start += 2;
    661     } else if (start[0] == kLF_PdfWhiteSpace) {
    662         start += 1;
    663     } else if (isPdfWhiteSpace(start[0])) {
    664         start += 1;
    665     } else {
    666         SkASSERT(isPdfDelimiter(start[0]));
    667         // TODO(edisonn): warning?
    668     }
    669 
    670     const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI");
    671     const unsigned char* endEI = endstream ? endstream + 2 : NULL;  // 2 == strlen("EI")
    672 
    673     if (endstream) {
    674         int length = endstream - start;
    675         if (*(endstream-1) == kLF_PdfWhiteSpace) length--;
    676         if (*(endstream-2) == kCR_PdfWhiteSpace) length--;
    677         inlineImage->addStream(start, (size_t)length);
    678     } else {
    679         // TODO(edisonn): report error in inline image stream (ID-EI) section
    680         // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly
    681         return end;
    682     }
    683     return endEI;
    684 }
    685 
    686 static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end,
    687                                            SkPdfNativeObject* dict,
    688                                            SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
    689     if (allocator == NULL) {
    690         // TODO(edisonn): report/warning error
    691         return end;
    692     }
    693     SkPdfNativeObject::makeEmptyDictionary(dict);
    694     // PUT_TRACK_STREAM(dict, start, start)
    695 
    696     start = skipPdfWhiteSpaces(start, end);
    697     SkPdfAllocator tmpStorage;  // keys will be stored in dict, we can free them after set.
    698 
    699     while (start < end && *start == kNamed_PdfDelimiter) {
    700         SkPdfNativeObject key;
    701         //*start = '\0';
    702         start++;
    703         start = readName(start, end, &key, &tmpStorage);
    704         start = skipPdfWhiteSpaces(start, end);
    705 
    706         if (start < end) {
    707             SkPdfNativeObject* value = allocator->allocObject();
    708             start = nextObject(start, end, value, allocator, doc);
    709 
    710             start = skipPdfWhiteSpaces(start, end);
    711 
    712             if (start < end) {
    713                 // We should have an indirect reference
    714                 if (isPdfDigit(*start)) {
    715                     SkPdfNativeObject generation;
    716                     start = nextObject(start, end, &generation, allocator, doc);
    717 
    718                     SkPdfNativeObject keywordR;
    719                     start = nextObject(start, end, &keywordR, allocator, doc);
    720 
    721                     if (value->isInteger() && generation.isInteger() &&
    722                             keywordR.isKeywordReference()) {
    723                         int64_t id = value->intValue();
    724                         SkPdfNativeObject::resetAndMakeReference(
    725                                 (unsigned int)id,
    726                                 (unsigned int)generation.intValue(),
    727                                 value);
    728                         //  PUT_TRACK_PARAMETERS_OBJ2(value, &generation)
    729                         dict->set(&key, value);
    730                     } else {
    731                         // TODO(edisonn) error?, ignore it for now.
    732                         dict->set(&key, value);
    733                     }
    734                 } else {
    735                     // next elem is not a digit, but it might not be / either!
    736                     dict->set(&key, value);
    737                 }
    738             } else {
    739                 // /key >>
    740                 dict->set(&key, value);
    741                 return end;
    742             }
    743             start = skipPdfWhiteSpaces(start, end);
    744         } else {
    745             dict->set(&key, &SkPdfNativeObject::kNull);
    746             return end;
    747         }
    748     }
    749 
    750     // now we should expect >>
    751     start = skipPdfWhiteSpaces(start, end);
    752     if (*start != kClosedInequityBracket_PdfDelimiter) {
    753         // TODO(edisonn): report/warning
    754     }
    755 
    756     start++;  // skip >
    757     if (*start != kClosedInequityBracket_PdfDelimiter) {
    758         // TODO(edisonn): report/warning
    759     }
    760 
    761     start++;  // skip >
    762 
    763     //STORE_TRACK_PARAMETER_OFFSET_END(dict,start);
    764 
    765     start = readStream(start, end, dict, doc);
    766 
    767     return start;
    768 }
    769 
    770 const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
    771                                 SkPdfNativeObject* token,
    772                                 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) {
    773     const unsigned char* current;
    774 
    775     // skip white spaces
    776     start = skipPdfWhiteSpaces(start, end);
    777 
    778     if (start >= end) {
    779         return end;
    780     }
    781 
    782     current = endOfPdfToken(start, end);
    783 
    784     // no token, len would be 0
    785     if (current == start || current == end) {
    786         return end;
    787     }
    788 
    789     int tokenLen = current - start;
    790 
    791     if (tokenLen == 1) {
    792         // start array
    793         switch (*start) {
    794             case kOpenedSquareBracket_PdfDelimiter:
    795                 return readArray(current, end, token, allocator, doc);
    796 
    797             case kOpenedRoundBracket_PdfDelimiter:
    798                 return readString(start + 1, end, token, allocator);
    799 
    800             case kOpenedInequityBracket_PdfDelimiter:
    801                 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) {
    802                     // TODO(edisonn): pass here the length somehow?
    803                     return readDictionary(start + 2, end, token, allocator, doc);  // skip <<
    804                 } else {
    805                     return readHexString(start + 1, end, token, allocator);  // skip <
    806                 }
    807 
    808             case kNamed_PdfDelimiter:
    809                 return readName(start + 1, end, token, allocator);
    810 
    811             // TODO(edisonn): what to do curly brackets?
    812             case kOpenedCurlyBracket_PdfDelimiter:
    813             default:
    814                 break;
    815         }
    816 
    817         SkASSERT(!isPdfWhiteSpace(*start));
    818         if (isPdfDelimiter(*start)) {
    819             // TODO(edisonn): how unexpected stream ] } > ) will be handled?
    820             // for now ignore, and it will become a keyword to be ignored
    821         }
    822     }
    823 
    824     if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') {
    825         SkPdfNativeObject::makeNull(token);
    826         // PUT_TRACK_STREAM(start, start + 4)
    827         return current;
    828     }
    829 
    830     if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') {
    831         SkPdfNativeObject::makeBoolean(true, token);
    832         // PUT_TRACK_STREAM(start, start + 4)
    833         return current;
    834     }
    835 
    836     // TODO(edisonn): again, make all buffers have 5 extra bytes
    837     if (tokenLen == 5 && start[0] == 'f' &&
    838                          start[1] == 'a' &&
    839                          start[2] == 'l' &&
    840                          start[3] == 's' &&
    841                          start[4] == 'e') {
    842         SkPdfNativeObject::makeBoolean(false, token);
    843         // PUT_TRACK_STREAM(start, start + 5)
    844         return current;
    845     }
    846 
    847     if (isPdfNumeric(*start)) {
    848         SkPdfNativeObject::makeNumeric(start, current, token);
    849         //  PUT_TRACK_STREAM(start, current)
    850     } else {
    851         SkPdfNativeObject::makeKeyword(start, current, token);
    852         // PUT_TRACK_STREAM(start, current)
    853     }
    854     return current;
    855 }
    856 
    857 SkPdfNativeObject* SkPdfAllocator::allocBlock() {
    858     fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject);
    859     return new SkPdfNativeObject[BUFFER_SIZE];
    860 }
    861 
    862 SkPdfAllocator::~SkPdfAllocator() {
    863     for (int i = 0 ; i < fHandles.count(); i++) {
    864         free(fHandles[i]);
    865     }
    866     for (int i = 0 ; i < fHistory.count(); i++) {
    867         for (int j = 0 ; j < BUFFER_SIZE; j++) {
    868             fHistory[i][j].reset();
    869         }
    870         delete[] fHistory[i];
    871     }
    872     for (int j = 0 ; j < BUFFER_SIZE; j++) {
    873         fCurrent[j].reset();
    874     }
    875     delete[] fCurrent;
    876 }
    877 
    878 SkPdfNativeObject* SkPdfAllocator::allocObject() {
    879     if (fCurrentUsed >= BUFFER_SIZE) {
    880         fHistory.push(fCurrent);
    881         fCurrent = allocBlock();
    882         fCurrentUsed = 0;
    883         fSizeInBytes += sizeof(SkPdfNativeObject*);
    884     }
    885     fCurrentUsed++;
    886     return &fCurrent[fCurrentUsed - 1];
    887 }
    888 
    889 // TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache the result,
    890 // so there is no need of a second pass
    891 SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
    892                                            SkPdfAllocator* allocator,
    893                                            SkPdfNativeDoc* doc)
    894             : fDoc(doc)
    895             , fAllocator(allocator)
    896             , fUncompressedStream(NULL)
    897             , fUncompressedStreamEnd(NULL)
    898             , fEmpty(false)
    899             , fHasPutBack(false) {
    900     const unsigned char* buffer = NULL;
    901     size_t len = 0;
    902     objWithStream->GetFilteredStreamRef(&buffer, &len);
    903     // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
    904     // we need to do now for perf, and our generated pdfs do not have comments,
    905     // but we need to remove this hack for pdfs in the wild
    906     char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
    907     if (endobj) {
    908         len = endobj - (char*)buffer + strlen("endobj");
    909     }
    910     fUncompressedStreamStart = fUncompressedStream = buffer;
    911     fUncompressedStreamEnd = fUncompressedStream + len;
    912 }
    913 
    914 SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len,
    915                                            SkPdfAllocator* allocator,
    916                                            SkPdfNativeDoc* doc) : fDoc(doc)
    917                                                                 , fAllocator(allocator)
    918                                                                 , fEmpty(false)
    919                                                                 , fHasPutBack(false) {
    920     // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!)
    921     // we need to do now for perf, and our generated pdfs do not have comments,
    922     // but we need to remove this hack for pdfs in the wild
    923     char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj");
    924     if (endobj) {
    925         len = endobj - (char*)buffer + strlen("endobj");
    926     }
    927     fUncompressedStreamStart = fUncompressedStream = buffer;
    928     fUncompressedStreamEnd = fUncompressedStream + len;
    929 }
    930 
    931 SkPdfNativeTokenizer::~SkPdfNativeTokenizer() {
    932 }
    933 
    934 bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) {
    935 #ifdef PDF_TRACE_READ_TOKEN
    936     static int read_op = 0;
    937 #endif
    938 
    939     token->fKeyword = NULL;
    940     token->fObject = NULL;
    941 
    942     fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
    943     if (fUncompressedStream >= fUncompressedStreamEnd) {
    944         fEmpty = true;
    945         return false;
    946     }
    947 
    948     SkPdfNativeObject obj;
    949     fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc);
    950     //  PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)
    951 
    952     // If it is a keyword, we will only get the pointer of the string.
    953     if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) {
    954         token->fKeyword = obj.c_str();
    955         token->fKeywordLength = obj.lenstr();
    956         token->fType = kKeyword_TokenType;
    957     } else {
    958         SkPdfNativeObject* pobj = fAllocator->allocObject();
    959         *pobj = obj;
    960         token->fObject = pobj;
    961         token->fType = kObject_TokenType;
    962     }
    963 
    964 #ifdef PDF_TRACE_READ_TOKEN
    965     read_op++;
    966 #if 0
    967     if (548 == read_op) {
    968         printf("break;\n");
    969     }
    970 #endif
    971     printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object",
    972            token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
    973                              token->fObject->toString().c_str());
    974 #endif
    975 
    976     return true;
    977 }
    978 
    979 void SkPdfNativeTokenizer::PutBack(PdfToken token) {
    980     SkASSERT(!fHasPutBack);
    981     fHasPutBack = true;
    982     fPutBack = token;
    983 #ifdef PDF_TRACE_READ_TOKEN
    984     printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object",
    985            token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str() :
    986                             token.fObject->toString().c_str());
    987 #endif
    988 }
    989 
    990 bool SkPdfNativeTokenizer::readToken(PdfToken* token, bool writeDiff) {
    991     if (fHasPutBack) {
    992         *token = fPutBack;
    993         fHasPutBack = false;
    994 #ifdef PDF_TRACE_READ_TOKEN
    995         printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object",
    996                token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() :
    997                                  token->fObject->toString().c_str());
    998 #endif
    999         if (writeDiff) {
   1000             SkPdfDiffEncoder::WriteToFile(token);
   1001         }
   1002         return true;
   1003     }
   1004 
   1005     if (fEmpty) {
   1006 #ifdef PDF_TRACE_READ_TOKEN
   1007         printf("EMPTY TOKENIZER\n");
   1008 #endif
   1009         return false;
   1010     }
   1011 
   1012     const bool result = readTokenCore(token);
   1013     if (result && writeDiff) {
   1014         SkPdfDiffEncoder::WriteToFile(token);
   1015     }
   1016     return result;
   1017 }
   1018 
   1019 #define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName)
   1020 
   1021 // keys
   1022 DECLARE_PDF_NAME(BitsPerComponent);
   1023 DECLARE_PDF_NAME(ColorSpace);
   1024 DECLARE_PDF_NAME(Decode);
   1025 DECLARE_PDF_NAME(DecodeParms);
   1026 DECLARE_PDF_NAME(Filter);
   1027 DECLARE_PDF_NAME(Height);
   1028 DECLARE_PDF_NAME(ImageMask);
   1029 DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations?
   1030 DECLARE_PDF_NAME(Interpolate);
   1031 DECLARE_PDF_NAME(Width);
   1032 
   1033 // values
   1034 DECLARE_PDF_NAME(DeviceGray);
   1035 DECLARE_PDF_NAME(DeviceRGB);
   1036 DECLARE_PDF_NAME(DeviceCMYK);
   1037 DECLARE_PDF_NAME(Indexed);
   1038 DECLARE_PDF_NAME(ASCIIHexDecode);
   1039 DECLARE_PDF_NAME(ASCII85Decode);
   1040 DECLARE_PDF_NAME(LZWDecode);
   1041 DECLARE_PDF_NAME(FlateDecode);  // PDF 1.2
   1042 DECLARE_PDF_NAME(RunLengthDecode);
   1043 DECLARE_PDF_NAME(CCITTFaxDecode);
   1044 DECLARE_PDF_NAME(DCTDecode);
   1045 
   1046 #define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName;
   1047 
   1048 
   1049 static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) {
   1050     if (!key || !key->isName()) {
   1051         return key;
   1052     }
   1053 
   1054     // TODO(edisonn): use autogenerated code!
   1055     HANDLE_NAME_ABBR(key, BitsPerComponent, BPC);
   1056     HANDLE_NAME_ABBR(key, ColorSpace, CS);
   1057     HANDLE_NAME_ABBR(key, Decode, D);
   1058     HANDLE_NAME_ABBR(key, DecodeParms, DP);
   1059     HANDLE_NAME_ABBR(key, Filter, F);
   1060     HANDLE_NAME_ABBR(key, Height, H);
   1061     HANDLE_NAME_ABBR(key, ImageMask, IM);
   1062 //    HANDLE_NAME_ABBR(key, Intent, );
   1063     HANDLE_NAME_ABBR(key, Interpolate, I);
   1064     HANDLE_NAME_ABBR(key, Width, W);
   1065 
   1066     return key;
   1067 }
   1068 
   1069 static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) {
   1070     if (!value || !value->isName()) {
   1071         return value;
   1072     }
   1073 
   1074     // TODO(edisonn): use autogenerated code!
   1075     HANDLE_NAME_ABBR(value, DeviceGray, G);
   1076     HANDLE_NAME_ABBR(value, DeviceRGB, RGB);
   1077     HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK);
   1078     HANDLE_NAME_ABBR(value, Indexed, I);
   1079     HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx);
   1080     HANDLE_NAME_ABBR(value, ASCII85Decode, A85);
   1081     HANDLE_NAME_ABBR(value, LZWDecode, LZW);
   1082     HANDLE_NAME_ABBR(value, FlateDecode, Fl);  // (PDF 1.2)
   1083     HANDLE_NAME_ABBR(value, RunLengthDecode, RL);
   1084     HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF);
   1085     HANDLE_NAME_ABBR(value, DCTDecode, DCT);
   1086 
   1087     return value;
   1088 }
   1089 
   1090 SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() {
   1091     // BI already processed
   1092     fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd);
   1093     if (fUncompressedStream >= fUncompressedStreamEnd) {
   1094         return NULL;
   1095     }
   1096 
   1097     SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject();
   1098     SkPdfNativeObject::makeEmptyDictionary(inlineImage);
   1099     //  PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart,
   1100     //                             fUncompressedStream - fUncompressedStreamStart)
   1101 
   1102     while (fUncompressedStream < fUncompressedStreamEnd) {
   1103         SkPdfNativeObject* key = fAllocator->allocObject();
   1104         fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key,
   1105                                          fAllocator, fDoc);
   1106         // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
   1107 
   1108         if (key->isKeyword() && key->lenstr() == 2 &&
   1109                     key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID
   1110             fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd,
   1111                                                         inlineImage, fDoc);
   1112             return inlineImage;
   1113         } else {
   1114             SkPdfNativeObject* obj = fAllocator->allocObject();
   1115             fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj,
   1116                                              fAllocator, fDoc);
   1117             //  PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s
   1118             // TODO(edisonn): perf maybe we should not expand abBreviation like this
   1119             inlineImage->set(inlineImageKeyAbbreviationExpand(key),
   1120                              inlineImageValueAbbreviationExpand(obj));
   1121         }
   1122     }
   1123     // TODO(edisonn): report end of data with inline image without an EI
   1124     return inlineImage;
   1125 }
   1126