Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
      8 
      9 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     10 
     11 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize)
     12     : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {}
     13 
     14 CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str)
     15     : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {}
     16 
     17 void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, uint32_t& dwSize) {
     18   pStart = nullptr;
     19   dwSize = 0;
     20   uint8_t ch;
     21   while (1) {
     22     if (m_dwSize <= m_dwCurPos)
     23       return;
     24     ch = m_pData[m_dwCurPos++];
     25     while (PDFCharIsWhitespace(ch)) {
     26       if (m_dwSize <= m_dwCurPos)
     27         return;
     28       ch = m_pData[m_dwCurPos++];
     29     }
     30 
     31     if (ch != '%')
     32       break;
     33 
     34     while (1) {
     35       if (m_dwSize <= m_dwCurPos)
     36         return;
     37       ch = m_pData[m_dwCurPos++];
     38       if (PDFCharIsLineEnding(ch))
     39         break;
     40     }
     41   }
     42 
     43   uint32_t start_pos = m_dwCurPos - 1;
     44   pStart = m_pData + start_pos;
     45   if (PDFCharIsDelimiter(ch)) {
     46     if (ch == '/') {
     47       while (1) {
     48         if (m_dwSize <= m_dwCurPos)
     49           return;
     50         ch = m_pData[m_dwCurPos++];
     51         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
     52           m_dwCurPos--;
     53           dwSize = m_dwCurPos - start_pos;
     54           return;
     55         }
     56       }
     57     } else {
     58       dwSize = 1;
     59       if (ch == '<') {
     60         if (m_dwSize <= m_dwCurPos)
     61           return;
     62         ch = m_pData[m_dwCurPos++];
     63         if (ch == '<')
     64           dwSize = 2;
     65         else
     66           m_dwCurPos--;
     67       } else if (ch == '>') {
     68         if (m_dwSize <= m_dwCurPos)
     69           return;
     70         ch = m_pData[m_dwCurPos++];
     71         if (ch == '>')
     72           dwSize = 2;
     73         else
     74           m_dwCurPos--;
     75       }
     76     }
     77     return;
     78   }
     79 
     80   dwSize = 1;
     81   while (1) {
     82     if (m_dwSize <= m_dwCurPos)
     83       return;
     84     ch = m_pData[m_dwCurPos++];
     85 
     86     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
     87       m_dwCurPos--;
     88       break;
     89     }
     90     dwSize++;
     91   }
     92 }
     93 
     94 CFX_ByteStringC CPDF_SimpleParser::GetWord() {
     95   const uint8_t* pStart;
     96   uint32_t dwSize;
     97   ParseWord(pStart, dwSize);
     98   if (dwSize == 1 && pStart[0] == '<') {
     99     while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
    100       m_dwCurPos++;
    101     }
    102     if (m_dwCurPos < m_dwSize) {
    103       m_dwCurPos++;
    104     }
    105     return CFX_ByteStringC(pStart,
    106                            (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
    107   }
    108   if (dwSize == 1 && pStart[0] == '(') {
    109     int level = 1;
    110     while (m_dwCurPos < m_dwSize) {
    111       if (m_pData[m_dwCurPos] == ')') {
    112         level--;
    113         if (level == 0) {
    114           break;
    115         }
    116       }
    117       if (m_pData[m_dwCurPos] == '\\') {
    118         if (m_dwSize <= m_dwCurPos) {
    119           break;
    120         }
    121         m_dwCurPos++;
    122       } else if (m_pData[m_dwCurPos] == '(') {
    123         level++;
    124       }
    125       if (m_dwSize <= m_dwCurPos) {
    126         break;
    127       }
    128       m_dwCurPos++;
    129     }
    130     if (m_dwCurPos < m_dwSize) {
    131       m_dwCurPos++;
    132     }
    133     return CFX_ByteStringC(pStart,
    134                            (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
    135   }
    136   return CFX_ByteStringC(pStart, dwSize);
    137 }
    138 
    139 bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token,
    140                                               int nParams) {
    141   nParams++;
    142   uint32_t* pBuf = FX_Alloc(uint32_t, nParams);
    143   int buf_index = 0;
    144   int buf_count = 0;
    145   m_dwCurPos = 0;
    146   while (1) {
    147     pBuf[buf_index++] = m_dwCurPos;
    148     if (buf_index == nParams) {
    149       buf_index = 0;
    150     }
    151     buf_count++;
    152     if (buf_count > nParams) {
    153       buf_count = nParams;
    154     }
    155     CFX_ByteStringC word = GetWord();
    156     if (word.IsEmpty()) {
    157       FX_Free(pBuf);
    158       return false;
    159     }
    160     if (word == token) {
    161       if (buf_count < nParams) {
    162         continue;
    163       }
    164       m_dwCurPos = pBuf[buf_index];
    165       FX_Free(pBuf);
    166       return true;
    167     }
    168   }
    169   return false;
    170 }
    171