Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
      8 
      9 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     10 
     11 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize)
     12     : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {}
     13 
     14 CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str)
     15     : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {}
     16 
     17 std::pair<const uint8_t*, uint32_t> CPDF_SimpleParser::ParseWord() {
     18   const uint8_t* pStart = nullptr;
     19   uint8_t dwSize = 0;
     20   uint8_t ch;
     21   while (1) {
     22     if (m_dwSize <= m_dwCurPos)
     23       return std::make_pair(pStart, dwSize);
     24     ch = m_pData[m_dwCurPos++];
     25     while (PDFCharIsWhitespace(ch)) {
     26       if (m_dwSize <= m_dwCurPos)
     27         return std::make_pair(pStart, dwSize);
     28       ch = m_pData[m_dwCurPos++];
     29     }
     30 
     31     if (ch != '%')
     32       break;
     33 
     34     while (1) {
     35       if (m_dwSize <= m_dwCurPos)
     36         return std::make_pair(pStart, dwSize);
     37       ch = m_pData[m_dwCurPos++];
     38       if (PDFCharIsLineEnding(ch))
     39         break;
     40     }
     41   }
     42 
     43   uint32_t start_pos = m_dwCurPos - 1;
     44   pStart = m_pData + start_pos;
     45   if (PDFCharIsDelimiter(ch)) {
     46     if (ch == '/') {
     47       while (1) {
     48         if (m_dwSize <= m_dwCurPos)
     49           return std::make_pair(pStart, dwSize);
     50         ch = m_pData[m_dwCurPos++];
     51         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
     52           m_dwCurPos--;
     53           dwSize = m_dwCurPos - start_pos;
     54           return std::make_pair(pStart, dwSize);
     55         }
     56       }
     57     } else {
     58       dwSize = 1;
     59       if (ch == '<') {
     60         if (m_dwSize <= m_dwCurPos)
     61           return std::make_pair(pStart, dwSize);
     62         ch = m_pData[m_dwCurPos++];
     63         if (ch == '<')
     64           dwSize = 2;
     65         else
     66           m_dwCurPos--;
     67       } else if (ch == '>') {
     68         if (m_dwSize <= m_dwCurPos)
     69           return std::make_pair(pStart, dwSize);
     70         ch = m_pData[m_dwCurPos++];
     71         if (ch == '>')
     72           dwSize = 2;
     73         else
     74           m_dwCurPos--;
     75       }
     76     }
     77     return std::make_pair(pStart, dwSize);
     78   }
     79 
     80   dwSize = 1;
     81   while (1) {
     82     if (m_dwSize <= m_dwCurPos)
     83       return std::make_pair(pStart, dwSize);
     84     ch = m_pData[m_dwCurPos++];
     85 
     86     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
     87       m_dwCurPos--;
     88       break;
     89     }
     90     dwSize++;
     91   }
     92   return std::make_pair(pStart, dwSize);
     93 }
     94 
     95 ByteStringView CPDF_SimpleParser::GetWord() {
     96   const uint8_t* pStart;
     97   uint32_t dwSize;
     98   std::tie(pStart, dwSize) = ParseWord();
     99   if (dwSize == 1 && pStart[0] == '<') {
    100     while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
    101       m_dwCurPos++;
    102     }
    103     if (m_dwCurPos < m_dwSize) {
    104       m_dwCurPos++;
    105     }
    106     return ByteStringView(pStart,
    107                           static_cast<size_t>(m_dwCurPos - (pStart - m_pData)));
    108   }
    109   if (dwSize == 1 && pStart[0] == '(') {
    110     int level = 1;
    111     while (m_dwCurPos < m_dwSize) {
    112       if (m_pData[m_dwCurPos] == ')') {
    113         level--;
    114         if (level == 0) {
    115           break;
    116         }
    117       }
    118       if (m_pData[m_dwCurPos] == '\\') {
    119         if (m_dwSize <= m_dwCurPos) {
    120           break;
    121         }
    122         m_dwCurPos++;
    123       } else if (m_pData[m_dwCurPos] == '(') {
    124         level++;
    125       }
    126       if (m_dwSize <= m_dwCurPos) {
    127         break;
    128       }
    129       m_dwCurPos++;
    130     }
    131     if (m_dwCurPos < m_dwSize) {
    132       m_dwCurPos++;
    133     }
    134     return ByteStringView(pStart,
    135                           static_cast<size_t>(m_dwCurPos - (pStart - m_pData)));
    136   }
    137   return ByteStringView(pStart, dwSize);
    138 }
    139 
    140 bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token,
    141                                               int nParams) {
    142   nParams++;
    143   uint32_t* pBuf = FX_Alloc(uint32_t, nParams);
    144   int buf_index = 0;
    145   int buf_count = 0;
    146   m_dwCurPos = 0;
    147   while (1) {
    148     pBuf[buf_index++] = m_dwCurPos;
    149     if (buf_index == nParams) {
    150       buf_index = 0;
    151     }
    152     buf_count++;
    153     if (buf_count > nParams) {
    154       buf_count = nParams;
    155     }
    156     ByteStringView word = GetWord();
    157     if (word.IsEmpty()) {
    158       FX_Free(pBuf);
    159       return false;
    160     }
    161     if (word == token) {
    162       if (buf_count < nParams) {
    163         continue;
    164       }
    165       m_dwCurPos = pBuf[buf_index];
    166       FX_Free(pBuf);
    167       return true;
    168     }
    169   }
    170   return false;
    171 }
    172