1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_simple_parser.h" 8 9 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 10 11 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize) 12 : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {} 13 14 CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) 15 : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {} 16 17 std::pair<const uint8_t*, uint32_t> CPDF_SimpleParser::ParseWord() { 18 const uint8_t* pStart = nullptr; 19 uint8_t dwSize = 0; 20 uint8_t ch; 21 while (1) { 22 if (m_dwSize <= m_dwCurPos) 23 return std::make_pair(pStart, dwSize); 24 ch = m_pData[m_dwCurPos++]; 25 while (PDFCharIsWhitespace(ch)) { 26 if (m_dwSize <= m_dwCurPos) 27 return std::make_pair(pStart, dwSize); 28 ch = m_pData[m_dwCurPos++]; 29 } 30 31 if (ch != '%') 32 break; 33 34 while (1) { 35 if (m_dwSize <= m_dwCurPos) 36 return std::make_pair(pStart, dwSize); 37 ch = m_pData[m_dwCurPos++]; 38 if (PDFCharIsLineEnding(ch)) 39 break; 40 } 41 } 42 43 uint32_t start_pos = m_dwCurPos - 1; 44 pStart = m_pData + start_pos; 45 if (PDFCharIsDelimiter(ch)) { 46 if (ch == '/') { 47 while (1) { 48 if (m_dwSize <= m_dwCurPos) 49 return std::make_pair(pStart, dwSize); 50 ch = m_pData[m_dwCurPos++]; 51 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 52 m_dwCurPos--; 53 dwSize = m_dwCurPos - start_pos; 54 return std::make_pair(pStart, dwSize); 55 } 56 } 57 } else { 58 dwSize = 1; 59 if (ch == '<') { 60 if (m_dwSize <= m_dwCurPos) 61 return std::make_pair(pStart, dwSize); 62 ch = m_pData[m_dwCurPos++]; 63 if (ch == '<') 64 dwSize = 2; 65 else 66 m_dwCurPos--; 67 } else if (ch == '>') { 68 if (m_dwSize <= m_dwCurPos) 69 return std::make_pair(pStart, dwSize); 70 ch = m_pData[m_dwCurPos++]; 71 if (ch == '>') 72 dwSize = 2; 73 else 74 m_dwCurPos--; 75 } 76 } 77 return std::make_pair(pStart, dwSize); 78 } 79 80 dwSize = 1; 81 while (1) { 82 if (m_dwSize <= m_dwCurPos) 83 return std::make_pair(pStart, dwSize); 84 ch = m_pData[m_dwCurPos++]; 85 86 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 87 m_dwCurPos--; 88 break; 89 } 90 dwSize++; 91 } 92 return std::make_pair(pStart, dwSize); 93 } 94 95 ByteStringView CPDF_SimpleParser::GetWord() { 96 const uint8_t* pStart; 97 uint32_t dwSize; 98 std::tie(pStart, dwSize) = ParseWord(); 99 if (dwSize == 1 && pStart[0] == '<') { 100 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { 101 m_dwCurPos++; 102 } 103 if (m_dwCurPos < m_dwSize) { 104 m_dwCurPos++; 105 } 106 return ByteStringView(pStart, 107 static_cast<size_t>(m_dwCurPos - (pStart - m_pData))); 108 } 109 if (dwSize == 1 && pStart[0] == '(') { 110 int level = 1; 111 while (m_dwCurPos < m_dwSize) { 112 if (m_pData[m_dwCurPos] == ')') { 113 level--; 114 if (level == 0) { 115 break; 116 } 117 } 118 if (m_pData[m_dwCurPos] == '\\') { 119 if (m_dwSize <= m_dwCurPos) { 120 break; 121 } 122 m_dwCurPos++; 123 } else if (m_pData[m_dwCurPos] == '(') { 124 level++; 125 } 126 if (m_dwSize <= m_dwCurPos) { 127 break; 128 } 129 m_dwCurPos++; 130 } 131 if (m_dwCurPos < m_dwSize) { 132 m_dwCurPos++; 133 } 134 return ByteStringView(pStart, 135 static_cast<size_t>(m_dwCurPos - (pStart - m_pData))); 136 } 137 return ByteStringView(pStart, dwSize); 138 } 139 140 bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token, 141 int nParams) { 142 nParams++; 143 uint32_t* pBuf = FX_Alloc(uint32_t, nParams); 144 int buf_index = 0; 145 int buf_count = 0; 146 m_dwCurPos = 0; 147 while (1) { 148 pBuf[buf_index++] = m_dwCurPos; 149 if (buf_index == nParams) { 150 buf_index = 0; 151 } 152 buf_count++; 153 if (buf_count > nParams) { 154 buf_count = nParams; 155 } 156 ByteStringView word = GetWord(); 157 if (word.IsEmpty()) { 158 FX_Free(pBuf); 159 return false; 160 } 161 if (word == token) { 162 if (buf_count < nParams) { 163 continue; 164 } 165 m_dwCurPos = pBuf[buf_index]; 166 FX_Free(pBuf); 167 return true; 168 } 169 } 170 return false; 171 } 172