1 /* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkPDFDocumentPriv.h" 9 #include "SkPDFTag.h" 10 11 // Table 333 in PDF 32000-1:2008 12 static const char* tag_name_from_type(SkPDF::DocumentStructureType type) { 13 switch (type) { 14 #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X 15 M(Document); 16 M(Part); 17 M(Art); 18 M(Sect); 19 M(Div); 20 M(BlockQuote); 21 M(Caption); 22 M(TOC); 23 M(TOCI); 24 M(Index); 25 M(NonStruct); 26 M(Private); 27 M(H); 28 M(H1); 29 M(H2); 30 M(H3); 31 M(H4); 32 M(H5); 33 M(H6); 34 M(P); 35 M(L); 36 M(LI); 37 M(Lbl); 38 M(LBody); 39 M(Table); 40 M(TR); 41 M(TH); 42 M(TD); 43 M(THead); 44 M(TBody); 45 M(TFoot); 46 M(Span); 47 M(Quote); 48 M(Note); 49 M(Reference); 50 M(BibEntry); 51 M(Code); 52 M(Link); 53 M(Annot); 54 M(Ruby); 55 M(RB); 56 M(RT); 57 M(RP); 58 M(Warichu); 59 M(WT); 60 M(WP); 61 M(Figure); 62 M(Formula); 63 M(Form); 64 #undef M 65 } 66 SK_ABORT("bad tag"); 67 return ""; 68 } 69 70 struct SkPDFTagNode { 71 SkPDFTagNode* fChildren = nullptr; 72 size_t fChildCount = 0; 73 struct MarkedContentInfo { 74 unsigned fPageIndex; 75 int fMarkId; 76 }; 77 SkTArray<MarkedContentInfo> fMarkedContent; 78 int fNodeId; 79 SkPDF::DocumentStructureType fType; 80 SkPDFIndirectReference fRef; 81 enum State { 82 kUnknown, 83 kYes, 84 kNo, 85 } fCanDiscard = kUnknown; 86 }; 87 88 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {} 89 90 SkPDFTagTree::~SkPDFTagTree() = default; 91 92 static void copy(const SkPDF::StructureElementNode& node, 93 SkPDFTagNode* dst, 94 SkArenaAlloc* arena, 95 SkTHashMap<int, SkPDFTagNode*>* nodeMap) { 96 nodeMap->set(node.fNodeId, dst); 97 size_t childCount = node.fChildCount; 98 SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount); 99 dst->fChildCount = childCount; 100 dst->fNodeId = node.fNodeId; 101 dst->fType = node.fType; 102 dst->fChildren = children; 103 for (size_t i = 0; i < childCount; ++i) { 104 copy(node.fChildren[i], &children[i], arena, nodeMap); 105 } 106 } 107 108 void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) { 109 if (node) { 110 fRoot = fArena.make<SkPDFTagNode>(); 111 copy(*node, fRoot, &fArena, &fNodeMap); 112 } 113 } 114 115 void SkPDFTagTree::reset() { 116 fArena.reset(); 117 fNodeMap.reset(); 118 fMarksPerPage.reset(); 119 fRoot = nullptr; 120 } 121 122 int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) { 123 if (!fRoot) { 124 return -1; 125 } 126 SkPDFTagNode** tagPtr = fNodeMap.find(nodeId); 127 if (!tagPtr) { 128 return -1; 129 } 130 SkPDFTagNode* tag = *tagPtr; 131 SkASSERT(tag); 132 while (fMarksPerPage.size() < pageIndex + 1) { 133 fMarksPerPage.push_back(); 134 } 135 SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex]; 136 int markId = pageMarks.count(); 137 tag->fMarkedContent.push_back({pageIndex, markId}); 138 pageMarks.push_back(tag); 139 return markId; 140 } 141 142 static bool can_discard(SkPDFTagNode* node) { 143 if (node->fCanDiscard == SkPDFTagNode::kYes) { 144 return true; 145 } 146 if (node->fCanDiscard == SkPDFTagNode::kNo) { 147 return false; 148 } 149 if (!node->fMarkedContent.empty()) { 150 node->fCanDiscard = SkPDFTagNode::kNo; 151 return false; 152 } 153 for (size_t i = 0; i < node->fChildCount; ++i) { 154 if (!can_discard(&node->fChildren[i])) { 155 node->fCanDiscard = SkPDFTagNode::kNo; 156 return false; 157 } 158 } 159 node->fCanDiscard = SkPDFTagNode::kYes; 160 return true; 161 } 162 163 164 SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent, 165 SkPDFTagNode* node, 166 SkPDFDocument* doc) { 167 SkPDFIndirectReference ref = doc->reserveRef(); 168 std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray(); 169 SkPDFTagNode* children = node->fChildren; 170 size_t childCount = node->fChildCount; 171 for (size_t i = 0; i < childCount; ++i) { 172 SkPDFTagNode* child = &children[i]; 173 if (!(can_discard(child))) { 174 kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc)); 175 } 176 } 177 for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) { 178 std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR"); 179 mcr->insertRef("Pg", doc->getPage(info.fPageIndex)); 180 mcr->insertInt("MCID", info.fMarkId); 181 kids->appendObject(std::move(mcr)); 182 } 183 node->fRef = ref; 184 SkPDFDict dict("StructElem"); 185 dict.insertName("S", tag_name_from_type(node->fType)); 186 dict.insertRef("P", parent); 187 dict.insertObject("K", std::move(kids)); 188 return doc->emit(dict, ref); 189 } 190 191 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) { 192 if (!fRoot) { 193 return SkPDFIndirectReference(); 194 } 195 if (can_discard(fRoot)) { 196 SkDEBUGFAIL("PDF has tag tree but no marked content."); 197 } 198 SkPDFIndirectReference ref = doc->reserveRef(); 199 200 unsigned pageCount = SkToUInt(doc->pageCount()); 201 202 // Build the StructTreeRoot. 203 SkPDFDict structTreeRoot("StructTreeRoot"); 204 structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc)); 205 structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount)); 206 207 // Build the parent tree, which is a mapping from the marked 208 // content IDs on each page to their corressponding tags. 209 SkPDFDict parentTree("ParentTree"); 210 auto parentTreeNums = SkPDFMakeArray(); 211 212 SkASSERT(fMarksPerPage.size() <= pageCount); 213 for (size_t j = 0; j < fMarksPerPage.size(); ++j) { 214 const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j]; 215 SkPDFArray markToTagArray; 216 for (SkPDFTagNode* mark : pageMarks) { 217 SkASSERT(mark->fRef); 218 markToTagArray.appendRef(mark->fRef); 219 } 220 parentTreeNums->appendInt(j); 221 parentTreeNums->appendRef(doc->emit(markToTagArray)); 222 } 223 parentTree.insertObject("Nums", std::move(parentTreeNums)); 224 structTreeRoot.insertRef("ParentTree", doc->emit(parentTree)); 225 return doc->emit(structTreeRoot, ref); 226 } 227 228