Home | History | Annotate | Download | only in pdf
      1 /*
      2  * Copyright 2018 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkPDFDocumentPriv.h"
      9 #include "SkPDFTag.h"
     10 
     11 // Table 333 in PDF 32000-1:2008
     12 static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
     13     switch (type) {
     14         #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X
     15         M(Document);
     16         M(Part);
     17         M(Art);
     18         M(Sect);
     19         M(Div);
     20         M(BlockQuote);
     21         M(Caption);
     22         M(TOC);
     23         M(TOCI);
     24         M(Index);
     25         M(NonStruct);
     26         M(Private);
     27         M(H);
     28         M(H1);
     29         M(H2);
     30         M(H3);
     31         M(H4);
     32         M(H5);
     33         M(H6);
     34         M(P);
     35         M(L);
     36         M(LI);
     37         M(Lbl);
     38         M(LBody);
     39         M(Table);
     40         M(TR);
     41         M(TH);
     42         M(TD);
     43         M(THead);
     44         M(TBody);
     45         M(TFoot);
     46         M(Span);
     47         M(Quote);
     48         M(Note);
     49         M(Reference);
     50         M(BibEntry);
     51         M(Code);
     52         M(Link);
     53         M(Annot);
     54         M(Ruby);
     55         M(RB);
     56         M(RT);
     57         M(RP);
     58         M(Warichu);
     59         M(WT);
     60         M(WP);
     61         M(Figure);
     62         M(Formula);
     63         M(Form);
     64         #undef M
     65     }
     66     SK_ABORT("bad tag");
     67     return "";
     68 }
     69 
     70 struct SkPDFTagNode {
     71     SkPDFTagNode* fChildren = nullptr;
     72     size_t fChildCount = 0;
     73     struct MarkedContentInfo {
     74         unsigned fPageIndex;
     75         int fMarkId;
     76     };
     77     SkTArray<MarkedContentInfo> fMarkedContent;
     78     int fNodeId;
     79     SkPDF::DocumentStructureType fType;
     80     SkPDFIndirectReference fRef;
     81     enum State {
     82         kUnknown,
     83         kYes,
     84         kNo,
     85     } fCanDiscard = kUnknown;
     86 };
     87 
     88 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
     89 
     90 SkPDFTagTree::~SkPDFTagTree() = default;
     91 
     92 static void copy(const SkPDF::StructureElementNode& node,
     93                  SkPDFTagNode* dst,
     94                  SkArenaAlloc* arena,
     95                  SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
     96     nodeMap->set(node.fNodeId, dst);
     97     size_t childCount = node.fChildCount;
     98     SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
     99     dst->fChildCount = childCount;
    100     dst->fNodeId = node.fNodeId;
    101     dst->fType = node.fType;
    102     dst->fChildren = children;
    103     for (size_t i = 0; i < childCount; ++i) {
    104         copy(node.fChildren[i], &children[i], arena, nodeMap);
    105     }
    106 }
    107 
    108 void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) {
    109     if (node) {
    110         fRoot = fArena.make<SkPDFTagNode>();
    111         copy(*node, fRoot, &fArena, &fNodeMap);
    112     }
    113 }
    114 
    115 void SkPDFTagTree::reset() {
    116     fArena.reset();
    117     fNodeMap.reset();
    118     fMarksPerPage.reset();
    119     fRoot = nullptr;
    120 }
    121 
    122 int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
    123     if (!fRoot) {
    124         return -1;
    125     }
    126     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
    127     if (!tagPtr) {
    128         return -1;
    129     }
    130     SkPDFTagNode* tag = *tagPtr;
    131     SkASSERT(tag);
    132     while (fMarksPerPage.size() < pageIndex + 1) {
    133         fMarksPerPage.push_back();
    134     }
    135     SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
    136     int markId = pageMarks.count();
    137     tag->fMarkedContent.push_back({pageIndex, markId});
    138     pageMarks.push_back(tag);
    139     return markId;
    140 }
    141 
    142 static bool can_discard(SkPDFTagNode* node) {
    143     if (node->fCanDiscard == SkPDFTagNode::kYes) {
    144         return true;
    145     }
    146     if (node->fCanDiscard == SkPDFTagNode::kNo) {
    147         return false;
    148     }
    149     if (!node->fMarkedContent.empty()) {
    150         node->fCanDiscard = SkPDFTagNode::kNo;
    151         return false;
    152     }
    153     for (size_t i = 0; i < node->fChildCount; ++i) {
    154         if (!can_discard(&node->fChildren[i])) {
    155             node->fCanDiscard = SkPDFTagNode::kNo;
    156             return false;
    157         }
    158     }
    159     node->fCanDiscard = SkPDFTagNode::kYes;
    160     return true;
    161 }
    162 
    163 
    164 SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
    165                                                 SkPDFTagNode* node,
    166                                                 SkPDFDocument* doc) {
    167     SkPDFIndirectReference ref = doc->reserveRef();
    168     std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
    169     SkPDFTagNode* children = node->fChildren;
    170     size_t childCount = node->fChildCount;
    171     for (size_t i = 0; i < childCount; ++i) {
    172         SkPDFTagNode* child = &children[i];
    173         if (!(can_discard(child))) {
    174             kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
    175         }
    176     }
    177     for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
    178         std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
    179         mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
    180         mcr->insertInt("MCID", info.fMarkId);
    181         kids->appendObject(std::move(mcr));
    182     }
    183     node->fRef = ref;
    184     SkPDFDict dict("StructElem");
    185     dict.insertName("S", tag_name_from_type(node->fType));
    186     dict.insertRef("P", parent);
    187     dict.insertObject("K", std::move(kids));
    188     return doc->emit(dict, ref);
    189 }
    190 
    191 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
    192     if (!fRoot) {
    193         return SkPDFIndirectReference();
    194     }
    195     if (can_discard(fRoot)) {
    196         SkDEBUGFAIL("PDF has tag tree but no marked content.");
    197     }
    198     SkPDFIndirectReference ref = doc->reserveRef();
    199 
    200     unsigned pageCount = SkToUInt(doc->pageCount());
    201 
    202     // Build the StructTreeRoot.
    203     SkPDFDict structTreeRoot("StructTreeRoot");
    204     structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
    205     structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
    206 
    207     // Build the parent tree, which is a mapping from the marked
    208     // content IDs on each page to their corressponding tags.
    209     SkPDFDict parentTree("ParentTree");
    210     auto parentTreeNums = SkPDFMakeArray();
    211 
    212     SkASSERT(fMarksPerPage.size() <= pageCount);
    213     for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
    214         const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
    215         SkPDFArray markToTagArray;
    216         for (SkPDFTagNode* mark : pageMarks) {
    217             SkASSERT(mark->fRef);
    218             markToTagArray.appendRef(mark->fRef);
    219         }
    220         parentTreeNums->appendInt(j);
    221         parentTreeNums->appendRef(doc->emit(markToTagArray));
    222     }
    223     parentTree.insertObject("Nums", std::move(parentTreeNums));
    224     structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
    225     return doc->emit(structTreeRoot, ref);
    226 }
    227 
    228