/* * {{{ header & license * Copyright (c) 2016 Stanimir Stamenkov * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * }}} */ package org.xhtmlrenderer.pdf; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.traversal.DocumentTraversal; import org.w3c.dom.traversal.NodeFilter; import org.w3c.dom.traversal.NodeIterator; import org.xhtmlrenderer.pdf.ITextOutputDevice.Bookmark; import org.xhtmlrenderer.render.Box; class HTMLOutline { private static final Pattern HEADING = Pattern.compile("h([1-6])", Pattern.CASE_INSENSITIVE); /** sectioning roots */ private static final Pattern ROOT = Pattern.compile("blockquote|details|fieldset|figure|td", Pattern.CASE_INSENSITIVE); private static final Pattern WS = Pattern.compile("\\s+"); private static final int MAX_NAME_LENGTH = 200; private final HTMLOutline parent; private final int level; private final Bookmark bookmark; private HTMLOutline() { this(0, "root", null); } private HTMLOutline(int level, String name, HTMLOutline parent) { this.level = level; this.bookmark = new Bookmark(name, ""); this.parent = parent; if (parent != null) { parent.bookmark.addChild(bookmark); } } /** * Creates a bookmark list of the document outline generated for the given * element context (usually the root document element). *

* The current algorithm is more simple than the one suggested in the HTML5 * specification such as it is not affected by * sectioning * content but just the heading level. For * example:

*
     * <body>
     *   <h1>Foo</h1>
     *   <h3>Bar</h3>
     *   <blockquote>
     *     <h5>Bla</h5>
     *   </blockquote>
     *   <p>Baz</p>
     *   <h2>Quux</h2>
     *   <section>
     *     <h3>Thud</h3>
     *   </section>
     *   <h4>Grunt</h4>
     * </body>
*

* Should generate outline as:

*
    *
  1. Foo *
      *
    1. Bar
    2. *
    3. Quux
    4. *
    5. Thud
    6. *
    7. Grunt
    8. *
  2. *
*

* But it generates outline as:

*
    *
  1. Foo *
      *
    1. Bar
    2. *
    3. Quux *
        *
      1. Thud *
          *
        1. Grunt
        2. *
      2. *
    4. *
  2. *
* *

Example document customizations

* *
Include non-heading element as bookmark (level 4)
*
     * <strong data-pdf-bookmark="4">Foo bar</strong>
* *
Specify bookmark name
*
     * <tr data-pdf-bookmark="5" data-pdf-bookmark-name="Bar baz">...</tr>
* *
Exclude individual heading from bookmarks
*
     * <h3 data-pdf-bookmark="none">Baz qux</h3>
* *
Prevent automatic bookmarks for the whole of the document
*
     * <html data-pdf-bookmark="exclude">...</html>
* * @param context the top element a sectioning outline would be generated for; * @param box box hierarchy the outline bookmarks would get mapped into. * @return Bookmarks of the outline generated for the given element context. * @see Creating an outline */ public static List generate(Element context, Box box) { NodeIterator iterator = NestedSectioningFilter.iterator(context); HTMLOutline root = new HTMLOutline(); HTMLOutline current = root; Map map = new IdentityHashMap(); for (Element element = (Element) iterator.nextNode(); element != null; element = (Element) iterator.nextNode()) { int level; try { level = Integer.parseInt(getOutlineLevel(element)); if (level < 1) { continue; // Illegal value } } catch (NumberFormatException e) { continue; // Invalid value } String name = getBookmarkName(element); while (current.level >= level) { current = current.parent; } current = new HTMLOutline(level, name, current); map.put(element, current.bookmark); } initBoxRefs(map, box); return root.bookmark.getChildren(); } // generate(Element, Box) : List private static void initBoxRefs(Map map, Box box) { Bookmark bookmark = map.get(box.getElement()); if (bookmark != null) { bookmark.setBox(box); } for (int i = 0, len = box.getChildCount(); i < len; i++) { initBoxRefs(map, box.getChild(i)); } } private static String getBookmarkName(Element element) { String name = element.getAttribute("data-pdf-bookmark-name").trim(); if (name.isEmpty()) { name = element.getTextContent(); } name = WS.matcher(name.trim()).replaceAll(" "); if (name.length() > MAX_NAME_LENGTH) { name = name.substring(0, MAX_NAME_LENGTH); } return name; } static String getOutlineLevel(Element element) { String bookmark = element.getAttribute("data-pdf-bookmark").trim(); if (bookmark.isEmpty()) { Matcher heading = HEADING.matcher(element.getTagName()); if (heading.matches()) { bookmark = heading.group(1); } else if (ROOT.matcher(element.getTagName()).matches()) { bookmark = "exclude"; } else { bookmark = "none"; } } return bookmark; } private static class NestedSectioningFilter implements NodeFilter { static final NestedSectioningFilter INSTANCE = new NestedSectioningFilter(); static NodeIterator iterator(Element root) { return ((DocumentTraversal) root.getOwnerDocument()) .createNodeIterator(root, SHOW_ELEMENT, INSTANCE, true); } @Override public short acceptNode(Node n) { String outlineLevel = getOutlineLevel((Element) n); if (outlineLevel.equalsIgnoreCase("none")) { return FILTER_SKIP; } return outlineLevel.equalsIgnoreCase("exclude") ? FILTER_REJECT : FILTER_ACCEPT; } } // class NestedSectioningFilter } // class HTMLOutline