/* * {{{ header & license * Copyright (c) 2016 Stanimir Stamenkov * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * }}} */ package org.xhtmlrenderer.pdf; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.traversal.DocumentTraversal; import org.w3c.dom.traversal.NodeFilter; import org.w3c.dom.traversal.NodeIterator; import org.xhtmlrenderer.pdf.ITextOutputDevice.Bookmark; import org.xhtmlrenderer.render.Box; class HTMLOutline { private static final Pattern HEADING = Pattern.compile("h([1-6])", Pattern.CASE_INSENSITIVE); /** sectioning roots */ private static final Pattern ROOT = Pattern.compile("blockquote|details|fieldset|figure|td", Pattern.CASE_INSENSITIVE); private static final Pattern WS = Pattern.compile("\\s+"); private static final int MAX_NAME_LENGTH = 200; private final HTMLOutline parent; private final int level; private final Bookmark bookmark; private HTMLOutline() { this(0, "root", null); } private HTMLOutline(int level, String name, HTMLOutline parent) { this.level = level; this.bookmark = new Bookmark(name, ""); this.parent = parent; if (parent != null) { parent.bookmark.addChild(bookmark); } } /** * Creates a bookmark list of the document outline generated for the given * element context (usually the root document element). *
* The current algorithm is more simple than the one suggested in the HTML5 * specification such as it is not affected by * sectioning * content but just the heading level. For * example:
** <body> * <h1>Foo</h1> * <h3>Bar</h3> * <blockquote> * <h5>Bla</h5> * </blockquote> * <p>Baz</p> * <h2>Quux</h2> * <section> * <h3>Thud</h3> * </section> * <h4>Grunt</h4> * </body>*
* Should generate outline as:
** But it generates outline as:
** <strong data-pdf-bookmark="4">Foo bar</strong>* *
* <tr data-pdf-bookmark="5" data-pdf-bookmark-name="Bar baz">...</tr>* *
* <h3 data-pdf-bookmark="none">Baz qux</h3>* *
* <html data-pdf-bookmark="exclude">...</html>* * @param context the top element a sectioning outline would be generated for; * @param box box hierarchy the outline bookmarks would get mapped into. * @return Bookmarks of the outline generated for the given element context. * @see Creating an outline */ public static List