COSWriterCompressionPool.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdfwriter.compress;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.pdfbox.pdfparser.PDFXRefStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.cos.COSStream;

/**
 * An instance of this class compresses the contents of a given {@link PDDocument}.
 *
 * @author Christian Appl
 */
public class COSWriterCompressionPool
{

    public static final float MINIMUM_SUPPORTED_VERSION = 1.6f;

    private final PDDocument document;
    private final CompressParameters parameters;

    private final COSObjectPool objectPool;

    // A list containing all objects, that shall be directly appended to the document's top level container.
    private final List<COSObjectKey> topLevelObjects = new ArrayList<>();
    // A list containing all objects, that may be appended to an object stream.
    private final List<COSObjectKey> objectStreamObjects = new ArrayList<>();
    // A list of all direct objects
    private final List<COSBase> allDirectObjects = new ArrayList<>();

    /**
     * <p>
     * Constructs an object that can be used to compress the contents of a given {@link PDDocument}. It provides the
     * means to:
     * </p>
     * <ul>
     * <li>Compress the COSStructure of the document, by streaming {@link COSBase}s to compressed
     * {@link COSWriterObjectStream}s</li>
     * </ul>
     *
     * @param document The document, that shall be compressed.
     * @param parameters The configuration of the compression operations, that shall be applied.
     * @throws IOException Shall be thrown if a compression operation failed.
     */
    public COSWriterCompressionPool(PDDocument document, CompressParameters parameters)
            throws IOException
    {
        this.document = document;
        this.parameters = parameters != null ? parameters : new CompressParameters();
        objectPool = new COSObjectPool(document.getDocument().getHighestXRefObjectNumber());

        // Initialize object pool.
        COSDocument cosDocument = document.getDocument();

        COSDictionary trailer = cosDocument.getTrailer();
        addStructure(trailer.getItem(COSName.ROOT));
        addStructure(trailer.getItem(COSName.INFO));

        Collections.sort(objectStreamObjects);
        Collections.sort(topLevelObjects);
    }

    /**
     * Adds the given {@link COSBase} to this pool, using the given {@link COSObjectKey} as it's referencable ID. This
     * method shall determine an appropriate key, for yet unregistered objects, to register them. Depending on the type
     * of object, it shall either be appended as-is or shall be appended to a compressed {@link COSWriterObjectStream}.
     *
     * @param key The {@link COSObjectKey} that shall be used as the {@link COSBase}s ID, if possible.
     * @param base The {@link COSBase}, that shall be registered in this pool.
     */
    private COSBase addObjectToPool(COSObjectKey key, COSBase base)
    {
        // Drop hollow objects.
        COSBase current = base instanceof COSObject ? ((COSObject) base).getObject() : base;
        // to avoid to mixup indirect COSInteger objects holding the same value we have to check
        // if the given key is the same than the key which is stored for the "same" base object wihtin the object pool
        // the same is always true for COSFloat, COSBoolean and COSName and under certain circumstances for the remainig
        // types as well
        if (current == null //
                || (key != null && objectPool.contains(key)) //
                || (key == null && objectPool.contains(current)))
        {
            return current;
        }

        // Check whether the object can not be appended to an object stream.
        // An objectStream shall only contain generation 0 objects.
        // It shall never contain the encryption dictionary.
        // It shall never contain the document's root dictionary. (relevant for document encryption)
        // It shall never contain other streams.
        if ((key != null && key.getGeneration() != 0)
                || current instanceof COSStream
                || (document.getEncryption() != null
                        && current == document.getEncryption().getCOSObject())
                || current == this.document.getDocument().getTrailer()
                        .getCOSDictionary(COSName.ROOT))
        {
            COSObjectKey actualKey = objectPool.put(key, current);
            if (actualKey == null)
            {
                return current;
            }
            topLevelObjects.add(actualKey);
            return current;
        }

        // Determine the object key.
        COSObjectKey actualKey = objectPool.put(key, current);
        if (actualKey == null)
        {
            return current;
        }

        // Append it to an object stream.
        this.objectStreamObjects.add(actualKey);
        return current;
    }

    /**
     * Attempts to find yet unregistered streams and dictionaries in the given structure.
     *
     * @param current The object to be added for compressing.
     * @throws IOException Shall be thrown, if compressing the object failed.
     */
    private void addStructure(COSBase current) throws IOException
    {
        COSBase base = current;
        if (current instanceof COSStream
                || (current instanceof COSDictionary && !current.isDirect()))
        {
            base = addObjectToPool(base.getKey(), current);
        }
        else if (current instanceof COSObject)
        {
            base = ((COSObject) current).getObject();
            if (base != null)
            {
                base = addObjectToPool(current.getKey(), current);
            }
        }
        if (base instanceof COSArray)
        {
            addElements(((COSArray) base).iterator());
        }
        else if (base instanceof COSDictionary)
        {
            addElements(((COSDictionary) base).getValues().iterator());
        }
    }

    private void addElements(Iterator<COSBase> elements) throws IOException
    {
        while (elements.hasNext())
        {
            COSBase value = elements.next();
            if (value instanceof COSArray
                    || (value instanceof COSDictionary
                    && !allDirectObjects.contains(value)))
            {
                allDirectObjects.add(value);
                addStructure(value);
            }
            else if (value instanceof COSObject)
            {
                COSObject cosObject = (COSObject) value;
                if (cosObject.getKey() != null && objectPool.contains(cosObject.getKey()))
                {
                    continue;
                }
                if (cosObject.getObject() != null)
                {
                    addStructure(value);
                }
            }
        }
    }

    /**
     * Returns all {@link COSBase}s, that must be added to the document's top level container. Those objects are not
     * valid to be added to an object stream.
     *
     * @return A list of all top level {@link COSBase}s.
     */
    public List<COSObjectKey> getTopLevelObjects()
    {
        return topLevelObjects;
    }

    /**
     * Returns all {@link COSBase}s that can be appended to an object stream. This list is only provided to enable
     * reflections. Contained objects should indeed be added to a compressed document via an object stream, as can be
     * created via calling: {@link COSWriterCompressionPool#createObjectStreams()}
     *
     * @return A list of all {@link COSBase}s, that can be added to an object stream.
     */
    public List<COSObjectKey> getObjectStreamObjects()
    {
        return objectStreamObjects;
    }

    /**
     * Returns true, if the given {@link COSBase} is a registered object of this compression pool.
     *
     * @param object The object, that shall be checked.
     * @return True, if the given {@link COSBase} is a registered object of this compression pool.
     */
    public boolean contains(COSBase object)
    {
        return objectPool.contains(object);
    }

    /**
     * Returns the {@link COSObjectKey}, that is registered for the given {@link COSBase} in this compression pool.
     *
     * @param object The {@link COSBase} a {@link COSObjectKey} is registered for in this compression pool.
     * @return The {@link COSObjectKey}, that is registered for the given {@link COSBase} in this compression pool, if
     * such an object is contained.
     */
    public COSObjectKey getKey(COSBase object)
    {
        return objectPool.getKey(object);
    }

    /**
     * Returns the {@link COSBase}, that is registered for the given {@link COSObjectKey} in this compression pool.
     *
     * @param key The {@link COSObjectKey} a {@link COSBase} is registered for in this compression pool.
     * @return The {@link COSBase}, that is registered for the given {@link COSObjectKey} in this compression pool, if
     * such an object is contained.
     */
    public COSBase getObject(COSObjectKey key)
    {
        return objectPool.getObject(key);
    }

    /**
     * Returns the highest object number, that is registered in this compression pool.
     *
     * @return The highest object number, that is registered in this compression pool.
     */
    public long getHighestXRefObjectNumber()
    {
        return objectPool.getHighestXRefObjectNumber();
    }

    /**
     * Creates {@link COSWriterObjectStream}s for all currently registered objects of this pool, that have been marked
     * as fit for being compressed in this manner. Such object streams may be added to a PDF document and shall be
     * declared in a document's {@link PDFXRefStream} accordingly. The objects contained in such a stream must not be
     * added to the document separately.
     *
     * @return The created {@link COSWriterObjectStream}s for all currently registered compressible objects.
     */
    public List<COSWriterObjectStream> createObjectStreams()
    {
        List<COSWriterObjectStream> objectStreams = new ArrayList<>();
        COSWriterObjectStream objectStream = null;
        for (int i = 0; i < objectStreamObjects.size(); i++)
        {
            COSObjectKey key = objectStreamObjects.get(i);
            if (objectStream == null || (i % parameters.getObjectStreamSize()) == 0)
            {
                objectStream = new COSWriterObjectStream(this);
                objectStreams.add(objectStream);
            }
            objectStream.prepareStreamObject(key, objectPool.getObject(key));
        }
        return objectStreams;
    }
}