PDFCloneUtility.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.multipdf;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.COSObjectable;

/**
 * Utility class used to clone PDF objects. It keeps track of objects it has
 * already cloned.
 *
 */
class PDFCloneUtility
{
    private static final Log LOG = LogFactory.getLog(PDFCloneUtility.class);

    private final PDDocument destination;
    private final Map<Object,COSBase> clonedVersion = new HashMap<>();
    private final Set<COSBase> clonedValues = new HashSet<>();
    // It might be useful to use IdentityHashMap like in PDFBOX-4477 for speed,
    // but we need a really huge file to test this. A test with the file from PDFBOX-4477
    // did not show a noticeable speed difference.

    /**
     * Creates a new instance for the given target document.
     * @param dest the destination PDF document that will receive the clones
     */
    PDFCloneUtility(PDDocument dest)
    {
        this.destination = dest;
    }

    /**
     * Returns the destination PDF document this cloner instance is set up for.
     * @return the destination PDF document
     */
    PDDocument getDestination()
    {
        return this.destination;
    }

    /**
     * Deep-clones the given object for inclusion into a different PDF document identified by
     * the destination parameter.
     * @param base the initial object as the root of the deep-clone operation
     * @return the cloned instance of the base object
     * @throws IOException if an I/O error occurs
     */
      COSBase cloneForNewDocument( Object base ) throws IOException
      {
          if( base == null )
          {
              return null;
          }
          COSBase retval = clonedVersion.get(base);
          if( retval != null )
          {
              //we are done, it has already been converted.
              return retval;
          }
          if (base instanceof COSBase && clonedValues.contains(base))
          {
              // Don't clone a clone
              return (COSBase) base;
          }
          if (base instanceof List)
          {
              COSArray array = new COSArray();
              List<?> list = (List<?>) base;
              for (Object obj : list)
              {
                  array.add(cloneForNewDocument(obj));
              }
              retval = array;
          }
          else if( base instanceof COSObjectable && !(base instanceof COSBase) )
          {
              retval = cloneForNewDocument( ((COSObjectable)base).getCOSObject() );
          }
          else if( base instanceof COSObject )
          {
              COSObject object = (COSObject)base;
              retval = cloneForNewDocument( object.getObject() );
          }
          else if( base instanceof COSArray )
          {
              COSArray newArray = new COSArray();
              COSArray array = (COSArray)base;
              for( int i=0; i<array.size(); i++ )
              {
                  COSBase value = array.get(i);
                  if (hasSelfReference(base, value))
                  {
                      newArray.add(newArray);
                  }
                  else
                  {
                      newArray.add(cloneForNewDocument(value));
                  }
              }
              retval = newArray;
          }
          else if( base instanceof COSStream )
          {
              COSStream originalStream = (COSStream)base;
              COSStream stream = destination.getDocument().createCOSStream();
              try (OutputStream output = stream.createRawOutputStream();
                   InputStream input = originalStream.createRawInputStream())
              {
                  IOUtils.copy(input, output);
              }
              clonedVersion.put( base, stream );
              for( Map.Entry<COSName, COSBase> entry :  originalStream.entrySet() )
              {
                  COSBase value = entry.getValue();
                  if (hasSelfReference(base, value))
                  {
                      stream.setItem(entry.getKey(), stream);
                  }
                  else
                  {
                      stream.setItem(entry.getKey(), cloneForNewDocument(value));
                  }
              }
              retval = stream;
          }
          else if( base instanceof COSDictionary )
          {
              COSDictionary dic = (COSDictionary)base;
              retval = new COSDictionary();
              clonedVersion.put( base, retval );
              for( Map.Entry<COSName, COSBase> entry : dic.entrySet() )
              {
                  COSBase value = entry.getValue();
                  if (hasSelfReference(base, value))
                  {
                      ((COSDictionary) retval).setItem(entry.getKey(), retval);
                  }
                  else
                  {
                      ((COSDictionary) retval).setItem(entry.getKey(), cloneForNewDocument(value));
                  }
              }
          }
          else
          {
              retval = (COSBase)base;
          }
          clonedVersion.put( base, retval );
          clonedValues.add(retval);
          return retval;
      }

      /**
       * Merges two objects of the same type by deep-cloning its members.
       * <br>
       * Base and target must be instances of the same class.
       * @param base the base object to be cloned
       * @param target the merge target
       * @throws IOException if an I/O error occurs
       */
      void cloneMerge( final COSObjectable base, COSObjectable target) throws IOException
      {
          if (base == null || base == target)
          {
              return;
          }
          COSBase retval = clonedVersion.get( base );
          if( retval != null )
          {
              return;
              //we are done, it has already been converted. // ### Is that correct for cloneMerge???
          }
          //TODO what when clone-merging a clone? Does it ever happen?
          if (!(base instanceof COSBase))
          {
              cloneMerge(base.getCOSObject(), target.getCOSObject());
          }
          else if( base instanceof COSObject )
          {
              if(target instanceof COSObject)
              {
                  cloneMerge(((COSObject) base).getObject(),((COSObject) target).getObject() );
              }
              else if (target instanceof COSDictionary || target instanceof COSArray)
              {
                  cloneMerge(((COSObject) base).getObject(), target);
              }
          }
          else if( base instanceof COSArray )
          {
              if (target instanceof COSObject)
              {
                  cloneMerge(base, ((COSObject) target).getObject());
              }
              else
              {
                  COSArray array = (COSArray) base;
                  for (int i = 0; i < array.size(); i++)
                  {
                      ((COSArray) target).add(cloneForNewDocument(array.get(i)));
                  }
              }
          }
          else if( base instanceof COSStream )
          {
            // does that make sense???
              COSStream originalStream = (COSStream)base;
              COSStream stream = destination.getDocument().createCOSStream();
              try (OutputStream output = stream.createOutputStream(originalStream.getFilters()))
              {
                  IOUtils.copy(originalStream.createInputStream(), output);
              }
              clonedVersion.put( base, stream );
              for( Map.Entry<COSName, COSBase> entry : originalStream.entrySet() )
              {
                  stream.setItem(entry.getKey(), cloneForNewDocument(entry.getValue()));
              }
              retval = stream;
          }
          else if( base instanceof COSDictionary )
          {
              if (target instanceof COSObject)
              {
                  cloneMerge(base, ((COSObject) target).getObject());
              }
              else
              {
                  COSDictionary dic = (COSDictionary) base;
                  clonedVersion.put(base, retval);
                  for (Map.Entry<COSName, COSBase> entry : dic.entrySet())
                  {
                      COSName key = entry.getKey();
                      COSBase value = entry.getValue();
                      if (((COSDictionary) target).getItem(key) != null)
                      {
                          cloneMerge(value, ((COSDictionary) target).getItem(key));
                      }
                      else
                      {
                          ((COSDictionary) target).setItem(key, cloneForNewDocument(value));
                      }
                  }
              }
          }
          else
          {
              retval = (COSBase)base;
          }
          clonedVersion.put( base, retval );
          clonedValues.add(retval);
      }

    /**
     * Check whether an element (of an array or a dictionary) points to its parent.
     *
     * @param parent COSArray or COSDictionary
     * @param value an element
     */
    private boolean hasSelfReference(Object parent, COSBase value)
    {
        if (value instanceof COSObject)
        {
            COSBase actual = ((COSObject) value).getObject();
            if (actual == parent)
            {
                COSObject cosObj = ((COSObject) value);
                LOG.warn(parent.getClass().getSimpleName() + " object has a reference to itself: " +
                        cosObj.getObjectNumber() + " " + cosObj.getGenerationNumber() + " R");
                return true;
            }
        }
        return false;
    }
}