COSStream.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.cos;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.filter.DecodeOptions;
import org.apache.pdfbox.filter.Filter;
import org.apache.pdfbox.filter.FilterFactory;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.io.RandomAccessInputStream;
import org.apache.pdfbox.io.RandomAccessOutputStream;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.io.RandomAccessReadView;
import org.apache.pdfbox.io.ScratchFile;
/**
* This class represents a stream object in a PDF document.
*
* @author Ben Litchfield
*/
public class COSStream extends COSDictionary implements Closeable
{
// backing store, in-memory or on-disk
private RandomAccess randomAccess;
// used as a temp buffer when creating a new stream
private ScratchFile scratchFile;
// indicates if the scratchfile was created within this COSStream instance
private boolean closeScratchFile = false;
// true if there's an open OutputStream
private boolean isWriting;
// random access view to be read from
private RandomAccessReadView randomAccessReadView;
private static final Log LOG = LogFactory.getLog(COSStream.class);
/**
* Creates a new stream with an empty dictionary.
* <p>
* Try to avoid using this constructor because it creates a new scratch file in memory. Instead,
* use {@link COSDocument#createCOSStream() document.getDocument().createCOSStream()} which will
* use the existing scratch file (in memory or in temp file) of the document.
* </p>
*/
public COSStream()
{
this(null);
}
/**
* Creates a new stream with an empty dictionary. Data is stored in the given scratch file.
*
* @param scratchFile Scratch file for writing stream data.
*/
public COSStream(ScratchFile scratchFile)
{
setInt(COSName.LENGTH, 0);
this.scratchFile = scratchFile;
}
/**
* Creates a new stream with an empty dictionary. Data is read from the given random accessview. Written data is stored
* in the given scratch file.
*
* @param scratchFile Scratch file for writing stream data.
* @throws IOException if the length of the random access view isn't available
*/
public COSStream(ScratchFile scratchFile, RandomAccessReadView randomAccessReadView)
throws IOException
{
this(scratchFile);
this.randomAccessReadView = randomAccessReadView;
setInt(COSName.LENGTH, (int) randomAccessReadView.length());
}
/**
* Throws if the random access backing store has been closed. Helpful for catching cases where
* a user tries to use a COSStream which has outlived its COSDocument.
*/
private void checkClosed() throws IOException
{
if (randomAccess != null && randomAccess.isClosed())
{
throw new IOException("COSStream has been closed and cannot be read. " +
"Perhaps its enclosing PDDocument has been closed?");
// Tip for debugging: look at the destination file with an editor, you'll see an
// incomplete stream at the bottom.
}
}
private ScratchFile getScratchFile()
{
if (scratchFile == null)
{
scratchFile = ScratchFile.getMainMemoryOnlyInstance();
closeScratchFile = true;
}
return scratchFile;
}
/**
* Returns a new InputStream which reads the encoded PDF stream data. Experts only!
*
* @return InputStream containing raw, encoded PDF stream data.
* @throws IOException If the stream could not be read.
*/
public InputStream createRawInputStream() throws IOException
{
checkClosed();
if (isWriting)
{
throw new IllegalStateException("Cannot read while there is an open stream writer");
}
if (randomAccess == null)
{
if (randomAccessReadView != null)
{
randomAccessReadView.seek(0);
return new RandomAccessInputStream(randomAccessReadView);
}
else
{
throw new IOException(
"Create InputStream called without data being written before to stream.");
}
}
else
{
return new RandomAccessInputStream(randomAccess);
}
}
/**
* Returns a new InputStream which reads the decoded stream data.
*
* @return InputStream containing decoded stream data.
* @throws IOException If the stream could not be read.
*/
public COSInputStream createInputStream() throws IOException
{
return createInputStream(DecodeOptions.DEFAULT);
}
public COSInputStream createInputStream(DecodeOptions options) throws IOException
{
InputStream input = createRawInputStream();
return COSInputStream.create(getFilterList(), this, input, options);
}
/**
* Returns a new RandomAccessRead which reads the decoded stream data.
*
* @return RandomAccessRead containing decoded stream data.
* @throws IOException If the stream could not be read.
*/
public RandomAccessRead createView() throws IOException
{
List<Filter> filterList = getFilterList();
if (filterList.isEmpty())
{
if (randomAccess == null && randomAccessReadView != null)
{
return new RandomAccessReadView(randomAccessReadView, 0,
randomAccessReadView.length());
}
else
{
return new RandomAccessReadBuffer(createRawInputStream());
}
}
else
{
if (filterList.size() > 1)
{
Set<Filter> filterSet = new HashSet<>(filterList);
if (filterSet.size() != filterList.size())
{
throw new IOException("Duplicate");
}
}
InputStream input = createRawInputStream();
ByteArrayOutputStream output = new ByteArrayOutputStream(input.available());
// apply filters
for (int i = 0; i < filterList.size(); i++)
{
if (i > 0)
{
input = new ByteArrayInputStream(output.toByteArray());
output.reset();
}
try
{
filterList.get(i).decode(input, output, this, i, DecodeOptions.DEFAULT);
}
finally
{
IOUtils.closeQuietly(input);
}
}
return new RandomAccessReadBuffer(output.toByteArray());
}
}
/**
* Returns a new OutputStream for writing stream data, using the current filters.
*
* @return OutputStream for un-encoded stream data.
* @throws IOException If the output stream could not be created.
*/
public OutputStream createOutputStream() throws IOException
{
return createOutputStream(null);
}
/**
* Returns a new OutputStream for writing stream data, using and the given filters.
*
* @param filters COSArray or COSName of filters to be used.
* @return OutputStream for un-encoded stream data.
* @throws IOException If the output stream could not be created.
*/
public OutputStream createOutputStream(COSBase filters) throws IOException
{
checkClosed();
if (isWriting)
{
throw new IllegalStateException("Cannot have more than one open stream writer.");
}
// apply filters, if any
if (filters != null)
{
setItem(COSName.FILTER, filters);
}
if (randomAccess != null)
randomAccess.clear();
else
randomAccess = getScratchFile().createBuffer();
OutputStream randomOut = new RandomAccessOutputStream(randomAccess);
OutputStream cosOut = new COSOutputStream(getFilterList(), this, randomOut,
getScratchFile());
isWriting = true;
return new FilterOutputStream(cosOut)
{
@Override
public void write(byte[] b, int off, int len) throws IOException
{
this.out.write(b, off, len);
}
@Override
public void close() throws IOException
{
super.close();
setInt(COSName.LENGTH, (int)randomAccess.length());
isWriting = false;
}
};
}
/**
* Returns a new OutputStream for writing encoded PDF data. Experts only!
*
* @return OutputStream for raw PDF stream data.
* @throws IOException If the output stream could not be created.
*/
public OutputStream createRawOutputStream() throws IOException
{
checkClosed();
if (isWriting)
{
throw new IllegalStateException("Cannot have more than one open stream writer.");
}
if (randomAccess != null)
randomAccess.clear();
else
randomAccess = getScratchFile().createBuffer();
OutputStream out = new RandomAccessOutputStream(randomAccess);
isWriting = true;
return new FilterOutputStream(out)
{
@Override
public void write(byte[] b, int off, int len) throws IOException
{
this.out.write(b, off, len);
}
@Override
public void close() throws IOException
{
super.close();
setInt(COSName.LENGTH, (int)randomAccess.length());
isWriting = false;
}
};
}
/**
* Returns the list of filters.
*/
private List<Filter> getFilterList() throws IOException
{
List<Filter> filterList;
COSBase filters = getFilters();
if (filters instanceof COSName)
{
filterList = new ArrayList<>(1);
filterList.add(FilterFactory.INSTANCE.getFilter((COSName)filters));
}
else if (filters instanceof COSArray)
{
COSArray filterArray = (COSArray)filters;
filterList = new ArrayList<>(filterArray.size());
for (int i = 0; i < filterArray.size(); i++)
{
COSBase base = filterArray.get(i);
if (!(base instanceof COSName))
{
throw new IOException("Forbidden type in filter array: " +
(base == null ? "null" : base.getClass().getName()));
}
filterList.add(FilterFactory.INSTANCE.getFilter((COSName) base));
}
}
else
{
filterList = new ArrayList<>();
}
return filterList;
}
/**
* Returns the length of the encoded stream.
*
* @return length in bytes
*/
public long getLength()
{
if (isWriting)
{
throw new IllegalStateException("There is an open OutputStream associated with this " +
"COSStream. It must be closed before querying the " +
"length of this COSStream.");
}
return getInt(COSName.LENGTH, 0);
}
/**
* This will return the filters to apply to the byte stream.
* The method will return
* <ul>
* <li>null if no filters are to be applied
* <li>a COSName if one filter is to be applied
* <li>a COSArray containing COSNames if multiple filters are to be applied
* </ul>
*
* @return the COSBase object representing the filters
*/
public COSBase getFilters()
{
return getDictionaryObject(COSName.FILTER);
}
/**
* Returns the contents of the stream as a PDF "text string".
*/
public String toTextString()
{
try (InputStream input = createInputStream())
{
byte[] array = IOUtils.toByteArray(input);
COSString string = new COSString(array);
return string.getString();
}
catch (IOException e)
{
LOG.debug("An exception occurred trying to get the content - returning empty string instead", e);
return "";
}
}
@Override
public void accept(ICOSVisitor visitor) throws IOException
{
visitor.visitFromStream(this);
}
/**
* {@inheritDoc}
*
* Called by PDFBox when the PDDocument is closed, this closes the stream and removes the data.
* You will usually not need this.
*
* @throws IOException
*/
@Override
public void close() throws IOException
{
try
{
if (closeScratchFile && scratchFile != null)
{
scratchFile.close();
scratchFile = null;
}
}
finally
{
try
{
// marks the scratch file pages as free
if (randomAccess != null)
{
randomAccess.close();
randomAccess = null;
}
}
finally
{
if (randomAccessReadView != null)
{
randomAccessReadView.close();
randomAccessReadView = null;
}
}
}
}
/**
* Indicates whether the stream contains any data or not.
*
* @return true if the stream contains any data
*/
public boolean hasData()
{
return randomAccess != null || randomAccessReadView != null;
}
}