PDType0Font.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.font;
import java.awt.geom.GeneralPath;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.cmap.CMap;
import org.apache.fontbox.ttf.CmapLookup;
import org.apache.fontbox.ttf.TTFParser;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.fontbox.ttf.model.GsubData;
import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
/**
* A Composite (Type 0) font.
*
* @author Ben Litchfield
*/
public class PDType0Font extends PDFont implements PDVectorFont
{
private static final Log LOG = LogFactory.getLog(PDType0Font.class);
private final PDCIDFont descendantFont;
private final Set<Integer> noUnicode = new HashSet<>();
private final GsubData gsubData;
private final CmapLookup cmapLookup;
private CMap cMap, cMapUCS2;
private boolean isCMapPredefined;
private boolean isDescendantCJK;
private PDCIDFontType2Embedder embedder;
private TrueTypeFont ttf;
/**
* Constructor for reading a Type0 font from a PDF file.
*
* @param fontDictionary The font dictionary according to the PDF specification.
* @throws IOException if the descendant font is missing.
*/
public PDType0Font(COSDictionary fontDictionary) throws IOException
{
super(fontDictionary);
gsubData = GsubData.NO_DATA_FOUND;
cmapLookup = null;
COSArray descendantFonts = dict.getCOSArray(COSName.DESCENDANT_FONTS);
if (descendantFonts == null)
{
throw new IOException("Missing descendant font array");
}
if (descendantFonts.size() == 0)
{
throw new IOException("Descendant font array is empty");
}
COSBase descendantFontDictBase = descendantFonts.getObject(0);
if (!(descendantFontDictBase instanceof COSDictionary))
{
throw new IOException("Missing descendant font dictionary");
}
if (!COSName.FONT.equals(
((COSDictionary) descendantFontDictBase).getCOSName(COSName.TYPE, COSName.FONT)))
{
throw new IOException("Missing or wrong type in descendant font dictionary");
}
descendantFont = PDFontFactory.createDescendantFont((COSDictionary) descendantFontDictBase, this);
readEncoding();
fetchCMapUCS2();
}
/**
* Private. Creates a new PDType0Font font for embedding.
*
* @param document
* @param ttf
* @param embedSubset
* @param closeTTF whether to close the ttf parameter after embedding. Must be true when the ttf
* parameter was created in the load() method, false when the ttf parameter was passed to the
* load() method.
* @param vertical
* @throws IOException
*/
private PDType0Font(PDDocument document, TrueTypeFont ttf, boolean embedSubset,
boolean closeTTF, boolean vertical) throws IOException
{
if (vertical)
{
ttf.enableVerticalSubstitutions();
}
gsubData = ttf.getGsubData();
cmapLookup = ttf.getUnicodeCmapLookup();
embedder = new PDCIDFontType2Embedder(document, dict, ttf, embedSubset, this, vertical);
descendantFont = embedder.getCIDFont();
readEncoding();
fetchCMapUCS2();
if (closeTTF)
{
if (embedSubset)
{
this.ttf = ttf;
document.registerTrueTypeFontForClosing(ttf);
}
else
{
// the TTF is fully loaded and it is safe to close the underlying data source
ttf.close();
}
}
}
/**
* Loads a TTF to be embedded and subset into a document as a Type 0 font. If you are loading a
* font for AcroForm, then use the 3-parameter constructor instead.
*
* @param doc The PDF document that will hold the embedded font.
* @param file A TrueType font.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font file.
*/
public static PDType0Font load(PDDocument doc, File file) throws IOException
{
return load(doc, new RandomAccessReadBufferedFile(file), true, false);
}
/**
* Loads a TTF to be embedded and subset into a document as a Type 0 font. If you are loading a
* font for AcroForm, then use the 3-parameter constructor instead.
*
* @param doc The PDF document that will hold the embedded font.
* @param input An input stream of a TrueType font. It will be closed before returning.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font load(PDDocument doc, InputStream input) throws IOException
{
return load(doc, input, true);
}
/**
* Loads a TTF to be embedded into a document as a Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param input An input stream of a TrueType font. It will be closed before returning.
* @param embedSubset True if the font will be subset before embedding. Set this to false when
* creating a font for AcroForm.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font load(PDDocument doc, InputStream input, boolean embedSubset)
throws IOException
{
return load(doc, new RandomAccessReadBuffer(input), embedSubset, false);
}
/**
* Loads a TTF to be embedded into a document as a Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param randomAccessRead source of a TrueType font.
* @param embedSubset True if the font will be subset before embedding. Set this to false when creating a font for
* AcroForm.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font load(PDDocument doc, RandomAccessRead randomAccessRead,
boolean embedSubset, boolean vertical) throws IOException
{
return new PDType0Font(doc, new TTFParser().parse(randomAccessRead), embedSubset, true,
vertical);
}
/**
* Loads a TTF to be embedded into a document as a Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param ttf A TrueType font.
* @param embedSubset True if the font will be subset before embedding. Set this to false when creating a font for
* AcroForm.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font load(PDDocument doc, TrueTypeFont ttf, boolean embedSubset)
throws IOException
{
return new PDType0Font(doc, ttf, embedSubset, false, false);
}
/**
* Loads a TTF to be embedded into a document as a vertical Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param file A TrueType font.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font file.
*/
public static PDType0Font loadVertical(PDDocument doc, File file) throws IOException
{
return load(doc, new RandomAccessReadBufferedFile(file), true, true);
}
/**
* Loads a TTF to be embedded into a document as a vertical Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param input A TrueType font.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font loadVertical(PDDocument doc, InputStream input) throws IOException
{
return load(doc, new RandomAccessReadBuffer(input), true, true);
}
/**
* Loads a TTF to be embedded into a document as a vertical Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param input A TrueType font.
* @param embedSubset True if the font will be subset before embedding
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font loadVertical(PDDocument doc, InputStream input, boolean embedSubset)
throws IOException
{
return load(doc, new RandomAccessReadBuffer(input), embedSubset, true);
}
/**
* Loads a TTF to be embedded into a document as a vertical Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param ttf A TrueType font.
* @param embedSubset True if the font will be subset before embedding
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font loadVertical(PDDocument doc, TrueTypeFont ttf, boolean embedSubset)
throws IOException
{
return new PDType0Font(doc, ttf, embedSubset, false, true);
}
@Override
public void addToSubset(int codePoint)
{
if (!willBeSubset())
{
throw new IllegalStateException("This font was created with subsetting disabled");
}
embedder.addToSubset(codePoint);
}
public void addGlyphsToSubset(Set<Integer> glyphIds)
{
if (!willBeSubset())
{
throw new IllegalStateException("This font was created with subsetting disabled");
}
embedder.addGlyphIds(glyphIds);
}
@Override
public void subset() throws IOException
{
if (!willBeSubset())
{
throw new IllegalStateException("This font was created with subsetting disabled");
}
embedder.subset();
if (ttf != null)
{
ttf.close();
ttf = null;
}
}
@Override
public boolean willBeSubset()
{
return embedder != null && embedder.needsSubset();
}
/**
* Reads the font's Encoding entry, which should be a CMap name/stream.
*/
private void readEncoding() throws IOException
{
COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
if (encoding instanceof COSName)
{
// predefined CMap
COSName encodingName = (COSName) encoding;
cMap = CMapManager.getPredefinedCMap(encodingName.getName());
isCMapPredefined = true;
}
else if (encoding != null)
{
cMap = readCMap(encoding);
if (cMap == null)
{
throw new IOException("Missing required CMap");
}
else if (!cMap.hasCIDMappings())
{
LOG.warn("Invalid Encoding CMap in font " + getName());
}
}
// check if the descendant font is CJK
PDCIDSystemInfo ros = descendantFont.getCIDSystemInfo();
if (ros != null)
{
String ordering = ros.getOrdering();
isDescendantCJK = "Adobe".equals(ros.getRegistry()) &&
("GB1".equals(ordering) ||
"CNS1".equals(ordering) ||
"Japan1".equals(ordering) ||
"Korea1".equals(ordering));
}
}
/**
* Fetches the corresponding UCS2 CMap if the font's CMap is predefined.
*/
private void fetchCMapUCS2() throws IOException
{
// if the font is composite and uses a predefined cmap (excluding Identity-H/V)
// or whose descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
// Adobe-Korea1 character collection:
COSName name = dict.getCOSName(COSName.ENCODING);
if (isCMapPredefined && !(name == COSName.IDENTITY_H || name == COSName.IDENTITY_V) ||
isDescendantCJK)
{
// a) Map the character code to a CID using the font's CMap
// b) Obtain the ROS from the font's CIDSystemInfo
// c) Construct a second CMap name by concatenating the ROS in the format "R-O-UCS2"
// d) Obtain the CMap with the constructed name
// e) Map the CID according to the CMap from step d), producing a Unicode value
// todo: not sure how to interpret the PDF spec here, do we always override? or only when Identity-H/V?
String strName = null;
if (isDescendantCJK)
{
PDCIDSystemInfo cidSystemInfo = descendantFont.getCIDSystemInfo();
if (cidSystemInfo != null)
{
strName = cidSystemInfo.getRegistry() + "-" +
cidSystemInfo.getOrdering() + "-" +
cidSystemInfo.getSupplement();
}
}
else if (name != null)
{
strName = name.getName();
}
// try to find the corresponding Unicode (UC2) CMap
if (strName != null)
{
try
{
CMap prdCMap = CMapManager.getPredefinedCMap(strName);
String ucs2Name = prdCMap.getRegistry() + "-" + prdCMap.getOrdering() + "-UCS2";
cMapUCS2 = CMapManager.getPredefinedCMap(ucs2Name);
}
catch (IOException ex)
{
LOG.warn("Could not get " + strName + " UC2 map for font " + getName(), ex);
}
}
}
}
/**
* Returns the PostScript name of the font. if (!((COSDictionary) descendantFontDictBase).contai
*/
public String getBaseFont()
{
return dict.getNameAsString(COSName.BASE_FONT);
}
/**
* Returns the descendant font.
*/
public PDCIDFont getDescendantFont()
{
return descendantFont;
}
/**
* Returns the font's CMap.
*/
public CMap getCMap()
{
return cMap;
}
/**
* Returns the font's UCS2 CMap, only present this font uses a predefined CMap.
*/
public CMap getCMapUCS2()
{
return cMapUCS2;
}
@Override
public PDFontDescriptor getFontDescriptor()
{
return descendantFont.getFontDescriptor();
}
@Override
public Matrix getFontMatrix()
{
return descendantFont.getFontMatrix();
}
@Override
public boolean isVertical()
{
return cMap.getWMode() == 1;
}
@Override
public float getHeight(int code) throws IOException
{
return descendantFont.getHeight(code);
}
@Override
protected byte[] encode(int unicode) throws IOException
{
return descendantFont.encode(unicode);
}
@Override
public boolean hasExplicitWidth(int code) throws IOException
{
return descendantFont.hasExplicitWidth(code);
}
@Override
public float getAverageFontWidth()
{
return descendantFont.getAverageFontWidth();
}
@Override
public Vector getPositionVector(int code)
{
// units are always 1/1000 text space, font matrix is not used, see FOP-2252
return descendantFont.getPositionVector(code).scale(-1 / 1000f);
}
@Override
public Vector getDisplacement(int code) throws IOException
{
if (isVertical())
{
return new Vector(0, descendantFont.getVerticalDisplacementVectorY(code) / 1000f);
}
else
{
return super.getDisplacement(code);
}
}
@Override
public float getWidth(int code) throws IOException
{
return descendantFont.getWidth(code);
}
@Override
protected float getStandard14Width(int code)
{
throw new UnsupportedOperationException("not supported");
}
@Override
public float getWidthFromFont(int code) throws IOException
{
return descendantFont.getWidthFromFont(code);
}
@Override
public boolean isEmbedded()
{
return descendantFont.isEmbedded();
}
@Override
public String toUnicode(int code)
{
// try to use a ToUnicode CMap
String unicode = super.toUnicode(code);
if (unicode != null)
{
return unicode;
}
if ((isCMapPredefined || isDescendantCJK) && cMapUCS2 != null)
{
// if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
// or if its descendant font uses Adobe-GB1/CNS1/Japan1/Korea1
// a) Map the character code to a character identifier (CID) according to the font?s CMap
int cid = codeToCID(code);
// e) Map the CID according to the CMap from step d), producing a Unicode value
return cMapUCS2.toUnicode(cid);
}
// PDFBOX-5324: try to get unicode from font cmap
if (descendantFont instanceof PDCIDFontType2)
{
TrueTypeFont font = ((PDCIDFontType2) descendantFont).getTrueTypeFont();
if (font != null)
{
try
{
CmapLookup cmap = font.getUnicodeCmapLookup(false);
if (cmap != null)
{
int gid;
if (descendantFont.isEmbedded())
{
// original PDFBOX-5324 supported only embedded fonts
gid = descendantFont.codeToGID(code);
}
else
{
// PDFBOX-5331: this bypasses the fallback attempt in
// PDCIDFontType2.codeToGID() which would bring a stackoverflow
gid = descendantFont.codeToCID(code);
}
List<Integer> codes = cmap.getCharCodes(gid);
if (codes != null && !codes.isEmpty())
{
return Character.toString((char) (int) codes.get(0));
}
}
}
catch (IOException e)
{
LOG.warn("get unicode from font cmap fail", e);
}
}
}
if (LOG.isWarnEnabled() && !noUnicode.contains(code))
{
// if no value has been produced, there is no way to obtain Unicode for the character.
String cid = "CID+" + codeToCID(code);
LOG.warn("No Unicode mapping for " + cid + " (" + code + ") in font " + getName());
// we keep track of which warnings have been issued, so we don't log multiple times
noUnicode.add(code);
}
return null;
}
@Override
public String getName()
{
return getBaseFont();
}
@Override
public BoundingBox getBoundingBox() throws IOException
{
// Will be cached by underlying font
return descendantFont.getBoundingBox();
}
@Override
public int readCode(InputStream in) throws IOException
{
return cMap.readCode(in);
}
/**
* Returns the CID for the given character code. If not found then CID 0 is returned.
*
* @param code character code
* @return CID
*/
public int codeToCID(int code)
{
return descendantFont.codeToCID(code);
}
/**
* Returns the GID for the given character code.
*
* @param code character code
* @return GID
*/
public int codeToGID(int code) throws IOException
{
return descendantFont.codeToGID(code);
}
@Override
public boolean isStandard14()
{
return false;
}
@Override
public boolean isDamaged()
{
return descendantFont.isDamaged();
}
@Override
public String toString()
{
String descendant = null;
if (getDescendantFont() != null)
{
descendant = getDescendantFont().getClass().getSimpleName();
}
return getClass().getSimpleName() + "/" + descendant + ", PostScript name: " + getBaseFont();
}
@Override
public GeneralPath getPath(int code) throws IOException
{
return descendantFont.getPath(code);
}
@Override
public GeneralPath getNormalizedPath(int code) throws IOException
{
return descendantFont.getNormalizedPath(code);
}
@Override
public boolean hasGlyph(int code) throws IOException
{
return descendantFont.hasGlyph(code);
}
public GsubData getGsubData()
{
return gsubData;
}
public byte[] encodeGlyphId(int glyphId)
{
return descendantFont.encodeGlyphId(glyphId);
}
public CmapLookup getCmapLookup()
{
return cmapLookup;
}
}