Source code for fontTools.tfmLib

"""Module for reading TFM (TeX Font Metrics) files.

The TFM format is described in the TFtoPL WEB source code, whose typeset form
can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_.

	>>> from fontTools.tfmLib import TFM
	>>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm")
	>>>
	>>> # Accessing an attribute gets you metadata.
	>>> tfm.checksum
	1274110073
	>>> tfm.designsize
	10.0
	>>> tfm.codingscheme
	'TeX text'
	>>> tfm.family
	'CMR'
	>>> tfm.seven_bit_safe_flag
	False
	>>> tfm.face
	234
	>>> tfm.extraheader
	{}
	>>> tfm.fontdimens
	{'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578}
	>>> # Accessing a character gets you its metrics.
	>>> # “width” is always available, other metrics are available only when
	>>> # applicable. All values are relative to “designsize”.
	>>> tfm.chars[ord("g")]
	{'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219}
	>>> # Kerning and ligature can be accessed as well.
	>>> tfm.kerning[ord("c")]
	{104: -0.02777862548828125, 107: -0.02777862548828125}
	>>> tfm.ligatures[ord("f")]
	{105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)}
"""

from types import SimpleNamespace

from fontTools.misc.sstruct import calcsize, unpack, unpack2

SIZES_FORMAT = """
    >
    lf: h    # length of the entire file, in words
    lh: h    # length of the header data, in words
    bc: h    # smallest character code in the font
    ec: h    # largest character code in the font
    nw: h    # number of words in the width table
    nh: h    # number of words in the height table
    nd: h    # number of words in the depth table
    ni: h    # number of words in the italic correction table
    nl: h    # number of words in the ligature/kern table
    nk: h    # number of words in the kern table
    ne: h    # number of words in the extensible character table
    np: h    # number of font parameter words
"""

SIZES_SIZE = calcsize(SIZES_FORMAT)

FIXED_FORMAT = "12.20F"

HEADER_FORMAT1 = f"""
    >
    checksum:            L
    designsize:          {FIXED_FORMAT}
"""

HEADER_FORMAT2 = f"""
    {HEADER_FORMAT1}
    codingscheme:        40p
"""

HEADER_FORMAT3 = f"""
    {HEADER_FORMAT2}
    family:              20p
"""

HEADER_FORMAT4 = f"""
    {HEADER_FORMAT3}
    seven_bit_safe_flag: ?
    ignored:             x
    ignored:             x
    face:                B
"""

HEADER_SIZE1 = calcsize(HEADER_FORMAT1)
HEADER_SIZE2 = calcsize(HEADER_FORMAT2)
HEADER_SIZE3 = calcsize(HEADER_FORMAT3)
HEADER_SIZE4 = calcsize(HEADER_FORMAT4)

LIG_KERN_COMMAND = """
    >
    skip_byte: B
    next_char: B
    op_byte: B
    remainder: B
"""

BASE_PARAMS = [
    "SLANT",
    "SPACE",
    "STRETCH",
    "SHRINK",
    "XHEIGHT",
    "QUAD",
    "EXTRASPACE",
]

MATHSY_PARAMS = [
    "NUM1",
    "NUM2",
    "NUM3",
    "DENOM1",
    "DENOM2",
    "SUP1",
    "SUP2",
    "SUP3",
    "SUB1",
    "SUB2",
    "SUPDROP",
    "SUBDROP",
    "DELIM1",
    "DELIM2",
    "AXISHEIGHT",
]

MATHEX_PARAMS = [
    "DEFAULTRULETHICKNESS",
    "BIGOPSPACING1",
    "BIGOPSPACING2",
    "BIGOPSPACING3",
    "BIGOPSPACING4",
    "BIGOPSPACING5",
]

VANILLA = 0
MATHSY = 1
MATHEX = 2

UNREACHABLE = 0
PASSTHROUGH = 1
ACCESSABLE = 2

NO_TAG = 0
LIG_TAG = 1
LIST_TAG = 2
EXT_TAG = 3

STOP_FLAG = 128
KERN_FLAG = 128


[docs] class TFMException(Exception): def __init__(self, message): super().__init__(message)
[docs] class TFM: def __init__(self, file): self._read(file) def __repr__(self): return ( f"<TFM" f" for {self.family}" f" in {self.codingscheme}" f" at {self.designsize:g}pt>" ) def _read(self, file): if hasattr(file, "read"): data = file.read() else: with open(file, "rb") as fp: data = fp.read() self._data = data if len(data) < SIZES_SIZE: raise TFMException("Too short input file") sizes = SimpleNamespace() unpack2(SIZES_FORMAT, data, sizes) # Do some file structure sanity checks. # TeX and TFtoPL do additional functional checks and might even correct # “errors” in the input file, but we instead try to output the file as # it is as long as it is parsable, even if the data make no sense. if sizes.lf < 0: raise TFMException("The file claims to have negative or zero length!") if len(data) < sizes.lf * 4: raise TFMException("The file has fewer bytes than it claims!") for name, length in vars(sizes).items(): if length < 0: raise TFMException("The subfile size: '{name}' is negative!") if sizes.lh < 2: raise TFMException(f"The header length is only {sizes.lh}!") if sizes.bc > sizes.ec + 1 or sizes.ec > 255: raise TFMException( f"The character code range {sizes.bc}..{sizes.ec} is illegal!" ) if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0: raise TFMException("Incomplete subfiles for character dimensions!") if sizes.ne > 256: raise TFMException(f"There are {ne} extensible recipes!") if sizes.lf != ( 6 + sizes.lh + (sizes.ec - sizes.bc + 1) + sizes.nw + sizes.nh + sizes.nd + sizes.ni + sizes.nl + sizes.nk + sizes.ne + sizes.np ): raise TFMException("Subfile sizes don’t add up to the stated total") # Subfile offsets, used in the helper function below. These all are # 32-bit word offsets not 8-bit byte offsets. char_base = 6 + sizes.lh - sizes.bc width_base = char_base + sizes.ec + 1 height_base = width_base + sizes.nw depth_base = height_base + sizes.nh italic_base = depth_base + sizes.nd lig_kern_base = italic_base + sizes.ni kern_base = lig_kern_base + sizes.nl exten_base = kern_base + sizes.nk param_base = exten_base + sizes.ne # Helper functions for accessing individual data. If this looks # nonidiomatic Python, I blame the effect of reading the literate WEB # documentation of TFtoPL. def char_info(c): return 4 * (char_base + c) def width_index(c): return data[char_info(c)] def noneexistent(c): return c < sizes.bc or c > sizes.ec or width_index(c) == 0 def height_index(c): return data[char_info(c) + 1] // 16 def depth_index(c): return data[char_info(c) + 1] % 16 def italic_index(c): return data[char_info(c) + 2] // 4 def tag(c): return data[char_info(c) + 2] % 4 def remainder(c): return data[char_info(c) + 3] def width(c): r = 4 * (width_base + width_index(c)) return read_fixed(r, "v")["v"] def height(c): r = 4 * (height_base + height_index(c)) return read_fixed(r, "v")["v"] def depth(c): r = 4 * (depth_base + depth_index(c)) return read_fixed(r, "v")["v"] def italic(c): r = 4 * (italic_base + italic_index(c)) return read_fixed(r, "v")["v"] def exten(c): return 4 * (exten_base + remainder(c)) def lig_step(i): return 4 * (lig_kern_base + i) def lig_kern_command(i): command = SimpleNamespace() unpack2(LIG_KERN_COMMAND, data[i:], command) return command def kern(i): r = 4 * (kern_base + i) return read_fixed(r, "v")["v"] def param(i): return 4 * (param_base + i) def read_fixed(index, key, obj=None): ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj) return ret[0] # Set all attributes to empty values regardless of the header size. unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self) offset = 24 length = sizes.lh * 4 self.extraheader = {} if length >= HEADER_SIZE4: rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1] if self.face < 18: s = self.face % 2 b = self.face // 2 self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3] for i in range(sizes.lh - HEADER_SIZE4 // 4): rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1] elif length >= HEADER_SIZE3: unpack2(HEADER_FORMAT3, data[offset:], self) elif length >= HEADER_SIZE2: unpack2(HEADER_FORMAT2, data[offset:], self) elif length >= HEADER_SIZE1: unpack2(HEADER_FORMAT1, data[offset:], self) self.fonttype = VANILLA scheme = self.codingscheme.upper() if scheme.startswith("TEX MATH SY"): self.fonttype = MATHSY elif scheme.startswith("TEX MATH EX"): self.fonttype = MATHEX self.fontdimens = {} for i in range(sizes.np): name = f"PARAMETER{i+1}" if i <= 6: name = BASE_PARAMS[i] elif self.fonttype == MATHSY and i <= 21: name = MATHSY_PARAMS[i - 7] elif self.fonttype == MATHEX and i <= 12: name = MATHEX_PARAMS[i - 7] read_fixed(param(i), name, self.fontdimens) lig_kern_map = {} self.right_boundary_char = None self.left_boundary_char = None if sizes.nl > 0: cmd = lig_kern_command(lig_step(0)) if cmd.skip_byte == 255: self.right_boundary_char = cmd.next_char cmd = lig_kern_command(lig_step((sizes.nl - 1))) if cmd.skip_byte == 255: self.left_boundary_char = 256 r = 256 * cmd.op_byte + cmd.remainder lig_kern_map[self.left_boundary_char] = r self.chars = {} for c in range(sizes.bc, sizes.ec + 1): if width_index(c) > 0: self.chars[c] = info = {} info["width"] = width(c) if height_index(c) > 0: info["height"] = height(c) if depth_index(c) > 0: info["depth"] = depth(c) if italic_index(c) > 0: info["italic"] = italic(c) char_tag = tag(c) if char_tag == NO_TAG: pass elif char_tag == LIG_TAG: lig_kern_map[c] = remainder(c) elif char_tag == LIST_TAG: info["nextlarger"] = remainder(c) elif char_tag == EXT_TAG: info["varchar"] = varchar = {} for i in range(4): part = data[exten(c) + i] if i == 3 or part > 0: name = "rep" if i == 0: name = "top" elif i == 1: name = "mid" elif i == 2: name = "bot" if noneexistent(part): varchar[name] = c else: varchar[name] = part self.ligatures = {} self.kerning = {} for c, i in sorted(lig_kern_map.items()): cmd = lig_kern_command(lig_step(i)) if cmd.skip_byte > STOP_FLAG: i = 256 * cmd.op_byte + cmd.remainder while i < sizes.nl: cmd = lig_kern_command(lig_step(i)) if cmd.skip_byte > STOP_FLAG: pass else: if cmd.op_byte >= KERN_FLAG: r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder self.kerning.setdefault(c, {})[cmd.next_char] = kern(r) else: r = cmd.op_byte if r == 4 or (r > 7 and r != 11): # Ligature step with nonstandard code, we output # the code verbatim. lig = r else: lig = "" if r % 4 > 1: lig += "/" lig += "LIG" if r % 2 != 0: lig += "/" while r > 3: lig += ">" r -= 4 self.ligatures.setdefault(c, {})[cmd.next_char] = ( lig, cmd.remainder, ) if cmd.skip_byte >= STOP_FLAG: break i += cmd.skip_byte + 1
if __name__ == "__main__": import sys tfm = TFM(sys.argv[1]) print( "\n".join( x for x in [ f"tfm.checksum={tfm.checksum}", f"tfm.designsize={tfm.designsize}", f"tfm.codingscheme={tfm.codingscheme}", f"tfm.fonttype={tfm.fonttype}", f"tfm.family={tfm.family}", f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}", f"tfm.face={tfm.face}", f"tfm.extraheader={tfm.extraheader}", f"tfm.fontdimens={tfm.fontdimens}", f"tfm.right_boundary_char={tfm.right_boundary_char}", f"tfm.left_boundary_char={tfm.left_boundary_char}", f"tfm.kerning={tfm.kerning}", f"tfm.ligatures={tfm.ligatures}", f"tfm.chars={tfm.chars}", ] ) ) print(tfm)