Source code for fontTools.tfmLib

"""Module for reading TFM (TeX Font Metrics) files.

The TFM format is described in the TFtoPL WEB source code, whose typeset form
can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_.

	>>> from fontTools.tfmLib import TFM
	>>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm")
	>>>
	>>> # Accessing an attribute gets you metadata.
	>>> tfm.checksum
	1274110073
	>>> tfm.designsize
	10.0
	>>> tfm.codingscheme
	'TeX text'
	>>> tfm.family
	'CMR'
	>>> tfm.seven_bit_safe_flag
	False
	>>> tfm.face
	234
	>>> tfm.extraheader
	{}
	>>> tfm.fontdimens
	{'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578}
	>>> # Accessing a character gets you its metrics.
	>>> # “width” is always available, other metrics are available only when
	>>> # applicable. All values are relative to “designsize”.
	>>> tfm.chars[ord("g")]
	{'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219}
	>>> # Kerning and ligature can be accessed as well.
	>>> tfm.kerning[ord("c")]
	{104: -0.02777862548828125, 107: -0.02777862548828125}
	>>> tfm.ligatures[ord("f")]
	{105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)}
"""

from types import SimpleNamespace

from fontTools.misc.sstruct import calcsize, unpack, unpack2

SIZES_FORMAT = """
    >
    lf: h    # length of the entire file, in words
    lh: h    # length of the header data, in words
    bc: h    # smallest character code in the font
    ec: h    # largest character code in the font
    nw: h    # number of words in the width table
    nh: h    # number of words in the height table
    nd: h    # number of words in the depth table
    ni: h    # number of words in the italic correction table
    nl: h    # number of words in the ligature/kern table
    nk: h    # number of words in the kern table
    ne: h    # number of words in the extensible character table
    np: h    # number of font parameter words
"""

SIZES_SIZE = calcsize(SIZES_FORMAT)

FIXED_FORMAT = "12.20F"

HEADER_FORMAT1 = f"""
    >
    checksum:            L
    designsize:          {FIXED_FORMAT}
"""

HEADER_FORMAT2 = f"""
    {HEADER_FORMAT1}
    codingscheme:        40p
"""

HEADER_FORMAT3 = f"""
    {HEADER_FORMAT2}
    family:              20p
"""

HEADER_FORMAT4 = f"""
    {HEADER_FORMAT3}
    seven_bit_safe_flag: ?
    ignored:             x
    ignored:             x
    face:                B
"""

HEADER_SIZE1 = calcsize(HEADER_FORMAT1)
HEADER_SIZE2 = calcsize(HEADER_FORMAT2)
HEADER_SIZE3 = calcsize(HEADER_FORMAT3)
HEADER_SIZE4 = calcsize(HEADER_FORMAT4)

LIG_KERN_COMMAND = """
    >
    skip_byte: B
    next_char: B
    op_byte: B
    remainder: B
"""

BASE_PARAMS = [
    "SLANT",
    "SPACE",
    "STRETCH",
    "SHRINK",
    "XHEIGHT",
    "QUAD",
    "EXTRASPACE",
]

MATHSY_PARAMS = [
    "NUM1",
    "NUM2",
    "NUM3",
    "DENOM1",
    "DENOM2",
    "SUP1",
    "SUP2",
    "SUP3",
    "SUB1",
    "SUB2",
    "SUPDROP",
    "SUBDROP",
    "DELIM1",
    "DELIM2",
    "AXISHEIGHT",
]

MATHEX_PARAMS = [
    "DEFAULTRULETHICKNESS",
    "BIGOPSPACING1",
    "BIGOPSPACING2",
    "BIGOPSPACING3",
    "BIGOPSPACING4",
    "BIGOPSPACING5",
]

VANILLA = 0
MATHSY = 1
MATHEX = 2

UNREACHABLE = 0
PASSTHROUGH = 1
ACCESSABLE = 2

NO_TAG = 0
LIG_TAG = 1
LIST_TAG = 2
EXT_TAG = 3

STOP_FLAG = 128
KERN_FLAG = 128



[docs]
class TFMException(Exception):
    def __init__(self, message):
        super().__init__(message)




[docs]
class TFM:
    def __init__(self, file):
        self._read(file)

    def __repr__(self):
        return (
            f"<TFM"
            f" for {self.family}"
            f" in {self.codingscheme}"
            f" at {self.designsize:g}pt>"
        )

    def _read(self, file):
        if hasattr(file, "read"):
            data = file.read()
        else:
            with open(file, "rb") as fp:
                data = fp.read()

        self._data = data

        if len(data) < SIZES_SIZE:
            raise TFMException("Too short input file")

        sizes = SimpleNamespace()
        unpack2(SIZES_FORMAT, data, sizes)

        # Do some file structure sanity checks.
        # TeX and TFtoPL do additional functional checks and might even correct
        # “errors” in the input file, but we instead try to output the file as
        # it is as long as it is parsable, even if the data make no sense.

        if sizes.lf < 0:
            raise TFMException("The file claims to have negative or zero length!")

        if len(data) < sizes.lf * 4:
            raise TFMException("The file has fewer bytes than it claims!")

        for name, length in vars(sizes).items():
            if length < 0:
                raise TFMException("The subfile size: '{name}' is negative!")

        if sizes.lh < 2:
            raise TFMException(f"The header length is only {sizes.lh}!")

        if sizes.bc > sizes.ec + 1 or sizes.ec > 255:
            raise TFMException(
                f"The character code range {sizes.bc}..{sizes.ec} is illegal!"
            )

        if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0:
            raise TFMException("Incomplete subfiles for character dimensions!")

        if sizes.ne > 256:
            raise TFMException(f"There are {ne} extensible recipes!")

        if sizes.lf != (
            6
            + sizes.lh
            + (sizes.ec - sizes.bc + 1)
            + sizes.nw
            + sizes.nh
            + sizes.nd
            + sizes.ni
            + sizes.nl
            + sizes.nk
            + sizes.ne
            + sizes.np
        ):
            raise TFMException("Subfile sizes don’t add up to the stated total")

        # Subfile offsets, used in the helper function below. These all are
        # 32-bit word offsets not 8-bit byte offsets.
        char_base = 6 + sizes.lh - sizes.bc
        width_base = char_base + sizes.ec + 1
        height_base = width_base + sizes.nw
        depth_base = height_base + sizes.nh
        italic_base = depth_base + sizes.nd
        lig_kern_base = italic_base + sizes.ni
        kern_base = lig_kern_base + sizes.nl
        exten_base = kern_base + sizes.nk
        param_base = exten_base + sizes.ne

        # Helper functions for accessing individual data. If this looks
        # nonidiomatic Python, I blame the effect of reading the literate WEB
        # documentation of TFtoPL.
        def char_info(c):
            return 4 * (char_base + c)

        def width_index(c):
            return data[char_info(c)]

        def noneexistent(c):
            return c < sizes.bc or c > sizes.ec or width_index(c) == 0

        def height_index(c):
            return data[char_info(c) + 1] // 16

        def depth_index(c):
            return data[char_info(c) + 1] % 16

        def italic_index(c):
            return data[char_info(c) + 2] // 4

        def tag(c):
            return data[char_info(c) + 2] % 4

        def remainder(c):
            return data[char_info(c) + 3]

        def width(c):
            r = 4 * (width_base + width_index(c))
            return read_fixed(r, "v")["v"]

        def height(c):
            r = 4 * (height_base + height_index(c))
            return read_fixed(r, "v")["v"]

        def depth(c):
            r = 4 * (depth_base + depth_index(c))
            return read_fixed(r, "v")["v"]

        def italic(c):
            r = 4 * (italic_base + italic_index(c))
            return read_fixed(r, "v")["v"]

        def exten(c):
            return 4 * (exten_base + remainder(c))

        def lig_step(i):
            return 4 * (lig_kern_base + i)

        def lig_kern_command(i):
            command = SimpleNamespace()
            unpack2(LIG_KERN_COMMAND, data[i:], command)
            return command

        def kern(i):
            r = 4 * (kern_base + i)
            return read_fixed(r, "v")["v"]

        def param(i):
            return 4 * (param_base + i)

        def read_fixed(index, key, obj=None):
            ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj)
            return ret[0]

        # Set all attributes to empty values regardless of the header size.
        unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self)

        offset = 24
        length = sizes.lh * 4
        self.extraheader = {}
        if length >= HEADER_SIZE4:
            rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1]
            if self.face < 18:
                s = self.face % 2
                b = self.face // 2
                self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3]
            for i in range(sizes.lh - HEADER_SIZE4 // 4):
                rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1]
        elif length >= HEADER_SIZE3:
            unpack2(HEADER_FORMAT3, data[offset:], self)
        elif length >= HEADER_SIZE2:
            unpack2(HEADER_FORMAT2, data[offset:], self)
        elif length >= HEADER_SIZE1:
            unpack2(HEADER_FORMAT1, data[offset:], self)

        self.fonttype = VANILLA
        scheme = self.codingscheme.upper()
        if scheme.startswith("TEX MATH SY"):
            self.fonttype = MATHSY
        elif scheme.startswith("TEX MATH EX"):
            self.fonttype = MATHEX

        self.fontdimens = {}
        for i in range(sizes.np):
            name = f"PARAMETER{i+1}"
            if i <= 6:
                name = BASE_PARAMS[i]
            elif self.fonttype == MATHSY and i <= 21:
                name = MATHSY_PARAMS[i - 7]
            elif self.fonttype == MATHEX and i <= 12:
                name = MATHEX_PARAMS[i - 7]
            read_fixed(param(i), name, self.fontdimens)

        lig_kern_map = {}
        self.right_boundary_char = None
        self.left_boundary_char = None
        if sizes.nl > 0:
            cmd = lig_kern_command(lig_step(0))
            if cmd.skip_byte == 255:
                self.right_boundary_char = cmd.next_char

            cmd = lig_kern_command(lig_step((sizes.nl - 1)))
            if cmd.skip_byte == 255:
                self.left_boundary_char = 256
                r = 256 * cmd.op_byte + cmd.remainder
                lig_kern_map[self.left_boundary_char] = r

        self.chars = {}
        for c in range(sizes.bc, sizes.ec + 1):
            if width_index(c) > 0:
                self.chars[c] = info = {}
                info["width"] = width(c)
                if height_index(c) > 0:
                    info["height"] = height(c)
                if depth_index(c) > 0:
                    info["depth"] = depth(c)
                if italic_index(c) > 0:
                    info["italic"] = italic(c)
                char_tag = tag(c)
                if char_tag == NO_TAG:
                    pass
                elif char_tag == LIG_TAG:
                    lig_kern_map[c] = remainder(c)
                elif char_tag == LIST_TAG:
                    info["nextlarger"] = remainder(c)
                elif char_tag == EXT_TAG:
                    info["varchar"] = varchar = {}
                    for i in range(4):
                        part = data[exten(c) + i]
                        if i == 3 or part > 0:
                            name = "rep"
                            if i == 0:
                                name = "top"
                            elif i == 1:
                                name = "mid"
                            elif i == 2:
                                name = "bot"
                            if noneexistent(part):
                                varchar[name] = c
                            else:
                                varchar[name] = part

        self.ligatures = {}
        self.kerning = {}
        for c, i in sorted(lig_kern_map.items()):
            cmd = lig_kern_command(lig_step(i))
            if cmd.skip_byte > STOP_FLAG:
                i = 256 * cmd.op_byte + cmd.remainder

            while i < sizes.nl:
                cmd = lig_kern_command(lig_step(i))
                if cmd.skip_byte > STOP_FLAG:
                    pass
                else:
                    if cmd.op_byte >= KERN_FLAG:
                        r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder
                        self.kerning.setdefault(c, {})[cmd.next_char] = kern(r)
                    else:
                        r = cmd.op_byte
                        if r == 4 or (r > 7 and r != 11):
                            # Ligature step with nonstandard code, we output
                            # the code verbatim.
                            lig = r
                        else:
                            lig = ""
                            if r % 4 > 1:
                                lig += "/"
                            lig += "LIG"
                            if r % 2 != 0:
                                lig += "/"
                            while r > 3:
                                lig += ">"
                                r -= 4
                        self.ligatures.setdefault(c, {})[cmd.next_char] = (
                            lig,
                            cmd.remainder,
                        )

                if cmd.skip_byte >= STOP_FLAG:
                    break
                i += cmd.skip_byte + 1



if __name__ == "__main__":
    import sys

    tfm = TFM(sys.argv[1])
    print(
        "\n".join(
            x
            for x in [
                f"tfm.checksum={tfm.checksum}",
                f"tfm.designsize={tfm.designsize}",
                f"tfm.codingscheme={tfm.codingscheme}",
                f"tfm.fonttype={tfm.fonttype}",
                f"tfm.family={tfm.family}",
                f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}",
                f"tfm.face={tfm.face}",
                f"tfm.extraheader={tfm.extraheader}",
                f"tfm.fontdimens={tfm.fontdimens}",
                f"tfm.right_boundary_char={tfm.right_boundary_char}",
                f"tfm.left_boundary_char={tfm.left_boundary_char}",
                f"tfm.kerning={tfm.kerning}",
                f"tfm.ligatures={tfm.ligatures}",
                f"tfm.chars={tfm.chars}",
            ]
        )
    )
    print(tfm)