Source code for fontTools.ttLib.tables._c_m_a_p

from fontTools.misc.textTools import bytesjoin, safeEval, readHex
from fontTools.misc.encodingTools import getEncoding
from fontTools.ttLib import getSearchRange
from fontTools.unicode import Unicode
from . import DefaultTable
import sys
import struct
import array
import logging


log = logging.getLogger(__name__)


def _make_map(font, chars, gids):
    assert len(chars) == len(gids)
    glyphNames = font.getGlyphNameMany(gids)
    cmap = {}
    for char, gid, name in zip(chars, gids, glyphNames):
        if gid == 0:
            continue
        cmap[char] = name
    return cmap



[docs]
class table__c_m_a_p(DefaultTable.DefaultTable):
    """Character to Glyph Index Mapping Table

    This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_
    table, which maps between input characters (in Unicode or other system encodings)
    and glyphs within the font. The ``cmap`` table contains one or more subtables
    which determine the mapping of of characters to glyphs across different platforms
    and encoding systems.

    ``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access
    to the subtables, although it is normally easier to retrieve individual subtables
    through the utility methods described below. To add new subtables to a font,
    first determine the subtable format (if in doubt use format 4 for glyphs within
    the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation
    Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``,
    and append them to the ``.tables`` list.

    Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap``
    attribute.

    Example::

            cmap4_0_3 = CmapSubtable.newSubtable(4)
            cmap4_0_3.platformID = 0
            cmap4_0_3.platEncID = 3
            cmap4_0_3.language = 0
            cmap4_0_3.cmap = { 0xC1: "Aacute" }

            cmap = newTable("cmap")
            cmap.tableVersion = 0
            cmap.tables = [cmap4_0_3]
    """


[docs]
    def getcmap(self, platformID, platEncID):
        """Returns the first subtable which matches the given platform and encoding.

        Args:
                platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh
                        (deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows.
                encodingID (int): Encoding ID. Interpretation depends on the platform ID.
                        See the OpenType specification for details.

        Returns:
                An object which is a subclass of :py:class:`CmapSubtable` if a matching
                subtable is found within the font, or ``None`` otherwise.
        """

        for subtable in self.tables:
            if subtable.platformID == platformID and subtable.platEncID == platEncID:
                return subtable
        return None  # not found



[docs]
    def getBestCmap(
        self,
        cmapPreferences=(
            (3, 10),
            (0, 6),
            (0, 4),
            (3, 1),
            (0, 3),
            (0, 2),
            (0, 1),
            (0, 0),
        ),
    ):
        """Returns the 'best' Unicode cmap dictionary available in the font
        or ``None``, if no Unicode cmap subtable is available.

        By default it will search for the following (platformID, platEncID)
        pairs in order::

                        (3, 10), # Windows Unicode full repertoire
                        (0, 6),  # Unicode full repertoire (format 13 subtable)
                        (0, 4),  # Unicode 2.0 full repertoire
                        (3, 1),  # Windows Unicode BMP
                        (0, 3),  # Unicode 2.0 BMP
                        (0, 2),  # Unicode ISO/IEC 10646
                        (0, 1),  # Unicode 1.1
                        (0, 0)   # Unicode 1.0

        This particular order matches what HarfBuzz uses to choose what
        subtable to use by default. This order prefers the largest-repertoire
        subtable, and among those, prefers the Windows-platform over the
        Unicode-platform as the former has wider support.

        This order can be customized via the ``cmapPreferences`` argument.
        """
        for platformID, platEncID in cmapPreferences:
            cmapSubtable = self.getcmap(platformID, platEncID)
            if cmapSubtable is not None:
                return cmapSubtable.cmap
        return None  # None of the requested cmap subtables were found



[docs]
    def buildReversed(self):
        """Builds a reverse mapping dictionary

        Iterates over all Unicode cmap tables and returns a dictionary mapping
        glyphs to sets of codepoints, such as::

                {
                        'one': {0x31}
                        'A': {0x41,0x391}
                }

        The values are sets of Unicode codepoints because
        some fonts map different codepoints to the same glyph.
        For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391
        GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph.
        """
        result = {}
        for subtable in self.tables:
            if subtable.isUnicode():
                for codepoint, name in subtable.cmap.items():
                    result.setdefault(name, set()).add(codepoint)
        return result


    def decompile(self, data, ttFont):
        tableVersion, numSubTables = struct.unpack(">HH", data[:4])
        self.tableVersion = int(tableVersion)
        self.tables = tables = []
        seenOffsets = {}
        for i in range(numSubTables):
            platformID, platEncID, offset = struct.unpack(
                ">HHl", data[4 + i * 8 : 4 + (i + 1) * 8]
            )
            platformID, platEncID = int(platformID), int(platEncID)
            format, length = struct.unpack(">HH", data[offset : offset + 4])
            if format in [8, 10, 12, 13]:
                format, reserved, length = struct.unpack(
                    ">HHL", data[offset : offset + 8]
                )
            elif format in [14]:
                format, length = struct.unpack(">HL", data[offset : offset + 6])

            if not length:
                log.error(
                    "cmap subtable is reported as having zero length: platformID %s, "
                    "platEncID %s, format %s offset %s. Skipping table.",
                    platformID,
                    platEncID,
                    format,
                    offset,
                )
                continue
            table = CmapSubtable.newSubtable(format)
            table.platformID = platformID
            table.platEncID = platEncID
            # Note that by default we decompile only the subtable header info;
            # any other data gets decompiled only when an attribute of the
            # subtable is referenced.
            table.decompileHeader(data[offset : offset + int(length)], ttFont)
            if offset in seenOffsets:
                table.data = None  # Mark as decompiled
                table.cmap = tables[seenOffsets[offset]].cmap
            else:
                seenOffsets[offset] = i
            tables.append(table)
        if ttFont.lazy is False:  # Be lazy for None and True
            self.ensureDecompiled()

    def ensureDecompiled(self, recurse=False):
        # The recurse argument is unused, but part of the signature of
        # ensureDecompiled across the library.
        for st in self.tables:
            st.ensureDecompiled()

    def compile(self, ttFont):
        self.tables.sort()  # sort according to the spec; see CmapSubtable.__lt__()
        numSubTables = len(self.tables)
        totalOffset = 4 + 8 * numSubTables
        data = struct.pack(">HH", self.tableVersion, numSubTables)
        tableData = b""
        seen = (
            {}
        )  # Some tables are the same object reference. Don't compile them twice.
        done = (
            {}
        )  # Some tables are different objects, but compile to the same data chunk
        for table in self.tables:
            offset = seen.get(id(table.cmap))
            if offset is None:
                chunk = table.compile(ttFont)
                offset = done.get(chunk)
                if offset is None:
                    offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(
                        tableData
                    )
                    tableData = tableData + chunk
            data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
        return data + tableData

    def toXML(self, writer, ttFont):
        writer.simpletag("tableVersion", version=self.tableVersion)
        writer.newline()
        for table in self.tables:
            table.toXML(writer, ttFont)

    def fromXML(self, name, attrs, content, ttFont):
        if name == "tableVersion":
            self.tableVersion = safeEval(attrs["version"])
            return
        if name[:12] != "cmap_format_":
            return
        if not hasattr(self, "tables"):
            self.tables = []
        format = safeEval(name[12:])
        table = CmapSubtable.newSubtable(format)
        table.platformID = safeEval(attrs["platformID"])
        table.platEncID = safeEval(attrs["platEncID"])
        table.fromXML(name, attrs, content, ttFont)
        self.tables.append(table)




[docs]
class CmapSubtable(object):
    """Base class for all cmap subtable formats.

    Subclasses which handle the individual subtable formats are named
    ``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass`
    to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a
    new subtable object for a given format.

    The object exposes a ``.cmap`` attribute, which contains a dictionary mapping
    character codepoints to glyph names.
    """


[docs]
    @staticmethod
    def getSubtableClass(format):
        """Return the subtable class for a format."""
        return cmap_classes.get(format, cmap_format_unknown)



[docs]
    @staticmethod
    def newSubtable(format):
        """Return a new instance of a subtable for the given format
        ."""
        subtableClass = CmapSubtable.getSubtableClass(format)
        return subtableClass(format)


    def __init__(self, format):
        self.format = format
        self.data = None
        self.ttFont = None
        self.platformID = None  #: The platform ID of this subtable
        self.platEncID = None  #: The encoding ID of this subtable (interpretation depends on ``platformID``)
        self.language = (
            None  #: The language ID of this subtable (Macintosh platform only)
        )

    def ensureDecompiled(self, recurse=False):
        # The recurse argument is unused, but part of the signature of
        # ensureDecompiled across the library.
        if self.data is None:
            return
        self.decompile(None, None)  # use saved data.
        self.data = None  # Once this table has been decompiled, make sure we don't
        # just return the original data. Also avoids recursion when
        # called with an attribute that the cmap subtable doesn't have.

    def __getattr__(self, attr):
        # allow lazy decompilation of subtables.
        if attr[:2] == "__":  # don't handle requests for member functions like '__lt__'
            raise AttributeError(attr)
        if self.data is None:
            raise AttributeError(attr)
        self.ensureDecompiled()
        return getattr(self, attr)

    def decompileHeader(self, data, ttFont):
        format, length, language = struct.unpack(">HHH", data[:6])
        assert (
            len(data) == length
        ), "corrupt cmap table format %d (data length: %d, header length: %d)" % (
            format,
            len(data),
            length,
        )
        self.format = int(format)
        self.length = int(length)
        self.language = int(language)
        self.data = data[6:]
        self.ttFont = ttFont

    def toXML(self, writer, ttFont):
        writer.begintag(
            self.__class__.__name__,
            [
                ("platformID", self.platformID),
                ("platEncID", self.platEncID),
                ("language", self.language),
            ],
        )
        writer.newline()
        codes = sorted(self.cmap.items())
        self._writeCodes(codes, writer)
        writer.endtag(self.__class__.__name__)
        writer.newline()


[docs]
    def getEncoding(self, default=None):
        """Returns the Python encoding name for this cmap subtable based on its platformID,
        platEncID, and language.  If encoding for these values is not known, by default
        ``None`` is returned.  That can be overridden by passing a value to the ``default``
        argument.

        Note that if you want to choose a "preferred" cmap subtable, most of the time
        ``self.isUnicode()`` is what you want as that one only returns true for the modern,
        commonly used, Unicode-compatible triplets, not the legacy ones.
        """
        return getEncoding(self.platformID, self.platEncID, self.language, default)



[docs]
    def isUnicode(self):
        """Returns true if the characters are interpreted as Unicode codepoints."""
        return self.platformID == 0 or (
            self.platformID == 3 and self.platEncID in [0, 1, 10]
        )



[docs]
    def isSymbol(self):
        """Returns true if the subtable is for the Symbol encoding (3,0)"""
        return self.platformID == 3 and self.platEncID == 0


    def _writeCodes(self, codes, writer):
        isUnicode = self.isUnicode()
        for code, name in codes:
            writer.simpletag("map", code=hex(code), name=name)
            if isUnicode:
                writer.comment(Unicode[code])
            writer.newline()

    def __lt__(self, other):
        if not isinstance(other, CmapSubtable):
            return NotImplemented

        # implemented so that list.sort() sorts according to the spec.
        selfTuple = (
            getattr(self, "platformID", None),
            getattr(self, "platEncID", None),
            getattr(self, "language", None),
            self.__dict__,
        )
        otherTuple = (
            getattr(other, "platformID", None),
            getattr(other, "platEncID", None),
            getattr(other, "language", None),
            other.__dict__,
        )
        return selfTuple < otherTuple



class cmap_format_0(CmapSubtable):
    def decompile(self, data, ttFont):
        # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
        # If not, someone is calling the subtable decompile() directly, and must provide both args.
        if data is not None and ttFont is not None:
            self.decompileHeader(data, ttFont)
        else:
            assert (
                data is None and ttFont is None
            ), "Need both data and ttFont arguments"
        data = (
            self.data
        )  # decompileHeader assigns the data after the header to self.data
        assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
        gids = array.array("B")
        gids.frombytes(self.data)
        charCodes = list(range(len(gids)))
        self.cmap = _make_map(self.ttFont, charCodes, gids)

    def compile(self, ttFont):
        if self.data:
            return struct.pack(">HHH", 0, 262, self.language) + self.data

        cmap = self.cmap
        assert set(cmap.keys()).issubset(range(256))
        getGlyphID = ttFont.getGlyphID
        valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)]

        gids = array.array("B", valueList)
        data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes()
        assert len(data) == 262
        return data

    def fromXML(self, name, attrs, content, ttFont):
        self.language = safeEval(attrs["language"])
        if not hasattr(self, "cmap"):
            self.cmap = {}
        cmap = self.cmap
        for element in content:
            if not isinstance(element, tuple):
                continue
            name, attrs, content = element
            if name != "map":
                continue
            cmap[safeEval(attrs["code"])] = attrs["name"]


subHeaderFormat = ">HHhH"


class SubHeader(object):
    def __init__(self):
        self.firstCode = None
        self.entryCount = None
        self.idDelta = None
        self.idRangeOffset = None
        self.glyphIndexArray = []


class cmap_format_2(CmapSubtable):
    def setIDDelta(self, subHeader):
        subHeader.idDelta = 0
        # find the minGI which is not zero.
        minGI = subHeader.glyphIndexArray[0]
        for gid in subHeader.glyphIndexArray:
            if (gid != 0) and (gid < minGI):
                minGI = gid
        # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
        # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
        # We would like to pick an idDelta such that the first glyphArray GID is 1,
        # so that we are more likely to be able to combine glypharray GID subranges.
        # This means that we have a problem when minGI is > 32K
        # Since the final gi is reconstructed from the glyphArray GID by:
        #    (short)finalGID = (gid + idDelta) % 0x10000),
        # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
        # negative number to an unsigned short.

        if minGI > 1:
            if minGI > 0x7FFF:
                subHeader.idDelta = -(0x10000 - minGI) - 1
            else:
                subHeader.idDelta = minGI - 1
            idDelta = subHeader.idDelta
            for i in range(subHeader.entryCount):
                gid = subHeader.glyphIndexArray[i]
                if gid > 0:
                    subHeader.glyphIndexArray[i] = gid - idDelta

    def decompile(self, data, ttFont):
        # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
        # If not, someone is calling the subtable decompile() directly, and must provide both args.
        if data is not None and ttFont is not None:
            self.decompileHeader(data, ttFont)
        else:
            assert (
                data is None and ttFont is None
            ), "Need both data and ttFont arguments"

        data = (
            self.data
        )  # decompileHeader assigns the data after the header to self.data
        subHeaderKeys = []
        maxSubHeaderindex = 0
        # get the key array, and determine the number of subHeaders.
        allKeys = array.array("H")
        allKeys.frombytes(data[:512])
        data = data[512:]
        if sys.byteorder != "big":
            allKeys.byteswap()
        subHeaderKeys = [key // 8 for key in allKeys]
        maxSubHeaderindex = max(subHeaderKeys)

        # Load subHeaders
        subHeaderList = []
        pos = 0
        for i in range(maxSubHeaderindex + 1):
            subHeader = SubHeader()
            (
                subHeader.firstCode,
                subHeader.entryCount,
                subHeader.idDelta,
                subHeader.idRangeOffset,
            ) = struct.unpack(subHeaderFormat, data[pos : pos + 8])
            pos += 8
            giDataPos = pos + subHeader.idRangeOffset - 2
            giList = array.array("H")
            giList.frombytes(data[giDataPos : giDataPos + subHeader.entryCount * 2])
            if sys.byteorder != "big":
                giList.byteswap()
            subHeader.glyphIndexArray = giList
            subHeaderList.append(subHeader)
        # How this gets processed.
        # Charcodes may be one or two bytes.
        # The first byte of a charcode is mapped through the subHeaderKeys, to select
        # a subHeader. For any subheader but 0, the next byte is then mapped through the
        # selected subheader. If subheader Index 0 is selected, then the byte itself is
        # mapped through the subheader, and there is no second byte.
        # Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
        #
        # Each subheader references a range in the glyphIndexArray whose length is entryCount.
        # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
        # referenced by another subheader.
        # The only subheader that will be referenced by more than one first-byte value is the subheader
        # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
        # 	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
        # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
        # A subheader specifies a subrange within (0...256) by the
        # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
        # (e.g. glyph not in font).
        # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
        # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
        # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
        # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
        # Example for Logocut-Medium
        # first byte of charcode = 129; selects subheader 1.
        # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
        # second byte of charCode = 66
        # the index offset = 66-64 = 2.
        # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
        # [glyphIndexArray index], [subrange array index] = glyphIndex
        # [256], [0]=1 	from charcode [129, 64]
        # [257], [1]=2  	from charcode [129, 65]
        # [258], [2]=3  	from charcode [129, 66]
        # [259], [3]=4  	from charcode [129, 67]
        # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
        # add it to the glyphID to get the final glyphIndex
        # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!

        self.data = b""
        cmap = {}
        notdefGI = 0
        for firstByte in range(256):
            subHeadindex = subHeaderKeys[firstByte]
            subHeader = subHeaderList[subHeadindex]
            if subHeadindex == 0:
                if (firstByte < subHeader.firstCode) or (
                    firstByte >= subHeader.firstCode + subHeader.entryCount
                ):
                    continue  # gi is notdef.
                else:
                    charCode = firstByte
                    offsetIndex = firstByte - subHeader.firstCode
                    gi = subHeader.glyphIndexArray[offsetIndex]
                    if gi != 0:
                        gi = (gi + subHeader.idDelta) % 0x10000
                    else:
                        continue  # gi is notdef.
                cmap[charCode] = gi
            else:
                if subHeader.entryCount:
                    charCodeOffset = firstByte * 256 + subHeader.firstCode
                    for offsetIndex in range(subHeader.entryCount):
                        charCode = charCodeOffset + offsetIndex
                        gi = subHeader.glyphIndexArray[offsetIndex]
                        if gi != 0:
                            gi = (gi + subHeader.idDelta) % 0x10000
                        else:
                            continue
                        cmap[charCode] = gi
                # If not subHeader.entryCount, then all char codes with this first byte are
                # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
                # same as mapping it to .notdef.

        gids = list(cmap.values())
        charCodes = list(cmap.keys())
        self.cmap = _make_map(self.ttFont, charCodes, gids)

    def compile(self, ttFont):
        if self.data:
            return (
                struct.pack(">HHH", self.format, self.length, self.language) + self.data
            )
        kEmptyTwoCharCodeRange = -1
        notdefGI = 0

        items = sorted(self.cmap.items())
        charCodes = [item[0] for item in items]
        names = [item[1] for item in items]
        nameMap = ttFont.getReverseGlyphMap()
        try:
            gids = [nameMap[name] for name in names]
        except KeyError:
            nameMap = ttFont.getReverseGlyphMap(rebuild=True)
            try:
                gids = [nameMap[name] for name in names]
            except KeyError:
                # allow virtual GIDs in format 2 tables
                gids = []
                for name in names:
                    try:
                        gid = nameMap[name]
                    except KeyError:
                        try:
                            if name[:3] == "gid":
                                gid = int(name[3:])
                            else:
                                gid = ttFont.getGlyphID(name)
                        except:
                            raise KeyError(name)

                    gids.append(gid)

        # Process the (char code to gid) item list in char code order.
        # By definition, all one byte char codes map to subheader 0.
        # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
        # which defines all char codes in its range to map to notdef) unless proven otherwise.
        # Note that since the char code items are processed in char code order, all the char codes with the
        # same first byte are in sequential order.

        subHeaderKeys = [
            kEmptyTwoCharCodeRange for x in range(256)
        ]  # list of indices into subHeaderList.
        subHeaderList = []

        # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
        # with a cmap where all the one byte char codes map to notdef,
        # with the result that the subhead 0 would not get created just by processing the item list.
        charCode = charCodes[0]
        if charCode > 255:
            subHeader = SubHeader()
            subHeader.firstCode = 0
            subHeader.entryCount = 0
            subHeader.idDelta = 0
            subHeader.idRangeOffset = 0
            subHeaderList.append(subHeader)

        lastFirstByte = -1
        items = zip(charCodes, gids)
        for charCode, gid in items:
            if gid == 0:
                continue
            firstbyte = charCode >> 8
            secondByte = charCode & 0x00FF

            if (
                firstbyte != lastFirstByte
            ):  # Need to update the current subhead, and start a new one.
                if lastFirstByte > -1:
                    # fix GI's and iDelta of current subheader.
                    self.setIDDelta(subHeader)

                    # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
                    # for the indices matching the char codes.
                    if lastFirstByte == 0:
                        for index in range(subHeader.entryCount):
                            charCode = subHeader.firstCode + index
                            subHeaderKeys[charCode] = 0

                    assert subHeader.entryCount == len(
                        subHeader.glyphIndexArray
                    ), "Error - subhead entry count does not match len of glyphID subrange."
                # init new subheader
                subHeader = SubHeader()
                subHeader.firstCode = secondByte
                subHeader.entryCount = 1
                subHeader.glyphIndexArray.append(gid)
                subHeaderList.append(subHeader)
                subHeaderKeys[firstbyte] = len(subHeaderList) - 1
                lastFirstByte = firstbyte
            else:
                # need to fill in with notdefs all the code points between the last charCode and the current charCode.
                codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
                for i in range(codeDiff):
                    subHeader.glyphIndexArray.append(notdefGI)
                subHeader.glyphIndexArray.append(gid)
                subHeader.entryCount = subHeader.entryCount + codeDiff + 1

        # fix GI's and iDelta of last subheader that we we added to the subheader array.
        self.setIDDelta(subHeader)

        # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
        subHeader = SubHeader()
        subHeader.firstCode = 0
        subHeader.entryCount = 0
        subHeader.idDelta = 0
        subHeader.idRangeOffset = 2
        subHeaderList.append(subHeader)
        emptySubheadIndex = len(subHeaderList) - 1
        for index in range(256):
            if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
                subHeaderKeys[index] = emptySubheadIndex
        # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
        # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
        # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
        # charcode 0 and GID 0.

        idRangeOffset = (
            len(subHeaderList) - 1
        ) * 8 + 2  # offset to beginning of glyphIDArray from first subheader idRangeOffset.
        subheadRangeLen = (
            len(subHeaderList) - 1
        )  # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
        for index in range(subheadRangeLen):
            subHeader = subHeaderList[index]
            subHeader.idRangeOffset = 0
            for j in range(index):
                prevSubhead = subHeaderList[j]
                if (
                    prevSubhead.glyphIndexArray == subHeader.glyphIndexArray
                ):  # use the glyphIndexArray subarray
                    subHeader.idRangeOffset = (
                        prevSubhead.idRangeOffset - (index - j) * 8
                    )
                    subHeader.glyphIndexArray = []
                    break
            if subHeader.idRangeOffset == 0:  # didn't find one.
                subHeader.idRangeOffset = idRangeOffset
                idRangeOffset = (
                    idRangeOffset - 8
                ) + subHeader.entryCount * 2  # one less subheader, one more subArray.
            else:
                idRangeOffset = idRangeOffset - 8  # one less subheader

        # Now we can write out the data!
        length = (
            6 + 512 + 8 * len(subHeaderList)
        )  # header, 256 subHeaderKeys, and subheader array.
        for subhead in subHeaderList[:-1]:
            length = (
                length + len(subhead.glyphIndexArray) * 2
            )  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
        dataList = [struct.pack(">HHH", 2, length, self.language)]
        for index in subHeaderKeys:
            dataList.append(struct.pack(">H", index * 8))
        for subhead in subHeaderList:
            dataList.append(
                struct.pack(
                    subHeaderFormat,
                    subhead.firstCode,
                    subhead.entryCount,
                    subhead.idDelta,
                    subhead.idRangeOffset,
                )
            )
        for subhead in subHeaderList[:-1]:
            for gi in subhead.glyphIndexArray:
                dataList.append(struct.pack(">H", gi))
        data = bytesjoin(dataList)
        assert len(data) == length, (
            "Error: cmap format 2 is not same length as calculated! actual: "
            + str(len(data))
            + " calc : "
            + str(length)
        )
        return data

    def fromXML(self, name, attrs, content, ttFont):
        self.language = safeEval(attrs["language"])
        if not hasattr(self, "cmap"):
            self.cmap = {}
        cmap = self.cmap

        for element in content:
            if not isinstance(element, tuple):
                continue
            name, attrs, content = element
            if name != "map":
                continue
            cmap[safeEval(attrs["code"])] = attrs["name"]


cmap_format_4_format = ">7H"

# uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
# uint16  reservedPad                # This value should be zero
# uint16  startCode[segCount]        # Starting character code for each segment
# uint16  idDelta[segCount]          # Delta for all character codes in segment
# uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
# uint16  glyphIndexArray[variable]  # Glyph index array


def splitRange(startCode, endCode, cmap):
    # Try to split a range of character codes into subranges with consecutive
    # glyph IDs in such a way that the cmap4 subtable can be stored "most"
    # efficiently. I can't prove I've got the optimal solution, but it seems
    # to do well with the fonts I tested: none became bigger, many became smaller.
    if startCode == endCode:
        return [], [endCode]

    lastID = cmap[startCode]
    lastCode = startCode
    inOrder = None
    orderedBegin = None
    subRanges = []

    # Gather subranges in which the glyph IDs are consecutive.
    for code in range(startCode + 1, endCode + 1):
        glyphID = cmap[code]

        if glyphID - 1 == lastID:
            if inOrder is None or not inOrder:
                inOrder = 1
                orderedBegin = lastCode
        else:
            if inOrder:
                inOrder = 0
                subRanges.append((orderedBegin, lastCode))
                orderedBegin = None

        lastID = glyphID
        lastCode = code

    if inOrder:
        subRanges.append((orderedBegin, lastCode))
    assert lastCode == endCode

    # Now filter out those new subranges that would only make the data bigger.
    # A new segment cost 8 bytes, not using a new segment costs 2 bytes per
    # character.
    newRanges = []
    for b, e in subRanges:
        if b == startCode and e == endCode:
            break  # the whole range, we're fine
        if b == startCode or e == endCode:
            threshold = 4  # split costs one more segment
        else:
            threshold = 8  # split costs two more segments
        if (e - b + 1) > threshold:
            newRanges.append((b, e))
    subRanges = newRanges

    if not subRanges:
        return [], [endCode]

    if subRanges[0][0] != startCode:
        subRanges.insert(0, (startCode, subRanges[0][0] - 1))
    if subRanges[-1][1] != endCode:
        subRanges.append((subRanges[-1][1] + 1, endCode))

    # Fill the "holes" in the segments list -- those are the segments in which
    # the glyph IDs are _not_ consecutive.
    i = 1
    while i < len(subRanges):
        if subRanges[i - 1][1] + 1 != subRanges[i][0]:
            subRanges.insert(i, (subRanges[i - 1][1] + 1, subRanges[i][0] - 1))
            i = i + 1
        i = i + 1

    # Transform the ranges into startCode/endCode lists.
    start = []
    end = []
    for b, e in subRanges:
        start.append(b)
        end.append(e)
    start.pop(0)

    assert len(start) + 1 == len(end)
    return start, end


class cmap_format_4(CmapSubtable):
    def decompile(self, data, ttFont):
        # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
        # If not, someone is calling the subtable decompile() directly, and must provide both args.
        if data is not None and ttFont is not None:
            self.decompileHeader(data, ttFont)
        else:
            assert (
                data is None and ttFont is None
            ), "Need both data and ttFont arguments"

        data = (
            self.data
        )  # decompileHeader assigns the data after the header to self.data
        (segCountX2, searchRange, entrySelector, rangeShift) = struct.unpack(
            ">4H", data[:8]
        )
        data = data[8:]
        segCount = segCountX2 // 2

        allCodes = array.array("H")
        allCodes.frombytes(data)
        self.data = data = None

        if sys.byteorder != "big":
            allCodes.byteswap()

        # divide the data
        endCode = allCodes[:segCount]
        allCodes = allCodes[segCount + 1 :]  # the +1 is skipping the reservedPad field
        startCode = allCodes[:segCount]
        allCodes = allCodes[segCount:]
        idDelta = allCodes[:segCount]
        allCodes = allCodes[segCount:]
        idRangeOffset = allCodes[:segCount]
        glyphIndexArray = allCodes[segCount:]
        lenGIArray = len(glyphIndexArray)

        # build 2-byte character mapping
        charCodes = []
        gids = []
        for i in range(len(startCode) - 1):  # don't do 0xffff!
            start = startCode[i]
            delta = idDelta[i]
            rangeOffset = idRangeOffset[i]
            partial = rangeOffset // 2 - start + i - len(idRangeOffset)

            rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
            charCodes.extend(rangeCharCodes)
            if rangeOffset == 0:
                gids.extend(
                    [(charCode + delta) & 0xFFFF for charCode in rangeCharCodes]
                )
            else:
                for charCode in rangeCharCodes:
                    index = charCode + partial
                    assert index < lenGIArray, (
                        "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !"
                        % (i, index, lenGIArray)
                    )
                    if glyphIndexArray[index] != 0:  # if not missing glyph
                        glyphID = glyphIndexArray[index] + delta
                    else:
                        glyphID = 0  # missing glyph
                    gids.append(glyphID & 0xFFFF)

        self.cmap = _make_map(self.ttFont, charCodes, gids)

    def compile(self, ttFont):
        if self.data:
            return (
                struct.pack(">HHH", self.format, self.length, self.language) + self.data
            )

        charCodes = list(self.cmap.keys())
        if not charCodes:
            startCode = [0xFFFF]
            endCode = [0xFFFF]
        else:
            charCodes.sort()
            names = [self.cmap[code] for code in charCodes]
            nameMap = ttFont.getReverseGlyphMap()
            try:
                gids = [nameMap[name] for name in names]
            except KeyError:
                nameMap = ttFont.getReverseGlyphMap(rebuild=True)
                try:
                    gids = [nameMap[name] for name in names]
                except KeyError:
                    # allow virtual GIDs in format 4 tables
                    gids = []
                    for name in names:
                        try:
                            gid = nameMap[name]
                        except KeyError:
                            try:
                                if name[:3] == "gid":
                                    gid = int(name[3:])
                                else:
                                    gid = ttFont.getGlyphID(name)
                            except:
                                raise KeyError(name)

                        gids.append(gid)
            cmap = {}  # code:glyphID mapping
            for code, gid in zip(charCodes, gids):
                cmap[code] = gid

            # Build startCode and endCode lists.
            # Split the char codes in ranges of consecutive char codes, then split
            # each range in more ranges of consecutive/not consecutive glyph IDs.
            # See splitRange().
            lastCode = charCodes[0]
            endCode = []
            startCode = [lastCode]
            for charCode in charCodes[
                1:
            ]:  # skip the first code, it's the first start code
                if charCode == lastCode + 1:
                    lastCode = charCode
                    continue
                start, end = splitRange(startCode[-1], lastCode, cmap)
                startCode.extend(start)
                endCode.extend(end)
                startCode.append(charCode)
                lastCode = charCode
            start, end = splitRange(startCode[-1], lastCode, cmap)
            startCode.extend(start)
            endCode.extend(end)
            startCode.append(0xFFFF)
            endCode.append(0xFFFF)

        # build up rest of cruft
        idDelta = []
        idRangeOffset = []
        glyphIndexArray = []
        for i in range(len(endCode) - 1):  # skip the closing codes (0xffff)
            indices = []
            for charCode in range(startCode[i], endCode[i] + 1):
                indices.append(cmap[charCode])
            if indices == list(range(indices[0], indices[0] + len(indices))):
                idDelta.append((indices[0] - startCode[i]) % 0x10000)
                idRangeOffset.append(0)
            else:
                idDelta.append(0)
                idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
                glyphIndexArray.extend(indices)
        idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
        idRangeOffset.append(0)

        # Insane.
        segCount = len(endCode)
        segCountX2 = segCount * 2
        searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)

        charCodeArray = array.array("H", endCode + [0] + startCode)
        idDeltaArray = array.array("H", idDelta)
        restArray = array.array("H", idRangeOffset + glyphIndexArray)
        if sys.byteorder != "big":
            charCodeArray.byteswap()
        if sys.byteorder != "big":
            idDeltaArray.byteswap()
        if sys.byteorder != "big":
            restArray.byteswap()
        data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes()

        length = struct.calcsize(cmap_format_4_format) + len(data)
        header = struct.pack(
            cmap_format_4_format,
            self.format,
            length,
            self.language,
            segCountX2,
            searchRange,
            entrySelector,
            rangeShift,
        )
        return header + data

    def fromXML(self, name, attrs, content, ttFont):
        self.language = safeEval(attrs["language"])
        if not hasattr(self, "cmap"):
            self.cmap = {}
        cmap = self.cmap

        for element in content:
            if not isinstance(element, tuple):
                continue
            nameMap, attrsMap, dummyContent = element
            if nameMap != "map":
                assert 0, "Unrecognized keyword in cmap subtable"
            cmap[safeEval(attrsMap["code"])] = attrsMap["name"]


class cmap_format_6(CmapSubtable):
    def decompile(self, data, ttFont):
        # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
        # If not, someone is calling the subtable decompile() directly, and must provide both args.
        if data is not None and ttFont is not None:
            self.decompileHeader(data, ttFont)
        else:
            assert (
                data is None and ttFont is None
            ), "Need both data and ttFont arguments"

        data = (
            self.data
        )  # decompileHeader assigns the data after the header to self.data
        firstCode, entryCount = struct.unpack(">HH", data[:4])
        firstCode = int(firstCode)
        data = data[4:]
        # assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
        gids = array.array("H")
        gids.frombytes(data[: 2 * int(entryCount)])
        if sys.byteorder != "big":
            gids.byteswap()
        self.data = data = None

        charCodes = list(range(firstCode, firstCode + len(gids)))
        self.cmap = _make_map(self.ttFont, charCodes, gids)

    def compile(self, ttFont):
        if self.data:
            return (
                struct.pack(">HHH", self.format, self.length, self.language) + self.data
            )
        cmap = self.cmap
        codes = sorted(cmap.keys())
        if codes:  # yes, there are empty cmap tables.
            codes = list(range(codes[0], codes[-1] + 1))
            firstCode = codes[0]
            valueList = [
                ttFont.getGlyphID(cmap[code]) if code in cmap else 0 for code in codes
            ]
            gids = array.array("H", valueList)
            if sys.byteorder != "big":
                gids.byteswap()
            data = gids.tobytes()
        else:
            data = b""
            firstCode = 0
        header = struct.pack(
            ">HHHHH", 6, len(data) + 10, self.language, firstCode, len(codes)
        )
        return header + data

    def fromXML(self, name, attrs, content, ttFont):
        self.language = safeEval(attrs["language"])
        if not hasattr(self, "cmap"):
            self.cmap = {}
        cmap = self.cmap

        for element in content:
            if not isinstance(element, tuple):
                continue
            name, attrs, content = element
            if name != "map":
                continue
            cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_12_or_13(CmapSubtable):
    def __init__(self, format):
        self.format = format
        self.reserved = 0
        self.data = None
        self.ttFont = None

    def decompileHeader(self, data, ttFont):
        format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
        assert (
            len(data) == (16 + nGroups * 12) == (length)
        ), "corrupt cmap table format %d (data length: %d, header length: %d)" % (
            self.format,
            len(data),
            length,
        )
        self.format = format
        self.reserved = reserved
        self.length = length
        self.language = language
        self.nGroups = nGroups
        self.data = data[16:]
        self.ttFont = ttFont

    def decompile(self, data, ttFont):
        # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
        # If not, someone is calling the subtable decompile() directly, and must provide both args.
        if data is not None and ttFont is not None:
            self.decompileHeader(data, ttFont)
        else:
            assert (
                data is None and ttFont is None
            ), "Need both data and ttFont arguments"

        data = (
            self.data
        )  # decompileHeader assigns the data after the header to self.data
        charCodes = []
        gids = []
        pos = 0
        for i in range(self.nGroups):
            startCharCode, endCharCode, glyphID = struct.unpack(
                ">LLL", data[pos : pos + 12]
            )
            pos += 12
            lenGroup = 1 + endCharCode - startCharCode
            charCodes.extend(list(range(startCharCode, endCharCode + 1)))
            gids.extend(self._computeGIDs(glyphID, lenGroup))
        self.data = data = None
        self.cmap = _make_map(self.ttFont, charCodes, gids)

    def compile(self, ttFont):
        if self.data:
            return (
                struct.pack(
                    ">HHLLL",
                    self.format,
                    self.reserved,
                    self.length,
                    self.language,
                    self.nGroups,
                )
                + self.data
            )
        charCodes = list(self.cmap.keys())
        names = list(self.cmap.values())
        nameMap = ttFont.getReverseGlyphMap()
        try:
            gids = [nameMap[name] for name in names]
        except KeyError:
            nameMap = ttFont.getReverseGlyphMap(rebuild=True)
            try:
                gids = [nameMap[name] for name in names]
            except KeyError:
                # allow virtual GIDs in format 12 tables
                gids = []
                for name in names:
                    try:
                        gid = nameMap[name]
                    except KeyError:
                        try:
                            if name[:3] == "gid":
                                gid = int(name[3:])
                            else:
                                gid = ttFont.getGlyphID(name)
                        except:
                            raise KeyError(name)

                    gids.append(gid)

        cmap = {}  # code:glyphID mapping
        for code, gid in zip(charCodes, gids):
            cmap[code] = gid

        charCodes.sort()
        index = 0
        startCharCode = charCodes[0]
        startGlyphID = cmap[startCharCode]
        lastGlyphID = startGlyphID - self._format_step
        lastCharCode = startCharCode - 1
        nGroups = 0
        dataList = []
        maxIndex = len(charCodes)
        for index in range(maxIndex):
            charCode = charCodes[index]
            glyphID = cmap[charCode]
            if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
                dataList.append(
                    struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)
                )
                startCharCode = charCode
                startGlyphID = glyphID
                nGroups = nGroups + 1
            lastGlyphID = glyphID
            lastCharCode = charCode
        dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
        nGroups = nGroups + 1
        data = bytesjoin(dataList)
        lengthSubtable = len(data) + 16
        assert len(data) == (nGroups * 12) == (lengthSubtable - 16)
        return (
            struct.pack(
                ">HHLLL",
                self.format,
                self.reserved,
                lengthSubtable,
                self.language,
                nGroups,
            )
            + data
        )

    def toXML(self, writer, ttFont):
        writer.begintag(
            self.__class__.__name__,
            [
                ("platformID", self.platformID),
                ("platEncID", self.platEncID),
                ("format", self.format),
                ("reserved", self.reserved),
                ("length", self.length),
                ("language", self.language),
                ("nGroups", self.nGroups),
            ],
        )
        writer.newline()
        codes = sorted(self.cmap.items())
        self._writeCodes(codes, writer)
        writer.endtag(self.__class__.__name__)
        writer.newline()

    def fromXML(self, name, attrs, content, ttFont):
        self.format = safeEval(attrs["format"])
        self.reserved = safeEval(attrs["reserved"])
        self.length = safeEval(attrs["length"])
        self.language = safeEval(attrs["language"])
        self.nGroups = safeEval(attrs["nGroups"])
        if not hasattr(self, "cmap"):
            self.cmap = {}
        cmap = self.cmap

        for element in content:
            if not isinstance(element, tuple):
                continue
            name, attrs, content = element
            if name != "map":
                continue
            cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_12(cmap_format_12_or_13):
    _format_step = 1

    def __init__(self, format=12):
        cmap_format_12_or_13.__init__(self, format)

    def _computeGIDs(self, startingGlyph, numberOfGlyphs):
        return list(range(startingGlyph, startingGlyph + numberOfGlyphs))

    def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
        return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)


class cmap_format_13(cmap_format_12_or_13):
    _format_step = 0

    def __init__(self, format=13):
        cmap_format_12_or_13.__init__(self, format)

    def _computeGIDs(self, startingGlyph, numberOfGlyphs):
        return [startingGlyph] * numberOfGlyphs

    def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
        return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)


def cvtToUVS(threeByteString):
    data = b"\0" + threeByteString
    (val,) = struct.unpack(">L", data)
    return val


def cvtFromUVS(val):
    assert 0 <= val < 0x1000000
    fourByteString = struct.pack(">L", val)
    return fourByteString[1:]


class cmap_format_14(CmapSubtable):
    def decompileHeader(self, data, ttFont):
        format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
        self.data = data[10:]
        self.length = length
        self.numVarSelectorRecords = numVarSelectorRecords
        self.ttFont = ttFont
        self.language = 0xFF  # has no language.

    def decompile(self, data, ttFont):
        if data is not None and ttFont is not None:
            self.decompileHeader(data, ttFont)
        else:
            assert (
                data is None and ttFont is None
            ), "Need both data and ttFont arguments"
        data = self.data

        self.cmap = (
            {}
        )  # so that clients that expect this to exist in a cmap table won't fail.
        uvsDict = {}
        recOffset = 0
        for n in range(self.numVarSelectorRecords):
            uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(
                ">3sLL", data[recOffset : recOffset + 11]
            )
            recOffset += 11
            varUVS = cvtToUVS(uvs)
            if defOVSOffset:
                startOffset = defOVSOffset - 10
                (numValues,) = struct.unpack(">L", data[startOffset : startOffset + 4])
                startOffset += 4
                for r in range(numValues):
                    uv, addtlCnt = struct.unpack(
                        ">3sB", data[startOffset : startOffset + 4]
                    )
                    startOffset += 4
                    firstBaseUV = cvtToUVS(uv)
                    cnt = addtlCnt + 1
                    baseUVList = list(range(firstBaseUV, firstBaseUV + cnt))
                    glyphList = [None] * cnt
                    localUVList = zip(baseUVList, glyphList)
                    try:
                        uvsDict[varUVS].extend(localUVList)
                    except KeyError:
                        uvsDict[varUVS] = list(localUVList)

            if nonDefUVSOffset:
                startOffset = nonDefUVSOffset - 10
                (numRecs,) = struct.unpack(">L", data[startOffset : startOffset + 4])
                startOffset += 4
                localUVList = []
                for r in range(numRecs):
                    uv, gid = struct.unpack(">3sH", data[startOffset : startOffset + 5])
                    startOffset += 5
                    uv = cvtToUVS(uv)
                    glyphName = self.ttFont.getGlyphName(gid)
                    localUVList.append((uv, glyphName))
                try:
                    uvsDict[varUVS].extend(localUVList)
                except KeyError:
                    uvsDict[varUVS] = localUVList

        self.uvsDict = uvsDict

    def toXML(self, writer, ttFont):
        writer.begintag(
            self.__class__.__name__,
            [
                ("platformID", self.platformID),
                ("platEncID", self.platEncID),
            ],
        )
        writer.newline()
        uvsDict = self.uvsDict
        uvsList = sorted(uvsDict.keys())
        for uvs in uvsList:
            uvList = uvsDict[uvs]
            uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
            for uv, gname in uvList:
                attrs = [("uv", hex(uv)), ("uvs", hex(uvs))]
                if gname is not None:
                    attrs.append(("name", gname))
                writer.simpletag("map", attrs)
                writer.newline()
        writer.endtag(self.__class__.__name__)
        writer.newline()

    def fromXML(self, name, attrs, content, ttFont):
        self.language = 0xFF  # provide a value so that CmapSubtable.__lt__() won't fail
        if not hasattr(self, "cmap"):
            self.cmap = (
                {}
            )  # so that clients that expect this to exist in a cmap table won't fail.
        if not hasattr(self, "uvsDict"):
            self.uvsDict = {}
            uvsDict = self.uvsDict

        # For backwards compatibility reasons we accept "None" as an indicator
        # for "default mapping", unless the font actually has a glyph named
        # "None".
        _hasGlyphNamedNone = None

        for element in content:
            if not isinstance(element, tuple):
                continue
            name, attrs, content = element
            if name != "map":
                continue
            uvs = safeEval(attrs["uvs"])
            uv = safeEval(attrs["uv"])
            gname = attrs.get("name")
            if gname == "None":
                if _hasGlyphNamedNone is None:
                    _hasGlyphNamedNone = "None" in ttFont.getGlyphOrder()
                if not _hasGlyphNamedNone:
                    gname = None
            try:
                uvsDict[uvs].append((uv, gname))
            except KeyError:
                uvsDict[uvs] = [(uv, gname)]

    def compile(self, ttFont):
        if self.data:
            return (
                struct.pack(
                    ">HLL", self.format, self.length, self.numVarSelectorRecords
                )
                + self.data
            )

        uvsDict = self.uvsDict
        uvsList = sorted(uvsDict.keys())
        self.numVarSelectorRecords = len(uvsList)
        offset = (
            10 + self.numVarSelectorRecords * 11
        )  # current value is end of VarSelectorRecords block.
        data = []
        varSelectorRecords = []
        for uvs in uvsList:
            entryList = uvsDict[uvs]

            defList = [entry for entry in entryList if entry[1] is None]
            if defList:
                defList = [entry[0] for entry in defList]
                defOVSOffset = offset
                defList.sort()

                lastUV = defList[0]
                cnt = -1
                defRecs = []
                for defEntry in defList:
                    cnt += 1
                    if (lastUV + cnt) != defEntry:
                        rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt - 1)
                        lastUV = defEntry
                        defRecs.append(rec)
                        cnt = 0

                rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
                defRecs.append(rec)

                numDefRecs = len(defRecs)
                data.append(struct.pack(">L", numDefRecs))
                data.extend(defRecs)
                offset += 4 + numDefRecs * 4
            else:
                defOVSOffset = 0

            ndefList = [entry for entry in entryList if entry[1] is not None]
            if ndefList:
                nonDefUVSOffset = offset
                ndefList.sort()
                numNonDefRecs = len(ndefList)
                data.append(struct.pack(">L", numNonDefRecs))
                offset += 4 + numNonDefRecs * 5

                for uv, gname in ndefList:
                    gid = ttFont.getGlyphID(gname)
                    ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
                    data.append(ndrec)
            else:
                nonDefUVSOffset = 0

            vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
            varSelectorRecords.append(vrec)

        data = bytesjoin(varSelectorRecords) + bytesjoin(data)
        self.length = 10 + len(data)
        headerdata = struct.pack(
            ">HLL", self.format, self.length, self.numVarSelectorRecords
        )

        return headerdata + data


class cmap_format_unknown(CmapSubtable):
    def toXML(self, writer, ttFont):
        cmapName = self.__class__.__name__[:12] + str(self.format)
        writer.begintag(
            cmapName,
            [
                ("platformID", self.platformID),
                ("platEncID", self.platEncID),
            ],
        )
        writer.newline()
        writer.dumphex(self.data)
        writer.endtag(cmapName)
        writer.newline()

    def fromXML(self, name, attrs, content, ttFont):
        self.data = readHex(content)
        self.cmap = {}

    def decompileHeader(self, data, ttFont):
        self.language = 0  # dummy value
        self.data = data

    def decompile(self, data, ttFont):
        # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
        # If not, someone is calling the subtable decompile() directly, and must provide both args.
        if data is not None and ttFont is not None:
            self.decompileHeader(data, ttFont)
        else:
            assert (
                data is None and ttFont is None
            ), "Need both data and ttFont arguments"

    def compile(self, ttFont):
        if self.data:
            return self.data
        else:
            return None


cmap_classes = {
    0: cmap_format_0,
    2: cmap_format_2,
    4: cmap_format_4,
    6: cmap_format_6,
    12: cmap_format_12,
    13: cmap_format_13,
    14: cmap_format_14,
}