A failed experiment: Python Pillow and a Cistercian font

I tried to use the Pillow library in Python to create images containing the FRBCistercian font, but the result was garbled. My guess is that Pillow can’t handle the way FRBCistercian works.

How does FRBCistercian work?

Cistercian numerals are a really interesting system for encoding four decimal digits into a single glyph. See https://cistercian.micahrl.com to see how they work.

Unfortunately, they are not part of Unicode yet, so there is not a portable way to build them. FRBCistercian uses Unicode’s “Private Use Area”, meaning that other fonts might use the same area for something conflicting or (more likely) the data will be interpreted as garbage or nothing at all.

To use FRBCistercian, you must encode the stave character followed by any of:

  • a character indicating the ones place (1, 2, 3, etc)
  • a character indicating the tens place (10, 20, 30, …)
  • a character indicating the hundreds place (100, 200, 300, …)
  • a character indicating the thousands place (1000, 2000, 3000, …)

The codepoints that correspond to those values are found in codepoints.txt.

So to make a zero in FRBCistercian, simply encode \u100002. To make a three, encode \u100002\u100005. To make a 13, encoee \u100002\u10000c\u100005. Etc.

As an aside, this script marks the third implementation of a system to display FRBCistercian that I’ve written.

  1. The first is in Javascript for https://cistercian.micahrl.com. (I really should break this out so that the library has an NPM package and the website is separate.)
  2. The second is in Go HTML templates for hugo-theme-cistercian (see also the blog post with examples).
  3. And now this third, doomed implementation in Python (see below).

Who cares, show us how it broke

A screenshot of what broke

Sad!

The background is a blurred image related to the post, the title is at the top, and what is supposed to be Cistercian numerals is on the left at the middle of the image.

I assume the issue is that Pillow can’t handle the ligatures that FRBCistercian uses, but I don’t know for sure.

(Please don’t judge me too harshly for this image, as it is just a proof of concept – I would have made it look nicer if the font had rendered properly!)

The code I wrote

This was intended to be for an og:image generator for a hugo blog. The script would read the YAML frontmatter of the blog post Markdown to get the date and title, and get the first image in the hugo pagebundle (that is, sort images under research/blog-post-slug/*.{jpg,jpeg,png} alphabetically and use the first one), and make a card out of it.

#!/usr/bin/env python3

import argparse
import datetime
import logging
import math
import os
import pathlib
import pdb
import sys
import traceback
import typing

import yaml

from PIL import Image, ImageDraw, ImageFilter, ImageFont
from PIL.ImageOps import exif_transpose

logging.basicConfig(
    level=logging.INFO, format="[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)


def idb_excepthook(type, value, tb):
    """Call an interactive debugger in post-mortem mode

    If you do "sys.excepthook = idb_excepthook", then an interactive debugger
    will be spawned at an unhandled exception
    """
    if hasattr(sys, "ps1") or not sys.stderr.isatty():
        sys.__excepthook__(type, value, tb)
    else:
        traceback.print_exception(type, value, tb)
        print
        pdb.pm()


def resolvepath(path):
    return os.path.realpath(os.path.normpath(os.path.expanduser(path)))


def find_first_image(path):
    """Find the first image under a path"""
    path = pathlib.PurePath(path)
    for f in sorted(os.listdir(path)):
        purepath = pathlib.PurePath(path, f)
        print(purepath)
        if not os.path.isfile(purepath):
            continue
        if purepath.suffix.lower() in [".jpeg", ".jpg", ".png"]:
            return purepath
    raise Exception(f"No image found in {path}")


def dumbparse_frontmatter(post_md: str):
    """Parse metadata from a post Markdown file with YAML frontmatter

    This is a very dumb way to do it but whatever
    """
    with open(post_md) as postfp:
        contents = postfp.read().split("\n")
    if contents[0] != "---":
        raise Exception(f"No YAML metadata in post file {postfp}")
    closing_dashes = contents[1:].index("---")
    frontmatter_yml = "\n".join(contents[1 : closing_dashes + 1])
    frontmatter = yaml.load(frontmatter_yml, yaml.Loader)
    return {
        "title": frontmatter["title"],
        "date": frontmatter["date"],
    }


def int2cistercian(num: int):
    """Return Unicdoe codepoints for FRBCistercian given a 4 digit int

    FRBCistercian uses the Private Use Area of Unicode.
    To get the right glyph, we have to map to this area.
    <https://github.com/ctrlcctrlv/FRBCistercian/blob/main/codepoints.tsv>
    """

    def get_digit(num: int, power: int):
        """Get the digit of a power.

        E.g:
            num(456, 1) returns 6, which is in the ones place.
            num(456, 2) returns 5, which is in the tens place
        """
        return math.floor((num / pow(10, power)) % 10)

    def kthu(num: int):
        """Return an array of digits representing places in a 1-4 digit number

        [thousands, hundreds, tens, ones] also called KTHU
        """
        if num < 0 or num > 9999:
            raise Exception("kthu() only supports numbers between 0-9999 (inclusive)")
        return [
            get_digit(num, 3),
            get_digit(num, 2),
            get_digit(num, 1),
            get_digit(num, 0),
        ]

    digits = kthu(num)
    result = [0x100002]
    if digits[0] > 0:
        result += [0x10001E + digits[0] - 1]
    if digits[1] > 0:
        result += [0x100015 + digits[1] - 1]
    if digits[2] > 0:
        result += [0x10000C + digits[2] - 1]
    if digits[3] > 0:
        # We do not subtract 1 because there is a zero,
        # but there is no "zero ten " / "zero hundred" / "zero thousand".
        result += [0x100002 + digits[3]]

    return "".join([chr(b) for b in result])


def date2cistercian(dt: datetime.datetime):
    """Convert a datetime to three Cistercian glyphs

    These glyphs represent the year, the month/day, and the hour/minute
    """
    yyyy = int(dt.strftime("%Y"))
    mmdd = int(dt.strftime("%m%d"))
    hhmm = int(dt.strftime("%H%M"))
    # print(f"Building Cistercian datestamp for {yyyy} {mmdd} {hhmm}")
    cyyyy = int2cistercian(yyyy)
    cmmdd = int2cistercian(mmdd)
    chhmm = int2cistercian(hhmm)
    cdate = f"{cyyyy} {cmmdd} {chhmm}"
    # print(f"Year codepoints: {[ord(c) for c in cyyyy]}")
    # with open("cdate.txt", "wt") as cdfp:
    #     cdfp.write(cdate)
    return cdate


def generate_ogimage(
    srcimgpath: str,
    outpng: str,
    date: datetime.datetime,
    title: str,
    text_font_path: str = "themes/qualia/static/fonts/Roboto/Roboto-Bold.ttf",
    cistercian_font_path: str = "themes/qualia/static/fonts/FRBCistercian.otf",
    outwidth: int = 800,
):
    """Generate a PNG suitable for an og:image

    Generates a Cistercian datestamp
    """

    with open(srcimgpath, "rb") as imgfp:
        img = Image.open(imgfp)
        print(f"Raw image size: {img.size}")

        # Fucking EXIF tags
        # This both removes EXIF rotation tags and returns a rotated image
        img = exif_transpose(img)
        print(f"EXIF-Transposed image size: {img.size}")

        # First, resize the image to be outwidth
        if img.size[0] > outwidth:
            outheight = int(outwidth * img.size[1] / img.size[0])
            img = img.resize((outwidth, outheight))
            print(f"Resized image size: {img.size}")

        # Going for a 1.91:1 aspect ratio with set width
        # TODO: If the image is lower height than the calculated new_height,
        # the result is probably wrong
        new_height = int(outwidth / 1.91)
        crop = {
            "left": 0,
            "right": outwidth,
            "top": int((img.size[1] / 2) - (new_height / 2)),
            "bottom": int((img.size[1] / 2) + (new_height / 2)),
        }
        img = img.crop((crop["left"], crop["top"], crop["right"], crop["bottom"]))
        print(f"Cropped image size: {img.size}")

        img = img.filter(ImageFilter.GaussianBlur(3))

        x_center = int(outwidth / 2)
        y_center = int(new_height / 2)

        titlefont = ImageFont.truetype(text_font_path, 64)
        titledraw = ImageDraw.Draw(img)
        titledraw.text((20, 20), title, (0, 0, 0), font=titlefont)

        cistercianfont = ImageFont.truetype(cistercian_font_path, 64)
        cisterciandraw = ImageDraw.Draw(img)
        cisterciandraw.text(
            (20, y_center), date2cistercian(date), (0, 0, 0), font=cistercianfont
        )

        img.save(outpng)

    return


def parseargs(arguments: typing.List[str]):
    """Parse program arguments"""
    parser = argparse.ArgumentParser(description="Generate og:image for a post")
    parser.add_argument(
        "--debug",
        "-d",
        action="store_true",
        help="Launch a debugger on unhandled exception",
    )
    parser.add_argument(
        "post",
        help="Path to the post directory, like content/blog/example-post; expected to have an index.md and at least one image file",
    )
    parser.add_argument(
        "outfile", help="Path to the output file, which should be a PNG"
    )
    parsed = parser.parse_args(arguments)
    return parsed


def main(*arguments):
    """Main program"""
    parsed = parseargs(arguments[1:])
    if parsed.debug:
        sys.excepthook = idb_excepthook
    frontmatter = dumbparse_frontmatter(os.path.join(parsed.post, "index.md"))
    firstimg = find_first_image(parsed.post)
    outfile = resolvepath(parsed.outfile)
    generate_ogimage(firstimg, outfile, frontmatter["date"], frontmatter["title"])
    print(f"Generated image at {outfile}")


if __name__ == "__main__":
    sys.exit(main(*sys.argv))