#! /usr/bin/env python3 ############################################## # Create a LaTeX document that presents # # tables of every character in every font # # used by the Comprehensive LaTeX Symbol # # List. # # # # Author: Scott Pakin # ############################################## import argparse import re import subprocess import sys from pathlib import Path def info(msg): 'Report an informational message to the user.' sys.stderr.write('INFO: %s\n' % msg) def kpsewhich(fname, check=True): 'Find a filename in the TeX tree.' proc = subprocess.run(['kpsewhich', str(fname)], capture_output=True, check=check, encoding='utf-8') return proc.stdout.strip() # Match the font full-name output from fc-query. fullname_re = re.compile(r'^\s*fullname:.*\"([^\"]+)\"\(s\)\s*$', re.MULTILINE) # Match Berry-named fonts, including those with a Math Design prefix. berry_re = re.compile(r'^(md)?(.)(..)(.)(.?)([78m].)$') def expand_berry_name(stem): '''Expand a Berry-named font, returning either a human-friendly string or None.''' # Define a small subset of font-name components based on Berry's # "Fontname" document. If we included more Berry-named fonts, it # instead would be worth parsing supplier.map, weight.map, variant.map, # etc. suppliers = { 'p': 'Adobe', } typefaces = { 'tm': 'Times', 'ut': 'Utopia', 'zc': 'Zapf Chancery', } weights = { 'b': 'Bold', 'm': 'Medium', 'r': 'Roman', } variants = { '': '', 'i': 'Italic', } encodings = { '7v': r'in \\TeX{} Math Extension Encoding', '7y': r'in \\TeX{} Math Symbol Encoding', '8c': r'in \\TeX{} Text Companion Encoding', '8r': r'in \\TeX{} Base 1 Encoding', '8t': r'in Cork Encoding', 'ma': r'Symbols A', # Not a Berry name 'mb': r'Symbols B', # Not a Berry name } # Parse each font-name component. For now, we omit the supplier # because I suspect that font maps typically remap fonts from # commerical suppliers to free variants. match = berry_re.match(stem) if match is None: return supp, face, wt, var, enc = match[2], match[3], match[4], match[5], match[6] parts = [] if match[1] == 'md': parts.append('Math Design') try: parts.append(typefaces[face]) except KeyError: return None try: parts.append(weights[wt]) except KeyError: return None try: parts.append(variants[var]) except KeyError: return None try: parts.append(encodings[enc]) except KeyError: return None # Return the concatenation of all components. return ' '.join(parts) def read_afm_font_name(fname): 'Read a FullName from a .afm file.' with open(str(fname)) as r: for ln in r: try: k, v = ln.split(None, 1) if k == 'FullName': return v.strip() except ValueError: pass raise RuntimeError(f'FullName not found in {fname}') def get_font_name(fname): """Return the human-friendly name of a font or None if it couldn't be determined.""" stem = fname.stem # All Computer Modern fonts report their FullName as "Computer Modern # Medium". All Euler and AMS fonts report their FullName as "Euler # Medium". Boisik fonts use a similar naming pattern as a Computer # Modern but don't define a FullName. Define more meaningful names for # all of these. stem2name = { 'bskarr10': 'Boisik Arrows', 'bskex10': 'Boisik Math Extension', 'bskma10': 'Boisik Additional Math Symbols', 'bskmi10': 'Boisik Math Italic', 'bskms10': r'Boisik \\AMS{} Math Symbols', 'bsksy10': 'Boisik Math Symbols', 'cmex10': 'Computer Modern Math Extension', 'cmmi10': 'Computer Modern Math Italic', 'cmsy10': 'Computer Modern Symbol', 'eufm10': 'Euler Fraktur Medium', 'eusm10': 'Euler Script Medium', 'msam10': r'\\AMS{} Extra Math Symbols Group 1', 'msbm10': r'\\AMS{} Extra Math Symbols Group 2', } try: return stem2name[stem] except KeyError: pass # Process Berry-named fonts. berry = expand_berry_name(stem) if berry is not None: return berry # Favor .pfb and .afm files because .tfm and .vf have no name # information. In at least one case (countriesofeurope), the .pfb # provides a more human-friendly name than the .otf. name_list = [fname] psname = kpsewhich(stem + '.pfb', check=False) if psname != '': name_list.append(psname) afmname = kpsewhich(stem + '.afm', check=False) if afmname != '': name_list.append(afmname) for nm in name_list: # Parse .afm files ourselves. if Path(nm).suffix == '.afm': return read_afm_font_name(nm) # In all other cases, use fc-query to find the font name. proc = subprocess.run(['fc-query', str(nm)], capture_output=True, encoding='utf-8') if proc.returncode != 0: continue match = fullname_re.search(proc.stdout) if match is None: continue # Perform various clean-up operations on the font name. name = match[1] name = name.replace(r'\040', ' ') name = name.replace(r'TeX', r'\\TeX{}') return name return None def read_strace_font_files(strace): 'Return a list of all font files opened in a prior lualatex run.' font_set = set() open_re = re.compile(r'^\d+ open(?:at)\([^\"]+\"(.+\.(otf|ttf|vf|tfm))\"([^\"]+)\) = \d+$') with open(strace) as r: for ln in r: match = open_re.match(ln) if match is not None: font_set.add(Path(match[1]).absolute()) return font_set def filter_by_format(fonts): '''Filter a set of fonts to include only one font of each format. For example, if yhcmex10.vf and yhcmex10.tfm both exist, select yhcmex10.vf.''' # Define an extension priority order, with larger numbers implying # higher priority. priority = { '.otf': 4, '.ttf': 3, '.vf': 2, '.tfm': 1, } # Group fonts by stem. stem2pripath = {} # Map from stem to (priority, full path) for fn in fonts: stem = fn.stem pri = priority[fn.suffix] if stem in stem2pripath: # Stem already exists. Keep the higher-priority filename. if pri > stem2pripath[stem][0]: stem2pripath[stem] = (pri, fn) else: # First occurence of stem stem2pripath[stem] = (pri, fn) # Return the reduced set of filenames. return {elt[1] for elt in stem2pripath.values()} def filter_by_size(fonts): '''Filter a set of fonts to include only one size of each typeface. For example, if cmbx5, cmbx7, and cmbx10 all exist, select cmbx10 (the closest to 10 pt.).''' substem2sizepath = {} # Map from stem fragment to (font size, full path) num_re = re.compile(r'^(.*\D)(\d+)$') for fn in fonts: # Split the font name into a substem and a size. For example, # ".../fonts/tfm/public/fourier/futr8r.tfm" has substem "futr" # and size "8". stem = fn.stem match = num_re.match(stem) if match is None: substem, size = stem, 10 else: substem, size = match[1], int(match[2]) if size >= 100: size /= 100 # Example: ecrm1728.tfm --> 17.28 pt. # Retain the substem whose size is closest to 10pt. if substem in ['knot']: # Special case to retain all of knot1.tfm, knot2.tfm, # knot3.tfm, ... substem2sizepath[stem] = (10, fn) elif substem in substem2sizepath: # Substem already exists. Keep the better size. old_size = substem2sizepath[substem][0] if (size - 10)**2 < (old_size - 10)**2: substem2sizepath[substem] = (size, fn) else: # First occurrence of substem substem2sizepath[substem] = (size, fn) # Return the reduced set of filenames. return {elt[1] for elt in substem2sizepath.values()} def filter_by_variant(fonts): '''Filter a set of fonts to include only one variant of each typeface. For example, if Erewhon-Bold, Erewhon-BoldItalic, Erewhon-BoldSlanted, Erewhon-Italic, Erewhon-Regular, and Erewhon-RegularSlanted all exist, select Erewhon-Regular as the most representative typeface.''' # Prefer FONTNAME, FONTNAME-Regular, and FONTNAME-Book over other # FONTNAME-VARIANT stems. deletable = set() stem2path = {fn.stem: fn for fn in fonts} dash_re = re.compile(r'^([^-]+)-([^-]+)(.*)$', re.IGNORECASE) style_re = re.compile(r'(Italic|Bold|Slanted|Medium|Oblique)', re.IGNORECASE) for fn in fonts: # Look for fonts we want to discard. stem = fn.stem match = dash_re.match(stem) if match is None: continue base, style, suffix = match[1], match[2], match[3] if style_re.search(style) is None: continue # Mark the font for deletion only if a more "baseline" version exists. for new_style in [ '', '-Regular', '-regular', '-Book', '-book', ]: if base + new_style + suffix in stem2path: deletable.add(stem) break for rm in deletable: del stem2path[rm] # Return the reduced set of filenames. return set(stem2path.values()) def filter_special_cases(fonts): '''Remove fonts that are known to be too similar to other fonts. For example, roman fonts with the same Berry-named encoding (e.g., 8r) tend to be redundant.''' deletable = set() # Set of fonts to delete, named by stem # Plan to delete fonts that don't build properly as standalone fonts. deletable.update([ 'futr8x', 'futr9c', 'md-gmr8y', 'mdugmr8c', ]) # Plan to delete redundant fonts. deletable.update([ 'cmss17', 'cmti10', 'cmtt10', 'futri8r', ]) # Delete all Helvetica if we have any Times. stem2path = {fn.stem: fn for fn in fonts} if any([s[:3] == 'ptm' for s in stem2path]): for stem in stem2path: if stem[:3] == 'phv': deletable.add(stem) # Define a mapping from fonts to delete to fonts to keep instead. del2keep = { 'bbmss10': 'bbm10', # Delete sans-serif. 'bbmtt10': 'bbm10', # Delete monospace. 'cmbsy10': 'cmsy10', # Delete bold. 'cmbx10': 'cmr10', # Delete bold. 'cmmib10': 'cmmi10', # Delete bold. 'cmphi10': 'cmph10', # Delete italic. 'EBGaramond-Regular-lf-t1--base': 'EBGaramond-Regular-lf-t1', # Delete TFM; keep VF. 'EBGaramond-Regular-lf-t1': 'ecrm1000', # Delete Garamond, keep European Computer Modern. 'eccc1000': 'ecrm1000', # Delete small caps. 'ec-lmbx10': 'ec-lmr10', # Delete bold. 'ec-lmcsc10': 'ec-lmr10', # Delete small caps. 'ec-lmri10': 'ec-lmr10', # Delete italic. 'ec-lmro10': 'ec-lmr10', # Delete oblique. 'ec-lmss10': 'ec-lmr10', # Delete sans-serif. 'ec-lmsso10': 'ec-lmr10', # Delete sans-serif oblique. 'ec-lmtt10': 'ec-lmr10', # Delete monospace. 'ecrb1000': 'ecrm1000', # Delete bold. 'ecsx1000': 'ecrm1000', # Delete bold sans-serif. 'ecti1000': 'ecrm1000', # Delete italic. 'fourier-mlit': 'fourier-ml', # Delete italic. 'futmii': 'cmmi10', # Delete Fourier; keep CM. 'futr8r': 'ptmr8r', # Delete Fourier; keep Times. 'futri8r': 'ptmri8r', # Delete Fourier; keep Times. 'logosl10': 'logo10', # Delete slanted. 'mdbchr7v': 'mdputr7v', # Delete Charter, keep Utopia. 'mdbchr8c': 'mdputr8c', # Delete Charter, keep Utopia. 'md-chr7v': 'mdbchr7v', # Delete TFM; keep VF. 'md-chr8c': 'mdbchr8c', # Delete TFM; keep VF. 'md-chr8y': 'mdbchr8c', # Delete TFM; keep VF. 'md-gmr7v': 'mdugmr7v', # Delete TFM; keep VF. 'md-gmr8c': 'mdugmr8c', # Delete TFM; keep VF. 'md-gmr8y': 'mdugmr8c', # Delete TFM; keep VF. 'mdugmr7v': 'mdputr7v', # Delete Garamond, keep Utopia. 'md-utr7v': 'mdputr7v', # Delete TFM; keep VF. 'md-utr7y': 'mdputr7y', # Delete TFM; keep VF. 'md-utr8c': 'mdputr8c', # Delete TFM; keep VF. 'md-utr8y': 'mdputr7y', # Delete TFM; keep VF. 'md-utr8y': 'mdputr8c', # Delete TFM; keep VF. 'md-utr8y': 'mdputrma', # Delete TFM; keep VF. 'md-utrma': 'mdputrma', # Delete TFM; keep VF. 'md-utrmb': 'mdputrmb', # Delete TFM; keep VF. 'NewCM08-Book': 'NewCM10-Book', # Delete 8 pt.; keep 10 pt. 'NewCMSans08-Book': 'NewCMSans10-Book', # Delete 8 pt.; keep 10 pt. 'rrsfso10': 'rsfso10', # Delete raw. 'rtcxr': 'tcxr', # Delete raw. 'rtxi': 't1xi', # Delete raw. 'rtxptmr': 'tcxr', # Delete raw. 'rtxptmri': 't1xi', # Delete raw. 'rtxptmr': 'txr', # Delete raw. 'rtxr': 'txr', # Delete raw. 'stix-mathrm-bold': 'stix-mathrm', # Delete bold. 't1xi': 't1xr', # Delete italic. 't1xr': 'ecrm1000', # Delete txfonts, keep European Computer Modern. 'tcrm1000': 'ts1-lmr10', # Delete European Computer Modern, keep Latin Modern, which has a few more characters 'tcxr': 'tcrm1000', # Delete txfonts, keep European Computer Modern. 'ts1-lmtt10': 'ts1-lmr10', # Delete monospace. 'txbexa': 'txexa', # Delete bold. 'txbmia': 'txmia', # Delete bold. 'txbsyc': 'txsyc', # Delete bold. 'txr': 't1xr', # Delete sparser encoding. 'txsy': 'cmsy10', # Delete txfonts, keep Computer Modern', 'wasyb10': 'wasy10', # Delete bold. } # Remove all "delete" fonts if the corresponding "keep" font exists. for fn in fonts: stem = fn.stem try: keep = del2keep[stem] if keep in stem2path: deletable.add(stem) except KeyError: pass for rm in deletable: del stem2path[rm] # Return the reduced set of filenames. return set(stem2path.values()) def filter_fonts(fonts): 'Discard redundant fonts from a list.' info('Discarding redundant font formats') fewer_fonts = filter_by_format(fonts) for i, rm in enumerate(sorted([fn.name for fn in set(fonts) - set(fewer_fonts)])): info(' [%d] %s' % (i + 1, rm)) info('Discarding redundant file sizes') fonts = fewer_fonts fewer_fonts = filter_by_size(fonts) for i, rm in enumerate(sorted([fn.stem for fn in set(fonts) - set(fewer_fonts)])): info(' [%d] %s' % (i + 1, rm)) info('Discarding redundant typeface variants') fonts = fewer_fonts fewer_fonts = filter_by_variant(fonts) for i, rm in enumerate(sorted([fn.stem for fn in set(fonts) - set(fewer_fonts)])): info(' [%d] %s' % (i + 1, rm)) info('Discarding non-exemplar typefaces') fonts = fewer_fonts fewer_fonts = filter_special_cases(fonts) for i, rm in enumerate(sorted([fn.stem for fn in set(fonts) - set(fewer_fonts)])): info(' [%d] %s' % (i + 1, rm)) return fewer_fonts def write_document_intro(w): 'Output all the LaTeX text that precedes the font tables proper.' w.write(r'''%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % This is a generated file. DO NOT EDIT. % % Edit makerawtables instead. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \documentclass{article} \usepackage[margin=1in]{geometry} \usepackage{longtable} \usepackage[longtable]{multirow} \usepackage{booktabs} \usepackage{array} \usepackage{needspace} \usepackage{fancyhdr} \usepackage{ifsym} \usepackage{hvlogos} \usepackage{hyperref} % Define this document's metadata. \hypersetup{% pdftitle={Raw Font Tables}, pdfauthor={Scott Pakin}, pdfsubject={Tables of fonts used in the Comprehensive LaTeX Symbol List}, pdfkeywords={font tables, symbols, glyphs, characters, TeX, LaTeX}, baseurl={http://mirror.ctan.org/info/symbols/comprehensive/} } % Before we load any fonts, ensure that METAFONT fonts will be % compiled to 1200 DPI .pk files. This code assumes a not-too-old TeX % distribution. \RequirePackage{iftex} \ifluatex \directlua{ pdf.setpkresolution(1200) os.setenv("MAKETEX_MODE", "ljfzzz") os.setenv("BDPI", "1200") } \else \ifpdftex \pdfpkresolution=1200 \pdfpkmode{ljfzzz} \fi \fi % Prepare fonts we'll need throughout the text. \newcommand*{\symchar}[1]{{\usefont{OMS}{cmsy}{m}{n}\char#1}} \font\sectionfont=cminch % \CLSL is a shortcut for "Comprehensive LaTeX Symbol List". \newcommand*{\CLSL}{\textit{Comprehensive \LaTeX\ Symbol List}} % Typeset a control sequence. \newcommand*{\cs}[1]{\hbox{\texttt{\expandafter\string\csname#1\endcsname}}} % Define a thicker horizontal rule. \newcommand*{\thickhline}{\specialrule{1pt}{0pt}{0pt}} % Inhibit section numbering. \setcounter{secnumdepth}{0} % Report each page's first and last table in the page header. \renewcommand{\sectionmark}[1]{} \fancyhead[L]{\itshape\rightmark} \fancyhead[C]{} \fancyhead[R]{\itshape\leftmark} % Start a section for a new letter of the alphabet. \newcommand*{\beginletter}[1]{% \clearpage \phantomsection \addcontentsline{toc}{section}{#1} \begin{center} \sectionfont#1\par \end{center} } % Load Lua code for generating font tables. \directlua{dofile("rawtables.lua")} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{document} \begin{center} {\Large\bfseries Raw Font Tables\par} \bigskip Scott Pakin, \textit{scott+clsl@pakin.org}\par \bigskip \today \vspace{1cm} \end{center} \phantomsection \addcontentsline{toc}{section}{Introduction} This document presents, in alphabetical order, font tables for all\footnote{Redundant fonts are omitted. For example, bold and italic variants are deemed redundant with the corresponding roman font; different-sized fonts for the same typeface are deemed redundant; and fonts with similar typefaces but the same encoding are deemed redundant.} of the fonts that appear in the \CLSL. It was mechanically produced using a script that extracts the list of fonts used by the \CLSL\ and typesets all characters in each font in a per-font table. The purpose of this document is to provide a companion mechanism for locating symbols by organizing the myriad symbols available to \TeX\ and \LaTeX\ by font family rather than by \LaTeX\ symbol name. It may also reveal some unnamed symbols---or symbols overlooked by the \CLSL. On the other hand, not every symbol shown in the \CLSL\ appears in this document. Some symbols are defined by juxtaposing multiple other symbols; some symbols are defined in terms of graphics primitives instead of fonts. The tables shown in this document are strictly those that correspond to ``true'' fonts---glyphs drawn in \METAFONT, OpenType, TrueType, or PostScript Type~1. In each table, characters are numbered in base~16 (hexadecimal). A character's hexadecimal position is formed by taking the first hexadecimal digits from a table's left column and the final hexadecimal digit from either the top or the bottom row, based on whether the character lies in the upper or lower row associated with the first hexadecimal digits. To clarify this description with an example, the ``\symchar{"34}'' symbol in the \texttt{cmsy10} table can be produced by \texttt{\string\char"34}. The ``\symchar{"3C}'' symbol that lies directly beneath that in the table can be produced by \texttt{\string\char"3C}. The decimal equivalents of these are \texttt{\string\char52} and \texttt{\string\char60}, and their character equivalents are ``\texttt{4}'' and ``\texttt{<}'', respectively. \font\txexa=txexa at 10pt \def\sqiiint{% \setbox0=\hbox{\txexa\char"52}% \raise 10pt\box0\relax } To put this means of character usage in context, suppose we want to typeset \cs{sqiiint} (``\sqiiint\kern3pt'') from the \textsf{txfonts} package. \textsf{txfonts} is a large package that redefines all text and math fonts, which may not be desirable if all that is desired is to typeset a single symbol. The following explains how to typeset a single \textsf{txfonts} symbol without having to load the entire package. We observe that the symbol in question is character 52 hexadecimal in the \texttt{txexa} table in this document. The first step is to associate \texttt{txexa} with a \TeX\ control sequence; here we call it \cs{myfont}: \begin{verbatim} \font\string\myfont=txexa at 10pt \end{verbatim} \noindent (If our document were typeset in a font size other than 10~pt., we would specify that size in the above.) We then define a macro, here \cs{mysqiiint}, that sets the font and typesets a single character: \begin{verbatim} \newcommand*{\mysqiiint}{{\myfont\char"52}} \end{verbatim} The extra pair of curly braces in the above limits the font change to the single character we want to typeset. We can now use \cs{mysqiiint} without having to load the \textsf{txfonts} package. Alas, in this case the symbol winds up being typeset below the baseline. This is an artifact of typesetting a mathematical symbol outside of math mode. The solution is to explicitly raise the symbol to the desired height: \begin{verbatim} \newcommand*{\mysqiiint}{\raisebox{10pt}{\myfont\char"52}} \end{verbatim} Note that the \textsf{amstext} package's \cs{text} command is a useful mechanism for typesetting text characters in math mode. \clearpage \pagestyle{fancy} ''') def write_font_tables(w, fonts): '''Produce one Lua render_table invocation per font and one \\beginletter invocation each time a font name begins with a new letter of the alphabet.''' prev_letter = '?' for fn in sorted(fonts, key=lambda s: s.stem.upper()): # Indicate when the first letter of the font name changes. letter = fn.stem[0].upper() if letter != prev_letter: w.write('\n\\beginletter{%s}\n' % letter) prev_letter = letter # Acquire a human-friendly font name. font_name = get_font_name(fn) name_arg = '' if font_name is None else f', name="{font_name}"' # Produce a font table. if fn.name == 'Asap-Symbol.otf': # Asap-Symbol defines all of its symbols in feature sets # rather than as unique Unicode code points. features = ['"ss%02d"' % i for i in range(1, 7)] w.write('\\directlua{render_table{"%s", features={%s}%s}}\n' % (fn, ', '.join(features), name_arg)) else: w.write('\\directlua{render_table{"%s"%s}}\n' % (fn, name_arg)) if __name__ == '__main__': # Parse the command line. parser = argparse.ArgumentParser( description='Generate a LaTeX file of font tables') parser.add_argument('strace', help='Log from building the CLSL under strace') parser.add_argument('-o', '--output', help='LuaLaTeX file to generate (default: stdout)') cl_args = parser.parse_args() # Read a set of fonts from the command line. fonts = read_strace_font_files(cl_args.strace) # Include additional fonts that are used by the Comprehensive LaTeX # Symbol List in a faked form (e.g., via conversion to graphics). num_extras = 0 for extra_font in ['emmentaler-11.otf']: fn = kpsewhich(extra_font) if fn == '': continue fonts.add(Path(fn)) num_extras += 1 # Elide "uninteresting" fonts. info('Encountered %d unique font filenames in %s' % (len(fonts), sys.argv[1])) fonts = filter_fonts(fonts) info('Planning on typesetting the remaining %d fonts:' % len(fonts)) for i, font in enumerate(sorted([fn.stem for fn in fonts], key=lambda s: s.upper())): info(' [%d] %s' % (i + 1, font)) # Write the document text. w = sys.stdout if cl_args.output is None else open(cl_args.output, 'w', encoding='utf-8') write_document_intro(w) # Write Lua calls to generate all font tables. write_font_tables(w, fonts) # Finish the document. w.write('\\end{document}\n') if cl_args.output is not None: w.close()