diff options
author | Koji Ishii <kojii@google.com> | 2021-04-25 07:03:56 +0000 |
---|---|---|
committer | Koji Ishii <kojii@google.com> | 2021-04-26 08:14:00 +0000 |
commit | 29a83996d5f70a2f88902edba123e1e156cb3b78 (patch) | |
tree | e84b117bb73b00dbd11e9be11137c452c26aead8 | |
parent | cee84e95552b1cbb76072543ebb0bded794fe809 (diff) |
Fix subset_noto_cjk.py to run on Python3
One of the dependencies of `noto-fonts/cjk/subset_noto_cjk.py`
now requires Python3, but the script requires Python2. This
patch fixes it to run on Python3.
Following changes in this patch:
1. Syntactic fixes for Python3.
2. `nototools.ttc_utils` requires a few fixes for Python3 and
for recent AFDKO, such as [PR536]. This patch adds an
option to use `TTCollection` in `fonttools` instead.
3. Improved command line parsing by using `argparse` to
support both engines, to ensure the transition does not
cause regressions.
Locally verified that the script with up-to-date `nototools`
on Python3.8.5 can generate exactly the same tables from Noto
CJK 2.001 except the timestamp and the checksum in the `head`
table.
[PR536]: https://github.com/googlefonts/nototools/pull/536
Change-Id: Ic2ebbb20d5344e70a5a48873410f00e681ad909f
-rwxr-xr-x | cjk/subset_noto_cjk.py | 63 |
1 files changed, 53 insertions, 10 deletions
diff --git a/cjk/subset_noto_cjk.py b/cjk/subset_noto_cjk.py index 5a324bf..f69248b 100755 --- a/cjk/subset_noto_cjk.py +++ b/cjk/subset_noto_cjk.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # coding=UTF-8 # # Copyright 2016 Google Inc. All rights reserved. @@ -17,7 +17,10 @@ """Create a curated subset of Noto CJK for Android.""" +import argparse +import logging import os +from pathlib import Path from fontTools import ttLib from nototools import font_data @@ -73,10 +76,12 @@ ANDROID_EMOJI = { } # We don't want support for ASCII control chars. -CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F'); +CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F') EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS) +TTC_NAMES = ('NotoSansCJK-Regular.ttc', 'NotoSerifCJK-Regular.ttc') + def remove_from_cmap(infile, outfile, exclude=frozenset()): """Removes a set of characters from a font file's cmap table.""" @@ -85,19 +90,57 @@ def remove_from_cmap(infile, outfile, exclude=frozenset()): font.save(outfile) -TEMP_DIR = 'subsetted' - -def remove_codepoints_from_ttc(ttc_name): - otf_names = ttc_utils.ttcfile_extract(ttc_name, TEMP_DIR) +def remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir): + otf_names = ttc_utils.ttcfile_extract(ttc_name, out_dir) - with tool_utils.temp_chdir(TEMP_DIR): + with tool_utils.temp_chdir(out_dir): for index, otf_name in enumerate(otf_names): - print 'Subsetting %s...' % otf_name + logging.info('Subsetting %s...', otf_name) remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS) ttc_utils.ttcfile_build(ttc_name, otf_names) for f in otf_names: os.remove(f) -remove_codepoints_from_ttc('NotoSansCJK-Regular.ttc') -remove_codepoints_from_ttc('NotoSerifCJK-Regular.ttc') +def remove_codepoints_from_ttc(ttc_path, out_dir): + """Removes a set of characters from a TTC font file's cmap table.""" + logging.info('Loading %s', ttc_path) + ttc = ttLib.ttCollection.TTCollection(ttc_path) + + logging.info('Subsetting %d fonts in the collection', len(ttc)) + for font in ttc: + font_data.delete_from_cmap(font, EXCLUDED_CODEPOINTS) + + out_path = out_dir / ttc_path.name + logging.info('Saving to %s', out_path) + ttc.save(out_path) + logging.info('Size: %d --> %d, delta=%d', + ttc_path.stat().st_size, + out_path.stat().st_size, + out_path.stat().st_size - ttc_path.stat().st_size) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('input', default='.', nargs='?') + parser.add_argument('-o', '--output', default='subsetted') + parser.add_argument('--use-ttc-utils', action='store_true') + parser.add_argument('-v', '--verbose', action='count') + args = parser.parse_args() + if args.verbose: + if args.verbose > 1: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + in_dir = Path(args.input) + out_dir = Path(args.output) + out_dir.mkdir(parents=True, exist_ok=True) + for ttc_name in TTC_NAMES: + if args.use_ttc_utils: + remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir) + else: + remove_codepoints_from_ttc(in_dir / ttc_name, out_dir) + + +if __name__ == "__main__": + main() |