1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
#!/usr/bin/env python3
# coding=UTF-8
#
# Copyright 2016 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a curated subset of Noto CJK for Android."""
import argparse
import logging
import os
from pathlib import Path
from fontTools import ttLib
from nototools import font_data
from nototools import tool_utils
from nototools import ttc_utils
# Characters supported in Noto CJK fonts that UTR #51 recommends default to
# emoji-style.
EMOJI_IN_CJK = {
0x26BD, # ⚽ SOCCER BALL
0x26BE, # ⚾ BASEBALL
0x1F18E, # 🆎 NEGATIVE SQUARED AB
0x1F191, # 🆑 SQUARED CL
0x1F192, # 🆒 SQUARED COOL
0x1F193, # 🆓 SQUARED FREE
0x1F194, # 🆔 SQUARED ID
0x1F195, # 🆕 SQUARED NEW
0x1F196, # 🆖 SQUARED NG
0x1F197, # 🆗 SQUARED OK
0x1F198, # 🆘 SQUARED SOS
0x1F199, # 🆙 SQUARED UP WITH EXCLAMATION MARK
0x1F19A, # 🆚 SQUARED VS
0x1F201, # 🈁 SQUARED KATAKANA KOKO
0x1F21A, # 🈚 SQUARED CJK UNIFIED IDEOGRAPH-7121
0x1F22F, # 🈯 SQUARED CJK UNIFIED IDEOGRAPH-6307
0x1F232, # 🈲 SQUARED CJK UNIFIED IDEOGRAPH-7981
0x1F233, # 🈳 SQUARED CJK UNIFIED IDEOGRAPH-7A7A
0x1F234, # 🈴 SQUARED CJK UNIFIED IDEOGRAPH-5408
0x1F235, # 🈵 SQUARED CJK UNIFIED IDEOGRAPH-6E80
0x1F236, # 🈶 SQUARED CJK UNIFIED IDEOGRAPH-6709
0x1F238, # 🈸 SQUARED CJK UNIFIED IDEOGRAPH-7533
0x1F239, # 🈹 SQUARED CJK UNIFIED IDEOGRAPH-5272
0x1F23A, # 🈺 SQUARED CJK UNIFIED IDEOGRAPH-55B6
0x1F250, # 🉐 CIRCLED IDEOGRAPH ADVANTAGE
0x1F251, # 🉑 CIRCLED IDEOGRAPH ACCEPT
}
# Characters we have decided we are doing as emoji-style in Android,
# despite UTR #51's recommendation
ANDROID_EMOJI = {
0x2600, # ☀ BLACK SUN WITH RAYS
0x2601, # ☁ CLOUD
0X260E, # ☎ BLACK TELEPHONE
0x261D, # ☝ WHITE UP POINTING INDEX
0x263A, # ☺ WHITE SMILING FACE
0x2660, # ♠ BLACK SPADE SUIT
0x2663, # ♣ BLACK CLUB SUIT
0x2665, # ♥ BLACK HEART SUIT
0x2666, # ♦ BLACK DIAMOND SUIT
0x270C, # ✌ VICTORY HAND
0x2744, # ❄ SNOWFLAKE
0x2764, # ❤ HEAVY BLACK HEART
}
# We don't want support for ASCII control chars.
CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F')
EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS)
TTC_NAMES = ('NotoSansCJK-Regular.ttc', 'NotoSerifCJK-Regular.ttc')
def remove_from_cmap(infile, outfile, exclude=frozenset()):
"""Removes a set of characters from a font file's cmap table."""
font = ttLib.TTFont(infile)
font_data.delete_from_cmap(font, exclude)
font.save(outfile)
def remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir):
otf_names = ttc_utils.ttcfile_extract(ttc_name, out_dir)
with tool_utils.temp_chdir(out_dir):
for index, otf_name in enumerate(otf_names):
logging.info('Subsetting %s...', otf_name)
remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS)
ttc_utils.ttcfile_build(ttc_name, otf_names)
for f in otf_names:
os.remove(f)
def remove_codepoints_from_ttc(ttc_path, out_dir):
"""Removes a set of characters from a TTC font file's cmap table."""
logging.info('Loading %s', ttc_path)
ttc = ttLib.ttCollection.TTCollection(ttc_path)
logging.info('Subsetting %d fonts in the collection', len(ttc))
for font in ttc:
font_data.delete_from_cmap(font, EXCLUDED_CODEPOINTS)
out_path = out_dir / ttc_path.name
logging.info('Saving to %s', out_path)
ttc.save(out_path)
logging.info('Size: %d --> %d, delta=%d',
ttc_path.stat().st_size,
out_path.stat().st_size,
out_path.stat().st_size - ttc_path.stat().st_size)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('input', default='.', nargs='?')
parser.add_argument('-o', '--output', default='subsetted')
parser.add_argument('--use-ttc-utils', action='store_true')
parser.add_argument('-v', '--verbose', action='count')
args = parser.parse_args()
if args.verbose:
if args.verbose > 1:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
in_dir = Path(args.input)
out_dir = Path(args.output)
out_dir.mkdir(parents=True, exist_ok=True)
for ttc_name in TTC_NAMES:
if args.use_ttc_utils:
remove_codepoints_from_ttc_using_ttc_utils(ttc_name, out_dir)
else:
remove_codepoints_from_ttc(in_dir / ttc_name, out_dir)
if __name__ == "__main__":
main()
|