summaryrefslogtreecommitdiff
path: root/tools/stringslint
diff options
context:
space:
mode:
authorJeff Sharkey <jsharkey@android.com>2018-06-05 15:55:45 -0600
committerJeff Sharkey <jsharkey@android.com>2018-06-06 09:51:01 -0600
commit47c792435ecf87f4ff293207ee0054e2968db979 (patch)
treefaee775326ec4fdf4ec8feb7ae3484f9b76085d8 /tools/stringslint
parent7f6641b890e73d972548ca13b4871dadfed980c0 (diff)
More string lint checks.
Expand strings into samples that are representative of what'll be shown in UI, and warn if they're longer than defined CHAR LIMIT. Guide all-caps strings towards android:textAllCaps if actions. Warn when strings mention "phone" without defining product-specific variants. Bug: 76097999 Test: manual Change-Id: I08046ecf531b2d616cb357639e804b73d6a12bea
Diffstat (limited to 'tools/stringslint')
-rw-r--r--tools/stringslint/stringslint.py99
1 files changed, 83 insertions, 16 deletions
diff --git a/tools/stringslint/stringslint.py b/tools/stringslint/stringslint.py
index d637ff346c82..03c0b9af66a0 100644
--- a/tools/stringslint/stringslint.py
+++ b/tools/stringslint/stringslint.py
@@ -20,11 +20,22 @@ a previous strings file, if provided.
Usage: stringslint.py strings.xml
Usage: stringslint.py strings.xml old_strings.xml
+
+In general:
+* Errors signal issues that must be fixed before submitting, and are only
+ used when there are no false-positives.
+* Warnings signal issues that might need to be fixed, but need manual
+ inspection due to risk of false-positives.
+* Info signal issues that should be fixed to match best-practices, such
+ as providing comments to aid translation.
"""
-import re, sys
+import re, sys, codecs
import lxml.etree as ET
+reload(sys)
+sys.setdefaultencoding('utf8')
+
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
@@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
warnings = None
-def warn(tag, msg, actual, expected):
+def warn(tag, msg, actual, expected, color=YELLOW):
global warnings
key = "%s:%d" % (tag.attrib["name"], hash(msg))
- value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True),
+ value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
tag.sourceline,
tag.attrib["name"],
format(reset=True),
@@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected):
format(reset=True))
warnings[key] = value
+
+def error(tag, msg, actual, expected):
+ warn(tag, msg, actual, expected, RED)
+
+def info(tag, msg, actual, expected):
+ warn(tag, msg, actual, expected, CYAN)
+
+# Escaping logic borrowed from https://stackoverflow.com/a/24519338
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\'"abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+def decode_escapes(s):
+ def decode_match(match):
+ return codecs.decode(match.group(0), 'unicode-escape')
+
+ s = re.sub(r"\n\s*", " ", s)
+ s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+ s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
+ s = re.sub(r"\^\d+", "____", s)
+ s = re.sub(r"<br/?>", "\n", s)
+ s = re.sub(r"</?[a-z]+>", "", s)
+ return s
+
+def sample_iter(tag):
+ if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
+ yield tag.attrib["example"]
+ elif tag.text:
+ yield decode_escapes(tag.text)
+ for e in tag:
+ for v in sample_iter(e):
+ yield v
+ if e.tail:
+ yield decode_escapes(e.tail)
+
def lint(path):
global warnings
warnings = {}
@@ -80,35 +131,45 @@ def lint(path):
comment = last_comment
last_comment = None
+ # Prepare string for analysis
+ text = "".join(child.itertext())
+ sample = "".join(sample_iter(child)).strip().strip("'\"")
+
# Validate comment
if comment is None:
- warn(child, "Missing string comment to aid translation",
+ info(child, "Missing string comment to aid translation",
None, None)
continue
if "do not translate" in comment.text.lower():
continue
if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
continue
- if re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) is None:
- warn(child, "Missing CHAR LIMIT to aid translation",
+
+ limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
+ if limit is None:
+ info(child, "Missing CHAR LIMIT to aid translation",
repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
+ elif re.match("\d+", limit.group(1)):
+ limit = int(limit.group(1))
+ if len(sample) > limit:
+ warn(child, "Expanded string length is larger than CHAR LIMIT",
+ sample, None)
# Look for common mistakes/substitutions
- text = "".join(child.itertext()).strip()
if "'" in text:
- warn(child, "Turned quotation mark glyphs are more polished",
+ error(child, "Turned quotation mark glyphs are more polished",
text, "This doesn\u2019t need to \u2018happen\u2019 today")
if '"' in text and not text.startswith('"') and text.endswith('"'):
- warn(child, "Turned quotation mark glyphs are more polished",
+ error(child, "Turned quotation mark glyphs are more polished",
text, "This needs to \u201chappen\u201d today")
if "..." in text:
- warn(child, "Ellipsis glyph is more polished",
+ error(child, "Ellipsis glyph is more polished",
text, "Loading\u2026")
if "wi-fi" in text.lower():
- warn(child, "Non-breaking glyph is more polished",
+ error(child, "Non-breaking glyph is more polished",
text, "Wi\u2011Fi")
if "wifi" in text.lower():
- warn(child, "Using non-standard spelling",
+ error(child, "Using non-standard spelling",
text, "Wi\u2011Fi")
if re.search("\d-\d", text):
warn(child, "Ranges should use en dash glyph",
@@ -119,11 +180,17 @@ def lint(path):
if ". " in text:
warn(child, "Only use single space between sentences",
text, "First idea. Second idea.")
+ if re.match(r"^[A-Z\s]{5,}$", text):
+ warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
+ text, "Refresh data")
+ if " phone " in text and "product" not in child.attrib:
+ warn(child, "Strings mentioning phones should have variants for tablets",
+ text, None)
# When more than one substitution, require indexes
if len(re.findall("%[^%]", text)) > 1:
if len(re.findall("%[^\d]", text)) > 0:
- warn(child, "Substitutions must be indexed",
+ error(child, "Substitutions must be indexed",
text, "Add %1$s to %2$s")
# Require xliff substitutions
@@ -132,15 +199,15 @@ def lint(path):
if gc.tail and re.search("%[^%]", gc.tail): badsub = True
if re.match("{.*xliff.*}g", gc.tag):
if "id" not in gc.attrib:
- warn(child, "Substitutions must define id attribute",
+ error(child, "Substitutions must define id attribute",
None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
if "example" not in gc.attrib:
- warn(child, "Substitutions must define example attribute",
+ error(child, "Substitutions must define example attribute",
None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
else:
if gc.text and re.search("%[^%]", gc.text): badsub = True
if badsub:
- warn(child, "Substitutions must be inside xliff tags",
+ error(child, "Substitutions must be inside xliff tags",
text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
return warnings