diff options
author | Jeff Sharkey <jsharkey@android.com> | 2018-06-05 15:55:45 -0600 |
---|---|---|
committer | Jeff Sharkey <jsharkey@android.com> | 2018-06-06 09:51:01 -0600 |
commit | 47c792435ecf87f4ff293207ee0054e2968db979 (patch) | |
tree | faee775326ec4fdf4ec8feb7ae3484f9b76085d8 /tools/stringslint | |
parent | 7f6641b890e73d972548ca13b4871dadfed980c0 (diff) |
More string lint checks.
Expand strings into samples that are representative of what'll be
shown in UI, and warn if they're longer than defined CHAR LIMIT.
Guide all-caps strings towards android:textAllCaps if actions.
Warn when strings mention "phone" without defining product-specific
variants.
Bug: 76097999
Test: manual
Change-Id: I08046ecf531b2d616cb357639e804b73d6a12bea
Diffstat (limited to 'tools/stringslint')
-rw-r--r-- | tools/stringslint/stringslint.py | 99 |
1 files changed, 83 insertions, 16 deletions
diff --git a/tools/stringslint/stringslint.py b/tools/stringslint/stringslint.py index d637ff346c82..03c0b9af66a0 100644 --- a/tools/stringslint/stringslint.py +++ b/tools/stringslint/stringslint.py @@ -20,11 +20,22 @@ a previous strings file, if provided. Usage: stringslint.py strings.xml Usage: stringslint.py strings.xml old_strings.xml + +In general: +* Errors signal issues that must be fixed before submitting, and are only + used when there are no false-positives. +* Warnings signal issues that might need to be fixed, but need manual + inspection due to risk of false-positives. +* Info signal issues that should be fixed to match best-practices, such + as providing comments to aid translation. """ -import re, sys +import re, sys, codecs import lxml.etree as ET +reload(sys) +sys.setdefaultencoding('utf8') + BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): @@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): warnings = None -def warn(tag, msg, actual, expected): +def warn(tag, msg, actual, expected, color=YELLOW): global warnings key = "%s:%d" % (tag.attrib["name"], hash(msg)) - value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True), + value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), tag.sourceline, tag.attrib["name"], format(reset=True), @@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected): format(reset=True)) warnings[key] = value + +def error(tag, msg, actual, expected): + warn(tag, msg, actual, expected, RED) + +def info(tag, msg, actual, expected): + warn(tag, msg, actual, expected, CYAN) + +# Escaping logic borrowed from https://stackoverflow.com/a/24519338 +ESCAPE_SEQUENCE_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'"abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +def decode_escapes(s): + def decode_match(match): + return codecs.decode(match.group(0), 'unicode-escape') + + s = re.sub(r"\n\s*", " ", s) + s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) + s = re.sub(r"%(\d+\$)?[a-z]", "____", s) + s = re.sub(r"\^\d+", "____", s) + s = re.sub(r"<br/?>", "\n", s) + s = re.sub(r"</?[a-z]+>", "", s) + return s + +def sample_iter(tag): + if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: + yield tag.attrib["example"] + elif tag.text: + yield decode_escapes(tag.text) + for e in tag: + for v in sample_iter(e): + yield v + if e.tail: + yield decode_escapes(e.tail) + def lint(path): global warnings warnings = {} @@ -80,35 +131,45 @@ def lint(path): comment = last_comment last_comment = None + # Prepare string for analysis + text = "".join(child.itertext()) + sample = "".join(sample_iter(child)).strip().strip("'\"") + # Validate comment if comment is None: - warn(child, "Missing string comment to aid translation", + info(child, "Missing string comment to aid translation", None, None) continue if "do not translate" in comment.text.lower(): continue if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": continue - if re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) is None: - warn(child, "Missing CHAR LIMIT to aid translation", + + limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) + if limit is None: + info(child, "Missing CHAR LIMIT to aid translation", repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->") + elif re.match("\d+", limit.group(1)): + limit = int(limit.group(1)) + if len(sample) > limit: + warn(child, "Expanded string length is larger than CHAR LIMIT", + sample, None) # Look for common mistakes/substitutions - text = "".join(child.itertext()).strip() if "'" in text: - warn(child, "Turned quotation mark glyphs are more polished", + error(child, "Turned quotation mark glyphs are more polished", text, "This doesn\u2019t need to \u2018happen\u2019 today") if '"' in text and not text.startswith('"') and text.endswith('"'): - warn(child, "Turned quotation mark glyphs are more polished", + error(child, "Turned quotation mark glyphs are more polished", text, "This needs to \u201chappen\u201d today") if "..." in text: - warn(child, "Ellipsis glyph is more polished", + error(child, "Ellipsis glyph is more polished", text, "Loading\u2026") if "wi-fi" in text.lower(): - warn(child, "Non-breaking glyph is more polished", + error(child, "Non-breaking glyph is more polished", text, "Wi\u2011Fi") if "wifi" in text.lower(): - warn(child, "Using non-standard spelling", + error(child, "Using non-standard spelling", text, "Wi\u2011Fi") if re.search("\d-\d", text): warn(child, "Ranges should use en dash glyph", @@ -119,11 +180,17 @@ def lint(path): if ". " in text: warn(child, "Only use single space between sentences", text, "First idea. Second idea.") + if re.match(r"^[A-Z\s]{5,}$", text): + warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", + text, "Refresh data") + if " phone " in text and "product" not in child.attrib: + warn(child, "Strings mentioning phones should have variants for tablets", + text, None) # When more than one substitution, require indexes if len(re.findall("%[^%]", text)) > 1: if len(re.findall("%[^\d]", text)) > 0: - warn(child, "Substitutions must be indexed", + error(child, "Substitutions must be indexed", text, "Add %1$s to %2$s") # Require xliff substitutions @@ -132,15 +199,15 @@ def lint(path): if gc.tail and re.search("%[^%]", gc.tail): badsub = True if re.match("{.*xliff.*}g", gc.tag): if "id" not in gc.attrib: - warn(child, "Substitutions must define id attribute", + error(child, "Substitutions must define id attribute", None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") if "example" not in gc.attrib: - warn(child, "Substitutions must define example attribute", + error(child, "Substitutions must define example attribute", None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") else: if gc.text and re.search("%[^%]", gc.text): badsub = True if badsub: - warn(child, "Substitutions must be inside xliff tags", + error(child, "Substitutions must be inside xliff tags", text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") return warnings |