summaryrefslogtreecommitdiff
path: root/tools/stringslint/stringslint.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/stringslint/stringslint.py')
-rw-r--r--tools/stringslint/stringslint.py99
1 files changed, 83 insertions, 16 deletions
diff --git a/tools/stringslint/stringslint.py b/tools/stringslint/stringslint.py
index d637ff346c82..03c0b9af66a0 100644
--- a/tools/stringslint/stringslint.py
+++ b/tools/stringslint/stringslint.py
@@ -20,11 +20,22 @@ a previous strings file, if provided.
Usage: stringslint.py strings.xml
Usage: stringslint.py strings.xml old_strings.xml
+
+In general:
+* Errors signal issues that must be fixed before submitting, and are only
+ used when there are no false-positives.
+* Warnings signal issues that might need to be fixed, but need manual
+ inspection due to risk of false-positives.
+* Info signal issues that should be fixed to match best-practices, such
+ as providing comments to aid translation.
"""
-import re, sys
+import re, sys, codecs
import lxml.etree as ET
+reload(sys)
+sys.setdefaultencoding('utf8')
+
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
@@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
warnings = None
-def warn(tag, msg, actual, expected):
+def warn(tag, msg, actual, expected, color=YELLOW):
global warnings
key = "%s:%d" % (tag.attrib["name"], hash(msg))
- value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True),
+ value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
tag.sourceline,
tag.attrib["name"],
format(reset=True),
@@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected):
format(reset=True))
warnings[key] = value
+
+def error(tag, msg, actual, expected):
+ warn(tag, msg, actual, expected, RED)
+
+def info(tag, msg, actual, expected):
+ warn(tag, msg, actual, expected, CYAN)
+
+# Escaping logic borrowed from https://stackoverflow.com/a/24519338
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\'"abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+def decode_escapes(s):
+ def decode_match(match):
+ return codecs.decode(match.group(0), 'unicode-escape')
+
+ s = re.sub(r"\n\s*", " ", s)
+ s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+ s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
+ s = re.sub(r"\^\d+", "____", s)
+ s = re.sub(r"<br/?>", "\n", s)
+ s = re.sub(r"</?[a-z]+>", "", s)
+ return s
+
+def sample_iter(tag):
+ if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
+ yield tag.attrib["example"]
+ elif tag.text:
+ yield decode_escapes(tag.text)
+ for e in tag:
+ for v in sample_iter(e):
+ yield v
+ if e.tail:
+ yield decode_escapes(e.tail)
+
def lint(path):
global warnings
warnings = {}
@@ -80,35 +131,45 @@ def lint(path):
comment = last_comment
last_comment = None
+ # Prepare string for analysis
+ text = "".join(child.itertext())
+ sample = "".join(sample_iter(child)).strip().strip("'\"")
+
# Validate comment
if comment is None:
- warn(child, "Missing string comment to aid translation",
+ info(child, "Missing string comment to aid translation",
None, None)
continue
if "do not translate" in comment.text.lower():
continue
if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
continue
- if re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) is None:
- warn(child, "Missing CHAR LIMIT to aid translation",
+
+ limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
+ if limit is None:
+ info(child, "Missing CHAR LIMIT to aid translation",
repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
+ elif re.match("\d+", limit.group(1)):
+ limit = int(limit.group(1))
+ if len(sample) > limit:
+ warn(child, "Expanded string length is larger than CHAR LIMIT",
+ sample, None)
# Look for common mistakes/substitutions
- text = "".join(child.itertext()).strip()
if "'" in text:
- warn(child, "Turned quotation mark glyphs are more polished",
+ error(child, "Turned quotation mark glyphs are more polished",
text, "This doesn\u2019t need to \u2018happen\u2019 today")
if '"' in text and not text.startswith('"') and text.endswith('"'):
- warn(child, "Turned quotation mark glyphs are more polished",
+ error(child, "Turned quotation mark glyphs are more polished",
text, "This needs to \u201chappen\u201d today")
if "..." in text:
- warn(child, "Ellipsis glyph is more polished",
+ error(child, "Ellipsis glyph is more polished",
text, "Loading\u2026")
if "wi-fi" in text.lower():
- warn(child, "Non-breaking glyph is more polished",
+ error(child, "Non-breaking glyph is more polished",
text, "Wi\u2011Fi")
if "wifi" in text.lower():
- warn(child, "Using non-standard spelling",
+ error(child, "Using non-standard spelling",
text, "Wi\u2011Fi")
if re.search("\d-\d", text):
warn(child, "Ranges should use en dash glyph",
@@ -119,11 +180,17 @@ def lint(path):
if ". " in text:
warn(child, "Only use single space between sentences",
text, "First idea. Second idea.")
+ if re.match(r"^[A-Z\s]{5,}$", text):
+ warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
+ text, "Refresh data")
+ if " phone " in text and "product" not in child.attrib:
+ warn(child, "Strings mentioning phones should have variants for tablets",
+ text, None)
# When more than one substitution, require indexes
if len(re.findall("%[^%]", text)) > 1:
if len(re.findall("%[^\d]", text)) > 0:
- warn(child, "Substitutions must be indexed",
+ error(child, "Substitutions must be indexed",
text, "Add %1$s to %2$s")
# Require xliff substitutions
@@ -132,15 +199,15 @@ def lint(path):
if gc.tail and re.search("%[^%]", gc.tail): badsub = True
if re.match("{.*xliff.*}g", gc.tag):
if "id" not in gc.attrib:
- warn(child, "Substitutions must define id attribute",
+ error(child, "Substitutions must define id attribute",
None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
if "example" not in gc.attrib:
- warn(child, "Substitutions must define example attribute",
+ error(child, "Substitutions must define example attribute",
None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
else:
if gc.text and re.search("%[^%]", gc.text): badsub = True
if badsub:
- warn(child, "Substitutions must be inside xliff tags",
+ error(child, "Substitutions must be inside xliff tags",
text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
return warnings