1 files changed, 83 insertions, 16 deletions
diff --git a/tools/stringslint/stringslint.py b/tools/stringslint/stringslint.py
index d637ff346c82..03c0b9af66a0 100644
--- a/tools/stringslint/stringslint.py
+++ b/tools/stringslint/stringslint.py
@@ -20,11 +20,22 @@ a previous strings file, if provided.
 
 Usage: stringslint.py strings.xml
 Usage: stringslint.py strings.xml old_strings.xml
+
+In general:
+* Errors signal issues that must be fixed before submitting, and are only
+  used when there are no false-positives.
+* Warnings signal issues that might need to be fixed, but need manual
+  inspection due to risk of false-positives.
+* Info signal issues that should be fixed to match best-practices, such
+  as providing comments to aid translation.
 """
 
-import re, sys
+import re, sys, codecs
 import lxml.etree as ET
 
+reload(sys)
+sys.setdefaultencoding('utf8')
+
 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
 
 def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
@@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
 
 warnings = None
 
-def warn(tag, msg, actual, expected):
+def warn(tag, msg, actual, expected, color=YELLOW):
     global warnings
     key = "%s:%d" % (tag.attrib["name"], hash(msg))
-    value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True),
+    value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
                                        tag.sourceline,
                                        tag.attrib["name"],
                                        format(reset=True),
@@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected):
                                                                format(reset=True))
     warnings[key] = value
 
+
+def error(tag, msg, actual, expected):
+    warn(tag, msg, actual, expected, RED)
+
+def info(tag, msg, actual, expected):
+    warn(tag, msg, actual, expected, CYAN)
+
+# Escaping logic borrowed from https://stackoverflow.com/a/24519338
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    s = re.sub(r"\n\s*", " ", s)
+    s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+    s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
+    s = re.sub(r"\^\d+", "____", s)
+    s = re.sub(r"<br/?>", "\n", s)
+    s = re.sub(r"</?[a-z]+>", "", s)
+    return s
+
+def sample_iter(tag):
+    if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
+        yield tag.attrib["example"]
+    elif tag.text:
+        yield decode_escapes(tag.text)
+    for e in tag:
+        for v in sample_iter(e):
+            yield v
+        if e.tail:
+            yield decode_escapes(e.tail)
+
 def lint(path):
     global warnings
     warnings = {}
@@ -80,35 +131,45 @@ def lint(path):
             comment = last_comment
             last_comment = None
 
+            # Prepare string for analysis
+            text = "".join(child.itertext())
+            sample = "".join(sample_iter(child)).strip().strip("'\"")
+
             # Validate comment
             if comment is None:
-                warn(child, "Missing string comment to aid translation",
+                info(child, "Missing string comment to aid translation",
                      None, None)
                 continue
             if "do not translate" in comment.text.lower():
                 continue
             if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
                 continue
-            if re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) is None:
-                warn(child, "Missing CHAR LIMIT to aid translation",
+
+            limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
+            if limit is None:
+                info(child, "Missing CHAR LIMIT to aid translation",
                      repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
+            elif re.match("\d+", limit.group(1)):
+                limit = int(limit.group(1))
+                if len(sample) > limit:
+                    warn(child, "Expanded string length is larger than CHAR LIMIT",
+                        sample, None)
 
             # Look for common mistakes/substitutions
-            text = "".join(child.itertext()).strip()
             if "'" in text:
-                warn(child, "Turned quotation mark glyphs are more polished",
+                error(child, "Turned quotation mark glyphs are more polished",
                      text, "This doesn\u2019t need to \u2018happen\u2019 today")
             if '"' in text and not text.startswith('"') and text.endswith('"'):
-                warn(child, "Turned quotation mark glyphs are more polished",
+                error(child, "Turned quotation mark glyphs are more polished",
                      text, "This needs to \u201chappen\u201d today")
             if "..." in text:
-                warn(child, "Ellipsis glyph is more polished",
+                error(child, "Ellipsis glyph is more polished",
                      text, "Loading\u2026")
             if "wi-fi" in text.lower():
-                warn(child, "Non-breaking glyph is more polished",
+                error(child, "Non-breaking glyph is more polished",
                      text, "Wi\u2011Fi")
             if "wifi" in text.lower():
-                warn(child, "Using non-standard spelling",
+                error(child, "Using non-standard spelling",
                      text, "Wi\u2011Fi")
             if re.search("\d-\d", text):
                 warn(child, "Ranges should use en dash glyph",
@@ -119,11 +180,17 @@ def lint(path):
             if ".  " in text:
                 warn(child, "Only use single space between sentences",
                      text, "First idea. Second idea.")
+            if re.match(r"^[A-Z\s]{5,}$", text):
+                warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
+                     text, "Refresh data")
+            if " phone " in text and "product" not in child.attrib:
+                warn(child, "Strings mentioning phones should have variants for tablets",
+                     text, None)
 
             # When more than one substitution, require indexes
             if len(re.findall("%[^%]", text)) > 1:
                 if len(re.findall("%[^\d]", text)) > 0:
-                    warn(child, "Substitutions must be indexed",
+                    error(child, "Substitutions must be indexed",
                          text, "Add %1$s to %2$s")
 
             # Require xliff substitutions
@@ -132,15 +199,15 @@ def lint(path):
                 if gc.tail and re.search("%[^%]", gc.tail): badsub = True
                 if re.match("{.*xliff.*}g", gc.tag):
                     if "id" not in gc.attrib:
-                        warn(child, "Substitutions must define id attribute",
+                        error(child, "Substitutions must define id attribute",
                              None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
                     if "example" not in gc.attrib:
-                        warn(child, "Substitutions must define example attribute",
+                        error(child, "Substitutions must define example attribute",
                              None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
                 else:
                     if gc.text and re.search("%[^%]", gc.text): badsub = True
                 if badsub:
-                    warn(child, "Substitutions must be inside xliff tags",
+                    error(child, "Substitutions must be inside xliff tags",
                          text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
 
     return warnings