Loading tools/stringslint/stringslint.py +83 −16 Original line number Diff line number Diff line Loading @@ -20,11 +20,22 @@ a previous strings file, if provided. Usage: stringslint.py strings.xml Usage: stringslint.py strings.xml old_strings.xml In general: * Errors signal issues that must be fixed before submitting, and are only used when there are no false-positives. * Warnings signal issues that might need to be fixed, but need manual inspection due to risk of false-positives. * Info signal issues that should be fixed to match best-practices, such as providing comments to aid translation. """ import re, sys import re, sys, codecs import lxml.etree as ET reload(sys) sys.setdefaultencoding('utf8') BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): Loading @@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): warnings = None def warn(tag, msg, actual, expected): def warn(tag, msg, actual, expected, color=YELLOW): global warnings key = "%s:%d" % (tag.attrib["name"], hash(msg)) value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True), value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), tag.sourceline, tag.attrib["name"], format(reset=True), Loading @@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected): format(reset=True)) warnings[key] = value def error(tag, msg, actual, expected): warn(tag, msg, actual, expected, RED) def info(tag, msg, actual, expected): warn(tag, msg, actual, expected, CYAN) # Escaping logic borrowed from https://stackoverflow.com/a/24519338 ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): return codecs.decode(match.group(0), 'unicode-escape') s = re.sub(r"\n\s*", " ", s) s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) s = re.sub(r"%(\d+\$)?[a-z]", "____", s) s = re.sub(r"\^\d+", "____", s) s = re.sub(r"<br/?>", "\n", s) s = re.sub(r"</?[a-z]+>", "", s) return s def sample_iter(tag): if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: yield tag.attrib["example"] elif tag.text: yield decode_escapes(tag.text) for e in tag: for v in sample_iter(e): yield v if e.tail: yield decode_escapes(e.tail) def lint(path): global warnings warnings = {} Loading @@ -80,35 +131,45 @@ def lint(path): comment = last_comment last_comment = None # Prepare string for analysis text = "".join(child.itertext()) sample = "".join(sample_iter(child)).strip().strip("'\"") # Validate comment if comment is None: warn(child, "Missing string comment to aid translation", info(child, "Missing string comment to aid translation", None, None) continue if "do not translate" in comment.text.lower(): continue if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": continue if re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) is None: warn(child, "Missing CHAR LIMIT to aid translation", limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) if limit is None: info(child, "Missing CHAR LIMIT to aid translation", repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->") elif re.match("\d+", limit.group(1)): limit = int(limit.group(1)) if len(sample) > limit: warn(child, "Expanded string length is larger than CHAR LIMIT", sample, None) # Look for common mistakes/substitutions text = "".join(child.itertext()).strip() if "'" in text: warn(child, "Turned quotation mark glyphs are more polished", error(child, "Turned quotation mark glyphs are more polished", text, "This doesn\u2019t need to \u2018happen\u2019 today") if '"' in text and not text.startswith('"') and text.endswith('"'): warn(child, "Turned quotation mark glyphs are more polished", error(child, "Turned quotation mark glyphs are more polished", text, "This needs to \u201chappen\u201d today") if "..." in text: warn(child, "Ellipsis glyph is more polished", error(child, "Ellipsis glyph is more polished", text, "Loading\u2026") if "wi-fi" in text.lower(): warn(child, "Non-breaking glyph is more polished", error(child, "Non-breaking glyph is more polished", text, "Wi\u2011Fi") if "wifi" in text.lower(): warn(child, "Using non-standard spelling", error(child, "Using non-standard spelling", text, "Wi\u2011Fi") if re.search("\d-\d", text): warn(child, "Ranges should use en dash glyph", Loading @@ -119,11 +180,17 @@ def lint(path): if ". " in text: warn(child, "Only use single space between sentences", text, "First idea. Second idea.") if re.match(r"^[A-Z\s]{5,}$", text): warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", text, "Refresh data") if " phone " in text and "product" not in child.attrib: warn(child, "Strings mentioning phones should have variants for tablets", text, None) # When more than one substitution, require indexes if len(re.findall("%[^%]", text)) > 1: if len(re.findall("%[^\d]", text)) > 0: warn(child, "Substitutions must be indexed", error(child, "Substitutions must be indexed", text, "Add %1$s to %2$s") # Require xliff substitutions Loading @@ -132,15 +199,15 @@ def lint(path): if gc.tail and re.search("%[^%]", gc.tail): badsub = True if re.match("{.*xliff.*}g", gc.tag): if "id" not in gc.attrib: warn(child, "Substitutions must define id attribute", error(child, "Substitutions must define id attribute", None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") if "example" not in gc.attrib: warn(child, "Substitutions must define example attribute", error(child, "Substitutions must define example attribute", None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") else: if gc.text and re.search("%[^%]", gc.text): badsub = True if badsub: warn(child, "Substitutions must be inside xliff tags", error(child, "Substitutions must be inside xliff tags", text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") return warnings Loading Loading
tools/stringslint/stringslint.py +83 −16 Original line number Diff line number Diff line Loading @@ -20,11 +20,22 @@ a previous strings file, if provided. Usage: stringslint.py strings.xml Usage: stringslint.py strings.xml old_strings.xml In general: * Errors signal issues that must be fixed before submitting, and are only used when there are no false-positives. * Warnings signal issues that might need to be fixed, but need manual inspection due to risk of false-positives. * Info signal issues that should be fixed to match best-practices, such as providing comments to aid translation. """ import re, sys import re, sys, codecs import lxml.etree as ET reload(sys) sys.setdefaultencoding('utf8') BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): Loading @@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): warnings = None def warn(tag, msg, actual, expected): def warn(tag, msg, actual, expected, color=YELLOW): global warnings key = "%s:%d" % (tag.attrib["name"], hash(msg)) value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True), value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), tag.sourceline, tag.attrib["name"], format(reset=True), Loading @@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected): format(reset=True)) warnings[key] = value def error(tag, msg, actual, expected): warn(tag, msg, actual, expected, RED) def info(tag, msg, actual, expected): warn(tag, msg, actual, expected, CYAN) # Escaping logic borrowed from https://stackoverflow.com/a/24519338 ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): return codecs.decode(match.group(0), 'unicode-escape') s = re.sub(r"\n\s*", " ", s) s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) s = re.sub(r"%(\d+\$)?[a-z]", "____", s) s = re.sub(r"\^\d+", "____", s) s = re.sub(r"<br/?>", "\n", s) s = re.sub(r"</?[a-z]+>", "", s) return s def sample_iter(tag): if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: yield tag.attrib["example"] elif tag.text: yield decode_escapes(tag.text) for e in tag: for v in sample_iter(e): yield v if e.tail: yield decode_escapes(e.tail) def lint(path): global warnings warnings = {} Loading @@ -80,35 +131,45 @@ def lint(path): comment = last_comment last_comment = None # Prepare string for analysis text = "".join(child.itertext()) sample = "".join(sample_iter(child)).strip().strip("'\"") # Validate comment if comment is None: warn(child, "Missing string comment to aid translation", info(child, "Missing string comment to aid translation", None, None) continue if "do not translate" in comment.text.lower(): continue if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": continue if re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) is None: warn(child, "Missing CHAR LIMIT to aid translation", limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) if limit is None: info(child, "Missing CHAR LIMIT to aid translation", repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->") elif re.match("\d+", limit.group(1)): limit = int(limit.group(1)) if len(sample) > limit: warn(child, "Expanded string length is larger than CHAR LIMIT", sample, None) # Look for common mistakes/substitutions text = "".join(child.itertext()).strip() if "'" in text: warn(child, "Turned quotation mark glyphs are more polished", error(child, "Turned quotation mark glyphs are more polished", text, "This doesn\u2019t need to \u2018happen\u2019 today") if '"' in text and not text.startswith('"') and text.endswith('"'): warn(child, "Turned quotation mark glyphs are more polished", error(child, "Turned quotation mark glyphs are more polished", text, "This needs to \u201chappen\u201d today") if "..." in text: warn(child, "Ellipsis glyph is more polished", error(child, "Ellipsis glyph is more polished", text, "Loading\u2026") if "wi-fi" in text.lower(): warn(child, "Non-breaking glyph is more polished", error(child, "Non-breaking glyph is more polished", text, "Wi\u2011Fi") if "wifi" in text.lower(): warn(child, "Using non-standard spelling", error(child, "Using non-standard spelling", text, "Wi\u2011Fi") if re.search("\d-\d", text): warn(child, "Ranges should use en dash glyph", Loading @@ -119,11 +180,17 @@ def lint(path): if ". " in text: warn(child, "Only use single space between sentences", text, "First idea. Second idea.") if re.match(r"^[A-Z\s]{5,}$", text): warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", text, "Refresh data") if " phone " in text and "product" not in child.attrib: warn(child, "Strings mentioning phones should have variants for tablets", text, None) # When more than one substitution, require indexes if len(re.findall("%[^%]", text)) > 1: if len(re.findall("%[^\d]", text)) > 0: warn(child, "Substitutions must be indexed", error(child, "Substitutions must be indexed", text, "Add %1$s to %2$s") # Require xliff substitutions Loading @@ -132,15 +199,15 @@ def lint(path): if gc.tail and re.search("%[^%]", gc.tail): badsub = True if re.match("{.*xliff.*}g", gc.tag): if "id" not in gc.attrib: warn(child, "Substitutions must define id attribute", error(child, "Substitutions must define id attribute", None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") if "example" not in gc.attrib: warn(child, "Substitutions must define example attribute", error(child, "Substitutions must define example attribute", None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") else: if gc.text and re.search("%[^%]", gc.text): badsub = True if badsub: warn(child, "Substitutions must be inside xliff tags", error(child, "Substitutions must be inside xliff tags", text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") return warnings Loading