Merge "More string lint checks." (da2432bb) · Commits · e / os / android_frameworks_base

tools/stringslint/stringslint.py

+83 −16

Original line number	Diff line number	Diff line
		@@ -20,11 +20,22 @@ a previous strings file, if provided.

		Usage: stringslint.py strings.xml
		Usage: stringslint.py strings.xml old_strings.xml

		In general:
		* Errors signal issues that must be fixed before submitting, and are only
		used when there are no false-positives.
		* Warnings signal issues that might need to be fixed, but need manual
		inspection due to risk of false-positives.
		* Info signal issues that should be fixed to match best-practices, such
		as providing comments to aid translation.
		"""

		import re, sys
		import re, sys, codecs
		import lxml.etree as ET

		reload(sys)
		sys.setdefaultencoding('utf8')

		BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)

		def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
		@@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):

		warnings = None

		def warn(tag, msg, actual, expected):
		def warn(tag, msg, actual, expected, color=YELLOW):
		global warnings
		key = "%s:%d" % (tag.attrib["name"], hash(msg))
		value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True),
		value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
		tag.sourceline,
		tag.attrib["name"],
		format(reset=True),
		@@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected):
		format(reset=True))
		warnings[key] = value


		def error(tag, msg, actual, expected):
		warn(tag, msg, actual, expected, RED)

		def info(tag, msg, actual, expected):
		warn(tag, msg, actual, expected, CYAN)

		# Escaping logic borrowed from https://stackoverflow.com/a/24519338
		ESCAPE_SEQUENCE_RE = re.compile(r'''
		( \\U........ # 8-digit hex escapes
		\| \\u.... # 4-digit hex escapes
		\| \\x.. # 2-digit hex escapes
		\| \\[0-7]{1,3} # Octal escapes
		\| \\N\{[^}]+\} # Unicode characters by name
		\| \\[\\'"abfnrtv] # Single-character escapes
		)''', re.UNICODE \| re.VERBOSE)

		def decode_escapes(s):
		def decode_match(match):
		return codecs.decode(match.group(0), 'unicode-escape')

		s = re.sub(r"\n\s*", " ", s)
		s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
		s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
		s = re.sub(r"\^\d+", "____", s)
		s = re.sub(r"<br/?>", "\n", s)
		s = re.sub(r"</?[a-z]+>", "", s)
		return s

		def sample_iter(tag):
		if not isinstance(tag, ET._Comment) and re.match("{.xliff.}g", tag.tag) and "example" in tag.attrib:
		yield tag.attrib["example"]
		elif tag.text:
		yield decode_escapes(tag.text)
		for e in tag:
		for v in sample_iter(e):
		yield v
		if e.tail:
		yield decode_escapes(e.tail)

		def lint(path):
		global warnings
		warnings = {}
		@@ -80,35 +131,45 @@ def lint(path):
		comment = last_comment
		last_comment = None

		# Prepare string for analysis
		text = "".join(child.itertext())
		sample = "".join(sample_iter(child)).strip().strip("'\"")

		# Validate comment
		if comment is None:
		warn(child, "Missing string comment to aid translation",
		info(child, "Missing string comment to aid translation",
		None, None)
		continue
		if "do not translate" in comment.text.lower():
		continue
		if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
		continue
		if re.search("CHAR[ _-]LIMIT=(\d+\|NONE\|none)", comment.text) is None:
		warn(child, "Missing CHAR LIMIT to aid translation",

		limit = re.search("CHAR[ _-]LIMIT=(\d+\|NONE\|none)", comment.text)
		if limit is None:
		info(child, "Missing CHAR LIMIT to aid translation",
		repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
		elif re.match("\d+", limit.group(1)):
		limit = int(limit.group(1))
		if len(sample) > limit:
		warn(child, "Expanded string length is larger than CHAR LIMIT",
		sample, None)

		# Look for common mistakes/substitutions
		text = "".join(child.itertext()).strip()
		if "'" in text:
		warn(child, "Turned quotation mark glyphs are more polished",
		error(child, "Turned quotation mark glyphs are more polished",
		text, "This doesn\u2019t need to \u2018happen\u2019 today")
		if '"' in text and not text.startswith('"') and text.endswith('"'):
		warn(child, "Turned quotation mark glyphs are more polished",
		error(child, "Turned quotation mark glyphs are more polished",
		text, "This needs to \u201chappen\u201d today")
		if "..." in text:
		warn(child, "Ellipsis glyph is more polished",
		error(child, "Ellipsis glyph is more polished",
		text, "Loading\u2026")
		if "wi-fi" in text.lower():
		warn(child, "Non-breaking glyph is more polished",
		error(child, "Non-breaking glyph is more polished",
		text, "Wi\u2011Fi")
		if "wifi" in text.lower():
		warn(child, "Using non-standard spelling",
		error(child, "Using non-standard spelling",
		text, "Wi\u2011Fi")
		if re.search("\d-\d", text):
		warn(child, "Ranges should use en dash glyph",
		@@ -119,11 +180,17 @@ def lint(path):
		if ". " in text:
		warn(child, "Only use single space between sentences",
		text, "First idea. Second idea.")
		if re.match(r"^[A-Z\s]{5,}$", text):
		warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
		text, "Refresh data")
		if " phone " in text and "product" not in child.attrib:
		warn(child, "Strings mentioning phones should have variants for tablets",
		text, None)

		# When more than one substitution, require indexes
		if len(re.findall("%[^%]", text)) > 1:
		if len(re.findall("%[^\d]", text)) > 0:
		warn(child, "Substitutions must be indexed",
		error(child, "Substitutions must be indexed",
		text, "Add %1$s to %2$s")

		# Require xliff substitutions
		@@ -132,15 +199,15 @@ def lint(path):
		if gc.tail and re.search("%[^%]", gc.tail): badsub = True
		if re.match("{.xliff.}g", gc.tag):
		if "id" not in gc.attrib:
		warn(child, "Substitutions must define id attribute",
		error(child, "Substitutions must define id attribute",
		None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
		if "example" not in gc.attrib:
		warn(child, "Substitutions must define example attribute",
		error(child, "Substitutions must define example attribute",
		None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
		else:
		if gc.text and re.search("%[^%]", gc.text): badsub = True
		if badsub:
		warn(child, "Substitutions must be inside xliff tags",
		error(child, "Substitutions must be inside xliff tags",
		text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")

		return warnings