Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit da2432bb authored by TreeHugger Robot's avatar TreeHugger Robot Committed by Android (Google) Code Review
Browse files

Merge "More string lint checks."

parents 7bb9fcbc 47c79243
Loading
Loading
Loading
Loading
+83 −16
Original line number Diff line number Diff line
@@ -20,11 +20,22 @@ a previous strings file, if provided.

Usage: stringslint.py strings.xml
Usage: stringslint.py strings.xml old_strings.xml

In general:
* Errors signal issues that must be fixed before submitting, and are only
  used when there are no false-positives.
* Warnings signal issues that might need to be fixed, but need manual
  inspection due to risk of false-positives.
* Info signal issues that should be fixed to match best-practices, such
  as providing comments to aid translation.
"""

import re, sys
import re, sys, codecs
import lxml.etree as ET

reload(sys)
sys.setdefaultencoding('utf8')

BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)

def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
@@ -43,10 +54,10 @@ def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):

warnings = None

def warn(tag, msg, actual, expected):
def warn(tag, msg, actual, expected, color=YELLOW):
    global warnings
    key = "%s:%d" % (tag.attrib["name"], hash(msg))
    value = "%sLine %d: '%s':%s %s" % (format(fg=YELLOW, bold=True),
    value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
                                       tag.sourceline,
                                       tag.attrib["name"],
                                       format(reset=True),
@@ -59,6 +70,46 @@ def warn(tag, msg, actual, expected):
                                                               format(reset=True))
    warnings[key] = value


def error(tag, msg, actual, expected):
    warn(tag, msg, actual, expected, RED)

def info(tag, msg, actual, expected):
    warn(tag, msg, actual, expected, CYAN)

# Escaping logic borrowed from https://stackoverflow.com/a/24519338
ESCAPE_SEQUENCE_RE = re.compile(r'''
    ( \\U........      # 8-digit hex escapes
    | \\u....          # 4-digit hex escapes
    | \\x..            # 2-digit hex escapes
    | \\[0-7]{1,3}     # Octal escapes
    | \\N\{[^}]+\}     # Unicode characters by name
    | \\[\\'"abfnrtv]  # Single-character escapes
    )''', re.UNICODE | re.VERBOSE)

def decode_escapes(s):
    def decode_match(match):
        return codecs.decode(match.group(0), 'unicode-escape')

    s = re.sub(r"\n\s*", " ", s)
    s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
    s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
    s = re.sub(r"\^\d+", "____", s)
    s = re.sub(r"<br/?>", "\n", s)
    s = re.sub(r"</?[a-z]+>", "", s)
    return s

def sample_iter(tag):
    if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
        yield tag.attrib["example"]
    elif tag.text:
        yield decode_escapes(tag.text)
    for e in tag:
        for v in sample_iter(e):
            yield v
        if e.tail:
            yield decode_escapes(e.tail)

def lint(path):
    global warnings
    warnings = {}
@@ -80,35 +131,45 @@ def lint(path):
            comment = last_comment
            last_comment = None

            # Prepare string for analysis
            text = "".join(child.itertext())
            sample = "".join(sample_iter(child)).strip().strip("'\"")

            # Validate comment
            if comment is None:
                warn(child, "Missing string comment to aid translation",
                info(child, "Missing string comment to aid translation",
                     None, None)
                continue
            if "do not translate" in comment.text.lower():
                continue
            if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
                continue
            if re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) is None:
                warn(child, "Missing CHAR LIMIT to aid translation",

            limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
            if limit is None:
                info(child, "Missing CHAR LIMIT to aid translation",
                     repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
            elif re.match("\d+", limit.group(1)):
                limit = int(limit.group(1))
                if len(sample) > limit:
                    warn(child, "Expanded string length is larger than CHAR LIMIT",
                        sample, None)

            # Look for common mistakes/substitutions
            text = "".join(child.itertext()).strip()
            if "'" in text:
                warn(child, "Turned quotation mark glyphs are more polished",
                error(child, "Turned quotation mark glyphs are more polished",
                     text, "This doesn\u2019t need to \u2018happen\u2019 today")
            if '"' in text and not text.startswith('"') and text.endswith('"'):
                warn(child, "Turned quotation mark glyphs are more polished",
                error(child, "Turned quotation mark glyphs are more polished",
                     text, "This needs to \u201chappen\u201d today")
            if "..." in text:
                warn(child, "Ellipsis glyph is more polished",
                error(child, "Ellipsis glyph is more polished",
                     text, "Loading\u2026")
            if "wi-fi" in text.lower():
                warn(child, "Non-breaking glyph is more polished",
                error(child, "Non-breaking glyph is more polished",
                     text, "Wi\u2011Fi")
            if "wifi" in text.lower():
                warn(child, "Using non-standard spelling",
                error(child, "Using non-standard spelling",
                     text, "Wi\u2011Fi")
            if re.search("\d-\d", text):
                warn(child, "Ranges should use en dash glyph",
@@ -119,11 +180,17 @@ def lint(path):
            if ".  " in text:
                warn(child, "Only use single space between sentences",
                     text, "First idea. Second idea.")
            if re.match(r"^[A-Z\s]{5,}$", text):
                warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
                     text, "Refresh data")
            if " phone " in text and "product" not in child.attrib:
                warn(child, "Strings mentioning phones should have variants for tablets",
                     text, None)

            # When more than one substitution, require indexes
            if len(re.findall("%[^%]", text)) > 1:
                if len(re.findall("%[^\d]", text)) > 0:
                    warn(child, "Substitutions must be indexed",
                    error(child, "Substitutions must be indexed",
                         text, "Add %1$s to %2$s")

            # Require xliff substitutions
@@ -132,15 +199,15 @@ def lint(path):
                if gc.tail and re.search("%[^%]", gc.tail): badsub = True
                if re.match("{.*xliff.*}g", gc.tag):
                    if "id" not in gc.attrib:
                        warn(child, "Substitutions must define id attribute",
                        error(child, "Substitutions must define id attribute",
                             None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
                    if "example" not in gc.attrib:
                        warn(child, "Substitutions must define example attribute",
                        error(child, "Substitutions must define example attribute",
                             None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
                else:
                    if gc.text and re.search("%[^%]", gc.text): badsub = True
                if badsub:
                    warn(child, "Substitutions must be inside xliff tags",
                    error(child, "Substitutions must be inside xliff tags",
                         text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")

    return warnings