Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit aa0255da authored by Adam Lesinski's avatar Adam Lesinski
Browse files

AAPT2: Add tool to fix positional argument issues

Change-Id: Ie8f733b70d561667cd25b38fb4c09d0837ef9b7f
parent 1ce859ee
Loading
Loading
Loading
Loading
+0 −0

Empty file added.

+132 −0
Original line number Diff line number Diff line
#!/usr/bin/env python

import os
import os.path
import sys
import tempfile
import xml.parsers.expat

"""
Scans each resource file in res/values/ looking for duplicates.
All but the last occurrence of resource definition are removed.
This creates no semantic changes, the resulting APK when built
should contain the same definition.
Looks for duplicate resource definitions and removes all but the last one.
"""

class Duplicate:
    """A small struct to maintain the positions of a Duplicate resource definition."""
    def __init__(self, name, product, depth, start, end):
        self.name = name
        self.product = product
        self.depth = depth
        self.start = start
        self.end = end

class ResourceDefinitionLocator:
    """Callback class for xml.parsers.expat which records resource definitions and their
    locations.
    """
    def __init__(self, parser):
        self.resource_definitions = {}
        self._parser = parser
        self._depth = 0
        self._current_resource = None

    def start_element(self, tag_name, attrs):
        self._depth += 1
        if self._depth == 2 and tag_name not in ["public", "java-symbol", "eat-comment", "skip"]:
            resource_name = None
            product = ""
            try:
                product = attrs["product"]
            except KeyError:
                pass

            if tag_name == "item":
                resource_name = "{0}/{1}".format(attrs["type"], attrs["name"])
            else:
                resource_name = "{0}/{1}".format(tag_name, attrs["name"])
            self._current_resource = Duplicate(
                    resource_name,
                    product,
                    self._depth,
                    (self._parser.CurrentLineNumber - 1, self._parser.CurrentColumnNumber),
                    None)

    def end_element(self, tag_name):
        if self._current_resource and self._depth == self._current_resource.depth:
            # Record the end position of the element, which is the length of the name
            # plus the </> symbols (len("</>") == 3).
            self._current_resource.end = (self._parser.CurrentLineNumber - 1,
                    self._parser.CurrentColumnNumber + 3 + len(tag_name))
            key_name = "{0}:{1}".format(self._current_resource.name,
                    self._current_resource.product)
            try:
                self.resource_definitions[key_name] += [self._current_resource]
            except KeyError:
                self.resource_definitions[key_name] = [self._current_resource]
            self._current_resource = None
        self._depth -= 1
import os.path
import xml.parsers.expat

def remove_duplicates(xml_path):
    """Reads the input file and generates an output file with any duplicate
    resources removed, keeping the last occurring definition and removing
    the others. The output is written to a temporary and then renamed
    to the original file name.
    """
    input = ""
    with open(xml_path) as fin:
        input = fin.read()
class DuplicateRemover:
    def matches(self, file_path):
        dirname, basename = os.path.split(file_path)
        dirname = os.path.split(dirname)[1]
        return dirname.startswith("values") and basename.endswith(".xml")

    def consume(self, xml_path, input):
        parser = xml.parsers.expat.ParserCreate("utf-8")
        parser.returns_unicode = True
        tracker = ResourceDefinitionLocator(parser)
@@ -103,8 +37,7 @@ def remove_duplicates(xml_path):
        output_lines = []
        current_line = ""
        for definition in duplicates:
        print "{0}:{1}:{2}: removing duplicate resource '{3}'".format(
                xml_path, definition.start[0] + 1, definition.start[1], definition.name)
            print "{0}: removing duplicate resource '{3}'".format( xml_path, definition.name)

            if last_line_no < definition.start[0]:
                # The next definition is on a new line, so write what we have
@@ -121,7 +54,8 @@ def remove_duplicates(xml_path):
                output_lines.append(input_lines[line_to_copy])

            # Add to the existing line we're building, by including the prefix of this line
        # and skipping the lines and characters until the end of this duplicate definition.
            # and skipping the lines and characters until the end of this duplicate
            # definition.
            last_line_no = definition.start[0]
            current_line += input_lines[last_line_no][last_col_no:definition.start[1]]
            last_line_no = definition.end[0]
@@ -138,44 +72,61 @@ def remove_duplicates(xml_path):
            output_lines.append(input_lines[line_to_copy])

        if len(duplicates) > 0:
        print "{0}: writing deduped copy...".format(xml_path)

        # Write the lines to a temporary file.
        dirname, basename = os.path.split(xml_path)
        temp_name = ""
        with tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False) as temp:
            temp_name = temp.name
            for line in output_lines:
                temp.write(line.encode('utf-8'))

        # Now rename that file to the original so we have an atomic write that is consistent.
        os.rename(temp.name, xml_path)

def enumerate_files(res_path):
    """Enumerates all files in the resource directory that are XML files and
       within a values-* subdirectory. These types of files end up compiled
       in the resources.arsc table of an APK.
            print "deduped {0}".format(xml_path)
            return "".join(output_lines).encode("utf-8")
        return input

class Duplicate:
    """A small struct to maintain the positions of a Duplicate resource definition."""
    def __init__(self, name, product, depth, start, end):
        self.name = name
        self.product = product
        self.depth = depth
        self.start = start
        self.end = end

class ResourceDefinitionLocator:
    """Callback class for xml.parsers.expat which records resource definitions and their
    locations.
    """
    values_directories = os.listdir(res_path)
    values_directories = filter(lambda f: f.startswith('values'), values_directories)
    values_directories = map(lambda f: os.path.join(res_path, f), values_directories)
    all_files = []
    for dir in values_directories:
        files = os.listdir(dir)
        files = filter(lambda f: f.endswith('.xml'), files)
        files = map(lambda f: os.path.join(dir, f), files)
        all_files += files
    return all_files

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print >> sys.stderr, "please specify a path to a resource directory"
        sys.exit(1)

    res_path = os.path.abspath(sys.argv[1])
    print "looking in {0} ...".format(res_path)

    for f in enumerate_files(res_path):
        print "checking {0} ...".format(f)
        remove_duplicates(f)
    def __init__(self, parser):
        self.resource_definitions = {}
        self._parser = parser
        self._depth = 0
        self._current_resource = None

    def start_element(self, tag_name, attrs):
        self._depth += 1
        if self._depth == 2 and tag_name not in ["public", "java-symbol", "eat-comment", "skip"]:
            resource_name = None
            product = ""
            try:
                product = attrs["product"]
            except KeyError:
                pass

            if tag_name == "item":
                resource_name = "{0}/{1}".format(attrs["type"], attrs["name"])
            else:
                resource_name = "{0}/{1}".format(tag_name, attrs["name"])
            self._current_resource = Duplicate(
                    resource_name,
                    product,
                    self._depth,
                    (self._parser.CurrentLineNumber - 1, self._parser.CurrentColumnNumber),
                    None)

    def end_element(self, tag_name):
        if self._current_resource and self._depth == self._current_resource.depth:
            # Record the end position of the element, which is the length of the name
            # plus the </> symbols (len("</>") == 3).
            self._current_resource.end = (self._parser.CurrentLineNumber - 1,
                    self._parser.CurrentColumnNumber + 3 + len(tag_name))
            key_name = "{0}:{1}".format(self._current_resource.name,
                    self._current_resource.product)
            try:
                self.resource_definitions[key_name] += [self._current_resource]
            except KeyError:
                self.resource_definitions[key_name] = [self._current_resource]
            self._current_resource = None
        self._depth -= 1
+77 −0
Original line number Diff line number Diff line
#!/usr/bin/env python

"""
Looks for strings with multiple substitution arguments (%d, &s, etc)
and replaces them with positional arguments (%1$d, %2$s).
"""

import os.path
import re
import xml.parsers.expat

class PositionalArgumentFixer:
    def matches(self, file_path):
        dirname, basename = os.path.split(file_path)
        dirname = os.path.split(dirname)[1]
        return dirname.startswith("values") and basename.endswith(".xml")

    def consume(self, xml_path, input):
        parser = xml.parsers.expat.ParserCreate("utf-8")
        locator = SubstitutionArgumentLocator(parser)
        parser.returns_unicode = True
        parser.StartElementHandler = locator.start_element
        parser.EndElementHandler = locator.end_element
        parser.CharacterDataHandler = locator.character_data
        parser.Parse(input)

        if len(locator.arguments) > 0:
            output = ""
            last_index = 0
            for arg in locator.arguments:
                output += input[last_index:arg.start]
                output += "%{0}$".format(arg.number)
                last_index = arg.start + 1
            output += input[last_index:]
            print "fixed {0}".format(xml_path)
            return output
        return input

class Argument:
    def __init__(self, start, number):
        self.start = start
        self.number = number

class SubstitutionArgumentLocator:
    """Callback class for xml.parsers.expat which records locations of
    substitution arguments in strings when there are more than 1 of
    them in a single <string> tag (and they are not positional).
    """
    def __init__(self, parser):
        self.arguments = []
        self._parser = parser
        self._depth = 0
        self._within_string = False
        self._current_arguments = []
        self._next_number = 1

    def start_element(self, tag_name, attrs):
        self._depth += 1
        if self._depth == 2 and tag_name == "string" and "translateable" not in attrs:
            self._within_string = True

    def character_data(self, data):
        if self._within_string:
            for m in re.finditer("%[-#+ 0,(]?\d*[bBhHsScCdoxXeEfgGaAtTn]", data):
                start, end = m.span()
                self._current_arguments.append(\
                        Argument(self._parser.CurrentByteIndex + start, self._next_number))
                self._next_number += 1

    def end_element(self, tag_name):
        if self._within_string and self._depth == 2:
            if len(self._current_arguments) > 1:
                self.arguments += self._current_arguments
            self._current_arguments = []
            self._within_string = False
            self._next_number = 1
        self._depth -= 1
+63 −0
Original line number Diff line number Diff line
#!/usr/bin/env python

"""
Scans each resource file in res/ applying various transformations
to fix invalid resource files.
"""

import os
import os.path
import sys
import tempfile

from consumers.duplicates import DuplicateRemover
from consumers.positional_arguments import PositionalArgumentFixer

def do_it(res_path, consumers):
    for file_path in enumerate_files(res_path):
        eligible_consumers = filter(lambda c: c.matches(file_path), consumers)
        if len(eligible_consumers) > 0:
            print "checking {0} ...".format(file_path)

            original_contents = read_contents(file_path)
            contents = original_contents
            for c in eligible_consumers:
                contents = c.consume(file_path, contents)
            if original_contents != contents:
                write_contents(file_path, contents)

def enumerate_files(res_path):
    """Enumerates all files in the resource directory."""
    values_directories = os.listdir(res_path)
    values_directories = map(lambda f: os.path.join(res_path, f), values_directories)
    all_files = []
    for dir in values_directories:
        files = os.listdir(dir)
        files = map(lambda f: os.path.join(dir, f), files)
        for f in files:
            yield f

def read_contents(file_path):
    """Reads the contents of file_path without decoding."""
    with open(file_path) as fin:
        return fin.read()

def write_contents(file_path, contents):
    """Writes the bytes in contents to file_path by first writing to a temporary, then
    renaming the temporary to file_path, ensuring a consistent write.
    """
    dirname, basename = os.path.split(file_path)
    temp_name = ""
    with tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False) as temp:
        temp_name = temp.name
        temp.write(contents)
    os.rename(temp.name, file_path)

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print >> sys.stderr, "please specify a path to a resource directory"
        sys.exit(1)

    res_path = os.path.abspath(sys.argv[1])
    print "looking in {0} ...".format(res_path)
    do_it(res_path, [DuplicateRemover(), PositionalArgumentFixer()])