Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ba541c7e authored by Treehugger Robot's avatar Treehugger Robot Committed by Gerrit Code Review
Browse files

Merge "Rewrite the build system benchmarks to be much simpler and not require bazel." into main

parents 2538c865 88ede358
Loading
Loading
Loading
Loading

tools/perf/benchmarks

0 → 100755
+550 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# Copyright (C) 2023 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
if __name__ == "__main__":
    sys.dont_write_bytecode = True

import argparse
import dataclasses
import datetime
import json
import os
import pathlib
import shutil
import subprocess
import time

import pretty
import utils


class FatalError(Exception):
    def __init__(self):
        pass


class OptionsError(Exception):
    def __init__(self, message):
        self.message = message


@dataclasses.dataclass(frozen=True)
class Lunch:
    "Lunch combination"

    target_product: str
    "TARGET_PRODUCT"

    target_release: str
    "TARGET_RELEASE"

    target_build_variant: str
    "TARGET_BUILD_VARIANT"

    def ToDict(self):
        return {
            "TARGET_PRODUCT": self.target_product,
            "TARGET_RELEASE": self.target_release,
            "TARGET_BUILD_VARIANT": self.target_build_variant,
        }

    def Combine(self):
        return f"{self.target_product}-{self.target_release}-{self.target_build_variant}"


@dataclasses.dataclass(frozen=True)
class Change:
    "A change that we make to the tree, and how to undo it"
    label: str
    "String to print in the log when the change is made"

    change: callable
    "Function to change the source tree"

    undo: callable
    "Function to revert the source tree to its previous condition in the most minimal way possible."


@dataclasses.dataclass(frozen=True)
class Benchmark:
    "Something we measure"

    id: str
    "Short ID for the benchmark, for the command line"

    title: str
    "Title for reports"

    change: Change
    "Source tree modification for the benchmark that will be measured"

    modules: list[str]
    "Build modules to build on soong command line"

    preroll: int
    "Number of times to run the build command to stabilize"

    postroll: int
    "Number of times to run the build command after reverting the action to stabilize"


@dataclasses.dataclass(frozen=True)
class FileSnapshot:
    "Snapshot of a file's contents."

    filename: str
    "The file that was snapshottened"

    contents: str
    "The contents of the file"

    def write(self):
        "Write the contents back to the file"
        with open(self.filename, "w") as f:
            f.write(self.contents)


def Snapshot(filename):
    """Return a FileSnapshot with the file's current contents."""
    with open(filename) as f:
        contents = f.read()
    return FileSnapshot(filename, contents)


def Clean():
    """Remove the out directory."""
    def remove_out():
        if os.path.exists("out"):
            shutil.rmtree("out")
    return Change(label="Remove out", change=remove_out, undo=lambda: None)


def NoChange():
    """No change to the source tree."""
    return Change(label="No change", change=lambda: None, undo=lambda: None)


def Modify(filename, contents, before=None):
    """Create an action to modify `filename` by appending `contents` before the last instances
    of `before` in the file.

    Raises an error if `before` doesn't appear in the file.
    """
    orig = Snapshot(filename)
    if before:
        index = orig.contents.rfind(before)
        if index < 0:
            report_error(f"{filename}: Unable to find string '{before}' for modify operation.")
            raise FatalError()
    else:
        index = len(orig.contents)
    modified = FileSnapshot(filename, orig.contents[:index] + contents + orig.contents[index:])
    return Change(
            label="Modify " + filename,
            change=lambda: modified.write(),
            undo=lambda: orig.write()
        )


class BenchmarkReport():
    "Information about a run of the benchmark"

    lunch: Lunch
    "lunch combo"

    benchmark: Benchmark
    "The benchmark object."

    iteration: int
    "Which iteration of the benchmark"

    log_dir: str
    "Path the the log directory, relative to the root of the reports directory"

    preroll_duration_ns: [int]
    "Durations of the in nanoseconds."

    duration_ns: int
    "Duration of the measured portion of the benchmark in nanoseconds."

    postroll_duration_ns: [int]
    "Durations of the postrolls in nanoseconds."

    complete: bool
    "Whether the benchmark made it all the way through the postrolls."

    def __init__(self, lunch, benchmark, iteration, log_dir):
        self.lunch = lunch
        self.benchmark = benchmark
        self.iteration = iteration
        self.log_dir = log_dir
        self.preroll_duration_ns = []
        self.duration_ns = -1
        self.postroll_duration_ns = []
        self.complete = False

    def ToDict(self):
        return {
            "lunch": self.lunch.ToDict(),
            "id": self.benchmark.id,
            "title": self.benchmark.title,
            "modules": self.benchmark.modules,
            "change": self.benchmark.change.label,
            "iteration": self.iteration,
            "log_dir": self.log_dir,
            "preroll_duration_ns": self.preroll_duration_ns,
            "duration_ns": self.duration_ns,
            "postroll_duration_ns": self.postroll_duration_ns,
            "complete": self.complete,
        }

class Runner():
    """Runs the benchmarks."""

    def __init__(self, options):
        self._options = options
        self._reports = []
        self._complete = False

    def Run(self):
        """Run all of the user-selected benchmarks."""
        # Clean out the log dir or create it if necessary
        prepare_log_dir(self._options.LogDir())

        try:
            for lunch in self._options.Lunches():
                print(lunch)
                for benchmark in self._options.Benchmarks():
                    for iteration in range(self._options.Iterations()):
                        self._run_benchmark(lunch, benchmark, iteration)
            self._complete = True
        finally:
            self._write_summary()


    def _run_benchmark(self, lunch, benchmark, iteration):
        """Run a single benchmark."""
        benchmark_log_subdir = self._log_dir(lunch, benchmark, iteration)
        benchmark_log_dir = self._options.LogDir().joinpath(benchmark_log_subdir)

        sys.stderr.write(f"STARTING BENCHMARK: {benchmark.id}\n")
        sys.stderr.write(f"             lunch: {lunch.Combine()}\n")
        sys.stderr.write(f"         iteration: {iteration}\n")
        sys.stderr.write(f" benchmark_log_dir: {benchmark_log_dir}\n")

        report = BenchmarkReport(lunch, benchmark, iteration, benchmark_log_subdir)
        self._reports.append(report)

        # Preroll builds
        for i in range(benchmark.preroll):
            ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"pre_{i}"), benchmark.modules)
            report.preroll_duration_ns.append(ns)

        sys.stderr.write(f"PERFORMING CHANGE: {benchmark.change.label}\n")
        if not self._options.DryRun():
            benchmark.change.change()
        try:

            # Measured build
            ns = self._run_build(lunch, benchmark_log_dir.joinpath("measured"), benchmark.modules)
            report.duration_ns = ns

            # Postroll builds
            for i in range(benchmark.preroll):
                ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"post_{i}"),
                                     benchmark.modules)
                report.postroll_duration_ns.append(ns)

        finally:
            # Always undo, even if we crashed or the build failed and we stopped.
            sys.stderr.write(f"UNDOING CHANGE: {benchmark.change.label}\n")
            if not self._options.DryRun():
                benchmark.change.undo()

        self._write_summary()
        sys.stderr.write(f"FINISHED BENCHMARK: {benchmark.id}\n")

    def _log_dir(self, lunch, benchmark, iteration):
        """Construct the log directory fir a benchmark run."""
        path = f"{lunch.Combine()}/{benchmark.id}"
        # Zero pad to the correct length for correct alpha sorting
        path += ("/%0" + str(len(str(self._options.Iterations()))) + "d") % iteration
        return path

    def _run_build(self, lunch, build_log_dir, modules):
        """Builds the modules.  Saves interesting log files to log_dir.  Raises FatalError
        if the build fails.
        """
        sys.stderr.write(f"STARTING BUILD {modules}\n")

        before_ns = time.perf_counter_ns()
        if not self._options.DryRun():
            cmd = [
                "build/soong/soong_ui.bash",
                "--build-mode",
                "--all-modules",
                f"--dir={self._options.root}",
            ] + modules
            env = dict(os.environ)
            env["TARGET_PRODUCT"] = lunch.target_product
            env["TARGET_RELEASE"] = lunch.target_release
            env["TARGET_BUILD_VARIANT"] = lunch.target_build_variant
            returncode = subprocess.call(cmd, env=env)
            if returncode != 0:
                report_error(f"Build failed: {' '.join(cmd)}")
                raise FatalError()

        after_ns = time.perf_counter_ns()

        # TODO: Copy some log files.

        sys.stderr.write(f"FINISHED BUILD {modules}\n")

        return after_ns - before_ns

    def _write_summary(self):
        # Write the results, even if the build failed or we crashed, including
        # whether we finished all of the benchmarks.
        data = {
            "start_time": self._options.Timestamp().isoformat(),
            "branch": self._options.Branch(),
            "tag": self._options.Tag(),
            "benchmarks": [report.ToDict() for report in self._reports],
            "complete": self._complete,
        }
        with open(self._options.LogDir().joinpath("summary.json"), "w", encoding="utf-8") as f:
            json.dump(data, f, indent=2, sort_keys=True)


def benchmark_table(benchmarks):
    rows = [("ID", "DESCRIPTION", "REBUILD"),]
    rows += [(benchmark.id, benchmark.title, " ".join(benchmark.modules)) for benchmark in
             benchmarks]
    return rows


def prepare_log_dir(directory):
    if os.path.exists(directory):
        # If it exists and isn't a directory, fail.
        if not os.path.isdir(directory):
            report_error(f"Log directory already exists but isn't a directory: {directory}")
            raise FatalError()
        # Make sure the directory is empty. Do this rather than deleting it to handle
        # symlinks cleanly.
        for filename in os.listdir(directory):
            entry = os.path.join(directory, filename)
            if os.path.isdir(entry):
                shutil.rmtree(entry)
            else:
                os.unlink(entry)
    else:
        # Create it
        os.makedirs(directory)


class Options():
    def __init__(self):
        self._had_error = False

        # Wall time clock when we started
        self._timestamp = datetime.datetime.now(datetime.timezone.utc)

        # Move to the root of the tree right away. Everything must happen from there.
        self.root = utils.get_root()
        if not self.root:
            report_error("Unable to find root of tree from cwd.")
            raise FatalError()
        os.chdir(self.root)

        # Initialize the Benchmarks. Note that this pre-loads all of the files, etc.
        # Doing all that here forces us to fail fast if one of them can't load a required
        # file, at the cost of a small startup speed. Don't make this do something slow
        # like scan the whole tree.
        self._init_benchmarks()

        # Argument parsing
        epilog = f"""
benchmarks:
{pretty.FormatTable(benchmark_table(self._benchmarks), prefix="  ")}
"""

        parser = argparse.ArgumentParser(
                prog="benchmarks",
                allow_abbrev=False, # Don't let people write unsupportable scripts.
                formatter_class=argparse.RawDescriptionHelpFormatter,
                epilog=epilog,
                description="Run build system performance benchmarks.")
        self.parser = parser

        parser.add_argument("--log-dir",
                            help="Directory for logs. Default is $TOP/../benchmarks/.")
        parser.add_argument("--dated-logs", action="store_true",
                            help="Append timestamp to log dir.")
        parser.add_argument("-n", action="store_true", dest="dry_run",
                            help="Dry run. Don't run the build commands but do everything else.")
        parser.add_argument("--tag",
                            help="Variant of the run, for when there are multiple perf runs.")
        parser.add_argument("--lunch", nargs="*",
                            help="Lunch combos to test")
        parser.add_argument("--iterations", type=int, default=1,
                            help="Number of iterations of each test to run.")
        parser.add_argument("--branch", type=str,
                            help="Specify branch. Otherwise a guess will be made based on repo.")
        parser.add_argument("--benchmark", nargs="*", default=[b.id for b in self._benchmarks],
                            metavar="BENCHMARKS",
                            help="Benchmarks to run.  Default suite will be run if omitted.")

        self._args = parser.parse_args()

        self._branch = self._branch()
        self._log_dir = self._log_dir()
        self._lunches = self._lunches()

        # Validate the benchmark ids
        all_ids = [benchmark.id for benchmark in self._benchmarks]
        bad_ids = [id for id in self._args.benchmark if id not in all_ids]
        if bad_ids:
            for id in bad_ids:
                self._error(f"Invalid benchmark: {id}")

        if self._had_error:
            raise FatalError()

    def Timestamp(self):
        return self._timestamp

    def _branch(self):
        """Return the branch, either from the command line or by guessing from repo."""
        if self._args.branch:
            return self._args.branch
        try:
            branch = subprocess.check_output(f"cd {self.root}/.repo/manifests"
                        + " && git rev-parse --abbrev-ref --symbolic-full-name @{u}",
                    shell=True, encoding="utf-8")
            return branch.strip().split("/")[-1]
        except subprocess.CalledProcessError as ex:
            report_error("Can't get branch from .repo dir. Specify --branch argument")
            report_error(str(ex))
            raise FatalError()

    def Branch(self):
        return self._branch

    def _log_dir(self):
        "The log directory to use, based on the current options"
        if self._args.log_dir:
            d = pathlib.Path(self._args.log_dir).resolve().absolute()
        else:
            d = self.root.joinpath("..", utils.DEFAULT_REPORT_DIR)
        if self._args.dated_logs:
            d = d.joinpath(self._timestamp.strftime('%Y-%m-%d'))
        d = d.joinpath(self._branch)
        if self._args.tag:
            d = d.joinpath(self._args.tag)
        return d.resolve().absolute()

    def LogDir(self):
        return self._log_dir

    def Benchmarks(self):
        return [b for b in self._benchmarks if b.id in self._args.benchmark]

    def Tag(self):
        return self._args.tag

    def DryRun(self):
        return self._args.dry_run

    def _lunches(self):
        def parse_lunch(lunch):
            parts = lunch.split("-")
            if len(parts) != 3:
                raise OptionsError(f"Invalid lunch combo: {lunch}")
            return Lunch(parts[0], parts[1], parts[2])
        # If they gave lunch targets on the command line use that
        if self._args.lunch:
            result = []
            # Split into Lunch objects
            for lunch in self._args.lunch:
                try:
                    result.append(parse_lunch(lunch))
                except OptionsError as ex:
                    self._error(ex.message)
            return result
        # Use whats in the environment
        product = os.getenv("TARGET_PRODUCT")
        release = os.getenv("TARGET_RELEASE")
        variant = os.getenv("TARGET_BUILD_VARIANT")
        if (not product) or (not release) or (not variant):
            # If they didn't give us anything, fail rather than guessing. There's no good
            # default for AOSP.
            self._error("No lunch combo specified. Either pass --lunch argument or run lunch.")
            return []
        return [Lunch(product, release, variant),]

    def Lunches(self):
        return self._lunches

    def Iterations(self):
        return self._args.iterations

    def _init_benchmarks(self):
        """Initialize the list of benchmarks."""
        # Assumes that we've already chdired to the root of the tree.
        self._benchmarks = [
            Benchmark(id="full",
                title="Full build",
                change=Clean(),
                modules=["droid"],
                preroll=0,
                postroll=3
            ),
            Benchmark(id="nochange",
                title="No change",
                change=NoChange(),
                modules=["droid"],
                preroll=2,
                postroll=3
            ),
            Benchmark(id="modify_bp",
                title="Modify Android.bp",
                change=Modify("bionic/libc/Android.bp", "// Comment"),
                modules=["droid"],
                preroll=1,
                postroll=3
            ),
        ]

    def _error(self, message):
        report_error(message)
        self._had_error = True


def report_error(message):
    sys.stderr.write(f"error: {message}\n")


def main(argv):
    try:
        options = Options()
        runner = Runner(options)
        runner.Run()
    except FatalError:
        sys.stderr.write(f"FAILED\n")


if __name__ == "__main__":
    main(sys.argv)
+185 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# Copyright (C) 2023 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
if __name__ == "__main__":
    sys.dont_write_bytecode = True

import argparse
import dataclasses
import datetime
import json
import os
import pathlib
import statistics
import zoneinfo

import pretty
import utils

# TODO:
# - Flag if the last postroll build was more than 15 seconds or something. That's
#   an indicator that something is amiss.
# - Add a mode to print all of the values for multi-iteration runs
# - Add a flag to reorder the tags
# - Add a flag to reorder the headers in order to show grouping more clearly.


def FindSummaries(args):
    def find_summaries(directory):
        return [str(p.resolve()) for p in pathlib.Path(directory).glob("**/summary.json")]
    if not args:
        # If they didn't give an argument, use the default dir
        root = utils.get_root()
        if not root:
            return []
        return find_summaries(root.joinpath("..", utils.DEFAULT_REPORT_DIR))
    results = list()
    for arg in args:
        if os.path.isfile(arg):
            # If it's a file add that
            results.append(arg)
        elif os.path.isdir(arg):
            # If it's a directory, find all of the files there
            results += find_summaries(arg)
        else:
            sys.stderr.write(f"Invalid summary argument: {arg}\n")
            sys.exit(1)
    return sorted(list(results))


def LoadSummary(filename):
    with open(filename) as f:
        return json.load(f)

# Columns:
#   Date
#   Branch
#   Tag
#   --
#   Lunch
# Rows:
#   Benchmark

@dataclasses.dataclass(frozen=True)
class Key():
    pass

class Column():
    def __init__(self):
        pass

def lunch_str(d):
    "Convert a lunch dict to a string"
    return f"{d['TARGET_PRODUCT']}-{d['TARGET_RELEASE']}-{d['TARGET_BUILD_VARIANT']}"

def group_by(l, key):
    "Return a list of tuples, grouped by key, sorted by key"
    result = {}
    for item in l:
        result.setdefault(key(item), []).append(item)
    return [(k, v) for k, v in result.items()]


class Table:
    def __init__(self):
        self._data = {}
        self._rows = []
        self._cols = []

    def Set(self, column_key, row_key, data):
        self._data[(column_key, row_key)] = data
        if not column_key in self._cols:
            self._cols.append(column_key)
        if not row_key in self._rows:
            self._rows.append(row_key)

    def Write(self, out):
        table = []
        # Expand the column items
        for row in zip(*self._cols):
            if row.count(row[0]) == len(row):
                continue
            table.append([""] + [col for col in row])
        if table:
            table.append(pretty.SEPARATOR)
        # Populate the data
        for row in self._rows:
            table.append([str(row)] + [str(self._data.get((col, row), "")) for col in self._cols])
        out.write(pretty.FormatTable(table))


def format_duration_sec(ns):
    "Format a duration in ns to second precision"
    sec = round(ns / 1000000000)
    h, sec = divmod(sec, 60*60)
    m, sec = divmod(sec, 60)
    result = ""
    if h > 0:
        result += f"{h:2d}h "
    if h > 0 or m > 0:
        result += f"{m:2d}m "
    return result + f"{sec:2d}s"

def main(argv):
    parser = argparse.ArgumentParser(
            prog="format_benchmarks",
            allow_abbrev=False, # Don't let people write unsupportable scripts.
            description="Print analysis tables for benchmarks")

    parser.add_argument("summaries", nargs="*",
                        help="A summary.json file or a directory in which to look for summaries.")

    args = parser.parse_args()

    # Load the summaries
    summaries = [(s, LoadSummary(s)) for s in FindSummaries(args.summaries)]

    # Convert to MTV time
    for filename, s in summaries:
        dt = datetime.datetime.fromisoformat(s["start_time"])
        dt = dt.astimezone(zoneinfo.ZoneInfo("America/Los_Angeles"))
        s["datetime"] = dt
        s["date"] = datetime.date(dt.year, dt.month, dt.day)

    # Sort the summaries
    summaries.sort(key=lambda s: (s[1]["date"], s[1]["branch"], s[1]["tag"]))

    # group the benchmarks by column and iteration
    def bm_key(b):
        return (
            lunch_str(b["lunch"]),
        )
    for filename, summary in summaries:
        summary["columns"] = [(key, group_by(bms, lambda b: b["id"])) for key, bms
                              in group_by(summary["benchmarks"], bm_key)]

    # Build the table
    table = Table()
    for filename, summary in summaries:
        for key, column in summary["columns"]:
            for id, cell in column:
                duration_ns = statistics.median([b["duration_ns"] for b in cell])
                table.Set(tuple([summary["date"].strftime("YYYY-MM-DD"),
                                 summary["branch"],
                                 summary["tag"]]
                                + list(key)),
                          cell[0]["title"], format_duration_sec(duration_ns))

    table.Write(sys.stdout)

if __name__ == "__main__":
    main(sys.argv)

tools/perf/pretty.py

0 → 100644
+52 −0

File added.

Preview size limit exceeded, changes collapsed.

tools/perf/utils.py

0 → 100644
+30 −0

File added.

Preview size limit exceeded, changes collapsed.