Rewrite the build system benchmarks to be much simpler and not require bazel. (88ede358) · Commits · e / os / android_build

tools/perf/benchmarks

0 → 100755

+550 −0

Original line number	Original line	Diff line number	Diff line
			#!/usr/bin/env python3
			# Copyright (C) 2023 The Android Open Source Project
			#
			# Licensed under the Apache License, Version 2.0 (the "License");
			# you may not use this file except in compliance with the License.
			# You may obtain a copy of the License at
			#
			# http://www.apache.org/licenses/LICENSE-2.0
			#
			# Unless required by applicable law or agreed to in writing, software
			# distributed under the License is distributed on an "AS IS" BASIS,
			# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			# See the License for the specific language governing permissions and
			# limitations under the License.

			import sys
			if __name__ == "__main__":
			sys.dont_write_bytecode = True

			import argparse
			import dataclasses
			import datetime
			import json
			import os
			import pathlib
			import shutil
			import subprocess
			import time

			import pretty
			import utils


			class FatalError(Exception):
			def __init__(self):
			pass


			class OptionsError(Exception):
			def __init__(self, message):
			self.message = message


			@dataclasses.dataclass(frozen=True)
			class Lunch:
			"Lunch combination"

			target_product: str
			"TARGET_PRODUCT"

			target_release: str
			"TARGET_RELEASE"

			target_build_variant: str
			"TARGET_BUILD_VARIANT"

			def ToDict(self):
			return {
			"TARGET_PRODUCT": self.target_product,
			"TARGET_RELEASE": self.target_release,
			"TARGET_BUILD_VARIANT": self.target_build_variant,
			}

			def Combine(self):
			return f"{self.target_product}-{self.target_release}-{self.target_build_variant}"


			@dataclasses.dataclass(frozen=True)
			class Change:
			"A change that we make to the tree, and how to undo it"
			label: str
			"String to print in the log when the change is made"

			change: callable
			"Function to change the source tree"

			undo: callable
			"Function to revert the source tree to its previous condition in the most minimal way possible."


			@dataclasses.dataclass(frozen=True)
			class Benchmark:
			"Something we measure"

			id: str
			"Short ID for the benchmark, for the command line"

			title: str
			"Title for reports"

			change: Change
			"Source tree modification for the benchmark that will be measured"

			modules: list[str]
			"Build modules to build on soong command line"

			preroll: int
			"Number of times to run the build command to stabilize"

			postroll: int
			"Number of times to run the build command after reverting the action to stabilize"


			@dataclasses.dataclass(frozen=True)
			class FileSnapshot:
			"Snapshot of a file's contents."

			filename: str
			"The file that was snapshottened"

			contents: str
			"The contents of the file"

			def write(self):
			"Write the contents back to the file"
			with open(self.filename, "w") as f:
			f.write(self.contents)


			def Snapshot(filename):
			"""Return a FileSnapshot with the file's current contents."""
			with open(filename) as f:
			contents = f.read()
			return FileSnapshot(filename, contents)


			def Clean():
			"""Remove the out directory."""
			def remove_out():
			if os.path.exists("out"):
			shutil.rmtree("out")
			return Change(label="Remove out", change=remove_out, undo=lambda: None)


			def NoChange():
			"""No change to the source tree."""
			return Change(label="No change", change=lambda: None, undo=lambda: None)


			def Modify(filename, contents, before=None):
			"""Create an action to modify `filename` by appending `contents` before the last instances
			of `before` in the file.

			Raises an error if `before` doesn't appear in the file.
			"""
			orig = Snapshot(filename)
			if before:
			index = orig.contents.rfind(before)
			if index < 0:
			report_error(f"{filename}: Unable to find string '{before}' for modify operation.")
			raise FatalError()
			else:
			index = len(orig.contents)
			modified = FileSnapshot(filename, orig.contents[:index] + contents + orig.contents[index:])
			return Change(
			label="Modify " + filename,
			change=lambda: modified.write(),
			undo=lambda: orig.write()
			)


			class BenchmarkReport():
			"Information about a run of the benchmark"

			lunch: Lunch
			"lunch combo"

			benchmark: Benchmark
			"The benchmark object."

			iteration: int
			"Which iteration of the benchmark"

			log_dir: str
			"Path the the log directory, relative to the root of the reports directory"

			preroll_duration_ns: [int]
			"Durations of the in nanoseconds."

			duration_ns: int
			"Duration of the measured portion of the benchmark in nanoseconds."

			postroll_duration_ns: [int]
			"Durations of the postrolls in nanoseconds."

			complete: bool
			"Whether the benchmark made it all the way through the postrolls."

			def __init__(self, lunch, benchmark, iteration, log_dir):
			self.lunch = lunch
			self.benchmark = benchmark
			self.iteration = iteration
			self.log_dir = log_dir
			self.preroll_duration_ns = []
			self.duration_ns = -1
			self.postroll_duration_ns = []
			self.complete = False

			def ToDict(self):
			return {
			"lunch": self.lunch.ToDict(),
			"id": self.benchmark.id,
			"title": self.benchmark.title,
			"modules": self.benchmark.modules,
			"change": self.benchmark.change.label,
			"iteration": self.iteration,
			"log_dir": self.log_dir,
			"preroll_duration_ns": self.preroll_duration_ns,
			"duration_ns": self.duration_ns,
			"postroll_duration_ns": self.postroll_duration_ns,
			"complete": self.complete,
			}

			class Runner():
			"""Runs the benchmarks."""

			def __init__(self, options):
			self._options = options
			self._reports = []
			self._complete = False

			def Run(self):
			"""Run all of the user-selected benchmarks."""
			# Clean out the log dir or create it if necessary
			prepare_log_dir(self._options.LogDir())

			try:
			for lunch in self._options.Lunches():
			print(lunch)
			for benchmark in self._options.Benchmarks():
			for iteration in range(self._options.Iterations()):
			self._run_benchmark(lunch, benchmark, iteration)
			self._complete = True
			finally:
			self._write_summary()


			def _run_benchmark(self, lunch, benchmark, iteration):
			"""Run a single benchmark."""
			benchmark_log_subdir = self._log_dir(lunch, benchmark, iteration)
			benchmark_log_dir = self._options.LogDir().joinpath(benchmark_log_subdir)

			sys.stderr.write(f"STARTING BENCHMARK: {benchmark.id}\n")
			sys.stderr.write(f" lunch: {lunch.Combine()}\n")
			sys.stderr.write(f" iteration: {iteration}\n")
			sys.stderr.write(f" benchmark_log_dir: {benchmark_log_dir}\n")

			report = BenchmarkReport(lunch, benchmark, iteration, benchmark_log_subdir)
			self._reports.append(report)

			# Preroll builds
			for i in range(benchmark.preroll):
			ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"pre_{i}"), benchmark.modules)
			report.preroll_duration_ns.append(ns)

			sys.stderr.write(f"PERFORMING CHANGE: {benchmark.change.label}\n")
			if not self._options.DryRun():
			benchmark.change.change()
			try:

			# Measured build
			ns = self._run_build(lunch, benchmark_log_dir.joinpath("measured"), benchmark.modules)
			report.duration_ns = ns

			# Postroll builds
			for i in range(benchmark.preroll):
			ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"post_{i}"),
			benchmark.modules)
			report.postroll_duration_ns.append(ns)

			finally:
			# Always undo, even if we crashed or the build failed and we stopped.
			sys.stderr.write(f"UNDOING CHANGE: {benchmark.change.label}\n")
			if not self._options.DryRun():
			benchmark.change.undo()

			self._write_summary()
			sys.stderr.write(f"FINISHED BENCHMARK: {benchmark.id}\n")

			def _log_dir(self, lunch, benchmark, iteration):
			"""Construct the log directory fir a benchmark run."""
			path = f"{lunch.Combine()}/{benchmark.id}"
			# Zero pad to the correct length for correct alpha sorting
			path += ("/%0" + str(len(str(self._options.Iterations()))) + "d") % iteration
			return path

			def _run_build(self, lunch, build_log_dir, modules):
			"""Builds the modules. Saves interesting log files to log_dir. Raises FatalError
			if the build fails.
			"""
			sys.stderr.write(f"STARTING BUILD {modules}\n")

			before_ns = time.perf_counter_ns()
			if not self._options.DryRun():
			cmd = [
			"build/soong/soong_ui.bash",
			"--build-mode",
			"--all-modules",
			f"--dir={self._options.root}",
			] + modules
			env = dict(os.environ)
			env["TARGET_PRODUCT"] = lunch.target_product
			env["TARGET_RELEASE"] = lunch.target_release
			env["TARGET_BUILD_VARIANT"] = lunch.target_build_variant
			returncode = subprocess.call(cmd, env=env)
			if returncode != 0:
			report_error(f"Build failed: {' '.join(cmd)}")
			raise FatalError()

			after_ns = time.perf_counter_ns()

			# TODO: Copy some log files.

			sys.stderr.write(f"FINISHED BUILD {modules}\n")

			return after_ns - before_ns

			def _write_summary(self):
			# Write the results, even if the build failed or we crashed, including
			# whether we finished all of the benchmarks.
			data = {
			"start_time": self._options.Timestamp().isoformat(),
			"branch": self._options.Branch(),
			"tag": self._options.Tag(),
			"benchmarks": [report.ToDict() for report in self._reports],
			"complete": self._complete,
			}
			with open(self._options.LogDir().joinpath("summary.json"), "w", encoding="utf-8") as f:
			json.dump(data, f, indent=2, sort_keys=True)


			def benchmark_table(benchmarks):
			rows = [("ID", "DESCRIPTION", "REBUILD"),]
			rows += [(benchmark.id, benchmark.title, " ".join(benchmark.modules)) for benchmark in
			benchmarks]
			return rows


			def prepare_log_dir(directory):
			if os.path.exists(directory):
			# If it exists and isn't a directory, fail.
			if not os.path.isdir(directory):
			report_error(f"Log directory already exists but isn't a directory: {directory}")
			raise FatalError()
			# Make sure the directory is empty. Do this rather than deleting it to handle
			# symlinks cleanly.
			for filename in os.listdir(directory):
			entry = os.path.join(directory, filename)
			if os.path.isdir(entry):
			shutil.rmtree(entry)
			else:
			os.unlink(entry)
			else:
			# Create it
			os.makedirs(directory)


			class Options():
			def __init__(self):
			self._had_error = False

			# Wall time clock when we started
			self._timestamp = datetime.datetime.now(datetime.timezone.utc)

			# Move to the root of the tree right away. Everything must happen from there.
			self.root = utils.get_root()
			if not self.root:
			report_error("Unable to find root of tree from cwd.")
			raise FatalError()
			os.chdir(self.root)

			# Initialize the Benchmarks. Note that this pre-loads all of the files, etc.
			# Doing all that here forces us to fail fast if one of them can't load a required
			# file, at the cost of a small startup speed. Don't make this do something slow
			# like scan the whole tree.
			self._init_benchmarks()

			# Argument parsing
			epilog = f"""
			benchmarks:
			{pretty.FormatTable(benchmark_table(self._benchmarks), prefix=" ")}
			"""

			parser = argparse.ArgumentParser(
			prog="benchmarks",
			allow_abbrev=False, # Don't let people write unsupportable scripts.
			formatter_class=argparse.RawDescriptionHelpFormatter,
			epilog=epilog,
			description="Run build system performance benchmarks.")
			self.parser = parser

			parser.add_argument("--log-dir",
			help="Directory for logs. Default is $TOP/../benchmarks/.")
			parser.add_argument("--dated-logs", action="store_true",
			help="Append timestamp to log dir.")
			parser.add_argument("-n", action="store_true", dest="dry_run",
			help="Dry run. Don't run the build commands but do everything else.")
			parser.add_argument("--tag",
			help="Variant of the run, for when there are multiple perf runs.")
			parser.add_argument("--lunch", nargs="*",
			help="Lunch combos to test")
			parser.add_argument("--iterations", type=int, default=1,
			help="Number of iterations of each test to run.")
			parser.add_argument("--branch", type=str,
			help="Specify branch. Otherwise a guess will be made based on repo.")
			parser.add_argument("--benchmark", nargs="*", default=[b.id for b in self._benchmarks],
			metavar="BENCHMARKS",
			help="Benchmarks to run. Default suite will be run if omitted.")

			self._args = parser.parse_args()

			self._branch = self._branch()
			self._log_dir = self._log_dir()
			self._lunches = self._lunches()

			# Validate the benchmark ids
			all_ids = [benchmark.id for benchmark in self._benchmarks]
			bad_ids = [id for id in self._args.benchmark if id not in all_ids]
			if bad_ids:
			for id in bad_ids:
			self._error(f"Invalid benchmark: {id}")

			if self._had_error:
			raise FatalError()

			def Timestamp(self):
			return self._timestamp

			def _branch(self):
			"""Return the branch, either from the command line or by guessing from repo."""
			if self._args.branch:
			return self._args.branch
			try:
			branch = subprocess.check_output(f"cd {self.root}/.repo/manifests"
			+ " && git rev-parse --abbrev-ref --symbolic-full-name @{u}",
			shell=True, encoding="utf-8")
			return branch.strip().split("/")[-1]
			except subprocess.CalledProcessError as ex:
			report_error("Can't get branch from .repo dir. Specify --branch argument")
			report_error(str(ex))
			raise FatalError()

			def Branch(self):
			return self._branch

			def _log_dir(self):
			"The log directory to use, based on the current options"
			if self._args.log_dir:
			d = pathlib.Path(self._args.log_dir).resolve().absolute()
			else:
			d = self.root.joinpath("..", utils.DEFAULT_REPORT_DIR)
			if self._args.dated_logs:
			d = d.joinpath(self._timestamp.strftime('%Y-%m-%d'))
			d = d.joinpath(self._branch)
			if self._args.tag:
			d = d.joinpath(self._args.tag)
			return d.resolve().absolute()

			def LogDir(self):
			return self._log_dir

			def Benchmarks(self):
			return [b for b in self._benchmarks if b.id in self._args.benchmark]

			def Tag(self):
			return self._args.tag

			def DryRun(self):
			return self._args.dry_run

			def _lunches(self):
			def parse_lunch(lunch):
			parts = lunch.split("-")
			if len(parts) != 3:
			raise OptionsError(f"Invalid lunch combo: {lunch}")
			return Lunch(parts[0], parts[1], parts[2])
			# If they gave lunch targets on the command line use that
			if self._args.lunch:
			result = []
			# Split into Lunch objects
			for lunch in self._args.lunch:
			try:
			result.append(parse_lunch(lunch))
			except OptionsError as ex:
			self._error(ex.message)
			return result
			# Use whats in the environment
			product = os.getenv("TARGET_PRODUCT")
			release = os.getenv("TARGET_RELEASE")
			variant = os.getenv("TARGET_BUILD_VARIANT")
			if (not product) or (not release) or (not variant):
			# If they didn't give us anything, fail rather than guessing. There's no good
			# default for AOSP.
			self._error("No lunch combo specified. Either pass --lunch argument or run lunch.")
			return []
			return [Lunch(product, release, variant),]

			def Lunches(self):
			return self._lunches

			def Iterations(self):
			return self._args.iterations

			def _init_benchmarks(self):
			"""Initialize the list of benchmarks."""
			# Assumes that we've already chdired to the root of the tree.
			self._benchmarks = [
			Benchmark(id="full",
			title="Full build",
			change=Clean(),
			modules=["droid"],
			preroll=0,
			postroll=3
			),
			Benchmark(id="nochange",
			title="No change",
			change=NoChange(),
			modules=["droid"],
			preroll=2,
			postroll=3
			),
			Benchmark(id="modify_bp",
			title="Modify Android.bp",
			change=Modify("bionic/libc/Android.bp", "// Comment"),
			modules=["droid"],
			preroll=1,
			postroll=3
			),
			]

			def _error(self, message):
			report_error(message)
			self._had_error = True


			def report_error(message):
			sys.stderr.write(f"error: {message}\n")


			def main(argv):
			try:
			options = Options()
			runner = Runner(options)
			runner.Run()
			except FatalError:
			sys.stderr.write(f"FAILED\n")


			if __name__ == "__main__":
			main(sys.argv)

tools/perf/format_benchmarks

0 → 100755

+185 −0

Original line number	Original line	Diff line number	Diff line
			#!/usr/bin/env python3
			# Copyright (C) 2023 The Android Open Source Project
			#
			# Licensed under the Apache License, Version 2.0 (the "License");
			# you may not use this file except in compliance with the License.
			# You may obtain a copy of the License at
			#
			# http://www.apache.org/licenses/LICENSE-2.0
			#
			# Unless required by applicable law or agreed to in writing, software
			# distributed under the License is distributed on an "AS IS" BASIS,
			# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			# See the License for the specific language governing permissions and
			# limitations under the License.

			import sys
			if __name__ == "__main__":
			sys.dont_write_bytecode = True

			import argparse
			import dataclasses
			import datetime
			import json
			import os
			import pathlib
			import statistics
			import zoneinfo

			import pretty
			import utils

			# TODO:
			# - Flag if the last postroll build was more than 15 seconds or something. That's
			# an indicator that something is amiss.
			# - Add a mode to print all of the values for multi-iteration runs
			# - Add a flag to reorder the tags
			# - Add a flag to reorder the headers in order to show grouping more clearly.


			def FindSummaries(args):
			def find_summaries(directory):
			return [str(p.resolve()) for p in pathlib.Path(directory).glob("**/summary.json")]
			if not args:
			# If they didn't give an argument, use the default dir
			root = utils.get_root()
			if not root:
			return []
			return find_summaries(root.joinpath("..", utils.DEFAULT_REPORT_DIR))
			results = list()
			for arg in args:
			if os.path.isfile(arg):
			# If it's a file add that
			results.append(arg)
			elif os.path.isdir(arg):
			# If it's a directory, find all of the files there
			results += find_summaries(arg)
			else:
			sys.stderr.write(f"Invalid summary argument: {arg}\n")
			sys.exit(1)
			return sorted(list(results))


			def LoadSummary(filename):
			with open(filename) as f:
			return json.load(f)

			# Columns:
			# Date
			# Branch
			# Tag
			# --
			# Lunch
			# Rows:
			# Benchmark

			@dataclasses.dataclass(frozen=True)
			class Key():
			pass

			class Column():
			def __init__(self):
			pass

			def lunch_str(d):
			"Convert a lunch dict to a string"
			return f"{d['TARGET_PRODUCT']}-{d['TARGET_RELEASE']}-{d['TARGET_BUILD_VARIANT']}"

			def group_by(l, key):
			"Return a list of tuples, grouped by key, sorted by key"
			result = {}
			for item in l:
			result.setdefault(key(item), []).append(item)
			return [(k, v) for k, v in result.items()]


			class Table:
			def __init__(self):
			self._data = {}
			self._rows = []
			self._cols = []

			def Set(self, column_key, row_key, data):
			self._data[(column_key, row_key)] = data
			if not column_key in self._cols:
			self._cols.append(column_key)
			if not row_key in self._rows:
			self._rows.append(row_key)

			def Write(self, out):
			table = []
			# Expand the column items
			for row in zip(*self._cols):
			if row.count(row[0]) == len(row):
			continue
			table.append([""] + [col for col in row])
			if table:
			table.append(pretty.SEPARATOR)
			# Populate the data
			for row in self._rows:
			table.append([str(row)] + [str(self._data.get((col, row), "")) for col in self._cols])
			out.write(pretty.FormatTable(table))


			def format_duration_sec(ns):
			"Format a duration in ns to second precision"
			sec = round(ns / 1000000000)
			h, sec = divmod(sec, 60*60)
			m, sec = divmod(sec, 60)
			result = ""
			if h > 0:
			result += f"{h:2d}h "
			if h > 0 or m > 0:
			result += f"{m:2d}m "
			return result + f"{sec:2d}s"

			def main(argv):
			parser = argparse.ArgumentParser(
			prog="format_benchmarks",
			allow_abbrev=False, # Don't let people write unsupportable scripts.
			description="Print analysis tables for benchmarks")

			parser.add_argument("summaries", nargs="*",
			help="A summary.json file or a directory in which to look for summaries.")

			args = parser.parse_args()

			# Load the summaries
			summaries = [(s, LoadSummary(s)) for s in FindSummaries(args.summaries)]

			# Convert to MTV time
			for filename, s in summaries:
			dt = datetime.datetime.fromisoformat(s["start_time"])
			dt = dt.astimezone(zoneinfo.ZoneInfo("America/Los_Angeles"))
			s["datetime"] = dt
			s["date"] = datetime.date(dt.year, dt.month, dt.day)

			# Sort the summaries
			summaries.sort(key=lambda s: (s[1]["date"], s[1]["branch"], s[1]["tag"]))

			# group the benchmarks by column and iteration
			def bm_key(b):
			return (
			lunch_str(b["lunch"]),
			)
			for filename, summary in summaries:
			summary["columns"] = [(key, group_by(bms, lambda b: b["id"])) for key, bms
			in group_by(summary["benchmarks"], bm_key)]

			# Build the table
			table = Table()
			for filename, summary in summaries:
			for key, column in summary["columns"]:
			for id, cell in column:
			duration_ns = statistics.median([b["duration_ns"] for b in cell])
			table.Set(tuple([summary["date"].strftime("YYYY-MM-DD"),
			summary["branch"],
			summary["tag"]]
			+ list(key)),
			cell[0]["title"], format_duration_sec(duration_ns))

			table.Write(sys.stdout)

			if __name__ == "__main__":
			main(sys.argv)

tools/perf/pretty.py

0 → 100644

+52 −0

File added.

Preview size limit exceeded, changes collapsed.

tools/perf/utils.py

0 → 100644

+30 −0

File added.

Preview size limit exceeded, changes collapsed.