Merge "Rewrite the build system benchmarks to be much simpler and not require bazel." into main (ba541c7e) · Commits · e / os / android_build

tools/perf/benchmarks

0 → 100755

+550 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# Copyright (C) 2023 The Android Open Source Project
		#
		# Licensed under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.

		import sys
		if __name__ == "__main__":
		sys.dont_write_bytecode = True

		import argparse
		import dataclasses
		import datetime
		import json
		import os
		import pathlib
		import shutil
		import subprocess
		import time

		import pretty
		import utils


		class FatalError(Exception):
		def __init__(self):
		pass


		class OptionsError(Exception):
		def __init__(self, message):
		self.message = message


		@dataclasses.dataclass(frozen=True)
		class Lunch:
		"Lunch combination"

		target_product: str
		"TARGET_PRODUCT"

		target_release: str
		"TARGET_RELEASE"

		target_build_variant: str
		"TARGET_BUILD_VARIANT"

		def ToDict(self):
		return {
		"TARGET_PRODUCT": self.target_product,
		"TARGET_RELEASE": self.target_release,
		"TARGET_BUILD_VARIANT": self.target_build_variant,
		}

		def Combine(self):
		return f"{self.target_product}-{self.target_release}-{self.target_build_variant}"


		@dataclasses.dataclass(frozen=True)
		class Change:
		"A change that we make to the tree, and how to undo it"
		label: str
		"String to print in the log when the change is made"

		change: callable
		"Function to change the source tree"

		undo: callable
		"Function to revert the source tree to its previous condition in the most minimal way possible."


		@dataclasses.dataclass(frozen=True)
		class Benchmark:
		"Something we measure"

		id: str
		"Short ID for the benchmark, for the command line"

		title: str
		"Title for reports"

		change: Change
		"Source tree modification for the benchmark that will be measured"

		modules: list[str]
		"Build modules to build on soong command line"

		preroll: int
		"Number of times to run the build command to stabilize"

		postroll: int
		"Number of times to run the build command after reverting the action to stabilize"


		@dataclasses.dataclass(frozen=True)
		class FileSnapshot:
		"Snapshot of a file's contents."

		filename: str
		"The file that was snapshottened"

		contents: str
		"The contents of the file"

		def write(self):
		"Write the contents back to the file"
		with open(self.filename, "w") as f:
		f.write(self.contents)


		def Snapshot(filename):
		"""Return a FileSnapshot with the file's current contents."""
		with open(filename) as f:
		contents = f.read()
		return FileSnapshot(filename, contents)


		def Clean():
		"""Remove the out directory."""
		def remove_out():
		if os.path.exists("out"):
		shutil.rmtree("out")
		return Change(label="Remove out", change=remove_out, undo=lambda: None)


		def NoChange():
		"""No change to the source tree."""
		return Change(label="No change", change=lambda: None, undo=lambda: None)


		def Modify(filename, contents, before=None):
		"""Create an action to modify `filename` by appending `contents` before the last instances
		of `before` in the file.

		Raises an error if `before` doesn't appear in the file.
		"""
		orig = Snapshot(filename)
		if before:
		index = orig.contents.rfind(before)
		if index < 0:
		report_error(f"{filename}: Unable to find string '{before}' for modify operation.")
		raise FatalError()
		else:
		index = len(orig.contents)
		modified = FileSnapshot(filename, orig.contents[:index] + contents + orig.contents[index:])
		return Change(
		label="Modify " + filename,
		change=lambda: modified.write(),
		undo=lambda: orig.write()
		)


		class BenchmarkReport():
		"Information about a run of the benchmark"

		lunch: Lunch
		"lunch combo"

		benchmark: Benchmark
		"The benchmark object."

		iteration: int
		"Which iteration of the benchmark"

		log_dir: str
		"Path the the log directory, relative to the root of the reports directory"

		preroll_duration_ns: [int]
		"Durations of the in nanoseconds."

		duration_ns: int
		"Duration of the measured portion of the benchmark in nanoseconds."

		postroll_duration_ns: [int]
		"Durations of the postrolls in nanoseconds."

		complete: bool
		"Whether the benchmark made it all the way through the postrolls."

		def __init__(self, lunch, benchmark, iteration, log_dir):
		self.lunch = lunch
		self.benchmark = benchmark
		self.iteration = iteration
		self.log_dir = log_dir
		self.preroll_duration_ns = []
		self.duration_ns = -1
		self.postroll_duration_ns = []
		self.complete = False

		def ToDict(self):
		return {
		"lunch": self.lunch.ToDict(),
		"id": self.benchmark.id,
		"title": self.benchmark.title,
		"modules": self.benchmark.modules,
		"change": self.benchmark.change.label,
		"iteration": self.iteration,
		"log_dir": self.log_dir,
		"preroll_duration_ns": self.preroll_duration_ns,
		"duration_ns": self.duration_ns,
		"postroll_duration_ns": self.postroll_duration_ns,
		"complete": self.complete,
		}

		class Runner():
		"""Runs the benchmarks."""

		def __init__(self, options):
		self._options = options
		self._reports = []
		self._complete = False

		def Run(self):
		"""Run all of the user-selected benchmarks."""
		# Clean out the log dir or create it if necessary
		prepare_log_dir(self._options.LogDir())

		try:
		for lunch in self._options.Lunches():
		print(lunch)
		for benchmark in self._options.Benchmarks():
		for iteration in range(self._options.Iterations()):
		self._run_benchmark(lunch, benchmark, iteration)
		self._complete = True
		finally:
		self._write_summary()


		def _run_benchmark(self, lunch, benchmark, iteration):
		"""Run a single benchmark."""
		benchmark_log_subdir = self._log_dir(lunch, benchmark, iteration)
		benchmark_log_dir = self._options.LogDir().joinpath(benchmark_log_subdir)

		sys.stderr.write(f"STARTING BENCHMARK: {benchmark.id}\n")
		sys.stderr.write(f" lunch: {lunch.Combine()}\n")
		sys.stderr.write(f" iteration: {iteration}\n")
		sys.stderr.write(f" benchmark_log_dir: {benchmark_log_dir}\n")

		report = BenchmarkReport(lunch, benchmark, iteration, benchmark_log_subdir)
		self._reports.append(report)

		# Preroll builds
		for i in range(benchmark.preroll):
		ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"pre_{i}"), benchmark.modules)
		report.preroll_duration_ns.append(ns)

		sys.stderr.write(f"PERFORMING CHANGE: {benchmark.change.label}\n")
		if not self._options.DryRun():
		benchmark.change.change()
		try:

		# Measured build
		ns = self._run_build(lunch, benchmark_log_dir.joinpath("measured"), benchmark.modules)
		report.duration_ns = ns

		# Postroll builds
		for i in range(benchmark.preroll):
		ns = self._run_build(lunch, benchmark_log_dir.joinpath(f"post_{i}"),
		benchmark.modules)
		report.postroll_duration_ns.append(ns)

		finally:
		# Always undo, even if we crashed or the build failed and we stopped.
		sys.stderr.write(f"UNDOING CHANGE: {benchmark.change.label}\n")
		if not self._options.DryRun():
		benchmark.change.undo()

		self._write_summary()
		sys.stderr.write(f"FINISHED BENCHMARK: {benchmark.id}\n")

		def _log_dir(self, lunch, benchmark, iteration):
		"""Construct the log directory fir a benchmark run."""
		path = f"{lunch.Combine()}/{benchmark.id}"
		# Zero pad to the correct length for correct alpha sorting
		path += ("/%0" + str(len(str(self._options.Iterations()))) + "d") % iteration
		return path

		def _run_build(self, lunch, build_log_dir, modules):
		"""Builds the modules. Saves interesting log files to log_dir. Raises FatalError
		if the build fails.
		"""
		sys.stderr.write(f"STARTING BUILD {modules}\n")

		before_ns = time.perf_counter_ns()
		if not self._options.DryRun():
		cmd = [
		"build/soong/soong_ui.bash",
		"--build-mode",
		"--all-modules",
		f"--dir={self._options.root}",
		] + modules
		env = dict(os.environ)
		env["TARGET_PRODUCT"] = lunch.target_product
		env["TARGET_RELEASE"] = lunch.target_release
		env["TARGET_BUILD_VARIANT"] = lunch.target_build_variant
		returncode = subprocess.call(cmd, env=env)
		if returncode != 0:
		report_error(f"Build failed: {' '.join(cmd)}")
		raise FatalError()

		after_ns = time.perf_counter_ns()

		# TODO: Copy some log files.

		sys.stderr.write(f"FINISHED BUILD {modules}\n")

		return after_ns - before_ns

		def _write_summary(self):
		# Write the results, even if the build failed or we crashed, including
		# whether we finished all of the benchmarks.
		data = {
		"start_time": self._options.Timestamp().isoformat(),
		"branch": self._options.Branch(),
		"tag": self._options.Tag(),
		"benchmarks": [report.ToDict() for report in self._reports],
		"complete": self._complete,
		}
		with open(self._options.LogDir().joinpath("summary.json"), "w", encoding="utf-8") as f:
		json.dump(data, f, indent=2, sort_keys=True)


		def benchmark_table(benchmarks):
		rows = [("ID", "DESCRIPTION", "REBUILD"),]
		rows += [(benchmark.id, benchmark.title, " ".join(benchmark.modules)) for benchmark in
		benchmarks]
		return rows


		def prepare_log_dir(directory):
		if os.path.exists(directory):
		# If it exists and isn't a directory, fail.
		if not os.path.isdir(directory):
		report_error(f"Log directory already exists but isn't a directory: {directory}")
		raise FatalError()
		# Make sure the directory is empty. Do this rather than deleting it to handle
		# symlinks cleanly.
		for filename in os.listdir(directory):
		entry = os.path.join(directory, filename)
		if os.path.isdir(entry):
		shutil.rmtree(entry)
		else:
		os.unlink(entry)
		else:
		# Create it
		os.makedirs(directory)


		class Options():
		def __init__(self):
		self._had_error = False

		# Wall time clock when we started
		self._timestamp = datetime.datetime.now(datetime.timezone.utc)

		# Move to the root of the tree right away. Everything must happen from there.
		self.root = utils.get_root()
		if not self.root:
		report_error("Unable to find root of tree from cwd.")
		raise FatalError()
		os.chdir(self.root)

		# Initialize the Benchmarks. Note that this pre-loads all of the files, etc.
		# Doing all that here forces us to fail fast if one of them can't load a required
		# file, at the cost of a small startup speed. Don't make this do something slow
		# like scan the whole tree.
		self._init_benchmarks()

		# Argument parsing
		epilog = f"""
		benchmarks:
		{pretty.FormatTable(benchmark_table(self._benchmarks), prefix=" ")}
		"""

		parser = argparse.ArgumentParser(
		prog="benchmarks",
		allow_abbrev=False, # Don't let people write unsupportable scripts.
		formatter_class=argparse.RawDescriptionHelpFormatter,
		epilog=epilog,
		description="Run build system performance benchmarks.")
		self.parser = parser

		parser.add_argument("--log-dir",
		help="Directory for logs. Default is $TOP/../benchmarks/.")
		parser.add_argument("--dated-logs", action="store_true",
		help="Append timestamp to log dir.")
		parser.add_argument("-n", action="store_true", dest="dry_run",
		help="Dry run. Don't run the build commands but do everything else.")
		parser.add_argument("--tag",
		help="Variant of the run, for when there are multiple perf runs.")
		parser.add_argument("--lunch", nargs="*",
		help="Lunch combos to test")
		parser.add_argument("--iterations", type=int, default=1,
		help="Number of iterations of each test to run.")
		parser.add_argument("--branch", type=str,
		help="Specify branch. Otherwise a guess will be made based on repo.")
		parser.add_argument("--benchmark", nargs="*", default=[b.id for b in self._benchmarks],
		metavar="BENCHMARKS",
		help="Benchmarks to run. Default suite will be run if omitted.")

		self._args = parser.parse_args()

		self._branch = self._branch()
		self._log_dir = self._log_dir()
		self._lunches = self._lunches()

		# Validate the benchmark ids
		all_ids = [benchmark.id for benchmark in self._benchmarks]
		bad_ids = [id for id in self._args.benchmark if id not in all_ids]
		if bad_ids:
		for id in bad_ids:
		self._error(f"Invalid benchmark: {id}")

		if self._had_error:
		raise FatalError()

		def Timestamp(self):
		return self._timestamp

		def _branch(self):
		"""Return the branch, either from the command line or by guessing from repo."""
		if self._args.branch:
		return self._args.branch
		try:
		branch = subprocess.check_output(f"cd {self.root}/.repo/manifests"
		+ " && git rev-parse --abbrev-ref --symbolic-full-name @{u}",
		shell=True, encoding="utf-8")
		return branch.strip().split("/")[-1]
		except subprocess.CalledProcessError as ex:
		report_error("Can't get branch from .repo dir. Specify --branch argument")
		report_error(str(ex))
		raise FatalError()

		def Branch(self):
		return self._branch

		def _log_dir(self):
		"The log directory to use, based on the current options"
		if self._args.log_dir:
		d = pathlib.Path(self._args.log_dir).resolve().absolute()
		else:
		d = self.root.joinpath("..", utils.DEFAULT_REPORT_DIR)
		if self._args.dated_logs:
		d = d.joinpath(self._timestamp.strftime('%Y-%m-%d'))
		d = d.joinpath(self._branch)
		if self._args.tag:
		d = d.joinpath(self._args.tag)
		return d.resolve().absolute()

		def LogDir(self):
		return self._log_dir

		def Benchmarks(self):
		return [b for b in self._benchmarks if b.id in self._args.benchmark]

		def Tag(self):
		return self._args.tag

		def DryRun(self):
		return self._args.dry_run

		def _lunches(self):
		def parse_lunch(lunch):
		parts = lunch.split("-")
		if len(parts) != 3:
		raise OptionsError(f"Invalid lunch combo: {lunch}")
		return Lunch(parts[0], parts[1], parts[2])
		# If they gave lunch targets on the command line use that
		if self._args.lunch:
		result = []
		# Split into Lunch objects
		for lunch in self._args.lunch:
		try:
		result.append(parse_lunch(lunch))
		except OptionsError as ex:
		self._error(ex.message)
		return result
		# Use whats in the environment
		product = os.getenv("TARGET_PRODUCT")
		release = os.getenv("TARGET_RELEASE")
		variant = os.getenv("TARGET_BUILD_VARIANT")
		if (not product) or (not release) or (not variant):
		# If they didn't give us anything, fail rather than guessing. There's no good
		# default for AOSP.
		self._error("No lunch combo specified. Either pass --lunch argument or run lunch.")
		return []
		return [Lunch(product, release, variant),]

		def Lunches(self):
		return self._lunches

		def Iterations(self):
		return self._args.iterations

		def _init_benchmarks(self):
		"""Initialize the list of benchmarks."""
		# Assumes that we've already chdired to the root of the tree.
		self._benchmarks = [
		Benchmark(id="full",
		title="Full build",
		change=Clean(),
		modules=["droid"],
		preroll=0,
		postroll=3
		),
		Benchmark(id="nochange",
		title="No change",
		change=NoChange(),
		modules=["droid"],
		preroll=2,
		postroll=3
		),
		Benchmark(id="modify_bp",
		title="Modify Android.bp",
		change=Modify("bionic/libc/Android.bp", "// Comment"),
		modules=["droid"],
		preroll=1,
		postroll=3
		),
		]

		def _error(self, message):
		report_error(message)
		self._had_error = True


		def report_error(message):
		sys.stderr.write(f"error: {message}\n")


		def main(argv):
		try:
		options = Options()
		runner = Runner(options)
		runner.Run()
		except FatalError:
		sys.stderr.write(f"FAILED\n")


		if __name__ == "__main__":
		main(sys.argv)

tools/perf/format_benchmarks

0 → 100755

+185 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		# Copyright (C) 2023 The Android Open Source Project
		#
		# Licensed under the Apache License, Version 2.0 (the "License");
		# you may not use this file except in compliance with the License.
		# You may obtain a copy of the License at
		#
		# http://www.apache.org/licenses/LICENSE-2.0
		#
		# Unless required by applicable law or agreed to in writing, software
		# distributed under the License is distributed on an "AS IS" BASIS,
		# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		# See the License for the specific language governing permissions and
		# limitations under the License.

		import sys
		if __name__ == "__main__":
		sys.dont_write_bytecode = True

		import argparse
		import dataclasses
		import datetime
		import json
		import os
		import pathlib
		import statistics
		import zoneinfo

		import pretty
		import utils

		# TODO:
		# - Flag if the last postroll build was more than 15 seconds or something. That's
		# an indicator that something is amiss.
		# - Add a mode to print all of the values for multi-iteration runs
		# - Add a flag to reorder the tags
		# - Add a flag to reorder the headers in order to show grouping more clearly.


		def FindSummaries(args):
		def find_summaries(directory):
		return [str(p.resolve()) for p in pathlib.Path(directory).glob("**/summary.json")]
		if not args:
		# If they didn't give an argument, use the default dir
		root = utils.get_root()
		if not root:
		return []
		return find_summaries(root.joinpath("..", utils.DEFAULT_REPORT_DIR))
		results = list()
		for arg in args:
		if os.path.isfile(arg):
		# If it's a file add that
		results.append(arg)
		elif os.path.isdir(arg):
		# If it's a directory, find all of the files there
		results += find_summaries(arg)
		else:
		sys.stderr.write(f"Invalid summary argument: {arg}\n")
		sys.exit(1)
		return sorted(list(results))


		def LoadSummary(filename):
		with open(filename) as f:
		return json.load(f)

		# Columns:
		# Date
		# Branch
		# Tag
		# --
		# Lunch
		# Rows:
		# Benchmark

		@dataclasses.dataclass(frozen=True)
		class Key():
		pass

		class Column():
		def __init__(self):
		pass

		def lunch_str(d):
		"Convert a lunch dict to a string"
		return f"{d['TARGET_PRODUCT']}-{d['TARGET_RELEASE']}-{d['TARGET_BUILD_VARIANT']}"

		def group_by(l, key):
		"Return a list of tuples, grouped by key, sorted by key"
		result = {}
		for item in l:
		result.setdefault(key(item), []).append(item)
		return [(k, v) for k, v in result.items()]


		class Table:
		def __init__(self):
		self._data = {}
		self._rows = []
		self._cols = []

		def Set(self, column_key, row_key, data):
		self._data[(column_key, row_key)] = data
		if not column_key in self._cols:
		self._cols.append(column_key)
		if not row_key in self._rows:
		self._rows.append(row_key)

		def Write(self, out):
		table = []
		# Expand the column items
		for row in zip(*self._cols):
		if row.count(row[0]) == len(row):
		continue
		table.append([""] + [col for col in row])
		if table:
		table.append(pretty.SEPARATOR)
		# Populate the data
		for row in self._rows:
		table.append([str(row)] + [str(self._data.get((col, row), "")) for col in self._cols])
		out.write(pretty.FormatTable(table))


		def format_duration_sec(ns):
		"Format a duration in ns to second precision"
		sec = round(ns / 1000000000)
		h, sec = divmod(sec, 60*60)
		m, sec = divmod(sec, 60)
		result = ""
		if h > 0:
		result += f"{h:2d}h "
		if h > 0 or m > 0:
		result += f"{m:2d}m "
		return result + f"{sec:2d}s"

		def main(argv):
		parser = argparse.ArgumentParser(
		prog="format_benchmarks",
		allow_abbrev=False, # Don't let people write unsupportable scripts.
		description="Print analysis tables for benchmarks")

		parser.add_argument("summaries", nargs="*",
		help="A summary.json file or a directory in which to look for summaries.")

		args = parser.parse_args()

		# Load the summaries
		summaries = [(s, LoadSummary(s)) for s in FindSummaries(args.summaries)]

		# Convert to MTV time
		for filename, s in summaries:
		dt = datetime.datetime.fromisoformat(s["start_time"])
		dt = dt.astimezone(zoneinfo.ZoneInfo("America/Los_Angeles"))
		s["datetime"] = dt
		s["date"] = datetime.date(dt.year, dt.month, dt.day)

		# Sort the summaries
		summaries.sort(key=lambda s: (s[1]["date"], s[1]["branch"], s[1]["tag"]))

		# group the benchmarks by column and iteration
		def bm_key(b):
		return (
		lunch_str(b["lunch"]),
		)
		for filename, summary in summaries:
		summary["columns"] = [(key, group_by(bms, lambda b: b["id"])) for key, bms
		in group_by(summary["benchmarks"], bm_key)]

		# Build the table
		table = Table()
		for filename, summary in summaries:
		for key, column in summary["columns"]:
		for id, cell in column:
		duration_ns = statistics.median([b["duration_ns"] for b in cell])
		table.Set(tuple([summary["date"].strftime("YYYY-MM-DD"),
		summary["branch"],
		summary["tag"]]
		+ list(key)),
		cell[0]["title"], format_duration_sec(duration_ns))

		table.Write(sys.stdout)

		if __name__ == "__main__":
		main(sys.argv)

tools/perf/pretty.py

0 → 100644

+52 −0

File added.

Preview size limit exceeded, changes collapsed.

tools/perf/utils.py

0 → 100644

+30 −0

File added.

Preview size limit exceeded, changes collapsed.