From 7f5ac83513c55a79bb32ac1be05250d865ff01f4 Mon Sep 17 00:00:00 2001 From: Romain Hunault Date: Fri, 27 Mar 2026 15:33:20 +0100 Subject: [PATCH] feat(sync): add repo ref preflight and tunable hardening --- Dockerfile | 16 +++++ Dockerfile.community | 16 +++++ README.md | 10 +++ src/build.sh | 129 +++++++++++++++++++++++++++++++------- src/repo_ref_preflight.py | 116 ++++++++++++++++++++++++++++++++++ 5 files changed, 266 insertions(+), 21 deletions(-) create mode 100644 src/repo_ref_preflight.py diff --git a/Dockerfile b/Dockerfile index 1ccc8db..fc73d54 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,6 +47,22 @@ ENV CHANGE_REF='' # A size of 0 deactivates --depth N ENV REPO_INIT_DEPTH=0 +# Optional parameter to override repo sync parallelism +# An empty value keeps the default derived from available CPUs, capped in build.sh +ENV REPO_SYNC_JOBS='' + +# Stop repo sync as soon as possible when a project fails +ENV REPO_SYNC_FAIL_FAST=false + +# Disable clone.bundle bootstrap during repo sync +ENV REPO_SYNC_NO_CLONE_BUNDLE=false + +# Additional raw flags appended to repo sync +ENV REPO_SYNC_EXTRA_FLAGS='' + +# Validate manifest refs before the expensive sync phase +ENV REPO_SYNC_REF_PREFLIGHT=true + # User identity ENV USER_NAME='/e/ robot' ENV USER_MAIL='erobot@e.email' diff --git a/Dockerfile.community b/Dockerfile.community index 09efd64..62a1cc1 100644 --- a/Dockerfile.community +++ b/Dockerfile.community @@ -38,6 +38,22 @@ ENV RELEASE_TYPE='UNOFFICIAL' # Repo use for build ENV REPO='https://gitlab.e.foundation/e/os/android.git' +# Optional parameter to override repo sync parallelism +# An empty value keeps the default derived from available CPUs, capped in build.sh +ENV REPO_SYNC_JOBS='' + +# Stop repo sync as soon as possible when a project fails +ENV REPO_SYNC_FAIL_FAST=false + +# Disable clone.bundle bootstrap during repo sync +ENV REPO_SYNC_NO_CLONE_BUNDLE=false + +# Additional raw flags appended to repo sync +ENV REPO_SYNC_EXTRA_FLAGS='' + +# Validate manifest refs before the expensive sync phase +ENV REPO_SYNC_REF_PREFLIGHT=true + # Repo use for build ENV MIRROR='https://github.com/LineageOS/mirror' diff --git a/README.md b/README.md index 572c44f..bbc4a9d 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,16 @@ Other useful settings are: the tree is cleaned with `mka clean`. If you want to be sure that each build is isolated from the others, set `BUILD_OVERLAY` to `true` (longer build time). Requires `--cap-add=SYS_ADMIN`. + * `REPO_SYNC_JOBS`: overrides the `repo sync -j` value. By default the script + derives it from available CPUs and caps it at `10`. + * `REPO_SYNC_FAIL_FAST (false)`: when set to `true`, adds `--fail-fast` to + `repo sync` so large sync waves stop earlier on the first project failure. + * `REPO_SYNC_NO_CLONE_BUNDLE (false)`: when set to `true`, adds + `--no-clone-bundle` to `repo sync`. + * `REPO_SYNC_EXTRA_FLAGS`: appends raw flags to `repo sync` for temporary + operational tuning without rebuilding the image. + * `REPO_SYNC_REF_PREFLIGHT (true)`: validates the refs declared by the merged + manifest before the expensive `repo sync` phase starts. * `MIRROR (https://github.com/LineageOS/mirror)`: Repo used for mirror * `LOCAL_MIRROR (false)`: change this to `true` if you want to create a local mirror of the LineageOS source (> 200 GB) diff --git a/src/build.sh b/src/build.sh index d9b54d4..b692907 100755 --- a/src/build.sh +++ b/src/build.sh @@ -44,6 +44,103 @@ extract_images() { fi } +build_repo_sync_args() { + local sync_jobs + sync_jobs="${REPO_SYNC_JOBS:-$JOBS}" + + REPO_SYNC_ARGS=(-c -j"$sync_jobs" --force-sync) + + if [ "${REPO_SYNC_FAIL_FAST}" = true ]; then + REPO_SYNC_ARGS+=(--fail-fast) + fi + + if [ "${REPO_SYNC_NO_CLONE_BUNDLE}" = true ]; then + REPO_SYNC_ARGS+=(--no-clone-bundle) + fi + + if [ -n "${REPO_SYNC_EXTRA_FLAGS}" ]; then + local extra_flags=() + read -r -a extra_flags <<<"${REPO_SYNC_EXTRA_FLAGS}" + REPO_SYNC_ARGS+=("${extra_flags[@]}") + fi +} + +run_repo_sync() { + local sync_log repo_status repo_out list_line + build_repo_sync_args + + sync_log=$(mktemp) + echo ">> [$(date)] Running: repo sync ${REPO_SYNC_ARGS[*]}" + repo sync "${REPO_SYNC_ARGS[@]}" >"$sync_log" 2>&1 + repo_status=$? + repo_out=$(cat "$sync_log") + cat "$sync_log" + rm -f "$sync_log" + + if [ "$repo_status" = "0" ]; then + return 0 + fi + + if [ -f /root/userscripts/clean.sh ]; then + if [[ "$repo_out" == *"Failing repos:"* ]]; then + list_line=$(printf '%s\n' "$repo_out" | sed -n 's/.*Failing repos: //p' | tail -n 1) + fi + if [[ "$repo_out" == *"Cannot remove project"* ]]; then + list_line=$(printf '%s\n' "$repo_out" | grep "Cannot remove project" | sed -e 's/.*error: \(.*\): Cannot.*/\1/' | tail -n 1) + fi + echo ">> [$(date)] Running clean.sh" + /root/userscripts/clean.sh $list_line + echo ">> [$(date)] Retrying: repo sync ${REPO_SYNC_ARGS[*]}" + repo sync "${REPO_SYNC_ARGS[@]}" + return $? + fi + + return 1 +} + +preflight_git_ref() { + local remote_url="$1" + local ref="$2" + local label="$3" + + if [ -z "$remote_url" ] || [ -z "$ref" ]; then + echo ">> [$(date)] Skipping ref preflight for ${label}: incomplete remote/ref" + return 0 + fi + + echo ">> [$(date)] Preflighting ${label}: git ls-remote --exit-code \"$remote_url\" \"$ref\"" + if ! git ls-remote --exit-code "$remote_url" "$ref" >/dev/null 2>&1; then + echo ">> [$(date)] Missing expected ref '$ref' on '$remote_url' for ${label}" + return 1 + fi + + return 0 +} + +run_repo_manifest_ref_preflight() { + local manifest_file + + if [ "${REPO_SYNC_REF_PREFLIGHT}" = false ]; then + echo ">> [$(date)] Skipping manifest ref preflight" + return 0 + fi + + manifest_file=$(mktemp) + if ! repo manifest -o "$manifest_file" >/dev/null 2>&1; then + echo ">> [$(date)] Unable to export merged manifest for preflight" + rm -f "$manifest_file" + return 1 + fi + + if ! /usr/bin/python3 /root/repo_ref_preflight.py --manifest "$manifest_file"; then + rm -f "$manifest_file" + return 1 + fi + + rm -f "$manifest_file" + return 0 +} + clean_up() { # Remove old zips and logs if [ "$DELETE_OLD_ZIPS" -gt "0" ]; then @@ -209,6 +306,10 @@ if [ -n "${BRANCH_NAME}" ] && [ -n "${DEVICE}" ]; then INIT_BRANCH=$CHANGE_REF fi + if ! preflight_git_ref "$INIT_REPO" "$INIT_BRANCH" "repo init"; then + exit 1 + fi + # Repo init source echo ">> [$(date)] Running: repo init $REPO_INIT_PARAM -u \"$INIT_REPO\" -b \"${INIT_BRANCH}\" $REPO_GROUPS" yes | repo init $REPO_INIT_PARAM -u "$INIT_REPO" -b "${INIT_BRANCH}" $REPO_GROUPS @@ -230,28 +331,14 @@ if [ -n "${BRANCH_NAME}" ] && [ -n "${DEVICE}" ]; then fi + if ! run_repo_manifest_ref_preflight; then + exit 1 + fi + echo ">> [$(date)] Syncing branch repository" builddate=$(date +%Y%m%d) - repo_out=$(repo sync -c -j"$JOBS" --force-sync 2>&1 >/dev/null) - repo_status=$? - echo -e $repo_out - - if [ "$repo_status" != "0" ]; then - if [ -f /root/userscripts/clean.sh ]; then - if [[ "$repo_out" == *"Failing repos:"* ]]; then - list_line=$(echo -e $repo_out | sed 's/.*Failing repos: //') - fi - if [[ "$repo_out" == *"Cannot remove project"* ]]; then - list_line=$(echo -e $repo_out | grep "Cannot remove project" | sed -e 's/.*error: \(.*\): Cannot.*/\1/') - fi - echo ">> [$(date)] Running clean.sh" - /root/userscripts/clean.sh $list_line - if ! repo sync -c --force-sync; then - sync_successful=false - fi - else - sync_successful=false - fi + if ! run_repo_sync; then + sync_successful=false fi if [ "$sync_successful" = true ]; then @@ -303,7 +390,7 @@ if [ -n "${BRANCH_NAME}" ] && [ -n "${DEVICE}" ]; then echo ">> [$(date)] Syncing branch repository" cd "$SRC_DIR/$branch_dir" || return 1 - if ! repo sync -c -j"$JOBS" --force-sync; then + if ! run_repo_sync; then sync_successful=false build_device=false fi diff --git a/src/repo_ref_preflight.py b/src/repo_ref_preflight.py new file mode 100644 index 0000000..efc5149 --- /dev/null +++ b/src/repo_ref_preflight.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +import argparse +import re +import subprocess +import sys +import xml.etree.ElementTree as ET + + +SHA1_RE = re.compile(r"^[0-9a-fA-F]{40}$") + + +def strip_tag(tag): + if "}" in tag: + return tag.split("}", 1)[1] + return tag + + +def join_remote(fetch, project_name): + if not fetch: + return "" + if "://" in project_name or re.match(r"^[^@]+@[^:]+:.+$", project_name): + return project_name + separator = "" + if not fetch.endswith("/") and not fetch.endswith(":"): + separator = "/" + return f"{fetch}{separator}{project_name}" + + +def parse_manifest(path): + tree = ET.parse(path) + root = tree.getroot() + + remotes = {} + default_remote = None + default_revision = None + + for element in root: + tag = strip_tag(element.tag) + if tag == "remote": + remotes[element.attrib["name"]] = { + "fetch": element.attrib.get("fetch", ""), + "revision": element.attrib.get("revision", ""), + } + elif tag == "default": + default_remote = element.attrib.get("remote") + default_revision = element.attrib.get("revision", "") + + refs = {} + for element in root.iter(): + if strip_tag(element.tag) != "project": + continue + + name = element.attrib.get("name") + if not name: + continue + + remote_name = element.attrib.get("remote", default_remote) + remote = remotes.get(remote_name or "", {}) + remote_url = join_remote(remote.get("fetch", ""), name) + revision = ( + element.attrib.get("revision") + or remote.get("revision") + or default_revision + ) + + if not remote_url or not revision or SHA1_RE.match(revision): + continue + + refs.setdefault((remote_url, revision), []).append(name) + + return refs + + +def check_refs(refs): + failures = [] + for (remote_url, revision), projects in sorted(refs.items()): + result = subprocess.run( + ["git", "ls-remote", "--exit-code", remote_url, revision], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + if result.returncode != 0: + failures.append((remote_url, revision, projects)) + return failures + + +def main(): + parser = argparse.ArgumentParser( + description="Validate that manifest refs exist before running repo sync." + ) + parser.add_argument("--manifest", required=True, help="Path to merged manifest XML") + args = parser.parse_args() + + refs = parse_manifest(args.manifest) + print(f">> [{subprocess.getoutput('date')}] Preflighting {len(refs)} manifest refs") + + failures = check_refs(refs) + if not failures: + return 0 + + for remote_url, revision, projects in failures: + projects_list = ", ".join(projects[:5]) + if len(projects) > 5: + projects_list += ", ..." + print( + ">> ERROR: missing manifest ref " + f"revision='{revision}' remote='{remote_url}' projects=[{projects_list}]", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) -- GitLab