profile__test_8py_source.html

#!/usr/bin/env python3


"""Compare two builds with timing runs and Valgrind profiling.


Results are always stored under ``$HOME/tmp`` unless ``--result-dir`` is used.

The script prints the chosen results directory immediately on startup so a

long-running comparison can be monitored from another shell.


Typical usage:


    python3 mofem/tools/profile_test.py \

      --build-dir-a /path/to/build_a \

      --build-dir-b /path/to/build_b \

      --exe-relpath tutorials/vec-2_nonlinear_elasticity/nonlinear_elastic \

      --label-a develop \

      --label-b transpose \

      --commit-a fe611e716d7538c51b067559b95be15daded86ca \

      --commit-b e24058e5681cfdf56ca336e5cf708a1b0a59a575 \

      --workdir /abs/path/to/run/cwd \

      --petsc-event SNESFunctionEval \

      --petsc-event SNESJacobianEval \

      -- \

      -file_name /abs/path/to/beam_3D.cub \

      -order 3 -ts_max_time 0.1 -ts_dt 0.1 -log_no_color -test 1 -test_op

"""


from __future__ import annotations


import argparse

import csv

import hashlib

import json

import math

import os

import re

import shutil

import shlex

import statistics

import subprocess

import sys

import time

from collections import OrderedDict

from dataclasses import asdict, dataclass

from datetime import datetime, timezone

from html import escape

from pathlib import Path

from typing import Iterable


PETSC_EVENT_RE = re.compile(

    r"^\s*\[\d+\]\s+<[^>]+>\s+\[petsc\]\s+(\S+)\s+\d+\s+\S+\s+([0-9.+\-eE]+)\b"

)

CALLGRIND_LINE_RE = re.compile(r"^\s*([\d,]+)\s+\‍([^)]+\‍)\s+(.*)$")

CMAKE_HOME_RE = re.compile(r"^CMAKE_HOME_DIRECTORY(?::\w+)?=(.*)$")

CALLGRIND_REF_RE = re.compile(r"^\‍((\d+)\‍)(?:\s+(.*))?$")


DEFAULT_CALLGRIND_ARGS = [

    "--dump-instr=yes",

    "--collect-jumps=yes",

    "--cache-sim=yes",

    "--branch-sim=yes",

]

DEFAULT_PERF_EVENTS = ["instructions", "cycles"]

DEFAULT_SVG_BACKGROUND = "#ffffff"

DEFAULT_SIM_BASE_CPI = 1.0

DEFAULT_SIM_L1_MISS_PENALTY = 5.0

DEFAULT_SIM_LL_MISS_PENALTY = 50.0

DEFAULT_SIM_BRANCH_MISS_PENALTY = 15.0

DEFAULT_EXCLUDE_PATTERNS = [

    r"^PROGRAM TOTALS$",

    r"^0x[0-9a-fA-F]+$",

    r"^\‍(\d+\‍)$",

    r"\‍(below main\‍)",

    r"__libc_start_main",

    r"__libc_start_call_main",

    r"ld-linux",

]

DEFAULT_SEED_CACHE_KEYS = [

    "ADOL-C_DIR",

    "ADOL-C_HEADER",

    "ADOL-C_LIBRARY",

    "BLAS_DIR",

    "CBLAS_INCLUDE_DIR",

    "CMAKE_PREFIX_PATH",

    "MOAB_DIR",

    "MOAB_VARIABLES_FILE",

    "MPI_RUN",

    "MPI_RUN_FLAGS",

    "PETSC_ARCH",

    "PETSC_DIR",

    "PETSC_VARIABLES_FILE",

    "SLEPC_DIR",

]


@dataclass


class Case:

    name: str

    label: str

    build_dir: str | None

    executable: str

    commit: str | None

    source_dir: str | None

    run_workdir: str | None


def parse_args() -> argparse.Namespace:

    parser = argparse.ArgumentParser(

        description="Profile two builds with timings and Callgrind."

    )

    parser.add_argument("--build-dir-a", help="Build directory for case A.")

    parser.add_argument("--build-dir-b", help="Build directory for case B.")

    parser.add_argument("--exe-a", help="Executable for case A.")

    parser.add_argument("--exe-b", help="Executable for case B.")

    parser.add_argument("--repo", help="Git repository to build from refs.")

    parser.add_argument("--ref-a", help="Git branch/tag/commit for case A.")

    parser.add_argument("--ref-b", help="Git branch/tag/commit for case B.")

    parser.add_argument(

        "--exe-relpath",

        default="tutorials/vec-2_nonlinear_elasticity/nonlinear_elastic",

        help="Executable path relative to each build dir.",

    )

    parser.add_argument("--label-a", default="case_a", help="Label for case A.")

    parser.add_argument("--label-b", default="case_b", help="Label for case B.")

    parser.add_argument("--commit-a", help="Commit or revision label for case A.")

    parser.add_argument("--commit-b", help="Commit or revision label for case B.")

    parser.add_argument("--source-dir-a", help="Source directory for case A.")

    parser.add_argument("--source-dir-b", help="Source directory for case B.")

    parser.add_argument(

        "--workdir",

        help="Working directory used for runs. Defaults to the executable directory.",

    )

    parser.add_argument(

        "--workdir-relpath",

        help="Working directory relative to each build dir in source-build mode.",

    )

    parser.add_argument(

        "--configure-arg",

        action="append",

        default=[],

        help="Extra argument passed to `cmake -S ... -B ...` in source-build mode.",

    )

    parser.add_argument(

        "--build-arg",

        action="append",

        default=[],

        help="Extra argument passed to `cmake --build` in source-build mode.",

    )

    parser.add_argument(

        "--build-target",

        help="CMake build target in source-build mode.",

    )

    parser.add_argument(

        "--build-jobs",

        type=int,

        default=max(1, os.cpu_count() or 1),

        help="Parallel jobs for `cmake --build` in source-build mode.",

    )

    parser.add_argument(

        "--seed-cache",

        help=(

            "Existing build directory or CMakeCache.txt used to seed configure "

            "variables in source-build mode."

        ),

    )

    parser.add_argument(

        "--seed-cache-var",

        action="append",

        default=[],

        help="Extra CMake cache variable name to import from --seed-cache.",

    )

    parser.add_argument(

        "--timing-runs",

        type=int,

        default=3,

        help="Number of normal timing runs per case.",

    )

    parser.add_argument(

        "--petsc-event",

        action="append",

        default=[],

        help="PETSc event name to extract from -log_view output. Can be repeated.",

    )

    parser.add_argument(

        "--callgrind-arg",

        action="append",

        default=[],

        help="Extra argument passed to valgrind --tool=callgrind. Can be repeated.",

    )

    parser.add_argument(

        "--perf-stat",

        action="store_true",

        help="Collect perf stat totals and compute IPC = instructions / cycles.",

    )

    parser.add_argument(

        "--perf-bin",

        default="perf",

        help="perf binary path. Useful when the perf wrapper does not match the running kernel.",

    )

    parser.add_argument(

        "--perf-runs",

        type=int,

        default=3,

        help="Number of perf stat runs per case.",

    )

    parser.add_argument(

        "--perf-event",

        action="append",

        default=[],

        help="Additional perf stat event. instructions and cycles are always collected.",

    )

    parser.add_argument(

        "--require-perf",

        action="store_true",

        help="Fail if perf stat collection fails.",

    )

    parser.add_argument(

        "--sim-base-cpi",

        type=float,

        default=DEFAULT_SIM_BASE_CPI,

        help="Base cycles-per-instruction used for the Valgrind estimated-cycle model.",

    )

    parser.add_argument(

        "--sim-l1-miss-penalty",

        type=float,

        default=DEFAULT_SIM_L1_MISS_PENALTY,

        help="Penalty for simulated L1 misses that are not also LL misses.",

    )

    parser.add_argument(

        "--sim-ll-miss-penalty",

        type=float,

        default=DEFAULT_SIM_LL_MISS_PENALTY,

        help="Penalty for simulated LL cache misses in the Valgrind estimated-cycle model.",

    )

    parser.add_argument(

        "--sim-branch-miss-penalty",

        type=float,

        default=DEFAULT_SIM_BRANCH_MISS_PENALTY,

        help="Penalty for simulated branch mispredictions in the Valgrind estimated-cycle model.",

    )

    parser.add_argument(

        "--top",

        type=int,

        default=10,

        help="Number of top branch differences to print and plot.",

    )

    parser.add_argument(

        "--result-dir",

        help="Directory for results. Defaults to $HOME/tmp/profile_test_<timestamp>.",

    )

    parser.add_argument(

        "--exclude-pattern",

        action="append",

        default=[],

        help="Regex for function rows to exclude from the diff report.",

    )

    parser.add_argument(

        "--include-pattern",

        action="append",

        default=[],

        help="Regex for function rows to keep. If omitted, all rows are considered.",

    )

    parser.add_argument(

        "--skip-timing", action="store_true", help="Skip normal timing runs."

    )

    parser.add_argument(

        "--skip-callgrind", action="store_true", help="Skip Callgrind runs."

    )

    parser.add_argument(

        "cmd_args",

        nargs=argparse.REMAINDER,

        help="Arguments for the profiled executable. Put them after --",

    )

    args = parser.parse_args()


    if args.cmd_args and args.cmd_args[0] == "--":

        args.cmd_args = args.cmd_args[1:]


    if args.repo:

        if not args.ref_a or not args.ref_b:

            parser.error("When --repo is used, provide both --ref-a and --ref-b.")

    else:

        if not args.exe_a and not args.build_dir_a:

            parser.error("Provide either --exe-a or --build-dir-a, or use --repo with refs.")

        if not args.exe_b and not args.build_dir_b:

            parser.error("Provide either --exe-b or --build-dir-b, or use --repo with refs.")

    if args.timing_runs < 1:

        parser.error("--timing-runs must be >= 1.")

    if args.perf_runs < 1:

        parser.error("--perf-runs must be >= 1.")

    if args.build_jobs < 1:

        parser.error("--build-jobs must be >= 1.")

    if not args.cmd_args:

        parser.error("Pass the executable arguments after --.")


    return args


def sanitize_label(value: str) -> str:

    return re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("_") or "case"


def stable_cache_name(value: str) -> str:

    digest = hashlib.sha1(value.encode()).hexdigest()[:10]

    return f"{sanitize_label(value)}_{digest}"


def ensure_results_dir(requested: str | None, label_a: str, label_b: str) -> Path:

    if requested:

        result_dir = Path(requested).expanduser().resolve()

    else:

        ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")

        base = Path.home() / "tmp"

        result_dir = base / (

            f"profile_test_{ts}_{sanitize_label(label_a)}_vs_{sanitize_label(label_b)}"

        )

    result_dir.mkdir(parents=True, exist_ok=True)

    return result_dir


def read_cmake_home_directory(build_dir: str | None) -> str | None:

    if not build_dir:

        return None

    cache = Path(build_dir) / "CMakeCache.txt"

    if not cache.exists():

        return None

    for line in cache.read_text(errors="ignore").splitlines():

        match = CMAKE_HOME_RE.match(line)

        if match:

            return match.group(1)

    return None


def resolve_cmake_cache_path(build_or_cache: str) -> Path:

    path = Path(build_or_cache).expanduser().resolve()

    if path.is_dir():

        path = path / "CMakeCache.txt"

    if not path.exists():

        raise FileNotFoundError(f"CMake cache not found: {path}")

    return path


def read_cmake_cache_entries(cache_path: Path) -> dict[str, tuple[str, str]]:

    entries: dict[str, tuple[str, str]] = {}

    for line in cache_path.read_text(errors="ignore").splitlines():

        if line.startswith("//") or line.startswith("#"):

            continue

        if ":" not in line or "=" not in line:

            continue

        key_type, value = line.split("=", 1)

        key, value_type = key_type.split(":", 1)

        entries[key] = (value_type, value)

    return entries


def make_seed_configure_args(seed_cache: str, extra_keys: list[str]) -> tuple[list[str], dict[str, dict[str, str]]]:

    cache_path = resolve_cmake_cache_path(seed_cache)

    entries = read_cmake_cache_entries(cache_path)

    configure_args: list[str] = []

    imported: dict[str, dict[str, str]] = {}

    for key in ordered_unique([*DEFAULT_SEED_CACHE_KEYS, *extra_keys]):

        cache_entry = entries.get(key)

        if not cache_entry:

            continue

        value_type, value = cache_entry

        if value.endswith("-NOTFOUND"):

            continue

        configure_args.append(f"-D{key}:{value_type}={value}")

        imported[key] = {"type": value_type, "value": value}

    return configure_args, imported


def infer_commit(source_dir: str | None) -> str | None:

    if not source_dir:

        return None

    try:

        proc = subprocess.run(

            ["git", "-C", source_dir, "rev-parse", "HEAD"],

            check=True,

            stdout=subprocess.PIPE,

            stderr=subprocess.PIPE,

            text=True,

        )

    except (subprocess.CalledProcessError, FileNotFoundError):

        return None

    return proc.stdout.strip() or None


def resolve_git_commit(repo_path: Path, ref: str) -> str:

    try:

        proc = subprocess.run(

            ["git", "-C", str(repo_path), "rev-parse", "--verify", f"{ref}^{{commit}}"],

            check=True,

            stdout=subprocess.PIPE,

            stderr=subprocess.PIPE,

            text=True,

        )

    except subprocess.CalledProcessError as exc:

        stderr = exc.stderr.strip()

        raise RuntimeError(

            f"Failed to resolve ref {ref!r} in {repo_path}\n{stderr}"

        ) from exc

    return proc.stdout.strip()


def reusable_repo_cache_root(repo_path: Path) -> Path:

    return Path.home() / "tmp" / "profile_test_cache" / stable_cache_name(str(repo_path))


def ensure_reusable_worktree(

    repo_path: Path,

    ref: str,

    label: str,

    case_log_dir: Path,

) -> tuple[Path, str]:

    cache_root = reusable_repo_cache_root(repo_path)

    worktree_root = cache_root / "worktrees"

    worktree_dir = worktree_root / stable_cache_name(ref)

    worktree_root.mkdir(parents=True, exist_ok=True)


    target_commit = resolve_git_commit(repo_path, ref)

    print(f"{label} reusable worktree: {worktree_dir}", flush=True)


    if not worktree_dir.exists():

        rc, _ = run_command(

            [

                "git",

                "-C",

                str(repo_path),

                "worktree",

                "add",

                "--detach",

                str(worktree_dir),

                target_commit,

            ],

            cwd=str(repo_path),

            log_path=case_log_dir / "worktree_add.log",

            announce_name=f"{label} worktree add",

            print_cmd=True,

        )

        if rc != 0:

            raise RuntimeError(

                f"failed to create reusable worktree for ref {ref}\n"

                f"log: {case_log_dir / 'worktree_add.log'}\n"

                f"last lines:\n{tail_text(case_log_dir / 'worktree_add.log')}"

            )

        return worktree_dir, target_commit


    show_top = subprocess.run(

        ["git", "-C", str(worktree_dir), "rev-parse", "--show-toplevel"],

        stdout=subprocess.PIPE,

        stderr=subprocess.PIPE,

        text=True,

    )

    if show_top.returncode != 0:

        shutil.rmtree(worktree_dir)

        rc, _ = run_command(

            ["git", "-C", str(repo_path), "worktree", "prune"],

            cwd=str(repo_path),

            log_path=case_log_dir / "worktree_prune.log",

            announce_name=f"{label} worktree prune",

            print_cmd=True,

        )

        if rc != 0:

            raise RuntimeError(

                f"failed to prune stale worktrees for {ref}\n"

                f"log: {case_log_dir / 'worktree_prune.log'}\n"

                f"last lines:\n{tail_text(case_log_dir / 'worktree_prune.log')}"

            )

        rc, _ = run_command(

            [

                "git",

                "-C",

                str(repo_path),

                "worktree",

                "add",

                "--detach",

                str(worktree_dir),

                target_commit,

            ],

            cwd=str(repo_path),

            log_path=case_log_dir / "worktree_add.log",

            announce_name=f"{label} worktree add",

            print_cmd=True,

        )

        if rc != 0:

            raise RuntimeError(

                f"failed to recreate reusable worktree for ref {ref}\n"

                f"log: {case_log_dir / 'worktree_add.log'}\n"

                f"last lines:\n{tail_text(case_log_dir / 'worktree_add.log')}"

            )

        return worktree_dir, target_commit


    for log_name, announce_name, cmd in [

        (

            "worktree_checkout.log",

            f"{label} worktree checkout",

            ["git", "-C", str(worktree_dir), "checkout", "--detach", target_commit],

        ),

        (

            "worktree_reset.log",

            f"{label} worktree reset",

            ["git", "-C", str(worktree_dir), "reset", "--hard", target_commit],

        ),

        (

            "worktree_clean.log",

            f"{label} worktree clean",

            ["git", "-C", str(worktree_dir), "clean", "-fd"],

        ),

    ]:

        rc, _ = run_command(

            cmd,

            cwd=str(worktree_dir),

            log_path=case_log_dir / log_name,

            announce_name=announce_name,

            print_cmd=True,

        )

        if rc != 0:

            raise RuntimeError(

                f"failed to refresh reusable worktree for ref {ref}\n"

                f"log: {case_log_dir / log_name}\n"

                f"last lines:\n{tail_text(case_log_dir / log_name)}"

            )


    return worktree_dir, target_commit


def resolve_case(

    name: str,

    label: str,

    build_dir: str | None,

    exe: str | None,

    exe_relpath: str,

    commit: str | None,

    source_dir: str | None,

) -> Case:

    if exe:

        exe_path = Path(exe).expanduser().resolve()

    else:

        if not build_dir:

            raise ValueError(f"{name}: provide either --exe or --build-dir")

        build_path = Path(build_dir).expanduser().resolve()

        if build_path.is_file():

            exe_path = build_path

            build_dir = str(build_path.parent.parent.parent)

        else:

            exe_path = build_path / exe_relpath

    if not exe_path.exists():

        raise FileNotFoundError(

            f"{name}: executable not found: {exe_path}\n"

            f"  build_dir={build_dir}\n"

            f"  exe_relpath={exe_relpath}\n"

            f"Use --exe-a/--exe-b for full executable paths, or pass the build "

            f"directory to --build-dir-a/--build-dir-b."

        )


    if source_dir is None:

        source_dir = read_cmake_home_directory(build_dir)

    if commit is None:

        commit = infer_commit(source_dir)


    return Case(

        name=name,

        label=label,

        build_dir=str(Path(build_dir).expanduser().resolve()) if build_dir else None,

        executable=str(exe_path),

        commit=commit,

        source_dir=source_dir,

        run_workdir=None,

    )


def resolve_run_cwd(case: Case, fallback_workdir: str | None) -> str:

    if case.run_workdir:

        return case.run_workdir

    if fallback_workdir:

        return fallback_workdir

    return str(Path(case.executable).parent)


def prepare_source_case(

    name: str,

    label: str,

    repo: str,

    ref: str,

    result_dir: Path,

    exe_relpath: str,

    workdir_relpath: str | None,

    configure_args: list[str],

    build_args: list[str],

    build_target: str | None,

    build_jobs: int,

) -> Case:

    repo_path = Path(repo).expanduser().resolve()

    if not repo_path.exists():

        raise FileNotFoundError(f"{name}: repository not found: {repo_path}")


    case_log_dir = result_dir / sanitize_label(label)

    case_log_dir.mkdir(parents=True, exist_ok=True)

    worktree_dir, target_commit = ensure_reusable_worktree(

        repo_path=repo_path,

        ref=ref,

        label=label,

        case_log_dir=case_log_dir,

    )


    cache_root = reusable_repo_cache_root(repo_path)

    build_root = cache_root / "builds"

    build_dir = build_root / stable_cache_name(ref)

    build_root.mkdir(parents=True, exist_ok=True)

    print(f"{label} reusable build dir: {build_dir}", flush=True)


    source_root = worktree_dir

    if not (source_root / "CMakeLists.txt").exists():

        nested_source_root = worktree_dir / "mofem"

        if (nested_source_root / "CMakeLists.txt").exists():

            source_root = nested_source_root

        else:

            raise FileNotFoundError(

                f"{name}: could not find CMakeLists.txt in {worktree_dir} or {nested_source_root}"

            )


    configure_cmd = [

        "cmake",

        "-S",

        str(source_root),

        "-B",

        str(build_dir),

        *configure_args,

    ]

    rc, _ = run_command(

        configure_cmd,

        cwd=str(source_root),

        log_path=case_log_dir / "configure.log",

        announce_name=f"{label} configure",

        print_cmd=True,

    )

    if rc != 0:

        raise RuntimeError(

            f"{name}: configure failed for ref {ref}\n"

            f"log: {case_log_dir / 'configure.log'}\n"

            f"last lines:\n{tail_text(case_log_dir / 'configure.log')}"

        )


    build_cmd = [

        "cmake",

        "--build",

        str(build_dir),

        "-j",

        str(build_jobs),

    ]

    if build_target:

        build_cmd.extend(["--target", build_target])

    build_cmd.extend(build_args)

    rc, _ = run_command(

        build_cmd,

        cwd=str(build_dir),

        log_path=case_log_dir / "build.log",

        announce_name=f"{label} build",

        print_cmd=True,

    )

    if rc != 0:

        raise RuntimeError(

            f"{name}: build failed for ref {ref}\n"

            f"log: {case_log_dir / 'build.log'}\n"

            f"last lines:\n{tail_text(case_log_dir / 'build.log')}"

        )


    exe_path = build_dir / exe_relpath

    if not exe_path.exists():

        raise FileNotFoundError(

            f"{name}: built executable not found: {exe_path}\n"

            f"Check --exe-relpath and --build-target."

        )


    commit = infer_commit(str(worktree_dir))

    run_workdir = str(build_dir / workdir_relpath) if workdir_relpath else None

    if run_workdir and not Path(run_workdir).exists():

        raise FileNotFoundError(

            f"{name}: run workdir not found: {run_workdir}\n"

            f"Check --workdir-relpath."

        )


    return Case(

        name=name,

        label=label,

        build_dir=str(build_dir),

        executable=str(exe_path),

        commit=commit or target_commit or ref,

        source_dir=str(source_root),

        run_workdir=run_workdir,

    )


def run_command(

    cmd: list[str],

    cwd: str | None,

    log_path: Path,

    announce_name: str | None = None,

    print_cmd: bool = False,

) -> tuple[int, float]:

    started = time.perf_counter()

    log_path.parent.mkdir(parents=True, exist_ok=True)

    if announce_name:

        print(f"{announce_name} log: {log_path}", flush=True)

        print(f"monitor: tail -f {shlex.quote(str(log_path))}", flush=True)

    if print_cmd:

        print(f"command: {' '.join(shlex.quote(c) for c in cmd)}", flush=True)


    with log_path.open("w", buffering=1) as log_handle:

        proc = subprocess.Popen(

            cmd,

            cwd=cwd,

            stdout=subprocess.PIPE,

            stderr=subprocess.STDOUT,

            text=True,

            bufsize=1,

        )

        assert proc.stdout is not None

        for line in proc.stdout:

            log_handle.write(line)

        proc.stdout.close()

        returncode = proc.wait()


    elapsed = time.perf_counter() - started

    if print_cmd:

        print(f"{announce_name or 'command'} finished with exit code {returncode}", flush=True)

    return returncode, elapsed


def parse_petsc_events(log_text: str, event_names: Iterable[str]) -> dict[str, float]:

    wanted = set(event_names)

    values: dict[str, float] = {}

    for line in log_text.splitlines():

        match = PETSC_EVENT_RE.match(line)

        if not match:

            continue

        name, value = match.groups()

        if name in wanted:

            values[name] = float(value)

    return values


def tail_text(path: Path, num_lines: int = 40) -> str:

    lines = path.read_text(errors="ignore").splitlines()

    return "\n".join(lines[-num_lines:])


def ordered_unique(items: Iterable[str]) -> list[str]:

    return list(OrderedDict.fromkeys(items))


def sanitize_perf_event_name(event_name: str) -> str:

    return event_name.split(":", 1)[0].strip()


def parse_perf_stat(log_text: str, event_names: Iterable[str]) -> dict[str, float]:

    wanted = set(event_names)

    values: dict[str, float] = {}

    for raw_line in log_text.splitlines():

        line = raw_line.strip()

        if not line or line.startswith("#"):

            continue

        parts = [part.strip() for part in raw_line.split(",")]

        if len(parts) < 3:

            continue

        raw_value = parts[0]

        event_name = sanitize_perf_event_name(parts[2])

        if event_name not in wanted:

            continue

        if raw_value in {"<not supported>", "<not counted>", "not counted"}:

            continue

        try:

            values[event_name] = float(raw_value)

        except ValueError:

            continue

    return values


def run_wall_time_case(

    case: Case,

    app_args: list[str],

    workdir: str | None,

    results_dir: Path,

    timing_runs: int,

) -> dict:

    case_dir = results_dir / case.label

    case_dir.mkdir(parents=True, exist_ok=True)


    runs = []

    for run_id in range(1, timing_runs + 1):

        log_path = case_dir / f"wall_run_{run_id}.log"

        cmd = [case.executable, *app_args]

        rc, elapsed = run_command(

            cmd,

            cwd=resolve_run_cwd(case, workdir),

            log_path=log_path,

            announce_name=f"{case.label} wall run {run_id}",

        )

        if rc != 0:

            raise RuntimeError(

                f"{case.label}: wall run {run_id} failed with exit code {rc}\n"

                f"log: {log_path}\n"

                f"last lines:\n{tail_text(log_path)}"

            )


        row = {

            "run": run_id,

            "wall_seconds": elapsed,

            "log_path": str(log_path),

        }

        runs.append(row)


    summary = {

        "runs": runs,

        "mean_wall_seconds": statistics.mean(row["wall_seconds"] for row in runs),

    }


    return summary


def run_petsc_event_case(

    case: Case,

    app_args: list[str],

    workdir: str | None,

    results_dir: Path,

    timing_runs: int,

    petsc_events: list[str],

) -> dict:

    case_dir = results_dir / case.label

    case_dir.mkdir(parents=True, exist_ok=True)


    petsc_args = list(app_args)

    if "-log_view" not in petsc_args:

        petsc_args.append("-log_view")


    runs = []

    for run_id in range(1, timing_runs + 1):

        log_path = case_dir / f"petsc_run_{run_id}.log"

        cmd = [case.executable, *petsc_args]

        rc, elapsed = run_command(

            cmd,

            cwd=resolve_run_cwd(case, workdir),

            log_path=log_path,

            announce_name=f"{case.label} petsc run {run_id}",

        )

        if rc != 0:

            raise RuntimeError(

                f"{case.label}: petsc run {run_id} failed with exit code {rc}\n"

                f"log: {log_path}\n"

                f"last lines:\n{tail_text(log_path)}"

            )


        log_text = log_path.read_text()

        event_values = parse_petsc_events(log_text, petsc_events)

        missing = [name for name in petsc_events if name not in event_values]

        if missing:

            raise RuntimeError(

                f"{case.label}: petsc run {run_id} missing PETSc events {missing}\n"

                f"log: {log_path}\n"

                f"last lines:\n{tail_text(log_path)}"

            )


        row = {

            "run": run_id,

            "wall_seconds": elapsed,

            "log_path": str(log_path),

            "petsc_events": event_values,

            "petsc_event_sum": sum(event_values[name] for name in petsc_events),

        }

        runs.append(row)


    events_summary = {}

    for name in petsc_events:

        values = [row["petsc_events"][name] for row in runs]

        events_summary[name] = statistics.mean(values)


    summary = {

        "runs": runs,

        "mean_wall_seconds": statistics.mean(row["wall_seconds"] for row in runs),

        "mean_petsc_events": events_summary,

        "mean_petsc_event_sum": statistics.mean(row["petsc_event_sum"] for row in runs),

    }


    return summary


def run_perf_case(

    case: Case,

    app_args: list[str],

    workdir: str | None,

    results_dir: Path,

    perf_runs: int,

    perf_bin: str,

    perf_events: list[str],

) -> dict:

    case_dir = results_dir / case.label

    case_dir.mkdir(parents=True, exist_ok=True)


    event_list = ordered_unique([*DEFAULT_PERF_EVENTS, *perf_events])

    runs = []

    for run_id in range(1, perf_runs + 1):

        log_path = case_dir / f"perf_run_{run_id}.log"

        cmd = [

            perf_bin,

            "stat",

            "-x,",

            "--no-big-num",

            "-e",

            ",".join(event_list),

            "--",

            case.executable,

            *app_args,

        ]

        rc, elapsed = run_command(

            cmd,

            cwd=resolve_run_cwd(case, workdir),

            log_path=log_path,

            announce_name=f"{case.label} perf run {run_id}",

        )

        log_text = log_path.read_text()

        if rc != 0:

            raise RuntimeError(

                f"{case.label}: perf stat run {run_id} failed with exit code {rc}\n"

                f"  log: {log_path}\n"

                f"  first lines:\n"

                + "\n".join(log_text.splitlines()[:8])

            )


        event_values = parse_perf_stat(log_text, event_list)

        missing = [event for event in DEFAULT_PERF_EVENTS if event not in event_values]

        if missing:

            raise RuntimeError(

                f"{case.label}: perf stat run {run_id} did not report required events {missing}\n"

                f"  log: {log_path}\n"

                f"  first lines:\n"

                + "\n".join(log_text.splitlines()[:8])

            )


        instructions = event_values["instructions"]

        cycles = event_values["cycles"]

        ipc = instructions / cycles if cycles else math.nan

        runs.append(

            {

                "run": run_id,

                "wall_seconds": elapsed,

                "log_path": str(log_path),

                "events": event_values,

                "ipc": ipc,

            }

        )


    mean_events = {}

    for name in event_list:

        values = [row["events"].get(name) for row in runs if name in row["events"]]

        if values:

            mean_events[name] = statistics.mean(values)


    ipc_values = [row["ipc"] for row in runs if not math.isnan(row["ipc"])]

    summary = {

        "runs": runs,

        "event_names": event_list,

        "mean_events": mean_events,

        "mean_wall_seconds": statistics.mean(row["wall_seconds"] for row in runs),

    }

    if ipc_values:

        summary["mean_ipc"] = statistics.mean(ipc_values)


    return summary


def run_callgrind_case(

    case: Case,

    app_args: list[str],

    workdir: str | None,

    results_dir: Path,

    callgrind_args: list[str],

) -> dict:

    case_dir = results_dir / case.label

    case_dir.mkdir(parents=True, exist_ok=True)


    callgrind_out = case_dir / "callgrind.out"

    callgrind_log = case_dir / "callgrind.log"

    annotate_txt = case_dir / "callgrind.annotate.txt"


    cmd = [

        "valgrind",

        "--tool=callgrind",

        f"--callgrind-out-file={callgrind_out}",

        *callgrind_args,

        case.executable,

        *app_args,

    ]

    rc, elapsed = run_command(

        cmd,

        cwd=resolve_run_cwd(case, workdir),

        log_path=callgrind_log,

        announce_name=f"{case.label} callgrind",

    )

    if rc != 0:

        raise RuntimeError(

            f"{case.label}: callgrind run failed with exit code {rc}\n"

            f"log: {callgrind_log}\n"

            f"last lines:\n{tail_text(callgrind_log)}"

        )


    annotate_proc = subprocess.run(

        [

            "callgrind_annotate",

            "--inclusive=yes",

            "--auto=no",

            "--threshold=0",

            str(callgrind_out),

        ],

        check=True,

        stdout=subprocess.PIPE,

        stderr=subprocess.STDOUT,

        text=True,

    )

    annotate_txt.write_text(annotate_proc.stdout)


    return {

        "elapsed_wall_seconds": elapsed,

        "callgrind_out": str(callgrind_out),

        "callgrind_log": str(callgrind_log),

        "annotate_txt": str(annotate_txt),

    }


def parse_callgrind_annotate(annotate_path: Path) -> tuple[int | None, dict[str, int]]:

    totals: int | None = None

    functions: dict[str, int] = {}

    for line in annotate_path.read_text(errors="ignore").splitlines():

        if "PROGRAM TOTALS" in line:

            match = CALLGRIND_LINE_RE.match(line)

            if match:

                totals = int(match.group(1).replace(",", ""))

            continue

        match = CALLGRIND_LINE_RE.match(line)

        if not match:

            continue

        value_txt, func = match.groups()

        value = int(value_txt.replace(",", ""))

        functions[func] = value

    return totals, functions


def resolve_callgrind_name(raw_value: str, table: dict[str, str]) -> str:

    match = CALLGRIND_REF_RE.match(raw_value)

    if not match:

        return raw_value

    ref_id, payload = match.groups()

    if payload is not None:

        table[ref_id] = payload

    return table.get(ref_id, payload or f"({ref_id})")


def parse_callgrind_costs_line(line: str, event_names: list[str]) -> dict[str, int] | None:

    parts = line.split()

    if len(parts) < 2:

        return None

    if "=" in parts[0]:

        return None


    raw_costs = parts[1:]

    if not raw_costs:

        return None


    try:

        costs = [int(value) for value in raw_costs]

    except ValueError:

        return None


    padded_costs = costs + [0] * max(0, len(event_names) - len(costs))

    return {

        event_name: padded_costs[idx]

        for idx, event_name in enumerate(event_names)

    }


def parse_callgrind_out(

    callgrind_path: Path,

) -> tuple[list[str], dict[str, int], dict[str, dict[str, int]]]:

    event_names: list[str] = []

    totals: dict[str, int] = {}

    functions: dict[str, dict[str, int]] = {}


    ob_table: dict[str, str] = {}

    fl_table: dict[str, str] = {}

    fn_table: dict[str, str] = {}


    lines = callgrind_path.read_text(errors="ignore").splitlines()

    for raw_line in lines:

        stripped = raw_line.strip()

        if stripped.startswith("ob="):

            resolve_callgrind_name(stripped[3:].strip(), ob_table)

        elif stripped.startswith("fl="):

            resolve_callgrind_name(stripped[3:].strip(), fl_table)

        elif stripped.startswith("fn="):

            resolve_callgrind_name(stripped[3:].strip(), fn_table)


    current_obj = "???"

    current_file = "???"

    current_function: str | None = None


    for raw_line in lines:

        line = raw_line.strip()

        if not line:

            continue


        if line.startswith("events:"):

            event_names = line.split(":", 1)[1].strip().split()

            continue


        if line.startswith("summary:"):

            try:

                summary_values = [

                    int(value) for value in line.split(":", 1)[1].strip().split()

                ]

                padded_summary = summary_values + [0] * max(

                    0, len(event_names) - len(summary_values)

                )

                totals = {

                    event_name: padded_summary[idx]

                    for idx, event_name in enumerate(event_names)

                }

            except ValueError:

                pass

            continue


        if line.startswith("ob="):

            current_obj = resolve_callgrind_name(line[3:].strip(), ob_table)

            continue


        if line.startswith("fl="):

            current_file = resolve_callgrind_name(line[3:].strip(), fl_table)

            continue


        if line.startswith("fn="):

            current_function = resolve_callgrind_name(line[3:].strip(), fn_table)

            continue


        if current_function is None:

            continue


        if not event_names:

            continue


        event_costs = parse_callgrind_costs_line(line, event_names)

        if event_costs is None:

            continue


        key = current_function

        function_events = functions.setdefault(

            key, {event_name: 0 for event_name in event_names}

        )

        for event_name, value in event_costs.items():

            function_events[event_name] = function_events.get(event_name, 0) + value


    return event_names, totals, functions


def should_include_function(

    func: str, include_patterns: list[re.Pattern[str]], exclude_patterns: list[re.Pattern[str]]

) -> bool:

    if include_patterns and not any(pattern.search(func) for pattern in include_patterns):

        return False

    if any(pattern.search(func) for pattern in exclude_patterns):

        return False

    return True


def build_diff_rows(

    funcs_a: dict[str, int],

    funcs_b: dict[str, int],

    include_patterns: list[re.Pattern[str]],

    exclude_patterns: list[re.Pattern[str]],

) -> list[dict]:

    rows = []

    for func in set(funcs_a) | set(funcs_b):

        if not should_include_function(func, include_patterns, exclude_patterns):

            continue

        a_val = funcs_a.get(func, 0)

        b_val = funcs_b.get(func, 0)

        delta = b_val - a_val

        if a_val == 0 and b_val == 0:

            continue

        if a_val:

            delta_pct = 100.0 * delta / a_val

        else:

            delta_pct = float("inf")

        rows.append(

            {

                "function": func,

                "value_a": a_val,

                "value_b": b_val,

                "delta": delta,

                "delta_percent_vs_a": delta_pct,

                "abs_delta": abs(delta),

                "present_in_both": bool(a_val and b_val),

            }

        )

    return rows


def project_rows_to_estimated_cycles(

    rows: list[dict],

    ipc_a: float | None,

    ipc_b: float | None,

) -> list[dict]:

    if not ipc_a or not ipc_b:

        raise ValueError("Both IPC values are required to estimate cycles.")

    if ipc_a <= 0 or ipc_b <= 0:

        raise ValueError("IPC values must be positive to estimate cycles.")


    projected_rows = []

    for row in rows:

        value_a = row["value_a"] / ipc_a

        value_b = row["value_b"] / ipc_b

        delta = value_b - value_a

        if value_a:

            delta_pct = 100.0 * delta / value_a

        else:

            delta_pct = float("inf")

        projected_rows.append(

            {

                **row,

                "value_a": value_a,

                "value_b": value_b,

                "delta": delta,

                "delta_percent_vs_a": delta_pct,

                "abs_delta": abs(delta),

            }

        )

    return projected_rows


def event_value(event_map: dict[str, int], name: str) -> int:

    return event_map.get(name, 0)


def total_l1_misses(event_map: dict[str, int]) -> int:

    return (

        event_value(event_map, "I1mr")

        + event_value(event_map, "D1mr")

        + event_value(event_map, "D1mw")

    )


def total_ll_misses(event_map: dict[str, int]) -> int:

    return (

        event_value(event_map, "ILmr")

        + event_value(event_map, "DLmr")

        + event_value(event_map, "DLmw")

    )


def total_branch_misses(event_map: dict[str, int]) -> int:

    return event_value(event_map, "Bcm") + event_value(event_map, "Bim")


def estimate_cycles_from_events(

    event_map: dict[str, int],

    base_cpi: float,

    l1_miss_penalty: float,

    ll_miss_penalty: float,

    branch_miss_penalty: float,

) -> float:

    ir = event_value(event_map, "Ir")

    ll_misses = total_ll_misses(event_map)

    l1_misses = total_l1_misses(event_map)

    l1_only_misses = max(0, l1_misses - ll_misses)

    branch_misses = total_branch_misses(event_map)

    return (

        ir * base_cpi

        + l1_only_misses * l1_miss_penalty

        + ll_misses * ll_miss_penalty

        + branch_misses * branch_miss_penalty

    )


def metric_dict_from_event_maps(

    event_maps: dict[str, dict[str, int]],

    metric_name: str,

) -> dict[str, float]:

    return {

        func: float(events.get(metric_name, 0))

        for func, events in event_maps.items()

    }


def estimated_cycle_dict_from_event_maps(

    event_maps: dict[str, dict[str, int]],

    base_cpi: float,

    l1_miss_penalty: float,

    ll_miss_penalty: float,

    branch_miss_penalty: float,

) -> dict[str, float]:

    return {

        func: estimate_cycles_from_events(

            events,

            base_cpi=base_cpi,

            l1_miss_penalty=l1_miss_penalty,

            ll_miss_penalty=ll_miss_penalty,

            branch_miss_penalty=branch_miss_penalty,

        )

        for func, events in event_maps.items()

    }


def format_delta_percent(value: float) -> str:

    if value == float("inf"):

        return "inf"

    if value == float("-inf"):

        return "-inf"

    return f"{value:.2f}"


def format_metric(value: float) -> str:

    abs_value = abs(value)

    if abs_value >= 1_000_000_000:

        return f"{value / 1_000_000_000:.2f}G"

    if abs_value >= 1_000_000:

        return f"{value / 1_000_000:.2f}M"

    if abs_value >= 1_000:

        return f"{value / 1_000:.2f}K"

    if abs_value >= 1:

        return f"{value:.2f}"

    return f"{value:.4f}"


def shorten_label(text: str, limit: int = 100) -> str:

    if len(text) <= limit:

        return text

    return text[: limit - 3] + "..."


def write_perf_csv(path: Path, label: str, summary: dict) -> None:

    event_names = summary.get("event_names", [])

    fieldnames = ["label", "run", "wall_seconds", "ipc", *event_names, "log_path"]

    with path.open("w", newline="") as handle:

        writer = csv.DictWriter(handle, fieldnames=fieldnames)

        writer.writeheader()

        for row in summary["runs"]:

            csv_row = {

                "label": label,

                "run": row["run"],

                "wall_seconds": row["wall_seconds"],

                "ipc": row["ipc"],

                "log_path": row["log_path"],

            }

            for name in event_names:

                csv_row[name] = row["events"].get(name)

            writer.writerow(csv_row)


def select_top_difference_rows(rows: list[dict], limit: int) -> list[dict]:

    paired_rows = [row for row in rows if row.get("present_in_both")]

    if paired_rows:

        return sorted(paired_rows, key=lambda row: row["abs_delta"], reverse=True)[:limit]

    return sorted(rows, key=lambda row: row["abs_delta"], reverse=True)[:limit]


def write_svg_diff_plot(

    path: Path,

    rows: list[dict],

    case_a: Case,

    case_b: Case,

    metric_label: str,

    title: str,

) -> None:

    if not rows:

        path.write_text(

            "<svg xmlns='http://www.w3.org/2000/svg' width='800' height='120'>"

            f"<rect x='0' y='0' width='800' height='120' fill='{DEFAULT_SVG_BACKGROUND}' />"

            "<text x='20' y='60' font-family='sans-serif' font-size='20'>No rows to plot</text>"

            "</svg>"

        )

        return


    width = 1900

    row_height = 44

    top_margin = 90

    bottom_margin = 40

    left_margin = 760

    right_margin = 160

    plot_width = width - left_margin - right_margin

    center_x = left_margin + plot_width / 2

    height = top_margin + bottom_margin + row_height * len(rows)

    max_abs = max(row["abs_delta"] for row in rows) or 1

    scale = (plot_width / 2 - 20) / max_abs


    parts = [

        f"<svg xmlns='http://www.w3.org/2000/svg' width='{width}' height='{height}'>",

        f"<rect x='0' y='0' width='{width}' height='{height}' fill='{DEFAULT_SVG_BACKGROUND}' />",

        "<style>"

        "text{font-family:Arial,Helvetica,sans-serif;fill:#222}"

        ".label{font-size:15px}.value{font-size:14px}.title{font-size:24px;font-weight:bold}"

        ".subtitle{font-size:15px;fill:#555}.axis{stroke:#888;stroke-width:1}"

        ".pos{fill:#cf3f3f}.neg{fill:#2d7d46}"

        "</style>",

        f"<text class='title' x='20' y='36'>{escape(title)}</text>",

        (

            f"<text class='subtitle' x='20' y='62'>Delta = {escape(case_b.label)} - "

            f"{escape(case_a.label)} | metric: {escape(metric_label)}</text>"

        ),

        f"<line class='axis' x1='{center_x:.1f}' y1='{top_margin - 18}' x2='{center_x:.1f}' y2='{height - bottom_margin + 10}' />",

    ]


    for idx, row in enumerate(rows):

        y = top_margin + idx * row_height

        bar_width = max(1.0, row["abs_delta"] * scale)

        if row["delta"] >= 0:

            x = center_x

            css_class = "pos"

            value_x = x + bar_width + 8

            value_anchor = "start"

        else:

            x = center_x - bar_width

            css_class = "neg"

            value_x = x - 8

            value_anchor = "end"


        label = shorten_label(row["function"], 110)

        parts.append(

            f"<text class='label' x='20' y='{y + 18}'>{escape(label)}</text>"

        )

        parts.append(

            f"<rect class='{css_class}' x='{x:.1f}' y='{y + 4}' width='{bar_width:.1f}' height='22' rx='3' ry='3' />"

        )

        parts.append(

            f"<text class='value' x='{value_x:.1f}' y='{y + 20}' text-anchor='{value_anchor}'>"

            f"{escape(format_metric(row['delta']))}</text>"

        )

    parts.append("</svg>")

    path.write_text("\n".join(parts))


def write_perf_svg(path: Path, case_a: Case, case_b: Case, perf_a: dict, perf_b: dict) -> None:

    metrics = [

        ("Instructions", perf_a["mean_events"].get("instructions"), perf_b["mean_events"].get("instructions")),

        ("Cycles", perf_a["mean_events"].get("cycles"), perf_b["mean_events"].get("cycles")),

        ("IPC", perf_a.get("mean_ipc"), perf_b.get("mean_ipc")),

    ]

    width = 1100

    height = 380

    left = 160

    top = 70

    plot_w = width - left - 80

    row_h = 90

    max_val = max(value for _, a, b in metrics for value in (a, b) if value is not None) or 1

    scale = (plot_w - 120) / max_val

    parts = [

        f"<svg xmlns='http://www.w3.org/2000/svg' width='{width}' height='{height}'>",

        f"<rect x='0' y='0' width='{width}' height='{height}' fill='{DEFAULT_SVG_BACKGROUND}' />",

        "<style>"

        "text{font-family:Arial,Helvetica,sans-serif;fill:#222}"

        ".title{font-size:24px;font-weight:bold}.label{font-size:17px}.value{font-size:14px}"

        ".a{fill:#4c78a8}.b{fill:#f58518}"

        "</style>",

        "<text class='title' x='20' y='36'>perf stat comparison</text>",

    ]

    for idx, (name, a_val, b_val) in enumerate(metrics):

        base_y = top + idx * row_h

        parts.append(f"<text class='label' x='20' y='{base_y + 20}'>{escape(name)}</text>")

        for offset, value, css_class, label in [

            (0, a_val, "a", case_a.label),

            (30, b_val, "b", case_b.label),

        ]:

            if value is None:

                continue

            bar_w = max(1.0, value * scale)

            y = base_y + offset

            parts.append(

                f"<rect class='{css_class}' x='{left}' y='{y}' width='{bar_w:.1f}' height='20' rx='3' ry='3' />"

            )

            parts.append(

                f"<text class='value' x='{left - 10}' y='{y + 15}' text-anchor='end'>{escape(label)}</text>"

            )

            parts.append(

                f"<text class='value' x='{left + bar_w + 8:.1f}' y='{y + 15}'>{escape(format_metric(value))}</text>"

            )

    parts.append("</svg>")

    path.write_text("\n".join(parts))


def write_timing_csv(path: Path, label: str, summary: dict, petsc_events: list[str]) -> None:

    fieldnames = ["label", "run", "wall_seconds", "petsc_event_sum", *petsc_events, "log_path"]

    with path.open("w", newline="") as handle:

        writer = csv.DictWriter(handle, fieldnames=fieldnames)

        writer.writeheader()

        for row in summary["runs"]:

            csv_row = {

                "label": label,

                "run": row["run"],

                "wall_seconds": row["wall_seconds"],

                "petsc_event_sum": row.get("petsc_event_sum"),

                "log_path": row["log_path"],

            }

            for name in petsc_events:

                csv_row[name] = row.get("petsc_events", {}).get(name)

            writer.writerow(csv_row)


def write_diff_csv(path: Path, rows: list[dict]) -> None:

    fieldnames = [

        "function",

        "value_a",

        "value_b",

        "delta",

        "delta_percent_vs_a",

        "abs_delta",

        "present_in_both",

    ]

    with path.open("w", newline="") as handle:

        writer = csv.DictWriter(handle, fieldnames=fieldnames)

        writer.writeheader()

        for row in rows:

            writer.writerow(row)


def print_top_rows(

    title: str,

    rows: list[dict],

    case_a: Case,

    case_b: Case,

    limit: int,

    metric_label: str = "Ir",

) -> None:

    print(title)

    if not rows:

        print("  (no rows)")

        return

    for row in rows[:limit]:

        print(

            "  "

            f"{row['delta']:+,} {metric_label} "

            f"({format_delta_percent(row['delta_percent_vs_a'])}% vs {case_a.label}) | "

            f"{case_a.label}={row['value_a']:,} | {case_b.label}={row['value_b']:,} | "

            f"{row['function']}"

        )


def main() -> int:

    args = parse_args()

    perf_a: dict | None = None

    perf_b: dict | None = None


    result_dir = ensure_results_dir(args.result_dir, args.label_a, args.label_b)

    print(f"Results directory: {result_dir}", flush=True)


    effective_configure_args = list(args.configure_arg)

    imported_seed_cache_vars: dict[str, dict[str, str]] = {}

    if args.repo and args.seed_cache:

        seeded_args, imported_seed_cache_vars = make_seed_configure_args(

            args.seed_cache, args.seed_cache_var

        )

        effective_configure_args = [*seeded_args, *effective_configure_args]

        if imported_seed_cache_vars:

            print(

                "Seeded configure cache vars: "

                + ", ".join(imported_seed_cache_vars.keys()),

                flush=True,

            )


    if args.repo:

        print("Phase 0: prepare source builds from refs", flush=True)

        case_a = prepare_source_case(

            name="case_a",

            label=args.label_a,

            repo=args.repo,

            ref=args.ref_a,

            result_dir=result_dir,

            exe_relpath=args.exe_relpath,

            workdir_relpath=args.workdir_relpath,

            configure_args=effective_configure_args,

            build_args=args.build_arg,

            build_target=args.build_target,

            build_jobs=args.build_jobs,

        )

        case_b = prepare_source_case(

            name="case_b",

            label=args.label_b,

            repo=args.repo,

            ref=args.ref_b,

            result_dir=result_dir,

            exe_relpath=args.exe_relpath,

            workdir_relpath=args.workdir_relpath,

            configure_args=effective_configure_args,

            build_args=args.build_arg,

            build_target=args.build_target,

            build_jobs=args.build_jobs,

        )

    else:

        case_a = resolve_case(

            name="case_a",

            label=args.label_a,

            build_dir=args.build_dir_a,

            exe=args.exe_a,

            exe_relpath=args.exe_relpath,

            commit=args.commit_a,

            source_dir=args.source_dir_a,

        )

        case_b = resolve_case(

            name="case_b",

            label=args.label_b,

            build_dir=args.build_dir_b,

            exe=args.exe_b,

            exe_relpath=args.exe_relpath,

            commit=args.commit_b,

            source_dir=args.source_dir_b,

        )


    metadata = {

        "created_utc": datetime.now(timezone.utc).isoformat(),

        "workdir": args.workdir,

        "workdir_relpath": args.workdir_relpath,

        "cmd_args": args.cmd_args,

        "skip_timing": args.skip_timing,

        "skip_callgrind": args.skip_callgrind,

        "petsc_events": args.petsc_event,

        "callgrind_args": DEFAULT_CALLGRIND_ARGS + args.callgrind_arg,

        "perf_enabled": args.perf_stat,

        "perf_bin": args.perf_bin,

        "perf_runs": args.perf_runs,

        "perf_events": ordered_unique([*DEFAULT_PERF_EVENTS, *args.perf_event]),

        "sim_model": {

            "base_cpi": args.sim_base_cpi,

            "l1_miss_penalty": args.sim_l1_miss_penalty,

            "ll_miss_penalty": args.sim_ll_miss_penalty,

            "branch_miss_penalty": args.sim_branch_miss_penalty,

        },

        "repo": args.repo,

        "ref_a": args.ref_a,

        "ref_b": args.ref_b,

        "configure_args": effective_configure_args,

        "build_args": args.build_arg,

        "build_target": args.build_target,

        "build_jobs": args.build_jobs,

        "seed_cache": args.seed_cache,

        "seed_cache_vars": imported_seed_cache_vars,

        "cases": [asdict(case_a), asdict(case_b)],

    }

    (result_dir / "metadata.json").write_text(json.dumps(metadata, indent=2))

    (result_dir / "command.txt").write_text(

        " ".join(shlex.quote(part) for part in args.cmd_args) + "\n"

    )


    summary: dict[str, object] = {

        "result_dir": str(result_dir),

        "cases": [asdict(case_a), asdict(case_b)],

        "skip_timing": args.skip_timing,

        "skip_callgrind": args.skip_callgrind,

        "wall_timing": {},

        "petsc_timing": {},

        "perf": {},

        "callgrind": {},

        "sim_model": {

            "base_cpi": args.sim_base_cpi,

            "l1_miss_penalty": args.sim_l1_miss_penalty,

            "ll_miss_penalty": args.sim_ll_miss_penalty,

            "branch_miss_penalty": args.sim_branch_miss_penalty,

        },

    }


    if not args.skip_timing:

        print("Phase 1: wall timing (no profiling, no -log_view)", flush=True)

        timing_a = run_wall_time_case(

            case=case_a,

            app_args=args.cmd_args,

            workdir=args.workdir,

            results_dir=result_dir,

            timing_runs=args.timing_runs,

        )

        timing_b = run_wall_time_case(

            case=case_b,

            app_args=args.cmd_args,

            workdir=args.workdir,

            results_dir=result_dir,

            timing_runs=args.timing_runs,

        )

        summary["wall_timing"] = {case_a.label: timing_a, case_b.label: timing_b}


        write_timing_csv(result_dir / f"{case_a.label}_wall.csv", case_a.label, timing_a, [])

        write_timing_csv(result_dir / f"{case_b.label}_wall.csv", case_b.label, timing_b, [])


        print(

            f"{case_a.label}: mean wall {timing_a['mean_wall_seconds']:.6f} s",

            flush=True,

        )

        print(

            f"{case_b.label}: mean wall {timing_b['mean_wall_seconds']:.6f} s",

            flush=True,

        )

        delta_wall = timing_b["mean_wall_seconds"] - timing_a["mean_wall_seconds"]

        delta_wall_pct = (

            100.0 * delta_wall / timing_a["mean_wall_seconds"]

            if timing_a["mean_wall_seconds"]

            else float("inf")

        )

        print(

            f"Wall-time delta ({case_b.label} - {case_a.label}): "

            f"{delta_wall:+.6f} s ({format_delta_percent(delta_wall_pct)}%)",

            flush=True,

        )

        if args.petsc_event:

            print("Phase 2: PETSc event timing (-log_view)", flush=True)

            petsc_a = run_petsc_event_case(

                case=case_a,

                app_args=args.cmd_args,

                workdir=args.workdir,

                results_dir=result_dir,

                timing_runs=args.timing_runs,

                petsc_events=args.petsc_event,

            )

            petsc_b = run_petsc_event_case(

                case=case_b,

                app_args=args.cmd_args,

                workdir=args.workdir,

                results_dir=result_dir,

                timing_runs=args.timing_runs,

                petsc_events=args.petsc_event,

            )

            summary["petsc_timing"] = {case_a.label: petsc_a, case_b.label: petsc_b}


            write_timing_csv(

                result_dir / f"{case_a.label}_petsc.csv",

                case_a.label,

                petsc_a,

                args.petsc_event,

            )

            write_timing_csv(

                result_dir / f"{case_b.label}_petsc.csv",

                case_b.label,

                petsc_b,

                args.petsc_event,

            )


            print(

                f"{case_a.label}: mean PETSc event-sum wall {petsc_a['mean_wall_seconds']:.6f} s",

                flush=True,

            )

            print(

                f"{case_b.label}: mean PETSc event-sum wall {petsc_b['mean_wall_seconds']:.6f} s",

                flush=True,

            )

            print(

                f"{case_a.label}: mean PETSc event sum {petsc_a['mean_petsc_event_sum']:.6f} s",

                flush=True,

            )

            print(

                f"{case_b.label}: mean PETSc event sum {petsc_b['mean_petsc_event_sum']:.6f} s",

                flush=True,

            )

            delta_petsc = petsc_b["mean_petsc_event_sum"] - petsc_a["mean_petsc_event_sum"]

            delta_petsc_pct = (

                100.0 * delta_petsc / petsc_a["mean_petsc_event_sum"]

                if petsc_a["mean_petsc_event_sum"]

                else float("inf")

            )

            print(

                f"PETSc event-sum delta ({case_b.label} - {case_a.label}): "

                f"{delta_petsc:+.6f} s ({format_delta_percent(delta_petsc_pct)}%)",

                flush=True,

            )

            for event_name in args.petsc_event:

                delta_event = (

                    petsc_b["mean_petsc_events"][event_name]

                    - petsc_a["mean_petsc_events"][event_name]

                )

                delta_event_pct = (

                    100.0 * delta_event / petsc_a["mean_petsc_events"][event_name]

                    if petsc_a["mean_petsc_events"][event_name]

                    else float("inf")

                )

                print(

                    f"{event_name} delta ({case_b.label} - {case_a.label}): "

                    f"{delta_event:+.6f} s ({format_delta_percent(delta_event_pct)}%)",

                    flush=True,

                )


    if args.perf_stat:

        try:

            print("Phase 3: perf stat", flush=True)

            perf_a = run_perf_case(

                case=case_a,

                app_args=args.cmd_args,

                workdir=args.workdir,

                results_dir=result_dir,

                perf_runs=args.perf_runs,

                perf_bin=args.perf_bin,

                perf_events=args.perf_event,

            )

            perf_b = run_perf_case(

                case=case_b,

                app_args=args.cmd_args,

                workdir=args.workdir,

                results_dir=result_dir,

                perf_runs=args.perf_runs,

                perf_bin=args.perf_bin,

                perf_events=args.perf_event,

            )

            summary["perf"] = {case_a.label: perf_a, case_b.label: perf_b}

            write_perf_csv(result_dir / f"{case_a.label}_perf.csv", case_a.label, perf_a)

            write_perf_csv(result_dir / f"{case_b.label}_perf.csv", case_b.label, perf_b)

            print(

                f"{case_a.label}: mean perf instructions {perf_a['mean_events']['instructions']:.0f}",

                flush=True,

            )

            print(

                f"{case_a.label}: mean perf cycles {perf_a['mean_events']['cycles']:.0f}",

                flush=True,

            )

            print(

                f"{case_a.label}: mean IPC {perf_a.get('mean_ipc', math.nan):.6f}",

                flush=True,

            )

            print(

                f"{case_b.label}: mean perf instructions {perf_b['mean_events']['instructions']:.0f}",

                flush=True,

            )

            print(

                f"{case_b.label}: mean perf cycles {perf_b['mean_events']['cycles']:.0f}",

                flush=True,

            )

            print(

                f"{case_b.label}: mean IPC {perf_b.get('mean_ipc', math.nan):.6f}",

                flush=True,

            )

            delta_instr = (

                perf_b["mean_events"]["instructions"] - perf_a["mean_events"]["instructions"]

            )

            delta_instr_pct = (

                100.0 * delta_instr / perf_a["mean_events"]["instructions"]

                if perf_a["mean_events"]["instructions"]

                else float("inf")

            )

            print(

                f"Instructions delta ({case_b.label} - {case_a.label}): "

                f"{delta_instr:+.0f} ({format_delta_percent(delta_instr_pct)}%)",

                flush=True,

            )

            delta_cycles = (

                perf_b["mean_events"]["cycles"] - perf_a["mean_events"]["cycles"]

            )

            delta_cycles_pct = (

                100.0 * delta_cycles / perf_a["mean_events"]["cycles"]

                if perf_a["mean_events"]["cycles"]

                else float("inf")

            )

            print(

                f"Cycles delta ({case_b.label} - {case_a.label}): "

                f"{delta_cycles:+.0f} ({format_delta_percent(delta_cycles_pct)}%)",

                flush=True,

            )

            if "mean_ipc" in perf_a and "mean_ipc" in perf_b:

                delta_ipc = perf_b["mean_ipc"] - perf_a["mean_ipc"]

                delta_ipc_pct = (

                    100.0 * delta_ipc / perf_a["mean_ipc"]

                    if perf_a["mean_ipc"]

                    else float("inf")

                )

                print(

                    f"IPC delta ({case_b.label} - {case_a.label}): "

                    f"{delta_ipc:+.6f} ({format_delta_percent(delta_ipc_pct)}%)",

                    flush=True,

                )

            perf_plot = result_dir / "perf_ipc_comparison.svg"

            write_perf_svg(perf_plot, case_a, case_b, perf_a, perf_b)

            summary["perf_plot"] = str(perf_plot)

            print(f"perf plot: {perf_plot}", flush=True)

        except RuntimeError as exc:

            summary["perf_error"] = str(exc)

            print(f"perf stat unavailable: {exc}", flush=True)

            if args.require_perf:

                raise


    if not args.skip_callgrind:

        print("Phase 4: Callgrind", flush=True)

        callgrind_args = DEFAULT_CALLGRIND_ARGS + args.callgrind_arg

        cg_a = run_callgrind_case(

            case=case_a,

            app_args=args.cmd_args,

            workdir=args.workdir,

            results_dir=result_dir,

            callgrind_args=callgrind_args,

        )

        cg_b = run_callgrind_case(

            case=case_b,

            app_args=args.cmd_args,

            workdir=args.workdir,

            results_dir=result_dir,

            callgrind_args=callgrind_args,

        )

        summary["callgrind"] = {case_a.label: cg_a, case_b.label: cg_b}


        event_names_a, totals_a, funcs_a = parse_callgrind_out(Path(cg_a["callgrind_out"]))

        event_names_b, totals_b, funcs_b = parse_callgrind_out(Path(cg_b["callgrind_out"]))

        callgrind_event_names = ordered_unique([*event_names_a, *event_names_b])

        ir_total_a = totals_a.get("Ir")

        ir_total_b = totals_b.get("Ir")

        summary["callgrind_event_names"] = callgrind_event_names

        summary["callgrind_totals"] = {case_a.label: totals_a, case_b.label: totals_b}

        summary["callgrind_totals_ir"] = {

            case_a.label: ir_total_a,

            case_b.label: ir_total_b,

        }

        summary["callgrind_simulation_totals"] = {

            case_a.label: {

                "l1_misses": total_l1_misses(totals_a),

                "ll_misses": total_ll_misses(totals_a),

                "branch_misses": total_branch_misses(totals_a),

                "estimated_cycles": estimate_cycles_from_events(

                    totals_a,

                    base_cpi=args.sim_base_cpi,

                    l1_miss_penalty=args.sim_l1_miss_penalty,

                    ll_miss_penalty=args.sim_ll_miss_penalty,

                    branch_miss_penalty=args.sim_branch_miss_penalty,

                ),

            },

            case_b.label: {

                "l1_misses": total_l1_misses(totals_b),

                "ll_misses": total_ll_misses(totals_b),

                "branch_misses": total_branch_misses(totals_b),

                "estimated_cycles": estimate_cycles_from_events(

                    totals_b,

                    base_cpi=args.sim_base_cpi,

                    l1_miss_penalty=args.sim_l1_miss_penalty,

                    ll_miss_penalty=args.sim_ll_miss_penalty,

                    branch_miss_penalty=args.sim_branch_miss_penalty,

                ),

            },

        }

        for label in (case_a.label, case_b.label):

            totals_map = summary["callgrind_simulation_totals"][label]

            ir_total = summary["callgrind_totals_ir"][label]

            est_cycles = totals_map["estimated_cycles"]

            totals_map["ir_per_estimated_cycle"] = (

                ir_total / est_cycles if ir_total and est_cycles else math.nan

            )


        exclude_patterns = [re.compile(p) for p in (DEFAULT_EXCLUDE_PATTERNS + args.exclude_pattern)]

        include_patterns = [re.compile(p) for p in args.include_pattern]

        diff_rows = build_diff_rows(

            metric_dict_from_event_maps(funcs_a, "Ir"),

            metric_dict_from_event_maps(funcs_b, "Ir"),

            include_patterns,

            exclude_patterns,

        )

        diff_metric_label = "Ir"

        diff_title = "Top {count} Callgrind differences"

        diff_rows_est_cycles = build_diff_rows(

            estimated_cycle_dict_from_event_maps(

                funcs_a,

                base_cpi=args.sim_base_cpi,

                l1_miss_penalty=args.sim_l1_miss_penalty,

                ll_miss_penalty=args.sim_ll_miss_penalty,

                branch_miss_penalty=args.sim_branch_miss_penalty,

            ),

            estimated_cycle_dict_from_event_maps(

                funcs_b,

                base_cpi=args.sim_base_cpi,

                l1_miss_penalty=args.sim_l1_miss_penalty,

                ll_miss_penalty=args.sim_ll_miss_penalty,

                branch_miss_penalty=args.sim_branch_miss_penalty,

            ),

            include_patterns,

            exclude_patterns,

        )

        have_simulated_events = all(

            name in callgrind_event_names

            for name in ["Ir", "I1mr", "D1mr", "D1mw", "ILmr", "DLmr", "DLmw", "Bcm", "Bim"]

        )

        if have_simulated_events:

            diff_rows = diff_rows_est_cycles

            diff_metric_label = "est. cycles (Valgrind sim)"

            diff_title = "Top {count} estimated cycle differences"

            summary["callgrind_diff_metric"] = {

                "kind": "estimated_cycles_from_valgrind_sim",

                "metric_label": diff_metric_label,

                "base_cpi": args.sim_base_cpi,

                "l1_miss_penalty": args.sim_l1_miss_penalty,

                "ll_miss_penalty": args.sim_ll_miss_penalty,

                "branch_miss_penalty": args.sim_branch_miss_penalty,

            }

            print(

                "Per-function Callgrind differences shown as estimated cycles "

                "from Valgrind cache/branch simulation.",

                flush=True,

            )

        elif (

            perf_a

            and perf_b

            and "mean_ipc" in perf_a

            and "mean_ipc" in perf_b

            and perf_a["mean_ipc"]

            and perf_b["mean_ipc"]

        ):

            diff_rows = project_rows_to_estimated_cycles(

                diff_rows, perf_a["mean_ipc"], perf_b["mean_ipc"]

            )

            diff_metric_label = "est. cycles (Ir / IPC)"

            diff_title = "Top {count} estimated cycle differences"

            summary["callgrind_diff_metric"] = {

                "kind": "estimated_cycles_from_perf_ipc",

                "metric_label": diff_metric_label,

                "ipc_a": perf_a["mean_ipc"],

                "ipc_b": perf_b["mean_ipc"],

            }

            print(

                "Per-function Callgrind differences shown as estimated cycles "

                "(function Ir divided by whole-run IPC from perf stat).",

                flush=True,

            )

        else:

            if args.perf_stat:

                print(

                    "perf stat did not produce usable IPC, so per-function differences "

                    "remain in raw Ir.",

                    flush=True,

                )

            else:

                print(

                    "Valgrind simulated cache/branch events were unavailable, so "

                    "per-function differences remain in raw Ir.",

                    flush=True,

                )

            summary["callgrind_diff_metric"] = {

                "kind": "ir",

                "metric_label": diff_metric_label,

            }


        diff_rows.sort(key=lambda row: row["abs_delta"], reverse=True)


        write_diff_csv(result_dir / "callgrind_diff.csv", diff_rows)


        cg_diff_path = result_dir / "callgrind.cg_diff.out"

        try:

            cg_diff_proc = subprocess.run(

                ["cg_diff", cg_a["callgrind_out"], cg_b["callgrind_out"]],

                check=True,

                stdout=subprocess.PIPE,

                stderr=subprocess.STDOUT,

                text=True,

            )

            cg_diff_path.write_text(cg_diff_proc.stdout)

        except subprocess.CalledProcessError as exc:

            (result_dir / "callgrind.cg_diff.log").write_text(exc.stdout or "")


        print(

            f"{case_a.label}: callgrind total Ir {ir_total_a:,}" if ir_total_a is not None else f"{case_a.label}: callgrind total Ir unavailable",

            flush=True,

        )

        print(

            f"{case_b.label}: callgrind total Ir {ir_total_b:,}" if ir_total_b is not None else f"{case_b.label}: callgrind total Ir unavailable",

            flush=True,

        )

        print(

            f"{case_a.label}: simulated L1 misses {total_l1_misses(totals_a):,}",

            flush=True,

        )

        print(

            f"{case_b.label}: simulated L1 misses {total_l1_misses(totals_b):,}",

            flush=True,

        )

        print(

            f"{case_a.label}: simulated LL misses {total_ll_misses(totals_a):,}",

            flush=True,

        )

        print(

            f"{case_b.label}: simulated LL misses {total_ll_misses(totals_b):,}",

            flush=True,

        )

        print(

            f"{case_a.label}: simulated branch misses {total_branch_misses(totals_a):,}",

            flush=True,

        )

        print(

            f"{case_b.label}: simulated branch misses {total_branch_misses(totals_b):,}",

            flush=True,

        )

        est_cycles_a = summary["callgrind_simulation_totals"][case_a.label]["estimated_cycles"]

        est_cycles_b = summary["callgrind_simulation_totals"][case_b.label]["estimated_cycles"]

        print(

            f"{case_a.label}: estimated cycles {est_cycles_a:.0f}",

            flush=True,

        )

        print(

            f"{case_b.label}: estimated cycles {est_cycles_b:.0f}",

            flush=True,

        )

        sim_ipc_a = summary["callgrind_simulation_totals"][case_a.label]["ir_per_estimated_cycle"]

        sim_ipc_b = summary["callgrind_simulation_totals"][case_b.label]["ir_per_estimated_cycle"]

        print(

            f"{case_a.label}: Ir / est_cycles {sim_ipc_a:.6f}",

            flush=True,

        )

        print(

            f"{case_b.label}: Ir / est_cycles {sim_ipc_b:.6f}",

            flush=True,

        )

        if ir_total_a is not None and ir_total_b is not None:

            delta_total = ir_total_b - ir_total_a

            delta_pct = 100.0 * delta_total / ir_total_a if ir_total_a else float("inf")

            print(

                f"Callgrind total delta ({case_b.label} - {case_a.label}): "

                f"{delta_total:+,} Ir ({format_delta_percent(delta_pct)}%)",

                flush=True,

            )

        delta_est_cycles = est_cycles_b - est_cycles_a

        delta_est_cycles_pct = (

            100.0 * delta_est_cycles / est_cycles_a if est_cycles_a else float("inf")

        )

        print(

            f"Estimated cycles delta ({case_b.label} - {case_a.label}): "

            f"{delta_est_cycles:+.0f} ({format_delta_percent(delta_est_cycles_pct)}%)",

            flush=True,

        )

        if not math.isnan(sim_ipc_a) and not math.isnan(sim_ipc_b):

            delta_sim_ipc = sim_ipc_b - sim_ipc_a

            delta_sim_ipc_pct = 100.0 * delta_sim_ipc / sim_ipc_a if sim_ipc_a else float("inf")

            print(

                f"Ir / est_cycles delta ({case_b.label} - {case_a.label}): "

                f"{delta_sim_ipc:+.6f} ({format_delta_percent(delta_sim_ipc_pct)}%)",

                flush=True,

            )


        top_diff_rows = select_top_difference_rows(diff_rows, args.top)

        print_top_rows(

            f"Top branch differences ({case_b.label} - {case_a.label}) present in both branches:",

            top_diff_rows,

            case_a,

            case_b,

            args.top,

            metric_label=diff_metric_label,

        )

        diff_plot = result_dir / "callgrind_top_differences.svg"

        write_svg_diff_plot(

            diff_plot,

            top_diff_rows,

            case_a,

            case_b,

            metric_label=diff_metric_label,

            title=diff_title.format(count=len(top_diff_rows)),

        )

        summary["callgrind_diff_plot"] = str(diff_plot)

        print(f"callgrind diff plot: {diff_plot}", flush=True)

    else:

        print("Phase 4: Callgrind skipped (--skip-callgrind)", flush=True)


    summary_path = result_dir / "summary.json"

    summary_path.write_text(json.dumps(summary, indent=2))

    print(f"summary: {summary_path}", flush=True)

    return 0


if __name__ == "__main__":

    try:

        raise SystemExit(main())

    except KeyboardInterrupt:

        raise SystemExit(130)

    except (FileNotFoundError, ValueError, RuntimeError) as exc:

        print(exc, file=sys.stderr)

        raise SystemExit(2)

bool

profile_test.Case
Definition profile_test.py:97

profile_test.write_perf_csv
None write_perf_csv(Path path, str label, dict summary)
Definition profile_test.py:1338

profile_test.event_value
int event_value(dict[str, int] event_map, str name)
Definition profile_test.py:1238

profile_test.run_command
tuple[int, float] run_command(list[str] cmd, str|None cwd, Path log_path, str|None announce_name=None, bool print_cmd=False)
Definition profile_test.py:701

profile_test.make_seed_configure_args
tuple[list[str], dict[str, dict[str, str]]] make_seed_configure_args(str seed_cache, list[str] extra_keys)
Definition profile_test.py:356

profile_test.write_perf_svg
None write_perf_svg(Path path, Case case_a, Case case_b, dict perf_a, dict perf_b)
Definition profile_test.py:1439

profile_test.parse_args
argparse.Namespace parse_args()
Definition profile_test.py:107

profile_test.select_top_difference_rows
list[dict] select_top_difference_rows(list[dict] rows, int limit)
Definition profile_test.py:1357

profile_test.read_cmake_home_directory
str|None read_cmake_home_directory(str|None build_dir)
Definition profile_test.py:321

profile_test.resolve_callgrind_name
str resolve_callgrind_name(str raw_value, dict[str, str] table)
Definition profile_test.py:1048

profile_test.estimate_cycles_from_events
float estimate_cycles_from_events(dict[str, int] event_map, float base_cpi, float l1_miss_penalty, float ll_miss_penalty, float branch_miss_penalty)
Definition profile_test.py:1268

profile_test.infer_commit
str|None infer_commit(str|None source_dir)
Definition profile_test.py:373

profile_test.format_metric
str format_metric(float value)
Definition profile_test.py:1319

profile_test.total_branch_misses
int total_branch_misses(dict[str, int] event_map)
Definition profile_test.py:1258

profile_test.resolve_case
Case resolve_case(str name, str label, str|None build_dir, str|None exe, str exe_relpath, str|None commit, str|None source_dir)
Definition profile_test.py:536

profile_test.should_include_function
bool should_include_function(str func, list[re.Pattern[str]] include_patterns, list[re.Pattern[str]] exclude_patterns)
Definition profile_test.py:1165

profile_test.parse_perf_stat
dict[str, float] parse_perf_stat(str log_text, Iterable[str] event_names)
Definition profile_test.py:757

profile_test.resolve_git_commit
str resolve_git_commit(Path repo_path, str ref)
Definition profile_test.py:389

profile_test.estimated_cycle_dict_from_event_maps
dict[str, float] estimated_cycle_dict_from_event_maps(dict[str, dict[str, int]] event_maps, float base_cpi, float l1_miss_penalty, float ll_miss_penalty, float branch_miss_penalty)
Definition profile_test.py:1298

profile_test.reusable_repo_cache_root
Path reusable_repo_cache_root(Path repo_path)
Definition profile_test.py:406

profile_test.ensure_reusable_worktree
tuple[Path, str] ensure_reusable_worktree(Path repo_path, str ref, str label, Path case_log_dir)
Definition profile_test.py:415

profile_test.resolve_run_cwd
str resolve_run_cwd(Case case, str|None fallback_workdir)
Definition profile_test.py:573

profile_test.sanitize_perf_event_name
str sanitize_perf_event_name(str event_name)
Definition profile_test.py:753

profile_test.metric_dict_from_event_maps
dict[str, float] metric_dict_from_event_maps(dict[str, dict[str, int]] event_maps, str metric_name)
Definition profile_test.py:1285

profile_test.write_diff_csv
None write_diff_csv(Path path, list[dict] rows)
Definition profile_test.py:1505

profile_test.total_ll_misses
int total_ll_misses(dict[str, int] event_map)
Definition profile_test.py:1250

profile_test.run_perf_case
dict run_perf_case(Case case, list[str] app_args, str|None workdir, Path results_dir, int perf_runs, str perf_bin, list[str] perf_events)
Definition profile_test.py:896

profile_test.stable_cache_name
str stable_cache_name(str value)
Definition profile_test.py:303

profile_test.write_svg_diff_plot
None write_svg_diff_plot(Path path, list[dict] rows, Case case_a, Case case_b, str metric_label, str title)
Definition profile_test.py:1371

profile_test.tail_text
str tail_text(Path path, int num_lines=40)
Definition profile_test.py:744

profile_test.build_diff_rows
list[dict] build_diff_rows(dict[str, int] funcs_a, dict[str, int] funcs_b, list[re.Pattern[str]] include_patterns, list[re.Pattern[str]] exclude_patterns)
Definition profile_test.py:1178

profile_test.parse_callgrind_annotate
tuple[int|None, dict[str, int]] parse_callgrind_annotate(Path annotate_path)
Definition profile_test.py:1030

profile_test.resolve_cmake_cache_path
Path resolve_cmake_cache_path(str build_or_cache)
Definition profile_test.py:334

profile_test.project_rows_to_estimated_cycles
list[dict] project_rows_to_estimated_cycles(list[dict] rows, float|None ipc_a, float|None ipc_b)
Definition profile_test.py:1210

profile_test.ensure_results_dir
Path ensure_results_dir(str|None requested, str label_a, str label_b)
Definition profile_test.py:308

profile_test.main
int main()
Definition profile_test.py:1544

profile_test.ordered_unique
list[str] ordered_unique(Iterable[str] items)
Definition profile_test.py:749

profile_test.write_timing_csv
None write_timing_csv(Path path, str label, dict summary, list[str] petsc_events)
Definition profile_test.py:1487

profile_test.sanitize_label
str sanitize_label(str value)
Definition profile_test.py:299

profile_test.run_wall_time_case
dict run_wall_time_case(Case case, list[str] app_args, str|None workdir, Path results_dir, int timing_runs)
Definition profile_test.py:786

profile_test.prepare_source_case
Case prepare_source_case(str name, str label, str repo, str ref, Path result_dir, str exe_relpath, str|None workdir_relpath, list[str] configure_args, list[str] build_args, str|None build_target, int build_jobs)
Definition profile_test.py:593

profile_test.run_petsc_event_case
dict run_petsc_event_case(Case case, list[str] app_args, str|None workdir, Path results_dir, int timing_runs, list[str] petsc_events)
Definition profile_test.py:829

profile_test.format_delta_percent
str format_delta_percent(float value)
Definition profile_test.py:1311

profile_test.print_top_rows
None print_top_rows(str title, list[dict] rows, Case case_a, Case case_b, int limit, str metric_label="Ir")
Definition profile_test.py:1529

profile_test.parse_callgrind_costs_line
dict[str, int]|None parse_callgrind_costs_line(str line, list[str] event_names)
Definition profile_test.py:1058

profile_test.run_callgrind_case
dict run_callgrind_case(Case case, list[str] app_args, str|None workdir, Path results_dir, list[str] callgrind_args)
Definition profile_test.py:978

profile_test.read_cmake_cache_entries
dict[str, tuple[str, str]] read_cmake_cache_entries(Path cache_path)
Definition profile_test.py:343

profile_test.parse_callgrind_out
tuple[list[str], dict[str, int], dict[str, dict[str, int]]] parse_callgrind_out(Path callgrind_path)
Definition profile_test.py:1083

profile_test.total_l1_misses
int total_l1_misses(dict[str, int] event_map)
Definition profile_test.py:1242

profile_test.parse_petsc_events
dict[str, float] parse_petsc_events(str log_text, Iterable[str] event_names)
Definition profile_test.py:731

profile_test.shorten_label
str shorten_label(str text, int limit=100)
Definition profile_test.py:1332