v0.16.0
Loading...
Searching...
No Matches
profile_test.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2
3"""Compare two builds with timing runs and Valgrind profiling.
4
5Results are always stored under ``$HOME/tmp`` unless ``--result-dir`` is used.
6The script prints the chosen results directory immediately on startup so a
7long-running comparison can be monitored from another shell.
8
9Typical usage:
10
11 python3 mofem/tools/profile_test.py \
12 --build-dir-a /path/to/build_a \
13 --build-dir-b /path/to/build_b \
14 --exe-relpath tutorials/vec-2_nonlinear_elasticity/nonlinear_elastic \
15 --label-a develop \
16 --label-b transpose \
17 --commit-a fe611e716d7538c51b067559b95be15daded86ca \
18 --commit-b e24058e5681cfdf56ca336e5cf708a1b0a59a575 \
19 --workdir /abs/path/to/run/cwd \
20 --petsc-event SNESFunctionEval \
21 --petsc-event SNESJacobianEval \
22 -- \
23 -file_name /abs/path/to/beam_3D.cub \
24 -order 3 -ts_max_time 0.1 -ts_dt 0.1 -log_no_color -test 1 -test_op
25"""
26
27from __future__ import annotations
28
29import argparse
30import csv
31import hashlib
32import json
33import math
34import os
35import re
36import shutil
37import shlex
38import statistics
39import subprocess
40import sys
41import time
42from collections import OrderedDict
43from dataclasses import asdict, dataclass
44from datetime import datetime, timezone
45from html import escape
46from pathlib import Path
47from typing import Iterable
48
49
50PETSC_EVENT_RE = re.compile(
51 r"^\s*\[\d+\]\s+<[^>]+>\s+\[petsc\]\s+(\S+)\s+\d+\s+\S+\s+([0-9.+\-eE]+)\b"
52)
53CALLGRIND_LINE_RE = re.compile(r"^\s*([\d,]+)\s+\‍([^)]+\‍)\s+(.*)$")
54CMAKE_HOME_RE = re.compile(r"^CMAKE_HOME_DIRECTORY(?::\w+)?=(.*)$")
55CALLGRIND_REF_RE = re.compile(r"^\‍((\d+)\‍)(?:\s+(.*))?$")
56
57DEFAULT_CALLGRIND_ARGS = [
58 "--dump-instr=yes",
59 "--collect-jumps=yes",
60 "--cache-sim=yes",
61 "--branch-sim=yes",
62]
63DEFAULT_PERF_EVENTS = ["instructions", "cycles"]
64DEFAULT_SVG_BACKGROUND = "#ffffff"
65DEFAULT_SIM_BASE_CPI = 1.0
66DEFAULT_SIM_L1_MISS_PENALTY = 5.0
67DEFAULT_SIM_LL_MISS_PENALTY = 50.0
68DEFAULT_SIM_BRANCH_MISS_PENALTY = 15.0
69DEFAULT_EXCLUDE_PATTERNS = [
70 r"^PROGRAM TOTALS$",
71 r"^0x[0-9a-fA-F]+$",
72 r"^\‍(\d+\‍)$",
73 r"\‍(below main\‍)",
74 r"__libc_start_main",
75 r"__libc_start_call_main",
76 r"ld-linux",
77]
78DEFAULT_SEED_CACHE_KEYS = [
79 "ADOL-C_DIR",
80 "ADOL-C_HEADER",
81 "ADOL-C_LIBRARY",
82 "BLAS_DIR",
83 "CBLAS_INCLUDE_DIR",
84 "CMAKE_PREFIX_PATH",
85 "MOAB_DIR",
86 "MOAB_VARIABLES_FILE",
87 "MPI_RUN",
88 "MPI_RUN_FLAGS",
89 "PETSC_ARCH",
90 "PETSC_DIR",
91 "PETSC_VARIABLES_FILE",
92 "SLEPC_DIR",
93]
94
95
96@dataclass
97class Case:
98 name: str
99 label: str
100 build_dir: str | None
101 executable: str
102 commit: str | None
103 source_dir: str | None
104 run_workdir: str | None
105
106
107def parse_args() -> argparse.Namespace:
108 parser = argparse.ArgumentParser(
109 description="Profile two builds with timings and Callgrind."
110 )
111 parser.add_argument("--build-dir-a", help="Build directory for case A.")
112 parser.add_argument("--build-dir-b", help="Build directory for case B.")
113 parser.add_argument("--exe-a", help="Executable for case A.")
114 parser.add_argument("--exe-b", help="Executable for case B.")
115 parser.add_argument("--repo", help="Git repository to build from refs.")
116 parser.add_argument("--ref-a", help="Git branch/tag/commit for case A.")
117 parser.add_argument("--ref-b", help="Git branch/tag/commit for case B.")
118 parser.add_argument(
119 "--exe-relpath",
120 default="tutorials/vec-2_nonlinear_elasticity/nonlinear_elastic",
121 help="Executable path relative to each build dir.",
122 )
123 parser.add_argument("--label-a", default="case_a", help="Label for case A.")
124 parser.add_argument("--label-b", default="case_b", help="Label for case B.")
125 parser.add_argument("--commit-a", help="Commit or revision label for case A.")
126 parser.add_argument("--commit-b", help="Commit or revision label for case B.")
127 parser.add_argument("--source-dir-a", help="Source directory for case A.")
128 parser.add_argument("--source-dir-b", help="Source directory for case B.")
129 parser.add_argument(
130 "--workdir",
131 help="Working directory used for runs. Defaults to the executable directory.",
132 )
133 parser.add_argument(
134 "--workdir-relpath",
135 help="Working directory relative to each build dir in source-build mode.",
136 )
137 parser.add_argument(
138 "--configure-arg",
139 action="append",
140 default=[],
141 help="Extra argument passed to `cmake -S ... -B ...` in source-build mode.",
142 )
143 parser.add_argument(
144 "--build-arg",
145 action="append",
146 default=[],
147 help="Extra argument passed to `cmake --build` in source-build mode.",
148 )
149 parser.add_argument(
150 "--build-target",
151 help="CMake build target in source-build mode.",
152 )
153 parser.add_argument(
154 "--build-jobs",
155 type=int,
156 default=max(1, os.cpu_count() or 1),
157 help="Parallel jobs for `cmake --build` in source-build mode.",
158 )
159 parser.add_argument(
160 "--seed-cache",
161 help=(
162 "Existing build directory or CMakeCache.txt used to seed configure "
163 "variables in source-build mode."
164 ),
165 )
166 parser.add_argument(
167 "--seed-cache-var",
168 action="append",
169 default=[],
170 help="Extra CMake cache variable name to import from --seed-cache.",
171 )
172 parser.add_argument(
173 "--timing-runs",
174 type=int,
175 default=3,
176 help="Number of normal timing runs per case.",
177 )
178 parser.add_argument(
179 "--petsc-event",
180 action="append",
181 default=[],
182 help="PETSc event name to extract from -log_view output. Can be repeated.",
183 )
184 parser.add_argument(
185 "--callgrind-arg",
186 action="append",
187 default=[],
188 help="Extra argument passed to valgrind --tool=callgrind. Can be repeated.",
189 )
190 parser.add_argument(
191 "--perf-stat",
192 action="store_true",
193 help="Collect perf stat totals and compute IPC = instructions / cycles.",
194 )
195 parser.add_argument(
196 "--perf-bin",
197 default="perf",
198 help="perf binary path. Useful when the perf wrapper does not match the running kernel.",
199 )
200 parser.add_argument(
201 "--perf-runs",
202 type=int,
203 default=3,
204 help="Number of perf stat runs per case.",
205 )
206 parser.add_argument(
207 "--perf-event",
208 action="append",
209 default=[],
210 help="Additional perf stat event. instructions and cycles are always collected.",
211 )
212 parser.add_argument(
213 "--require-perf",
214 action="store_true",
215 help="Fail if perf stat collection fails.",
216 )
217 parser.add_argument(
218 "--sim-base-cpi",
219 type=float,
220 default=DEFAULT_SIM_BASE_CPI,
221 help="Base cycles-per-instruction used for the Valgrind estimated-cycle model.",
222 )
223 parser.add_argument(
224 "--sim-l1-miss-penalty",
225 type=float,
226 default=DEFAULT_SIM_L1_MISS_PENALTY,
227 help="Penalty for simulated L1 misses that are not also LL misses.",
228 )
229 parser.add_argument(
230 "--sim-ll-miss-penalty",
231 type=float,
232 default=DEFAULT_SIM_LL_MISS_PENALTY,
233 help="Penalty for simulated LL cache misses in the Valgrind estimated-cycle model.",
234 )
235 parser.add_argument(
236 "--sim-branch-miss-penalty",
237 type=float,
238 default=DEFAULT_SIM_BRANCH_MISS_PENALTY,
239 help="Penalty for simulated branch mispredictions in the Valgrind estimated-cycle model.",
240 )
241 parser.add_argument(
242 "--top",
243 type=int,
244 default=10,
245 help="Number of top branch differences to print and plot.",
246 )
247 parser.add_argument(
248 "--result-dir",
249 help="Directory for results. Defaults to $HOME/tmp/profile_test_<timestamp>.",
250 )
251 parser.add_argument(
252 "--exclude-pattern",
253 action="append",
254 default=[],
255 help="Regex for function rows to exclude from the diff report.",
256 )
257 parser.add_argument(
258 "--include-pattern",
259 action="append",
260 default=[],
261 help="Regex for function rows to keep. If omitted, all rows are considered.",
262 )
263 parser.add_argument(
264 "--skip-timing", action="store_true", help="Skip normal timing runs."
265 )
266 parser.add_argument(
267 "--skip-callgrind", action="store_true", help="Skip Callgrind runs."
268 )
269 parser.add_argument(
270 "cmd_args",
271 nargs=argparse.REMAINDER,
272 help="Arguments for the profiled executable. Put them after --",
273 )
274 args = parser.parse_args()
275
276 if args.cmd_args and args.cmd_args[0] == "--":
277 args.cmd_args = args.cmd_args[1:]
278
279 if args.repo:
280 if not args.ref_a or not args.ref_b:
281 parser.error("When --repo is used, provide both --ref-a and --ref-b.")
282 else:
283 if not args.exe_a and not args.build_dir_a:
284 parser.error("Provide either --exe-a or --build-dir-a, or use --repo with refs.")
285 if not args.exe_b and not args.build_dir_b:
286 parser.error("Provide either --exe-b or --build-dir-b, or use --repo with refs.")
287 if args.timing_runs < 1:
288 parser.error("--timing-runs must be >= 1.")
289 if args.perf_runs < 1:
290 parser.error("--perf-runs must be >= 1.")
291 if args.build_jobs < 1:
292 parser.error("--build-jobs must be >= 1.")
293 if not args.cmd_args:
294 parser.error("Pass the executable arguments after --.")
295
296 return args
297
298
299def sanitize_label(value: str) -> str:
300 return re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("_") or "case"
301
302
303def stable_cache_name(value: str) -> str:
304 digest = hashlib.sha1(value.encode()).hexdigest()[:10]
305 return f"{sanitize_label(value)}_{digest}"
306
307
308def ensure_results_dir(requested: str | None, label_a: str, label_b: str) -> Path:
309 if requested:
310 result_dir = Path(requested).expanduser().resolve()
311 else:
312 ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
313 base = Path.home() / "tmp"
314 result_dir = base / (
315 f"profile_test_{ts}_{sanitize_label(label_a)}_vs_{sanitize_label(label_b)}"
316 )
317 result_dir.mkdir(parents=True, exist_ok=True)
318 return result_dir
319
320
321def read_cmake_home_directory(build_dir: str | None) -> str | None:
322 if not build_dir:
323 return None
324 cache = Path(build_dir) / "CMakeCache.txt"
325 if not cache.exists():
326 return None
327 for line in cache.read_text(errors="ignore").splitlines():
328 match = CMAKE_HOME_RE.match(line)
329 if match:
330 return match.group(1)
331 return None
332
333
334def resolve_cmake_cache_path(build_or_cache: str) -> Path:
335 path = Path(build_or_cache).expanduser().resolve()
336 if path.is_dir():
337 path = path / "CMakeCache.txt"
338 if not path.exists():
339 raise FileNotFoundError(f"CMake cache not found: {path}")
340 return path
341
342
343def read_cmake_cache_entries(cache_path: Path) -> dict[str, tuple[str, str]]:
344 entries: dict[str, tuple[str, str]] = {}
345 for line in cache_path.read_text(errors="ignore").splitlines():
346 if line.startswith("//") or line.startswith("#"):
347 continue
348 if ":" not in line or "=" not in line:
349 continue
350 key_type, value = line.split("=", 1)
351 key, value_type = key_type.split(":", 1)
352 entries[key] = (value_type, value)
353 return entries
354
355
356def make_seed_configure_args(seed_cache: str, extra_keys: list[str]) -> tuple[list[str], dict[str, dict[str, str]]]:
357 cache_path = resolve_cmake_cache_path(seed_cache)
358 entries = read_cmake_cache_entries(cache_path)
359 configure_args: list[str] = []
360 imported: dict[str, dict[str, str]] = {}
361 for key in ordered_unique([*DEFAULT_SEED_CACHE_KEYS, *extra_keys]):
362 cache_entry = entries.get(key)
363 if not cache_entry:
364 continue
365 value_type, value = cache_entry
366 if value.endswith("-NOTFOUND"):
367 continue
368 configure_args.append(f"-D{key}:{value_type}={value}")
369 imported[key] = {"type": value_type, "value": value}
370 return configure_args, imported
371
372
373def infer_commit(source_dir: str | None) -> str | None:
374 if not source_dir:
375 return None
376 try:
377 proc = subprocess.run(
378 ["git", "-C", source_dir, "rev-parse", "HEAD"],
379 check=True,
380 stdout=subprocess.PIPE,
381 stderr=subprocess.PIPE,
382 text=True,
383 )
384 except (subprocess.CalledProcessError, FileNotFoundError):
385 return None
386 return proc.stdout.strip() or None
387
388
389def resolve_git_commit(repo_path: Path, ref: str) -> str:
390 try:
391 proc = subprocess.run(
392 ["git", "-C", str(repo_path), "rev-parse", "--verify", f"{ref}^{{commit}}"],
393 check=True,
394 stdout=subprocess.PIPE,
395 stderr=subprocess.PIPE,
396 text=True,
397 )
398 except subprocess.CalledProcessError as exc:
399 stderr = exc.stderr.strip()
400 raise RuntimeError(
401 f"Failed to resolve ref {ref!r} in {repo_path}\n{stderr}"
402 ) from exc
403 return proc.stdout.strip()
404
405
406def reusable_repo_cache_root(repo_path: Path) -> Path:
407 return Path.home() / "tmp" / "profile_test_cache" / stable_cache_name(str(repo_path))
408
409
411 repo_path: Path,
412 ref: str,
413 label: str,
414 case_log_dir: Path,
415) -> tuple[Path, str]:
416 cache_root = reusable_repo_cache_root(repo_path)
417 worktree_root = cache_root / "worktrees"
418 worktree_dir = worktree_root / stable_cache_name(ref)
419 worktree_root.mkdir(parents=True, exist_ok=True)
420
421 target_commit = resolve_git_commit(repo_path, ref)
422 print(f"{label} reusable worktree: {worktree_dir}", flush=True)
423
424 if not worktree_dir.exists():
425 rc, _ = run_command(
426 [
427 "git",
428 "-C",
429 str(repo_path),
430 "worktree",
431 "add",
432 "--detach",
433 str(worktree_dir),
434 target_commit,
435 ],
436 cwd=str(repo_path),
437 log_path=case_log_dir / "worktree_add.log",
438 announce_name=f"{label} worktree add",
439 print_cmd=True,
440 )
441 if rc != 0:
442 raise RuntimeError(
443 f"failed to create reusable worktree for ref {ref}\n"
444 f"log: {case_log_dir / 'worktree_add.log'}\n"
445 f"last lines:\n{tail_text(case_log_dir / 'worktree_add.log')}"
446 )
447 return worktree_dir, target_commit
448
449 show_top = subprocess.run(
450 ["git", "-C", str(worktree_dir), "rev-parse", "--show-toplevel"],
451 stdout=subprocess.PIPE,
452 stderr=subprocess.PIPE,
453 text=True,
454 )
455 if show_top.returncode != 0:
456 shutil.rmtree(worktree_dir)
457 rc, _ = run_command(
458 ["git", "-C", str(repo_path), "worktree", "prune"],
459 cwd=str(repo_path),
460 log_path=case_log_dir / "worktree_prune.log",
461 announce_name=f"{label} worktree prune",
462 print_cmd=True,
463 )
464 if rc != 0:
465 raise RuntimeError(
466 f"failed to prune stale worktrees for {ref}\n"
467 f"log: {case_log_dir / 'worktree_prune.log'}\n"
468 f"last lines:\n{tail_text(case_log_dir / 'worktree_prune.log')}"
469 )
470 rc, _ = run_command(
471 [
472 "git",
473 "-C",
474 str(repo_path),
475 "worktree",
476 "add",
477 "--detach",
478 str(worktree_dir),
479 target_commit,
480 ],
481 cwd=str(repo_path),
482 log_path=case_log_dir / "worktree_add.log",
483 announce_name=f"{label} worktree add",
484 print_cmd=True,
485 )
486 if rc != 0:
487 raise RuntimeError(
488 f"failed to recreate reusable worktree for ref {ref}\n"
489 f"log: {case_log_dir / 'worktree_add.log'}\n"
490 f"last lines:\n{tail_text(case_log_dir / 'worktree_add.log')}"
491 )
492 return worktree_dir, target_commit
493
494 for log_name, announce_name, cmd in [
495 (
496 "worktree_checkout.log",
497 f"{label} worktree checkout",
498 ["git", "-C", str(worktree_dir), "checkout", "--detach", target_commit],
499 ),
500 (
501 "worktree_reset.log",
502 f"{label} worktree reset",
503 ["git", "-C", str(worktree_dir), "reset", "--hard", target_commit],
504 ),
505 (
506 "worktree_clean.log",
507 f"{label} worktree clean",
508 ["git", "-C", str(worktree_dir), "clean", "-fd"],
509 ),
510 ]:
511 rc, _ = run_command(
512 cmd,
513 cwd=str(worktree_dir),
514 log_path=case_log_dir / log_name,
515 announce_name=announce_name,
516 print_cmd=True,
517 )
518 if rc != 0:
519 raise RuntimeError(
520 f"failed to refresh reusable worktree for ref {ref}\n"
521 f"log: {case_log_dir / log_name}\n"
522 f"last lines:\n{tail_text(case_log_dir / log_name)}"
523 )
524
525 return worktree_dir, target_commit
526
527
529 name: str,
530 label: str,
531 build_dir: str | None,
532 exe: str | None,
533 exe_relpath: str,
534 commit: str | None,
535 source_dir: str | None,
536) -> Case:
537 if exe:
538 exe_path = Path(exe).expanduser().resolve()
539 else:
540 if not build_dir:
541 raise ValueError(f"{name}: provide either --exe or --build-dir")
542 build_path = Path(build_dir).expanduser().resolve()
543 if build_path.is_file():
544 exe_path = build_path
545 build_dir = str(build_path.parent.parent.parent)
546 else:
547 exe_path = build_path / exe_relpath
548 if not exe_path.exists():
549 raise FileNotFoundError(
550 f"{name}: executable not found: {exe_path}\n"
551 f" build_dir={build_dir}\n"
552 f" exe_relpath={exe_relpath}\n"
553 f"Use --exe-a/--exe-b for full executable paths, or pass the build "
554 f"directory to --build-dir-a/--build-dir-b."
555 )
556
557 if source_dir is None:
558 source_dir = read_cmake_home_directory(build_dir)
559 if commit is None:
560 commit = infer_commit(source_dir)
561
562 return Case(
563 name=name,
564 label=label,
565 build_dir=str(Path(build_dir).expanduser().resolve()) if build_dir else None,
566 executable=str(exe_path),
567 commit=commit,
568 source_dir=source_dir,
569 run_workdir=None,
570 )
571
572
573def resolve_run_cwd(case: Case, fallback_workdir: str | None) -> str:
574 if case.run_workdir:
575 return case.run_workdir
576 if fallback_workdir:
577 return fallback_workdir
578 return str(Path(case.executable).parent)
579
580
582 name: str,
583 label: str,
584 repo: str,
585 ref: str,
586 result_dir: Path,
587 exe_relpath: str,
588 workdir_relpath: str | None,
589 configure_args: list[str],
590 build_args: list[str],
591 build_target: str | None,
592 build_jobs: int,
593) -> Case:
594 repo_path = Path(repo).expanduser().resolve()
595 if not repo_path.exists():
596 raise FileNotFoundError(f"{name}: repository not found: {repo_path}")
597
598 case_log_dir = result_dir / sanitize_label(label)
599 case_log_dir.mkdir(parents=True, exist_ok=True)
600 worktree_dir, target_commit = ensure_reusable_worktree(
601 repo_path=repo_path,
602 ref=ref,
603 label=label,
604 case_log_dir=case_log_dir,
605 )
606
607 cache_root = reusable_repo_cache_root(repo_path)
608 build_root = cache_root / "builds"
609 build_dir = build_root / stable_cache_name(ref)
610 build_root.mkdir(parents=True, exist_ok=True)
611 print(f"{label} reusable build dir: {build_dir}", flush=True)
612
613 source_root = worktree_dir
614 if not (source_root / "CMakeLists.txt").exists():
615 nested_source_root = worktree_dir / "mofem"
616 if (nested_source_root / "CMakeLists.txt").exists():
617 source_root = nested_source_root
618 else:
619 raise FileNotFoundError(
620 f"{name}: could not find CMakeLists.txt in {worktree_dir} or {nested_source_root}"
621 )
622
623 configure_cmd = [
624 "cmake",
625 "-S",
626 str(source_root),
627 "-B",
628 str(build_dir),
629 *configure_args,
630 ]
631 rc, _ = run_command(
632 configure_cmd,
633 cwd=str(source_root),
634 log_path=case_log_dir / "configure.log",
635 announce_name=f"{label} configure",
636 print_cmd=True,
637 )
638 if rc != 0:
639 raise RuntimeError(
640 f"{name}: configure failed for ref {ref}\n"
641 f"log: {case_log_dir / 'configure.log'}\n"
642 f"last lines:\n{tail_text(case_log_dir / 'configure.log')}"
643 )
644
645 build_cmd = [
646 "cmake",
647 "--build",
648 str(build_dir),
649 "-j",
650 str(build_jobs),
651 ]
652 if build_target:
653 build_cmd.extend(["--target", build_target])
654 build_cmd.extend(build_args)
655 rc, _ = run_command(
656 build_cmd,
657 cwd=str(build_dir),
658 log_path=case_log_dir / "build.log",
659 announce_name=f"{label} build",
660 print_cmd=True,
661 )
662 if rc != 0:
663 raise RuntimeError(
664 f"{name}: build failed for ref {ref}\n"
665 f"log: {case_log_dir / 'build.log'}\n"
666 f"last lines:\n{tail_text(case_log_dir / 'build.log')}"
667 )
668
669 exe_path = build_dir / exe_relpath
670 if not exe_path.exists():
671 raise FileNotFoundError(
672 f"{name}: built executable not found: {exe_path}\n"
673 f"Check --exe-relpath and --build-target."
674 )
675
676 commit = infer_commit(str(worktree_dir))
677 run_workdir = str(build_dir / workdir_relpath) if workdir_relpath else None
678 if run_workdir and not Path(run_workdir).exists():
679 raise FileNotFoundError(
680 f"{name}: run workdir not found: {run_workdir}\n"
681 f"Check --workdir-relpath."
682 )
683
684 return Case(
685 name=name,
686 label=label,
687 build_dir=str(build_dir),
688 executable=str(exe_path),
689 commit=commit or target_commit or ref,
690 source_dir=str(source_root),
691 run_workdir=run_workdir,
692 )
693
694
696 cmd: list[str],
697 cwd: str | None,
698 log_path: Path,
699 announce_name: str | None = None,
700 print_cmd: bool = False,
701) -> tuple[int, float]:
702 started = time.perf_counter()
703 log_path.parent.mkdir(parents=True, exist_ok=True)
704 if announce_name:
705 print(f"{announce_name} log: {log_path}", flush=True)
706 print(f"monitor: tail -f {shlex.quote(str(log_path))}", flush=True)
707 if print_cmd:
708 print(f"command: {' '.join(shlex.quote(c) for c in cmd)}", flush=True)
709
710 with log_path.open("w", buffering=1) as log_handle:
711 proc = subprocess.Popen(
712 cmd,
713 cwd=cwd,
714 stdout=subprocess.PIPE,
715 stderr=subprocess.STDOUT,
716 text=True,
717 bufsize=1,
718 )
719 assert proc.stdout is not None
720 for line in proc.stdout:
721 log_handle.write(line)
722 proc.stdout.close()
723 returncode = proc.wait()
724
725 elapsed = time.perf_counter() - started
726 if print_cmd:
727 print(f"{announce_name or 'command'} finished with exit code {returncode}", flush=True)
728 return returncode, elapsed
729
730
731def parse_petsc_events(log_text: str, event_names: Iterable[str]) -> dict[str, float]:
732 wanted = set(event_names)
733 values: dict[str, float] = {}
734 for line in log_text.splitlines():
735 match = PETSC_EVENT_RE.match(line)
736 if not match:
737 continue
738 name, value = match.groups()
739 if name in wanted:
740 values[name] = float(value)
741 return values
742
743
744def tail_text(path: Path, num_lines: int = 40) -> str:
745 lines = path.read_text(errors="ignore").splitlines()
746 return "\n".join(lines[-num_lines:])
747
748
749def ordered_unique(items: Iterable[str]) -> list[str]:
750 return list(OrderedDict.fromkeys(items))
751
752
753def sanitize_perf_event_name(event_name: str) -> str:
754 return event_name.split(":", 1)[0].strip()
755
756
757def parse_perf_stat(log_text: str, event_names: Iterable[str]) -> dict[str, float]:
758 wanted = set(event_names)
759 values: dict[str, float] = {}
760 for raw_line in log_text.splitlines():
761 line = raw_line.strip()
762 if not line or line.startswith("#"):
763 continue
764 parts = [part.strip() for part in raw_line.split(",")]
765 if len(parts) < 3:
766 continue
767 raw_value = parts[0]
768 event_name = sanitize_perf_event_name(parts[2])
769 if event_name not in wanted:
770 continue
771 if raw_value in {"<not supported>", "<not counted>", "not counted"}:
772 continue
773 try:
774 values[event_name] = float(raw_value)
775 except ValueError:
776 continue
777 return values
778
779
781 case: Case,
782 app_args: list[str],
783 workdir: str | None,
784 results_dir: Path,
785 timing_runs: int,
786) -> dict:
787 case_dir = results_dir / case.label
788 case_dir.mkdir(parents=True, exist_ok=True)
789
790 runs = []
791 for run_id in range(1, timing_runs + 1):
792 log_path = case_dir / f"wall_run_{run_id}.log"
793 cmd = [case.executable, *app_args]
794 rc, elapsed = run_command(
795 cmd,
796 cwd=resolve_run_cwd(case, workdir),
797 log_path=log_path,
798 announce_name=f"{case.label} wall run {run_id}",
799 )
800 if rc != 0:
801 raise RuntimeError(
802 f"{case.label}: wall run {run_id} failed with exit code {rc}\n"
803 f"log: {log_path}\n"
804 f"last lines:\n{tail_text(log_path)}"
805 )
806
807 row = {
808 "run": run_id,
809 "wall_seconds": elapsed,
810 "log_path": str(log_path),
811 }
812 runs.append(row)
813
814 summary = {
815 "runs": runs,
816 "mean_wall_seconds": statistics.mean(row["wall_seconds"] for row in runs),
817 }
818
819 return summary
820
821
823 case: Case,
824 app_args: list[str],
825 workdir: str | None,
826 results_dir: Path,
827 timing_runs: int,
828 petsc_events: list[str],
829) -> dict:
830 case_dir = results_dir / case.label
831 case_dir.mkdir(parents=True, exist_ok=True)
832
833 petsc_args = list(app_args)
834 if "-log_view" not in petsc_args:
835 petsc_args.append("-log_view")
836
837 runs = []
838 for run_id in range(1, timing_runs + 1):
839 log_path = case_dir / f"petsc_run_{run_id}.log"
840 cmd = [case.executable, *petsc_args]
841 rc, elapsed = run_command(
842 cmd,
843 cwd=resolve_run_cwd(case, workdir),
844 log_path=log_path,
845 announce_name=f"{case.label} petsc run {run_id}",
846 )
847 if rc != 0:
848 raise RuntimeError(
849 f"{case.label}: petsc run {run_id} failed with exit code {rc}\n"
850 f"log: {log_path}\n"
851 f"last lines:\n{tail_text(log_path)}"
852 )
853
854 log_text = log_path.read_text()
855 event_values = parse_petsc_events(log_text, petsc_events)
856 missing = [name for name in petsc_events if name not in event_values]
857 if missing:
858 raise RuntimeError(
859 f"{case.label}: petsc run {run_id} missing PETSc events {missing}\n"
860 f"log: {log_path}\n"
861 f"last lines:\n{tail_text(log_path)}"
862 )
863
864 row = {
865 "run": run_id,
866 "wall_seconds": elapsed,
867 "log_path": str(log_path),
868 "petsc_events": event_values,
869 "petsc_event_sum": sum(event_values[name] for name in petsc_events),
870 }
871 runs.append(row)
872
873 events_summary = {}
874 for name in petsc_events:
875 values = [row["petsc_events"][name] for row in runs]
876 events_summary[name] = statistics.mean(values)
877
878 summary = {
879 "runs": runs,
880 "mean_wall_seconds": statistics.mean(row["wall_seconds"] for row in runs),
881 "mean_petsc_events": events_summary,
882 "mean_petsc_event_sum": statistics.mean(row["petsc_event_sum"] for row in runs),
883 }
884
885 return summary
886
887
889 case: Case,
890 app_args: list[str],
891 workdir: str | None,
892 results_dir: Path,
893 perf_runs: int,
894 perf_bin: str,
895 perf_events: list[str],
896) -> dict:
897 case_dir = results_dir / case.label
898 case_dir.mkdir(parents=True, exist_ok=True)
899
900 event_list = ordered_unique([*DEFAULT_PERF_EVENTS, *perf_events])
901 runs = []
902 for run_id in range(1, perf_runs + 1):
903 log_path = case_dir / f"perf_run_{run_id}.log"
904 cmd = [
905 perf_bin,
906 "stat",
907 "-x,",
908 "--no-big-num",
909 "-e",
910 ",".join(event_list),
911 "--",
912 case.executable,
913 *app_args,
914 ]
915 rc, elapsed = run_command(
916 cmd,
917 cwd=resolve_run_cwd(case, workdir),
918 log_path=log_path,
919 announce_name=f"{case.label} perf run {run_id}",
920 )
921 log_text = log_path.read_text()
922 if rc != 0:
923 raise RuntimeError(
924 f"{case.label}: perf stat run {run_id} failed with exit code {rc}\n"
925 f" log: {log_path}\n"
926 f" first lines:\n"
927 + "\n".join(log_text.splitlines()[:8])
928 )
929
930 event_values = parse_perf_stat(log_text, event_list)
931 missing = [event for event in DEFAULT_PERF_EVENTS if event not in event_values]
932 if missing:
933 raise RuntimeError(
934 f"{case.label}: perf stat run {run_id} did not report required events {missing}\n"
935 f" log: {log_path}\n"
936 f" first lines:\n"
937 + "\n".join(log_text.splitlines()[:8])
938 )
939
940 instructions = event_values["instructions"]
941 cycles = event_values["cycles"]
942 ipc = instructions / cycles if cycles else math.nan
943 runs.append(
944 {
945 "run": run_id,
946 "wall_seconds": elapsed,
947 "log_path": str(log_path),
948 "events": event_values,
949 "ipc": ipc,
950 }
951 )
952
953 mean_events = {}
954 for name in event_list:
955 values = [row["events"].get(name) for row in runs if name in row["events"]]
956 if values:
957 mean_events[name] = statistics.mean(values)
958
959 ipc_values = [row["ipc"] for row in runs if not math.isnan(row["ipc"])]
960 summary = {
961 "runs": runs,
962 "event_names": event_list,
963 "mean_events": mean_events,
964 "mean_wall_seconds": statistics.mean(row["wall_seconds"] for row in runs),
965 }
966 if ipc_values:
967 summary["mean_ipc"] = statistics.mean(ipc_values)
968
969 return summary
970
971
973 case: Case,
974 app_args: list[str],
975 workdir: str | None,
976 results_dir: Path,
977 callgrind_args: list[str],
978) -> dict:
979 case_dir = results_dir / case.label
980 case_dir.mkdir(parents=True, exist_ok=True)
981
982 callgrind_out = case_dir / "callgrind.out"
983 callgrind_log = case_dir / "callgrind.log"
984 annotate_txt = case_dir / "callgrind.annotate.txt"
985
986 cmd = [
987 "valgrind",
988 "--tool=callgrind",
989 f"--callgrind-out-file={callgrind_out}",
990 *callgrind_args,
991 case.executable,
992 *app_args,
993 ]
994 rc, elapsed = run_command(
995 cmd,
996 cwd=resolve_run_cwd(case, workdir),
997 log_path=callgrind_log,
998 announce_name=f"{case.label} callgrind",
999 )
1000 if rc != 0:
1001 raise RuntimeError(
1002 f"{case.label}: callgrind run failed with exit code {rc}\n"
1003 f"log: {callgrind_log}\n"
1004 f"last lines:\n{tail_text(callgrind_log)}"
1005 )
1006
1007 annotate_proc = subprocess.run(
1008 [
1009 "callgrind_annotate",
1010 "--inclusive=yes",
1011 "--auto=no",
1012 "--threshold=0",
1013 str(callgrind_out),
1014 ],
1015 check=True,
1016 stdout=subprocess.PIPE,
1017 stderr=subprocess.STDOUT,
1018 text=True,
1019 )
1020 annotate_txt.write_text(annotate_proc.stdout)
1021
1022 return {
1023 "elapsed_wall_seconds": elapsed,
1024 "callgrind_out": str(callgrind_out),
1025 "callgrind_log": str(callgrind_log),
1026 "annotate_txt": str(annotate_txt),
1027 }
1028
1029
1030def parse_callgrind_annotate(annotate_path: Path) -> tuple[int | None, dict[str, int]]:
1031 totals: int | None = None
1032 functions: dict[str, int] = {}
1033 for line in annotate_path.read_text(errors="ignore").splitlines():
1034 if "PROGRAM TOTALS" in line:
1035 match = CALLGRIND_LINE_RE.match(line)
1036 if match:
1037 totals = int(match.group(1).replace(",", ""))
1038 continue
1039 match = CALLGRIND_LINE_RE.match(line)
1040 if not match:
1041 continue
1042 value_txt, func = match.groups()
1043 value = int(value_txt.replace(",", ""))
1044 functions[func] = value
1045 return totals, functions
1046
1047
1048def resolve_callgrind_name(raw_value: str, table: dict[str, str]) -> str:
1049 match = CALLGRIND_REF_RE.match(raw_value)
1050 if not match:
1051 return raw_value
1052 ref_id, payload = match.groups()
1053 if payload is not None:
1054 table[ref_id] = payload
1055 return table.get(ref_id, payload or f"({ref_id})")
1056
1057
1058def parse_callgrind_costs_line(line: str, event_names: list[str]) -> dict[str, int] | None:
1059 parts = line.split()
1060 if len(parts) < 2:
1061 return None
1062 if "=" in parts[0]:
1063 return None
1064
1065 raw_costs = parts[1:]
1066 if not raw_costs:
1067 return None
1068
1069 try:
1070 costs = [int(value) for value in raw_costs]
1071 except ValueError:
1072 return None
1073
1074 padded_costs = costs + [0] * max(0, len(event_names) - len(costs))
1075 return {
1076 event_name: padded_costs[idx]
1077 for idx, event_name in enumerate(event_names)
1078 }
1079
1080
1082 callgrind_path: Path,
1083) -> tuple[list[str], dict[str, int], dict[str, dict[str, int]]]:
1084 event_names: list[str] = []
1085 totals: dict[str, int] = {}
1086 functions: dict[str, dict[str, int]] = {}
1087
1088 ob_table: dict[str, str] = {}
1089 fl_table: dict[str, str] = {}
1090 fn_table: dict[str, str] = {}
1091
1092 lines = callgrind_path.read_text(errors="ignore").splitlines()
1093 for raw_line in lines:
1094 stripped = raw_line.strip()
1095 if stripped.startswith("ob="):
1096 resolve_callgrind_name(stripped[3:].strip(), ob_table)
1097 elif stripped.startswith("fl="):
1098 resolve_callgrind_name(stripped[3:].strip(), fl_table)
1099 elif stripped.startswith("fn="):
1100 resolve_callgrind_name(stripped[3:].strip(), fn_table)
1101
1102 current_obj = "???"
1103 current_file = "???"
1104 current_function: str | None = None
1105
1106 for raw_line in lines:
1107 line = raw_line.strip()
1108 if not line:
1109 continue
1110
1111 if line.startswith("events:"):
1112 event_names = line.split(":", 1)[1].strip().split()
1113 continue
1114
1115 if line.startswith("summary:"):
1116 try:
1117 summary_values = [
1118 int(value) for value in line.split(":", 1)[1].strip().split()
1119 ]
1120 padded_summary = summary_values + [0] * max(
1121 0, len(event_names) - len(summary_values)
1122 )
1123 totals = {
1124 event_name: padded_summary[idx]
1125 for idx, event_name in enumerate(event_names)
1126 }
1127 except ValueError:
1128 pass
1129 continue
1130
1131 if line.startswith("ob="):
1132 current_obj = resolve_callgrind_name(line[3:].strip(), ob_table)
1133 continue
1134
1135 if line.startswith("fl="):
1136 current_file = resolve_callgrind_name(line[3:].strip(), fl_table)
1137 continue
1138
1139 if line.startswith("fn="):
1140 current_function = resolve_callgrind_name(line[3:].strip(), fn_table)
1141 continue
1142
1143 if current_function is None:
1144 continue
1145
1146 if not event_names:
1147 continue
1148
1149 event_costs = parse_callgrind_costs_line(line, event_names)
1150 if event_costs is None:
1151 continue
1152
1153 key = current_function
1154 function_events = functions.setdefault(
1155 key, {event_name: 0 for event_name in event_names}
1156 )
1157 for event_name, value in event_costs.items():
1158 function_events[event_name] = function_events.get(event_name, 0) + value
1159
1160 return event_names, totals, functions
1161
1162
1164 func: str, include_patterns: list[re.Pattern[str]], exclude_patterns: list[re.Pattern[str]]
1165) -> bool:
1166 if include_patterns and not any(pattern.search(func) for pattern in include_patterns):
1167 return False
1168 if any(pattern.search(func) for pattern in exclude_patterns):
1169 return False
1170 return True
1171
1172
1174 funcs_a: dict[str, int],
1175 funcs_b: dict[str, int],
1176 include_patterns: list[re.Pattern[str]],
1177 exclude_patterns: list[re.Pattern[str]],
1178) -> list[dict]:
1179 rows = []
1180 for func in set(funcs_a) | set(funcs_b):
1181 if not should_include_function(func, include_patterns, exclude_patterns):
1182 continue
1183 a_val = funcs_a.get(func, 0)
1184 b_val = funcs_b.get(func, 0)
1185 delta = b_val - a_val
1186 if a_val == 0 and b_val == 0:
1187 continue
1188 if a_val:
1189 delta_pct = 100.0 * delta / a_val
1190 else:
1191 delta_pct = float("inf")
1192 rows.append(
1193 {
1194 "function": func,
1195 "value_a": a_val,
1196 "value_b": b_val,
1197 "delta": delta,
1198 "delta_percent_vs_a": delta_pct,
1199 "abs_delta": abs(delta),
1200 "present_in_both": bool(a_val and b_val),
1201 }
1202 )
1203 return rows
1204
1205
1207 rows: list[dict],
1208 ipc_a: float | None,
1209 ipc_b: float | None,
1210) -> list[dict]:
1211 if not ipc_a or not ipc_b:
1212 raise ValueError("Both IPC values are required to estimate cycles.")
1213 if ipc_a <= 0 or ipc_b <= 0:
1214 raise ValueError("IPC values must be positive to estimate cycles.")
1215
1216 projected_rows = []
1217 for row in rows:
1218 value_a = row["value_a"] / ipc_a
1219 value_b = row["value_b"] / ipc_b
1220 delta = value_b - value_a
1221 if value_a:
1222 delta_pct = 100.0 * delta / value_a
1223 else:
1224 delta_pct = float("inf")
1225 projected_rows.append(
1226 {
1227 **row,
1228 "value_a": value_a,
1229 "value_b": value_b,
1230 "delta": delta,
1231 "delta_percent_vs_a": delta_pct,
1232 "abs_delta": abs(delta),
1233 }
1234 )
1235 return projected_rows
1236
1237
1238def event_value(event_map: dict[str, int], name: str) -> int:
1239 return event_map.get(name, 0)
1240
1241
1242def total_l1_misses(event_map: dict[str, int]) -> int:
1243 return (
1244 event_value(event_map, "I1mr")
1245 + event_value(event_map, "D1mr")
1246 + event_value(event_map, "D1mw")
1247 )
1248
1249
1250def total_ll_misses(event_map: dict[str, int]) -> int:
1251 return (
1252 event_value(event_map, "ILmr")
1253 + event_value(event_map, "DLmr")
1254 + event_value(event_map, "DLmw")
1255 )
1256
1257
1258def total_branch_misses(event_map: dict[str, int]) -> int:
1259 return event_value(event_map, "Bcm") + event_value(event_map, "Bim")
1260
1261
1263 event_map: dict[str, int],
1264 base_cpi: float,
1265 l1_miss_penalty: float,
1266 ll_miss_penalty: float,
1267 branch_miss_penalty: float,
1268) -> float:
1269 ir = event_value(event_map, "Ir")
1270 ll_misses = total_ll_misses(event_map)
1271 l1_misses = total_l1_misses(event_map)
1272 l1_only_misses = max(0, l1_misses - ll_misses)
1273 branch_misses = total_branch_misses(event_map)
1274 return (
1275 ir * base_cpi
1276 + l1_only_misses * l1_miss_penalty
1277 + ll_misses * ll_miss_penalty
1278 + branch_misses * branch_miss_penalty
1279 )
1280
1281
1283 event_maps: dict[str, dict[str, int]],
1284 metric_name: str,
1285) -> dict[str, float]:
1286 return {
1287 func: float(events.get(metric_name, 0))
1288 for func, events in event_maps.items()
1289 }
1290
1291
1293 event_maps: dict[str, dict[str, int]],
1294 base_cpi: float,
1295 l1_miss_penalty: float,
1296 ll_miss_penalty: float,
1297 branch_miss_penalty: float,
1298) -> dict[str, float]:
1299 return {
1301 events,
1302 base_cpi=base_cpi,
1303 l1_miss_penalty=l1_miss_penalty,
1304 ll_miss_penalty=ll_miss_penalty,
1305 branch_miss_penalty=branch_miss_penalty,
1306 )
1307 for func, events in event_maps.items()
1308 }
1309
1310
1311def format_delta_percent(value: float) -> str:
1312 if value == float("inf"):
1313 return "inf"
1314 if value == float("-inf"):
1315 return "-inf"
1316 return f"{value:.2f}"
1317
1318
1319def format_metric(value: float) -> str:
1320 abs_value = abs(value)
1321 if abs_value >= 1_000_000_000:
1322 return f"{value / 1_000_000_000:.2f}G"
1323 if abs_value >= 1_000_000:
1324 return f"{value / 1_000_000:.2f}M"
1325 if abs_value >= 1_000:
1326 return f"{value / 1_000:.2f}K"
1327 if abs_value >= 1:
1328 return f"{value:.2f}"
1329 return f"{value:.4f}"
1330
1331
1332def shorten_label(text: str, limit: int = 100) -> str:
1333 if len(text) <= limit:
1334 return text
1335 return text[: limit - 3] + "..."
1336
1337
1338def write_perf_csv(path: Path, label: str, summary: dict) -> None:
1339 event_names = summary.get("event_names", [])
1340 fieldnames = ["label", "run", "wall_seconds", "ipc", *event_names, "log_path"]
1341 with path.open("w", newline="") as handle:
1342 writer = csv.DictWriter(handle, fieldnames=fieldnames)
1343 writer.writeheader()
1344 for row in summary["runs"]:
1345 csv_row = {
1346 "label": label,
1347 "run": row["run"],
1348 "wall_seconds": row["wall_seconds"],
1349 "ipc": row["ipc"],
1350 "log_path": row["log_path"],
1351 }
1352 for name in event_names:
1353 csv_row[name] = row["events"].get(name)
1354 writer.writerow(csv_row)
1355
1356
1357def select_top_difference_rows(rows: list[dict], limit: int) -> list[dict]:
1358 paired_rows = [row for row in rows if row.get("present_in_both")]
1359 if paired_rows:
1360 return sorted(paired_rows, key=lambda row: row["abs_delta"], reverse=True)[:limit]
1361 return sorted(rows, key=lambda row: row["abs_delta"], reverse=True)[:limit]
1362
1363
1365 path: Path,
1366 rows: list[dict],
1367 case_a: Case,
1368 case_b: Case,
1369 metric_label: str,
1370 title: str,
1371) -> None:
1372 if not rows:
1373 path.write_text(
1374 "<svg xmlns='http://www.w3.org/2000/svg' width='800' height='120'>"
1375 f"<rect x='0' y='0' width='800' height='120' fill='{DEFAULT_SVG_BACKGROUND}' />"
1376 "<text x='20' y='60' font-family='sans-serif' font-size='20'>No rows to plot</text>"
1377 "</svg>"
1378 )
1379 return
1380
1381 width = 1900
1382 row_height = 44
1383 top_margin = 90
1384 bottom_margin = 40
1385 left_margin = 760
1386 right_margin = 160
1387 plot_width = width - left_margin - right_margin
1388 center_x = left_margin + plot_width / 2
1389 height = top_margin + bottom_margin + row_height * len(rows)
1390 max_abs = max(row["abs_delta"] for row in rows) or 1
1391 scale = (plot_width / 2 - 20) / max_abs
1392
1393 parts = [
1394 f"<svg xmlns='http://www.w3.org/2000/svg' width='{width}' height='{height}'>",
1395 f"<rect x='0' y='0' width='{width}' height='{height}' fill='{DEFAULT_SVG_BACKGROUND}' />",
1396 "<style>"
1397 "text{font-family:Arial,Helvetica,sans-serif;fill:#222}"
1398 ".label{font-size:15px}.value{font-size:14px}.title{font-size:24px;font-weight:bold}"
1399 ".subtitle{font-size:15px;fill:#555}.axis{stroke:#888;stroke-width:1}"
1400 ".pos{fill:#cf3f3f}.neg{fill:#2d7d46}"
1401 "</style>",
1402 f"<text class='title' x='20' y='36'>{escape(title)}</text>",
1403 (
1404 f"<text class='subtitle' x='20' y='62'>Delta = {escape(case_b.label)} - "
1405 f"{escape(case_a.label)} | metric: {escape(metric_label)}</text>"
1406 ),
1407 f"<line class='axis' x1='{center_x:.1f}' y1='{top_margin - 18}' x2='{center_x:.1f}' y2='{height - bottom_margin + 10}' />",
1408 ]
1409
1410 for idx, row in enumerate(rows):
1411 y = top_margin + idx * row_height
1412 bar_width = max(1.0, row["abs_delta"] * scale)
1413 if row["delta"] >= 0:
1414 x = center_x
1415 css_class = "pos"
1416 value_x = x + bar_width + 8
1417 value_anchor = "start"
1418 else:
1419 x = center_x - bar_width
1420 css_class = "neg"
1421 value_x = x - 8
1422 value_anchor = "end"
1423
1424 label = shorten_label(row["function"], 110)
1425 parts.append(
1426 f"<text class='label' x='20' y='{y + 18}'>{escape(label)}</text>"
1427 )
1428 parts.append(
1429 f"<rect class='{css_class}' x='{x:.1f}' y='{y + 4}' width='{bar_width:.1f}' height='22' rx='3' ry='3' />"
1430 )
1431 parts.append(
1432 f"<text class='value' x='{value_x:.1f}' y='{y + 20}' text-anchor='{value_anchor}'>"
1433 f"{escape(format_metric(row['delta']))}</text>"
1434 )
1435 parts.append("</svg>")
1436 path.write_text("\n".join(parts))
1437
1438
1439def write_perf_svg(path: Path, case_a: Case, case_b: Case, perf_a: dict, perf_b: dict) -> None:
1440 metrics = [
1441 ("Instructions", perf_a["mean_events"].get("instructions"), perf_b["mean_events"].get("instructions")),
1442 ("Cycles", perf_a["mean_events"].get("cycles"), perf_b["mean_events"].get("cycles")),
1443 ("IPC", perf_a.get("mean_ipc"), perf_b.get("mean_ipc")),
1444 ]
1445 width = 1100
1446 height = 380
1447 left = 160
1448 top = 70
1449 plot_w = width - left - 80
1450 row_h = 90
1451 max_val = max(value for _, a, b in metrics for value in (a, b) if value is not None) or 1
1452 scale = (plot_w - 120) / max_val
1453 parts = [
1454 f"<svg xmlns='http://www.w3.org/2000/svg' width='{width}' height='{height}'>",
1455 f"<rect x='0' y='0' width='{width}' height='{height}' fill='{DEFAULT_SVG_BACKGROUND}' />",
1456 "<style>"
1457 "text{font-family:Arial,Helvetica,sans-serif;fill:#222}"
1458 ".title{font-size:24px;font-weight:bold}.label{font-size:17px}.value{font-size:14px}"
1459 ".a{fill:#4c78a8}.b{fill:#f58518}"
1460 "</style>",
1461 "<text class='title' x='20' y='36'>perf stat comparison</text>",
1462 ]
1463 for idx, (name, a_val, b_val) in enumerate(metrics):
1464 base_y = top + idx * row_h
1465 parts.append(f"<text class='label' x='20' y='{base_y + 20}'>{escape(name)}</text>")
1466 for offset, value, css_class, label in [
1467 (0, a_val, "a", case_a.label),
1468 (30, b_val, "b", case_b.label),
1469 ]:
1470 if value is None:
1471 continue
1472 bar_w = max(1.0, value * scale)
1473 y = base_y + offset
1474 parts.append(
1475 f"<rect class='{css_class}' x='{left}' y='{y}' width='{bar_w:.1f}' height='20' rx='3' ry='3' />"
1476 )
1477 parts.append(
1478 f"<text class='value' x='{left - 10}' y='{y + 15}' text-anchor='end'>{escape(label)}</text>"
1479 )
1480 parts.append(
1481 f"<text class='value' x='{left + bar_w + 8:.1f}' y='{y + 15}'>{escape(format_metric(value))}</text>"
1482 )
1483 parts.append("</svg>")
1484 path.write_text("\n".join(parts))
1485
1486
1487def write_timing_csv(path: Path, label: str, summary: dict, petsc_events: list[str]) -> None:
1488 fieldnames = ["label", "run", "wall_seconds", "petsc_event_sum", *petsc_events, "log_path"]
1489 with path.open("w", newline="") as handle:
1490 writer = csv.DictWriter(handle, fieldnames=fieldnames)
1491 writer.writeheader()
1492 for row in summary["runs"]:
1493 csv_row = {
1494 "label": label,
1495 "run": row["run"],
1496 "wall_seconds": row["wall_seconds"],
1497 "petsc_event_sum": row.get("petsc_event_sum"),
1498 "log_path": row["log_path"],
1499 }
1500 for name in petsc_events:
1501 csv_row[name] = row.get("petsc_events", {}).get(name)
1502 writer.writerow(csv_row)
1503
1504
1505def write_diff_csv(path: Path, rows: list[dict]) -> None:
1506 fieldnames = [
1507 "function",
1508 "value_a",
1509 "value_b",
1510 "delta",
1511 "delta_percent_vs_a",
1512 "abs_delta",
1513 "present_in_both",
1514 ]
1515 with path.open("w", newline="") as handle:
1516 writer = csv.DictWriter(handle, fieldnames=fieldnames)
1517 writer.writeheader()
1518 for row in rows:
1519 writer.writerow(row)
1520
1521
1523 title: str,
1524 rows: list[dict],
1525 case_a: Case,
1526 case_b: Case,
1527 limit: int,
1528 metric_label: str = "Ir",
1529) -> None:
1530 print(title)
1531 if not rows:
1532 print(" (no rows)")
1533 return
1534 for row in rows[:limit]:
1535 print(
1536 " "
1537 f"{row['delta']:+,} {metric_label} "
1538 f"({format_delta_percent(row['delta_percent_vs_a'])}% vs {case_a.label}) | "
1539 f"{case_a.label}={row['value_a']:,} | {case_b.label}={row['value_b']:,} | "
1540 f"{row['function']}"
1541 )
1542
1543
1544def main() -> int:
1545 args = parse_args()
1546 perf_a: dict | None = None
1547 perf_b: dict | None = None
1548
1549 result_dir = ensure_results_dir(args.result_dir, args.label_a, args.label_b)
1550 print(f"Results directory: {result_dir}", flush=True)
1551
1552 effective_configure_args = list(args.configure_arg)
1553 imported_seed_cache_vars: dict[str, dict[str, str]] = {}
1554 if args.repo and args.seed_cache:
1555 seeded_args, imported_seed_cache_vars = make_seed_configure_args(
1556 args.seed_cache, args.seed_cache_var
1557 )
1558 effective_configure_args = [*seeded_args, *effective_configure_args]
1559 if imported_seed_cache_vars:
1560 print(
1561 "Seeded configure cache vars: "
1562 + ", ".join(imported_seed_cache_vars.keys()),
1563 flush=True,
1564 )
1565
1566 if args.repo:
1567 print("Phase 0: prepare source builds from refs", flush=True)
1568 case_a = prepare_source_case(
1569 name="case_a",
1570 label=args.label_a,
1571 repo=args.repo,
1572 ref=args.ref_a,
1573 result_dir=result_dir,
1574 exe_relpath=args.exe_relpath,
1575 workdir_relpath=args.workdir_relpath,
1576 configure_args=effective_configure_args,
1577 build_args=args.build_arg,
1578 build_target=args.build_target,
1579 build_jobs=args.build_jobs,
1580 )
1581 case_b = prepare_source_case(
1582 name="case_b",
1583 label=args.label_b,
1584 repo=args.repo,
1585 ref=args.ref_b,
1586 result_dir=result_dir,
1587 exe_relpath=args.exe_relpath,
1588 workdir_relpath=args.workdir_relpath,
1589 configure_args=effective_configure_args,
1590 build_args=args.build_arg,
1591 build_target=args.build_target,
1592 build_jobs=args.build_jobs,
1593 )
1594 else:
1595 case_a = resolve_case(
1596 name="case_a",
1597 label=args.label_a,
1598 build_dir=args.build_dir_a,
1599 exe=args.exe_a,
1600 exe_relpath=args.exe_relpath,
1601 commit=args.commit_a,
1602 source_dir=args.source_dir_a,
1603 )
1604 case_b = resolve_case(
1605 name="case_b",
1606 label=args.label_b,
1607 build_dir=args.build_dir_b,
1608 exe=args.exe_b,
1609 exe_relpath=args.exe_relpath,
1610 commit=args.commit_b,
1611 source_dir=args.source_dir_b,
1612 )
1613
1614 metadata = {
1615 "created_utc": datetime.now(timezone.utc).isoformat(),
1616 "workdir": args.workdir,
1617 "workdir_relpath": args.workdir_relpath,
1618 "cmd_args": args.cmd_args,
1619 "skip_timing": args.skip_timing,
1620 "skip_callgrind": args.skip_callgrind,
1621 "petsc_events": args.petsc_event,
1622 "callgrind_args": DEFAULT_CALLGRIND_ARGS + args.callgrind_arg,
1623 "perf_enabled": args.perf_stat,
1624 "perf_bin": args.perf_bin,
1625 "perf_runs": args.perf_runs,
1626 "perf_events": ordered_unique([*DEFAULT_PERF_EVENTS, *args.perf_event]),
1627 "sim_model": {
1628 "base_cpi": args.sim_base_cpi,
1629 "l1_miss_penalty": args.sim_l1_miss_penalty,
1630 "ll_miss_penalty": args.sim_ll_miss_penalty,
1631 "branch_miss_penalty": args.sim_branch_miss_penalty,
1632 },
1633 "repo": args.repo,
1634 "ref_a": args.ref_a,
1635 "ref_b": args.ref_b,
1636 "configure_args": effective_configure_args,
1637 "build_args": args.build_arg,
1638 "build_target": args.build_target,
1639 "build_jobs": args.build_jobs,
1640 "seed_cache": args.seed_cache,
1641 "seed_cache_vars": imported_seed_cache_vars,
1642 "cases": [asdict(case_a), asdict(case_b)],
1643 }
1644 (result_dir / "metadata.json").write_text(json.dumps(metadata, indent=2))
1645 (result_dir / "command.txt").write_text(
1646 " ".join(shlex.quote(part) for part in args.cmd_args) + "\n"
1647 )
1648
1649 summary: dict[str, object] = {
1650 "result_dir": str(result_dir),
1651 "cases": [asdict(case_a), asdict(case_b)],
1652 "skip_timing": args.skip_timing,
1653 "skip_callgrind": args.skip_callgrind,
1654 "wall_timing": {},
1655 "petsc_timing": {},
1656 "perf": {},
1657 "callgrind": {},
1658 "sim_model": {
1659 "base_cpi": args.sim_base_cpi,
1660 "l1_miss_penalty": args.sim_l1_miss_penalty,
1661 "ll_miss_penalty": args.sim_ll_miss_penalty,
1662 "branch_miss_penalty": args.sim_branch_miss_penalty,
1663 },
1664 }
1665
1666 if not args.skip_timing:
1667 print("Phase 1: wall timing (no profiling, no -log_view)", flush=True)
1668 timing_a = run_wall_time_case(
1669 case=case_a,
1670 app_args=args.cmd_args,
1671 workdir=args.workdir,
1672 results_dir=result_dir,
1673 timing_runs=args.timing_runs,
1674 )
1675 timing_b = run_wall_time_case(
1676 case=case_b,
1677 app_args=args.cmd_args,
1678 workdir=args.workdir,
1679 results_dir=result_dir,
1680 timing_runs=args.timing_runs,
1681 )
1682 summary["wall_timing"] = {case_a.label: timing_a, case_b.label: timing_b}
1683
1684 write_timing_csv(result_dir / f"{case_a.label}_wall.csv", case_a.label, timing_a, [])
1685 write_timing_csv(result_dir / f"{case_b.label}_wall.csv", case_b.label, timing_b, [])
1686
1687 print(
1688 f"{case_a.label}: mean wall {timing_a['mean_wall_seconds']:.6f} s",
1689 flush=True,
1690 )
1691 print(
1692 f"{case_b.label}: mean wall {timing_b['mean_wall_seconds']:.6f} s",
1693 flush=True,
1694 )
1695 delta_wall = timing_b["mean_wall_seconds"] - timing_a["mean_wall_seconds"]
1696 delta_wall_pct = (
1697 100.0 * delta_wall / timing_a["mean_wall_seconds"]
1698 if timing_a["mean_wall_seconds"]
1699 else float("inf")
1700 )
1701 print(
1702 f"Wall-time delta ({case_b.label} - {case_a.label}): "
1703 f"{delta_wall:+.6f} s ({format_delta_percent(delta_wall_pct)}%)",
1704 flush=True,
1705 )
1706 if args.petsc_event:
1707 print("Phase 2: PETSc event timing (-log_view)", flush=True)
1708 petsc_a = run_petsc_event_case(
1709 case=case_a,
1710 app_args=args.cmd_args,
1711 workdir=args.workdir,
1712 results_dir=result_dir,
1713 timing_runs=args.timing_runs,
1714 petsc_events=args.petsc_event,
1715 )
1716 petsc_b = run_petsc_event_case(
1717 case=case_b,
1718 app_args=args.cmd_args,
1719 workdir=args.workdir,
1720 results_dir=result_dir,
1721 timing_runs=args.timing_runs,
1722 petsc_events=args.petsc_event,
1723 )
1724 summary["petsc_timing"] = {case_a.label: petsc_a, case_b.label: petsc_b}
1725
1727 result_dir / f"{case_a.label}_petsc.csv",
1728 case_a.label,
1729 petsc_a,
1730 args.petsc_event,
1731 )
1733 result_dir / f"{case_b.label}_petsc.csv",
1734 case_b.label,
1735 petsc_b,
1736 args.petsc_event,
1737 )
1738
1739 print(
1740 f"{case_a.label}: mean PETSc event-sum wall {petsc_a['mean_wall_seconds']:.6f} s",
1741 flush=True,
1742 )
1743 print(
1744 f"{case_b.label}: mean PETSc event-sum wall {petsc_b['mean_wall_seconds']:.6f} s",
1745 flush=True,
1746 )
1747 print(
1748 f"{case_a.label}: mean PETSc event sum {petsc_a['mean_petsc_event_sum']:.6f} s",
1749 flush=True,
1750 )
1751 print(
1752 f"{case_b.label}: mean PETSc event sum {petsc_b['mean_petsc_event_sum']:.6f} s",
1753 flush=True,
1754 )
1755 delta_petsc = petsc_b["mean_petsc_event_sum"] - petsc_a["mean_petsc_event_sum"]
1756 delta_petsc_pct = (
1757 100.0 * delta_petsc / petsc_a["mean_petsc_event_sum"]
1758 if petsc_a["mean_petsc_event_sum"]
1759 else float("inf")
1760 )
1761 print(
1762 f"PETSc event-sum delta ({case_b.label} - {case_a.label}): "
1763 f"{delta_petsc:+.6f} s ({format_delta_percent(delta_petsc_pct)}%)",
1764 flush=True,
1765 )
1766 for event_name in args.petsc_event:
1767 delta_event = (
1768 petsc_b["mean_petsc_events"][event_name]
1769 - petsc_a["mean_petsc_events"][event_name]
1770 )
1771 delta_event_pct = (
1772 100.0 * delta_event / petsc_a["mean_petsc_events"][event_name]
1773 if petsc_a["mean_petsc_events"][event_name]
1774 else float("inf")
1775 )
1776 print(
1777 f"{event_name} delta ({case_b.label} - {case_a.label}): "
1778 f"{delta_event:+.6f} s ({format_delta_percent(delta_event_pct)}%)",
1779 flush=True,
1780 )
1781
1782 if args.perf_stat:
1783 try:
1784 print("Phase 3: perf stat", flush=True)
1785 perf_a = run_perf_case(
1786 case=case_a,
1787 app_args=args.cmd_args,
1788 workdir=args.workdir,
1789 results_dir=result_dir,
1790 perf_runs=args.perf_runs,
1791 perf_bin=args.perf_bin,
1792 perf_events=args.perf_event,
1793 )
1794 perf_b = run_perf_case(
1795 case=case_b,
1796 app_args=args.cmd_args,
1797 workdir=args.workdir,
1798 results_dir=result_dir,
1799 perf_runs=args.perf_runs,
1800 perf_bin=args.perf_bin,
1801 perf_events=args.perf_event,
1802 )
1803 summary["perf"] = {case_a.label: perf_a, case_b.label: perf_b}
1804 write_perf_csv(result_dir / f"{case_a.label}_perf.csv", case_a.label, perf_a)
1805 write_perf_csv(result_dir / f"{case_b.label}_perf.csv", case_b.label, perf_b)
1806 print(
1807 f"{case_a.label}: mean perf instructions {perf_a['mean_events']['instructions']:.0f}",
1808 flush=True,
1809 )
1810 print(
1811 f"{case_a.label}: mean perf cycles {perf_a['mean_events']['cycles']:.0f}",
1812 flush=True,
1813 )
1814 print(
1815 f"{case_a.label}: mean IPC {perf_a.get('mean_ipc', math.nan):.6f}",
1816 flush=True,
1817 )
1818 print(
1819 f"{case_b.label}: mean perf instructions {perf_b['mean_events']['instructions']:.0f}",
1820 flush=True,
1821 )
1822 print(
1823 f"{case_b.label}: mean perf cycles {perf_b['mean_events']['cycles']:.0f}",
1824 flush=True,
1825 )
1826 print(
1827 f"{case_b.label}: mean IPC {perf_b.get('mean_ipc', math.nan):.6f}",
1828 flush=True,
1829 )
1830 delta_instr = (
1831 perf_b["mean_events"]["instructions"] - perf_a["mean_events"]["instructions"]
1832 )
1833 delta_instr_pct = (
1834 100.0 * delta_instr / perf_a["mean_events"]["instructions"]
1835 if perf_a["mean_events"]["instructions"]
1836 else float("inf")
1837 )
1838 print(
1839 f"Instructions delta ({case_b.label} - {case_a.label}): "
1840 f"{delta_instr:+.0f} ({format_delta_percent(delta_instr_pct)}%)",
1841 flush=True,
1842 )
1843 delta_cycles = (
1844 perf_b["mean_events"]["cycles"] - perf_a["mean_events"]["cycles"]
1845 )
1846 delta_cycles_pct = (
1847 100.0 * delta_cycles / perf_a["mean_events"]["cycles"]
1848 if perf_a["mean_events"]["cycles"]
1849 else float("inf")
1850 )
1851 print(
1852 f"Cycles delta ({case_b.label} - {case_a.label}): "
1853 f"{delta_cycles:+.0f} ({format_delta_percent(delta_cycles_pct)}%)",
1854 flush=True,
1855 )
1856 if "mean_ipc" in perf_a and "mean_ipc" in perf_b:
1857 delta_ipc = perf_b["mean_ipc"] - perf_a["mean_ipc"]
1858 delta_ipc_pct = (
1859 100.0 * delta_ipc / perf_a["mean_ipc"]
1860 if perf_a["mean_ipc"]
1861 else float("inf")
1862 )
1863 print(
1864 f"IPC delta ({case_b.label} - {case_a.label}): "
1865 f"{delta_ipc:+.6f} ({format_delta_percent(delta_ipc_pct)}%)",
1866 flush=True,
1867 )
1868 perf_plot = result_dir / "perf_ipc_comparison.svg"
1869 write_perf_svg(perf_plot, case_a, case_b, perf_a, perf_b)
1870 summary["perf_plot"] = str(perf_plot)
1871 print(f"perf plot: {perf_plot}", flush=True)
1872 except RuntimeError as exc:
1873 summary["perf_error"] = str(exc)
1874 print(f"perf stat unavailable: {exc}", flush=True)
1875 if args.require_perf:
1876 raise
1877
1878 if not args.skip_callgrind:
1879 print("Phase 4: Callgrind", flush=True)
1880 callgrind_args = DEFAULT_CALLGRIND_ARGS + args.callgrind_arg
1881 cg_a = run_callgrind_case(
1882 case=case_a,
1883 app_args=args.cmd_args,
1884 workdir=args.workdir,
1885 results_dir=result_dir,
1886 callgrind_args=callgrind_args,
1887 )
1888 cg_b = run_callgrind_case(
1889 case=case_b,
1890 app_args=args.cmd_args,
1891 workdir=args.workdir,
1892 results_dir=result_dir,
1893 callgrind_args=callgrind_args,
1894 )
1895 summary["callgrind"] = {case_a.label: cg_a, case_b.label: cg_b}
1896
1897 event_names_a, totals_a, funcs_a = parse_callgrind_out(Path(cg_a["callgrind_out"]))
1898 event_names_b, totals_b, funcs_b = parse_callgrind_out(Path(cg_b["callgrind_out"]))
1899 callgrind_event_names = ordered_unique([*event_names_a, *event_names_b])
1900 ir_total_a = totals_a.get("Ir")
1901 ir_total_b = totals_b.get("Ir")
1902 summary["callgrind_event_names"] = callgrind_event_names
1903 summary["callgrind_totals"] = {case_a.label: totals_a, case_b.label: totals_b}
1904 summary["callgrind_totals_ir"] = {
1905 case_a.label: ir_total_a,
1906 case_b.label: ir_total_b,
1907 }
1908 summary["callgrind_simulation_totals"] = {
1909 case_a.label: {
1910 "l1_misses": total_l1_misses(totals_a),
1911 "ll_misses": total_ll_misses(totals_a),
1912 "branch_misses": total_branch_misses(totals_a),
1913 "estimated_cycles": estimate_cycles_from_events(
1914 totals_a,
1915 base_cpi=args.sim_base_cpi,
1916 l1_miss_penalty=args.sim_l1_miss_penalty,
1917 ll_miss_penalty=args.sim_ll_miss_penalty,
1918 branch_miss_penalty=args.sim_branch_miss_penalty,
1919 ),
1920 },
1921 case_b.label: {
1922 "l1_misses": total_l1_misses(totals_b),
1923 "ll_misses": total_ll_misses(totals_b),
1924 "branch_misses": total_branch_misses(totals_b),
1925 "estimated_cycles": estimate_cycles_from_events(
1926 totals_b,
1927 base_cpi=args.sim_base_cpi,
1928 l1_miss_penalty=args.sim_l1_miss_penalty,
1929 ll_miss_penalty=args.sim_ll_miss_penalty,
1930 branch_miss_penalty=args.sim_branch_miss_penalty,
1931 ),
1932 },
1933 }
1934 for label in (case_a.label, case_b.label):
1935 totals_map = summary["callgrind_simulation_totals"][label]
1936 ir_total = summary["callgrind_totals_ir"][label]
1937 est_cycles = totals_map["estimated_cycles"]
1938 totals_map["ir_per_estimated_cycle"] = (
1939 ir_total / est_cycles if ir_total and est_cycles else math.nan
1940 )
1941
1942 exclude_patterns = [re.compile(p) for p in (DEFAULT_EXCLUDE_PATTERNS + args.exclude_pattern)]
1943 include_patterns = [re.compile(p) for p in args.include_pattern]
1944 diff_rows = build_diff_rows(
1945 metric_dict_from_event_maps(funcs_a, "Ir"),
1946 metric_dict_from_event_maps(funcs_b, "Ir"),
1947 include_patterns,
1948 exclude_patterns,
1949 )
1950 diff_metric_label = "Ir"
1951 diff_title = "Top {count} Callgrind differences"
1952 diff_rows_est_cycles = build_diff_rows(
1954 funcs_a,
1955 base_cpi=args.sim_base_cpi,
1956 l1_miss_penalty=args.sim_l1_miss_penalty,
1957 ll_miss_penalty=args.sim_ll_miss_penalty,
1958 branch_miss_penalty=args.sim_branch_miss_penalty,
1959 ),
1961 funcs_b,
1962 base_cpi=args.sim_base_cpi,
1963 l1_miss_penalty=args.sim_l1_miss_penalty,
1964 ll_miss_penalty=args.sim_ll_miss_penalty,
1965 branch_miss_penalty=args.sim_branch_miss_penalty,
1966 ),
1967 include_patterns,
1968 exclude_patterns,
1969 )
1970 have_simulated_events = all(
1971 name in callgrind_event_names
1972 for name in ["Ir", "I1mr", "D1mr", "D1mw", "ILmr", "DLmr", "DLmw", "Bcm", "Bim"]
1973 )
1974 if have_simulated_events:
1975 diff_rows = diff_rows_est_cycles
1976 diff_metric_label = "est. cycles (Valgrind sim)"
1977 diff_title = "Top {count} estimated cycle differences"
1978 summary["callgrind_diff_metric"] = {
1979 "kind": "estimated_cycles_from_valgrind_sim",
1980 "metric_label": diff_metric_label,
1981 "base_cpi": args.sim_base_cpi,
1982 "l1_miss_penalty": args.sim_l1_miss_penalty,
1983 "ll_miss_penalty": args.sim_ll_miss_penalty,
1984 "branch_miss_penalty": args.sim_branch_miss_penalty,
1985 }
1986 print(
1987 "Per-function Callgrind differences shown as estimated cycles "
1988 "from Valgrind cache/branch simulation.",
1989 flush=True,
1990 )
1991 elif (
1992 perf_a
1993 and perf_b
1994 and "mean_ipc" in perf_a
1995 and "mean_ipc" in perf_b
1996 and perf_a["mean_ipc"]
1997 and perf_b["mean_ipc"]
1998 ):
2000 diff_rows, perf_a["mean_ipc"], perf_b["mean_ipc"]
2001 )
2002 diff_metric_label = "est. cycles (Ir / IPC)"
2003 diff_title = "Top {count} estimated cycle differences"
2004 summary["callgrind_diff_metric"] = {
2005 "kind": "estimated_cycles_from_perf_ipc",
2006 "metric_label": diff_metric_label,
2007 "ipc_a": perf_a["mean_ipc"],
2008 "ipc_b": perf_b["mean_ipc"],
2009 }
2010 print(
2011 "Per-function Callgrind differences shown as estimated cycles "
2012 "(function Ir divided by whole-run IPC from perf stat).",
2013 flush=True,
2014 )
2015 else:
2016 if args.perf_stat:
2017 print(
2018 "perf stat did not produce usable IPC, so per-function differences "
2019 "remain in raw Ir.",
2020 flush=True,
2021 )
2022 else:
2023 print(
2024 "Valgrind simulated cache/branch events were unavailable, so "
2025 "per-function differences remain in raw Ir.",
2026 flush=True,
2027 )
2028 summary["callgrind_diff_metric"] = {
2029 "kind": "ir",
2030 "metric_label": diff_metric_label,
2031 }
2032
2033 diff_rows.sort(key=lambda row: row["abs_delta"], reverse=True)
2034
2035 write_diff_csv(result_dir / "callgrind_diff.csv", diff_rows)
2036
2037 cg_diff_path = result_dir / "callgrind.cg_diff.out"
2038 try:
2039 cg_diff_proc = subprocess.run(
2040 ["cg_diff", cg_a["callgrind_out"], cg_b["callgrind_out"]],
2041 check=True,
2042 stdout=subprocess.PIPE,
2043 stderr=subprocess.STDOUT,
2044 text=True,
2045 )
2046 cg_diff_path.write_text(cg_diff_proc.stdout)
2047 except subprocess.CalledProcessError as exc:
2048 (result_dir / "callgrind.cg_diff.log").write_text(exc.stdout or "")
2049
2050 print(
2051 f"{case_a.label}: callgrind total Ir {ir_total_a:,}" if ir_total_a is not None else f"{case_a.label}: callgrind total Ir unavailable",
2052 flush=True,
2053 )
2054 print(
2055 f"{case_b.label}: callgrind total Ir {ir_total_b:,}" if ir_total_b is not None else f"{case_b.label}: callgrind total Ir unavailable",
2056 flush=True,
2057 )
2058 print(
2059 f"{case_a.label}: simulated L1 misses {total_l1_misses(totals_a):,}",
2060 flush=True,
2061 )
2062 print(
2063 f"{case_b.label}: simulated L1 misses {total_l1_misses(totals_b):,}",
2064 flush=True,
2065 )
2066 print(
2067 f"{case_a.label}: simulated LL misses {total_ll_misses(totals_a):,}",
2068 flush=True,
2069 )
2070 print(
2071 f"{case_b.label}: simulated LL misses {total_ll_misses(totals_b):,}",
2072 flush=True,
2073 )
2074 print(
2075 f"{case_a.label}: simulated branch misses {total_branch_misses(totals_a):,}",
2076 flush=True,
2077 )
2078 print(
2079 f"{case_b.label}: simulated branch misses {total_branch_misses(totals_b):,}",
2080 flush=True,
2081 )
2082 est_cycles_a = summary["callgrind_simulation_totals"][case_a.label]["estimated_cycles"]
2083 est_cycles_b = summary["callgrind_simulation_totals"][case_b.label]["estimated_cycles"]
2084 print(
2085 f"{case_a.label}: estimated cycles {est_cycles_a:.0f}",
2086 flush=True,
2087 )
2088 print(
2089 f"{case_b.label}: estimated cycles {est_cycles_b:.0f}",
2090 flush=True,
2091 )
2092 sim_ipc_a = summary["callgrind_simulation_totals"][case_a.label]["ir_per_estimated_cycle"]
2093 sim_ipc_b = summary["callgrind_simulation_totals"][case_b.label]["ir_per_estimated_cycle"]
2094 print(
2095 f"{case_a.label}: Ir / est_cycles {sim_ipc_a:.6f}",
2096 flush=True,
2097 )
2098 print(
2099 f"{case_b.label}: Ir / est_cycles {sim_ipc_b:.6f}",
2100 flush=True,
2101 )
2102 if ir_total_a is not None and ir_total_b is not None:
2103 delta_total = ir_total_b - ir_total_a
2104 delta_pct = 100.0 * delta_total / ir_total_a if ir_total_a else float("inf")
2105 print(
2106 f"Callgrind total delta ({case_b.label} - {case_a.label}): "
2107 f"{delta_total:+,} Ir ({format_delta_percent(delta_pct)}%)",
2108 flush=True,
2109 )
2110 delta_est_cycles = est_cycles_b - est_cycles_a
2111 delta_est_cycles_pct = (
2112 100.0 * delta_est_cycles / est_cycles_a if est_cycles_a else float("inf")
2113 )
2114 print(
2115 f"Estimated cycles delta ({case_b.label} - {case_a.label}): "
2116 f"{delta_est_cycles:+.0f} ({format_delta_percent(delta_est_cycles_pct)}%)",
2117 flush=True,
2118 )
2119 if not math.isnan(sim_ipc_a) and not math.isnan(sim_ipc_b):
2120 delta_sim_ipc = sim_ipc_b - sim_ipc_a
2121 delta_sim_ipc_pct = 100.0 * delta_sim_ipc / sim_ipc_a if sim_ipc_a else float("inf")
2122 print(
2123 f"Ir / est_cycles delta ({case_b.label} - {case_a.label}): "
2124 f"{delta_sim_ipc:+.6f} ({format_delta_percent(delta_sim_ipc_pct)}%)",
2125 flush=True,
2126 )
2127
2128 top_diff_rows = select_top_difference_rows(diff_rows, args.top)
2130 f"Top branch differences ({case_b.label} - {case_a.label}) present in both branches:",
2131 top_diff_rows,
2132 case_a,
2133 case_b,
2134 args.top,
2135 metric_label=diff_metric_label,
2136 )
2137 diff_plot = result_dir / "callgrind_top_differences.svg"
2139 diff_plot,
2140 top_diff_rows,
2141 case_a,
2142 case_b,
2143 metric_label=diff_metric_label,
2144 title=diff_title.format(count=len(top_diff_rows)),
2145 )
2146 summary["callgrind_diff_plot"] = str(diff_plot)
2147 print(f"callgrind diff plot: {diff_plot}", flush=True)
2148 else:
2149 print("Phase 4: Callgrind skipped (--skip-callgrind)", flush=True)
2150
2151 summary_path = result_dir / "summary.json"
2152 summary_path.write_text(json.dumps(summary, indent=2))
2153 print(f"summary: {summary_path}", flush=True)
2154 return 0
2155
2156
2157if __name__ == "__main__":
2158 try:
2159 raise SystemExit(main())
2160 except KeyboardInterrupt:
2161 raise SystemExit(130)
2162 except (FileNotFoundError, ValueError, RuntimeError) as exc:
2163 print(exc, file=sys.stderr)
2164 raise SystemExit(2)
None write_perf_csv(Path path, str label, dict summary)
int event_value(dict[str, int] event_map, str name)
tuple[int, float] run_command(list[str] cmd, str|None cwd, Path log_path, str|None announce_name=None, bool print_cmd=False)
tuple[list[str], dict[str, dict[str, str]]] make_seed_configure_args(str seed_cache, list[str] extra_keys)
None write_perf_svg(Path path, Case case_a, Case case_b, dict perf_a, dict perf_b)
argparse.Namespace parse_args()
list[dict] select_top_difference_rows(list[dict] rows, int limit)
str|None read_cmake_home_directory(str|None build_dir)
str resolve_callgrind_name(str raw_value, dict[str, str] table)
float estimate_cycles_from_events(dict[str, int] event_map, float base_cpi, float l1_miss_penalty, float ll_miss_penalty, float branch_miss_penalty)
str|None infer_commit(str|None source_dir)
str format_metric(float value)
int total_branch_misses(dict[str, int] event_map)
Case resolve_case(str name, str label, str|None build_dir, str|None exe, str exe_relpath, str|None commit, str|None source_dir)
bool should_include_function(str func, list[re.Pattern[str]] include_patterns, list[re.Pattern[str]] exclude_patterns)
dict[str, float] parse_perf_stat(str log_text, Iterable[str] event_names)
str resolve_git_commit(Path repo_path, str ref)
dict[str, float] estimated_cycle_dict_from_event_maps(dict[str, dict[str, int]] event_maps, float base_cpi, float l1_miss_penalty, float ll_miss_penalty, float branch_miss_penalty)
Path reusable_repo_cache_root(Path repo_path)
tuple[Path, str] ensure_reusable_worktree(Path repo_path, str ref, str label, Path case_log_dir)
str resolve_run_cwd(Case case, str|None fallback_workdir)
str sanitize_perf_event_name(str event_name)
dict[str, float] metric_dict_from_event_maps(dict[str, dict[str, int]] event_maps, str metric_name)
None write_diff_csv(Path path, list[dict] rows)
int total_ll_misses(dict[str, int] event_map)
dict run_perf_case(Case case, list[str] app_args, str|None workdir, Path results_dir, int perf_runs, str perf_bin, list[str] perf_events)
str stable_cache_name(str value)
None write_svg_diff_plot(Path path, list[dict] rows, Case case_a, Case case_b, str metric_label, str title)
str tail_text(Path path, int num_lines=40)
list[dict] build_diff_rows(dict[str, int] funcs_a, dict[str, int] funcs_b, list[re.Pattern[str]] include_patterns, list[re.Pattern[str]] exclude_patterns)
tuple[int|None, dict[str, int]] parse_callgrind_annotate(Path annotate_path)
Path resolve_cmake_cache_path(str build_or_cache)
list[dict] project_rows_to_estimated_cycles(list[dict] rows, float|None ipc_a, float|None ipc_b)
Path ensure_results_dir(str|None requested, str label_a, str label_b)
list[str] ordered_unique(Iterable[str] items)
None write_timing_csv(Path path, str label, dict summary, list[str] petsc_events)
str sanitize_label(str value)
dict run_wall_time_case(Case case, list[str] app_args, str|None workdir, Path results_dir, int timing_runs)
Case prepare_source_case(str name, str label, str repo, str ref, Path result_dir, str exe_relpath, str|None workdir_relpath, list[str] configure_args, list[str] build_args, str|None build_target, int build_jobs)
dict run_petsc_event_case(Case case, list[str] app_args, str|None workdir, Path results_dir, int timing_runs, list[str] petsc_events)
str format_delta_percent(float value)
None print_top_rows(str title, list[dict] rows, Case case_a, Case case_b, int limit, str metric_label="Ir")
dict[str, int]|None parse_callgrind_costs_line(str line, list[str] event_names)
dict run_callgrind_case(Case case, list[str] app_args, str|None workdir, Path results_dir, list[str] callgrind_args)
dict[str, tuple[str, str]] read_cmake_cache_entries(Path cache_path)
tuple[list[str], dict[str, int], dict[str, dict[str, int]]] parse_callgrind_out(Path callgrind_path)
int total_l1_misses(dict[str, int] event_map)
dict[str, float] parse_petsc_events(str log_text, Iterable[str] event_names)
str shorten_label(str text, int limit=100)