From 2d467246fd3af7988f0213767995f38796d0e730 Mon Sep 17 00:00:00 2001
From: Marcus Holland-Moritz <github@mhxnet.de>
Date: Thu, 17 Apr 2025 07:42:04 +0200
Subject: [PATCH] chore(benchmark): add initial version of query.py

---
 .benchmark/query.py | 184 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100755 .benchmark/query.py

diff --git a/.benchmark/query.py b/.benchmark/query.py
new file mode 100755
index 00000000..f2d6f999
--- /dev/null
+++ b/.benchmark/query.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import json
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib.container as mplc
+
+from packaging.version import Version
+from tinydb import TinyDB, Query
+from tqdm import tqdm
+
+
+TABLE_NAME = "benchmarks"
+
+
+def import_data(db_path, *args):
+    """
+    Import all JSON files in 'json_dir' into the TinyDB database at 'db_path'.
+    """
+    db = TinyDB(db_path)
+    table = db.table(TABLE_NAME)
+
+    files = []
+
+    for directory in args:
+        files.extend(
+            [
+                os.path.join(directory, f)
+                for f in os.listdir(directory)
+                if f.endswith(".json")
+            ]
+        )
+
+    for path in tqdm(files, desc="Importing JSON files", unit="file"):
+        with open(path, "r") as f:
+            data = json.load(f)
+            if "exit_codes" in data:
+                del data["times"]
+                del data["exit_codes"]
+            table.insert(data)
+
+
+def query_and_bar_chart(db_path, arch, version, binary_type):
+    """
+    Example query that:
+      - filters records by arch and version
+      - groups them by (name, config)
+      - plots a grouped bar chart of 'mean' times (with error bars using 'stddev').
+    """
+    db = TinyDB(db_path)
+    table = db.table(TABLE_NAME)
+    Q = Query()
+
+    release_configs = {
+        None,
+        "clang",
+        "clang-minsize-musl-lto",
+    }
+
+    # Fetch rows that match the arch, version, binary_type etc.
+    rows = table.search(
+        (Q.arch == arch)
+        &
+        # (Q.version == version) &
+        (Q.config.test(lambda x: x in release_configs))
+        & (Q.type == binary_type)
+        &
+        # (Q.name.test(lambda x: not x.endswith("_mmap"))) &
+        (Q.mean.exists())
+        & (Q.stddev.exists())
+    )
+
+    # Sort and create a DataFrame with the relevant columns.
+    # rows.sort(key=lambda r: (r.get("name"), r.get("config")))
+    # df = pd.DataFrame(rows, columns=["name", "config", "mean", "stddev"])
+    # rows.sort(key=lambda r: (r.get("name"), r.get("commit_time")))
+    df = pd.DataFrame(rows, columns=["name", "version", "mean", "stddev"])
+    df["version_object"] = df["version"].apply(Version)
+
+    # Pivot the DataFrame so that "name" becomes the row index and different "config"
+    # values become separate columns for the "mean" and "stddev" values.
+    df_pivot = df.pivot(
+        index="name", columns=["version_object"], values=["mean", "stddev"]
+    )
+
+    # Extract the DataFrame for means and stddevs.
+    mean_df = df_pivot["mean"]
+    stddev_df = df_pivot["stddev"]
+
+    # === Plotting ===
+
+    # Plot the normalized horizontal grouped bar chart.
+    ax = mean_df.plot(kind="bar", yerr=stddev_df, capsize=3, figsize=(10, 10), log=True, width=0.8)
+
+    for container in ax.containers:
+        if isinstance(container, mplc.BarContainer):
+            pass
+        if isinstance(container, mplc.ErrorbarContainer):
+            # Set the error bar color to match the bar color.
+            for errbar in container.lines[1]:
+                errbar.set_alpha(0.25)
+            for errbar in container.lines[2]:
+                errbar.set_alpha(0.25)
+
+    # Customize titles, labels, and legend.
+    ax.set_title(f"Benchmark results for arch={arch}, type={binary_type}")
+    # Rotate x-axis labels for better readability.
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=20, ha="right")
+    ax.set_ylabel("Benchmark Time")
+    ax.set_xlabel("Benchmark Name")
+    # Add grid lines in the background
+    ax.set_axisbelow(True)
+    ax.grid(axis="y", linestyle="-", alpha=0.5, which="both")
+    # Set y-axis range (1ms to 100s)
+    ax.set_ylim(0.001, 100.0)
+
+    # Put the legend outside the plot area.
+    ax.legend(
+        title="Version", bbox_to_anchor=(1.005, 1), loc="upper left", frameon=False
+    )
+
+    for boundary in np.arange(len(df)) - 0.5:
+        plt.axvline(x=boundary, color='grey', linestyle='-', linewidth=0.5)
+
+    # Maximize the space for the figure.
+    plt.subplots_adjust(left=0.06, right=0.94, top=0.97, bottom=0.12)
+
+    # ax.legend(title="Version")
+
+    # plt.tight_layout()
+    plt.show()
+
+
+# Interesting queries:
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Import JSON benchmarks into TinyDB and generate charts."
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # Sub-command: import
+    import_parser = subparsers.add_parser(
+        "import", help="Import JSON files into TinyDB."
+    )
+    import_parser.add_argument("db_path", help="TinyDB database file.")
+    import_parser.add_argument(
+        "json_dirs", nargs="+", help="Directories containing JSON files to import."
+    )
+
+    # Sub-command: bar
+    bar_parser = subparsers.add_parser("bar", help="Generate a grouped bar chart.")
+    bar_parser.add_argument("db_path", help="TinyDB database file.")
+    bar_parser.add_argument("--arch", required=True, help="Architecture to filter by.")
+    bar_parser.add_argument("--version", required=True, help="Version to filter by.")
+    bar_parser.add_argument(
+        "--binary", default="standalone", help="Version to filter by."
+    )
+
+    # Sub-command: line
+    line_parser = subparsers.add_parser("line", help="Generate a line chart over time.")
+    line_parser.add_argument("db_path", help="TinyDB database file.")
+    line_parser.add_argument("--arch", required=True, help="Architecture to filter by.")
+    line_parser.add_argument("--config", required=True, help="Config to filter by.")
+    line_parser.add_argument(
+        "--binary", default="standalone", help="Version to filter by."
+    )
+
+    args = parser.parse_args()
+
+    if args.command == "import":
+        import_data(args.db_path, *args.json_dirs)
+    elif args.command == "bar":
+        query_and_bar_chart(args.db_path, args.arch, args.version, args.binary)
+    elif args.command == "line":
+        query_and_line_chart(args.db_path, args.arch, args.config, args.binary)
+
+
+if __name__ == "__main__":
+    main()