%config InlineBackend.figure_formats = ['svg']

import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('dark_background')

def toMb(b):
    return b * (9.537e-7)


results_dir='bench-results'
def importBenchmarkCSVs(contentDir):
    return {
        "casync": pd.read_csv(f"{contentDir}/casync.csv",";"),
        "file": pd.read_csv(f"{contentDir}/file.csv",";"),
        "compressed-file": pd.read_csv(f"{contentDir}/file-xz-compressed.csv",";"),
        "nar": pd.read_csv(f"{contentDir}/nar.csv",";"),
    }

b = {
    "massRebuild": {
        "before": importBenchmarkCSVs(f"{results_dir}/before-mass-rebuild"),
        "after": importBenchmarkCSVs(f"{results_dir}/after-mass-rebuild"),
    },
    "channelJump": {
        "before": importBenchmarkCSVs(f"{results_dir}/nixpkgs-stable-channel"),
        "after": importBenchmarkCSVs(f"{results_dir}/nixpkgs-unstable-channel")
    },
    "firefoxBump": {
        "before": importBenchmarkCSVs(f"{results_dir}/before-firefox-bump"),
        "after": importBenchmarkCSVs(f"{results_dir}/after-firefox-bump")
    },
    "gimpBump": {
        "before": importBenchmarkCSVs(f"{results_dir}/before-gimp-bump"),
        "after": importBenchmarkCSVs(f"{results_dir}/after-gimp-bump")
    },
    "emacsBump": {
        "before": importBenchmarkCSVs(f"{results_dir}/before-emacs-bump"),
        "after": importBenchmarkCSVs(f"{results_dir}/after-emacs-bump")
    },
    "openmpiBump": {
        "before": importBenchmarkCSVs(f"{results_dir}/before-openmpi-bump"),
        "after": importBenchmarkCSVs(f"{results_dir}/after-openmpi-bump")
    },
}


def analyse_benchmark_results(i, file):
    """
    Analyse a benchmark results.
    
    :param i: benchmark dataframes. Expecting a "before" and a "after" dataframe.
    
    Each benchmark simulates the substitutions triggered by transition between two
    nix closures, a "before" and a "after" one.
    
    For each substitution mechanism, we then simulate what we can re-use and have 
    to download by diff-ing the substitution atoms (file, chunk or NAR).
    """
    
    _a_nar = i["after"]["nar"]
    _b_nar = i["before"]["nar"]
    _a_casync = i["after"]["casync"]
    _b_casync = i["before"]["casync"]
    _a_file = i["after"]["file"]
    _b_file = i["before"]["file"]
    _a_compressed_file = i["after"]["compressed-file"]
    _b_compressed_file = i["before"]["compressed-file"]

    nar_closure_size = _a_nar["Nar Size"].sum()
    casync_closure_size = _a_casync["Chunk Size"].sum()
    file_closure_size = _a_file["Size"].sum()
    compressed_file_closure_size = _a_compressed_file["Size"].sum()
    
    _nar_merged = _a_nar.merge(_b_nar, how = "left", on="Nar Name", indicator=True, suffixes=("_after","_before"))
    nar_dl_size = _nar_merged.loc[_nar_merged["_merge"] == "left_only"]["Nar Size_after"].sum()
    nar_reused_size = _nar_merged.loc[_nar_merged["_merge"] == "both"]["Nar Size_after"].sum()
    nar_nar_savings = 0
    
    _casync_merged = _a_casync.merge(_b_casync, how="left", on="Chunk Name", indicator=True, suffixes=("_after","_before"))
    casync_dl_size = _casync_merged.loc[_casync_merged["_merge"]=="left_only"]["Chunk Size_after"].sum()
    casync_reused_size = _casync_merged.loc[_casync_merged["_merge"]=="both"]["Chunk Size_after"].sum()
    casync_nar_savings = (nar_dl_size - casync_dl_size) / nar_dl_size
    
    _file_merged = _a_file.merge(_b_file, how="left", on="Sha256", indicator=True, suffixes=("_after","_before"))
    file_dl_size = _file_merged.loc[_file_merged["_merge"]=="left_only"]["Size_after"].sum()
    file_reused_size = _file_merged.loc[_file_merged["_merge"]=="both"]["Size_after"].sum()
    file_nar_savings = (nar_dl_size - file_dl_size) / nar_dl_size

    _compressed_file_merged = _a_compressed_file.merge(_b_compressed_file, how="left", on="Sha256", indicator=True, suffixes=("_after","_before"))
    compressed_file_dl_size = _compressed_file_merged.loc[_compressed_file_merged["_merge"]=="left_only"]["Size_after"].sum()
    compressed_file_reused_size = _compressed_file_merged.loc[_compressed_file_merged["_merge"]=="both"]["Size_after"].sum()
    compressed_file_nar_savings = (nar_dl_size - compressed_file_dl_size) / nar_dl_size
    if file:
        return pd.DataFrame( data = {
            "Name": ["NAR", "Casync", "File", "Compressed File"],
            "Closure Size (MB)": [toMb(nar_closure_size), toMb(casync_closure_size), toMb(file_closure_size), toMb(compressed_file_closure_size)],
            "Downloaded Size (MB)": [toMb(nar_dl_size), toMb(casync_dl_size), toMb(file_dl_size), toMb(compressed_file_dl_size)],
            "Re-used Size (MB)": [toMb(nar_reused_size), toMb(casync_reused_size), toMb(file_reused_size), toMb(compressed_file_reused_size)],
            "DL Savings Compared to NAR (%)": [nar_nar_savings * 100, casync_nar_savings * 100, file_nar_savings * 100, compressed_file_nar_savings * 100]
        })
    else:
        return pd.DataFrame( data = {
            "Name": ["NAR", "Casync", "Compressed File"],
            "Closure Size (MB)": [toMb(nar_closure_size), toMb(casync_closure_size), toMb(compressed_file_closure_size)],
            "Downloaded Size (MB)": [toMb(nar_dl_size), toMb(casync_dl_size), toMb(compressed_file_dl_size)],
            "Re-used Size (MB)": [toMb(nar_reused_size), toMb(casync_reused_size), toMb(compressed_file_reused_size)],
            "DL Savings Compared to NAR (%)": [nar_nar_savings * 100, casync_nar_savings * 100, compressed_file_nar_savings * 100]
        })

def gen_perf_pie(dataframe, key):
    idx=mass_rebuild_results.query(f'Name == "{key}"').index[0]
    pd.DataFrame(data={"data":[dataframe["Downloaded Size (MB)"][idx],dataframe["Re-used Size (MB)"][idx]]},\
             index=["Downloaded","Re-Used"])\
             .plot.pie(figsize=(6,6), y="data", ylabel="", title=f"{key} Downloaded/Re-Used Data")


mass_rebuild_results = analyse_benchmark_results(b["massRebuild"], True)


mass_rebuild_results


p = mass_rebuild_results.plot.bar(figsize=(12,5), x="Name",y="Downloaded Size (MB)",title="Volume to Download for the Mass Rebuild Update (less is better)", xlabel="", ylabel="Size in MB", color="#ff6a00")


_ = mass_rebuild_results.plot.bar(figsize=(12,5), x="Name",y="DL Savings Compared to NAR (%)",title="DL Savings Compared to NAR (more is better)", xlabel="", ylabel="Savings in %", color="#ff6a00")


firefox_bump_results = analyse_benchmark_results(b["firefoxBump"], False)
firefox_bump_results


_ = firefox_bump_results.plot.bar(figsize=(12,5), x="Name",y="Downloaded Size (MB)",title="Volume to Download for the Firefox Version Bump (less is better)", xlabel="", ylabel="Size in MB", color="#ff6a00")


_ = firefox_bump_results.plot.bar(figsize=(12,5), x="Name",y="DL Savings Compared to NAR (%)",title="DL Savings Compared to NAR (more is better)", xlabel="", ylabel="Savings in %", color="#ff6a00")


channel_jump_results = analyse_benchmark_results(b["channelJump"], False)
channel_jump_results


_ = channel_jump_results.plot.bar(figsize=(12,5), x="Name",y="Downloaded Size (MB)",title="Volume to Download for the Channel Jump (less is better)", xlabel="", ylabel="Size in MB", color="#ff6a00")


_ = channel_jump_results.plot.bar(figsize=(12,5), x="Name",y="DL Savings Compared to NAR (%)",title="DL Savings Compared to NAR (more is better)", xlabel="", ylabel="Savings in %", color="#ff6a00")


gimp_bump_results = analyse_benchmark_results(b["gimpBump"], False)
gimp_bump_results.plot.bar(figsize=(12,5), x="Name",y="Downloaded Size (MB)",title="Volume to Download for the Gimp Bump (less is better)", xlabel="", ylabel="Size in MB", color="#ff6a00")
gimp_bump_results


emacs_bump_results = analyse_benchmark_results(b["emacsBump"], False)
emacs_bump_results.plot.bar(figsize=(12,5), x="Name",y="Downloaded Size (MB)",title="Volume to Download for the Emacs Bump (less is better)", xlabel="", ylabel="Size in MB", color="#ff6a00")
emacs_bump_results


openmpi_bump_results = analyse_benchmark_results(b["openmpiBump"], False)
openmpi_bump_results.plot.bar(figsize=(12,5), x="Name",y="Downloaded Size (MB)",title="Volume to Download for the OpenMPI Bump (less is better)", xlabel="", ylabel="Size in MB", color="#ff6a00")
openmpi_bump_results

	Name	Closure Size (MB)	Downloaded Size (MB)	Re-used Size (MB)	DL Savings Compared to NAR (%)
0	NAR	386.060194	372.989991	13.070203	0.000000
1	Casync	608.652010	192.194724	416.457286	48.471882
2	File	1652.194177	705.665216	973.347687	-89.191462
3	Compressed File	476.522900	229.488728	247.034172	38.473221

	Name	Closure Size (MB)	Downloaded Size (MB)	Re-used Size (MB)	DL Savings Compared to NAR (%)
0	NAR	219.351904	56.431531	162.920373	0.000000
1	Casync	342.924949	55.870973	287.053976	0.993342
2	Compressed File	260.424006	55.829510	204.594495	1.066817

	Name	Closure Size (MB)	Downloaded Size (MB)	Re-used Size (MB)	DL Savings Compared to NAR (%)
0	NAR	387.457979	375.167475	12.290504	0.000000
1	Casync	610.885612	310.540811	300.344801	17.226084
2	Compressed File	478.120248	307.478671	170.641577	18.042290

	Name	Closure Size (MB)	Downloaded Size (MB)	Re-used Size (MB)	DL Savings Compared to NAR (%)
0	NAR	247.506391	20.270771	227.235619	0.000000
1	Casync	363.372208	13.057223	350.314985	35.585959
2	Compressed File	300.300404	10.187397	290.113007	49.743419

	Name	Closure Size (MB)	Downloaded Size (MB)	Re-used Size (MB)	DL Savings Compared to NAR (%)
0	NAR	110.330516	39.744307	70.586210	0.000000
1	Casync	177.733558	44.726448	133.007110	-12.535484
2	Compressed File	136.599691	40.735590	95.864101	-2.494152

Abstract¶

Import Benchmark Data¶

Methodology¶

Benchmark Scenarios¶

1. Mass Rebuild¶

2. Firefox Bump¶

3. Unstable to Stable Channel Jump¶

Gimp Bump¶

Emacs Bump¶

OpenMPI Bump¶

	Name	Closure Size (MB)	Downloaded Size (MB)	Re-used Size (MB)	DL Savings Compared to NAR (%)
0	NAR	139.540192	3.335962	136.204230	0.000000
1	Casync	214.708487	4.380815	210.327671	-31.320878
2	Compressed File	167.518237	3.260960	164.257277	2.248310