reformatting

5f4e50dc · Christoph Alt · a4ee1cd1 · 5f4e50dc
Commit 5f4e50dc authored 11 months ago by Christoph Alt
--- a/processors.py
+++ b/processors.py
@@ -7,30 +7,35 @@ from cbutil.likwid_parser import parse_likwid_json
 from cbutil.ncu_parser import extract_from_csv

 import logging
+
 logger = logging.getLogger(__file__)


 def mesa_pd(text_file: str, *, perf_group: str):
    line_wise = cb.read_file_line_wise(text_file)

-    fields = {key: value for key, value in
-              cb.process_linewise(cb.mesa_pd_text, line_wise)}
+    fields = {
+        key: value for key, value in cb.process_linewise(cb.mesa_pd_text, line_wise)
+    }

    time_stamp = cb.util.get_time_stamp_from_env(
        fallback=lambda: cb.util.file_time_to_sec(text_file)
    )
    yield cb.DataPoint(
-        "MESA_PD_KernelBenchmark",
-        time=time_stamp,
-        fields=fields, tags=dict()).asdict()
-
-
-def mesa_pd_sqlite(db_file: str, *,
-                   perf_group: str,
-                   tag_key={"mpi_num_processes",
-                            "omp_max_threads", "walberla_git", }
-                   ):
-
+        "MESA_PD_KernelBenchmark", time=time_stamp, fields=fields, tags=dict()
+    ).asdict()
+
+
+def mesa_pd_sqlite(
+    db_file: str,
+    *,
+    perf_group: str,
+    tag_key={
+        "mpi_num_processes",
+        "omp_max_threads",
+        "walberla_git",
+    },
+):
    with cb.sqlite_context(db_file) as connection:
        runs = cb.query_complete_table(connection, "runs")
        for run in runs.fetchall():
@@ -41,27 +46,26 @@ def mesa_pd_sqlite(db_file: str, *,
            runid = run["runId"]
            tags = {k: run[k] for k in run.keys() if k in tag_key}
            fields = dict()
-            for timing in cb.build_iterate_query(connection,
-                                                 from_table="timingPool",
-                                                 where_args=["runId", runid]):
-                fields.update(
-                    {timing["sweep"]: timing["average"]*timing["count"]})
+            for timing in cb.build_iterate_query(
+                connection, from_table="timingPool", where_args=["runId", runid]
+            ):
+                fields.update({timing["sweep"]: timing["average"] * timing["count"]})

-            yield cb.DataPoint("MESA_PD_KernelBenchmark",
-                               time=time,
-                               fields=fields,
-                               tags=tags).asdict()
+            yield cb.DataPoint(
+                "MESA_PD_KernelBenchmark", time=time, fields=fields, tags=tags
+            ).asdict()


 def find_profile_file(search_pattern: str, path="."):
    profile_files = list(Path(path).glob(search_pattern))
    if len(profile_files) == 0:
        raise FileNotFoundError(
-            f"Did not found a profile file in {path} with {search_pattern}")
+            f"Did not found a profile file in {path} with {search_pattern}"
+        )
    if len(profile_files) != 1:
        logger.info(
-            f"Found more then one profile file in {path},"
-            "picking the first one")
+            f"Found more then one profile file in {path}," "picking the first one"
+        )
    return profile_files[0]


@@ -77,33 +81,41 @@ def add_likwid_fields(search_pattern: str, perf_group: str) -> dict:
 def add_ncu_profile(search_pattern: str):
    try:
        ncu_file = find_profile_file(f"ncuprofile_*{search_pattern}*.csv")
-    except (FileNotFoundError, ) as e:
+    except (FileNotFoundError,) as e:
        logger.info(e)
        return {}, {}
    return extract_from_csv(ncu_file)


 def _iter_runs(runs, *, measurement_name, perf_group: str, gpu_name=None):
-    no_tag_keys = {'vtkWriteFrequency',
-                   'remainingTimeLoggerFrequency',
-                   'executable',
-                   'compile_flags',
-                   'id', }
+    no_tag_keys = {
+        "vtkWriteFrequency",
+        "remainingTimeLoggerFrequency",
+        "executable",
+        "compile_flags",
+        "id",
+    }
    field_names = {"mlupsPerProcess"}
    time_key = "timestamp"

    for run in runs:
-        dp = cb.data_point_factory(run,
-                                   time_key=time_key,
-                                   measurement_name=measurement_name,
-                                   field_keys=field_names,
-                                   no_tag_keys=no_tag_keys)
+        dp = cb.data_point_factory(
+            run,
+            time_key=time_key,
+            measurement_name=measurement_name,
+            field_keys=field_names,
+            no_tag_keys=no_tag_keys,
+        )
        pattern = "_".join(
-            [str(dp.tags[key]) for key in [
-                'stencil',
-                'streamingPattern',
-                'collisionSetup',
-            ]])
+            [
+                str(dp.tags[key])
+                for key in [
+                    "stencil",
+                    "streamingPattern",
+                    "collisionSetup",
+                ]
+            ]
+        )
        if gpu_name:
            pattern += f"_{gpu_name}"
            ncu_fields, ncu_tags = add_ncu_profile(pattern)
@@ -114,9 +126,11 @@ def _iter_runs(runs, *, measurement_name, perf_group: str, gpu_name=None):
            dp.fields.update(add_likwid_fields(pattern, perf_group))
        dp.time = cb.util.get_time_stamp_from_env(fallback=lambda: dp.time)
        dp.tags.update(
-            {"mpi_num_processes": prod(
-                [int(run[key])
-                 for key in run.keys() if key.startswith("block")])}
+            {
+                "mpi_num_processes": prod(
+                    [int(run[key]) for key in run.keys() if key.startswith("block")]
+                )
+            }
        )

        yield dp.asdict()
@@ -133,81 +147,96 @@ def uniformgrid_sqlite(db_file: str, *, perf_group: str):
    if db_file.startswith("gpu_profile"):
        gpu_name = Path(db_file).stem.split("_")[-1]
        measurement_name += "_profile"
-    yield from _iter_runs(cb.iterate_all_tables(db_file),
-                          measurement_name=measurement_name,
-                          perf_group=perf_group,
-                          gpu_name=gpu_name)
+    yield from _iter_runs(
+        cb.iterate_all_tables(db_file),
+        measurement_name=measurement_name,
+        perf_group=perf_group,
+        gpu_name=gpu_name,
+    )


 def pfac_csv(csv_file: str, *, perf_group: str):
    measurement_name = "PhaseFieldAllenCahn"
    if (arch := _get_arch(csv_file)) != "CPU":
        measurement_name += arch
-    yield from _iter_runs(cb.iterate_csv(csv_file),
-                          measurement_name=measurement_name,
-                          perf_group=perf_group)
+    yield from _iter_runs(
+        cb.iterate_csv(csv_file),
+        measurement_name=measurement_name,
+        perf_group=perf_group,
+    )


-def energy_json(json_file: str, *, perf_group='ENERGY'):
+def energy_json(json_file: str, *, perf_group="ENERGY"):
    measurement_name = os.path.splitext(os.path.basename(json_file))[0]
    result_dict = cb.json2dict(json_file)
-    base_keys = [perf_group, perf_group, 'Metric']
+    base_keys = [perf_group, perf_group, "Metric"]
    metric_dict = cb.get_from_nested_dict(result_dict, base_keys)
-    inner_dict = 'Values'
+    inner_dict = "Values"

    def search_for_metric(metric: str, unit: str):
        def get_metric_key():
-            return f'{metric} PKG [{unit}]'
+            return f"{metric} PKG [{unit}]"
+
        key = get_metric_key()
        try:
            ret = metric_dict[key]
        except KeyError:
-            ret = metric_dict[f'{metric} [{unit}]']
+            ret = metric_dict[f"{metric} [{unit}]"]
        return {key: ret[inner_dict][0]}

-    energy = search_for_metric('Energy', 'J')
-    power = search_for_metric('Power', 'W')
+    energy = search_for_metric("Energy", "J")
+    power = search_for_metric("Power", "W")

    time = cb.util.get_time_stamp_from_env(
        fallback=lambda: cb.file_time_to_sec(json_file)
    )
-    yield cb.DataPoint(measurement=measurement_name,
-                       time=time,
-                       fields={**energy, **power},
-                       tags=dict(),)
+    yield cb.DataPoint(
+        measurement=measurement_name,
+        time=time,
+        fields={**energy, **power},
+        tags=dict(),
+    )


 def process_likwid_files(file: str, perf_group: str) -> dict:
-    sum_keys = ['AVX DP [MFLOP/s] STAT',
-                'CPI STAT',
-                'DP [MFLOP/s] STAT',
-                'Energy DRAM [J] STAT',
-                'Energy [J] STAT',
-                'Memory bandwidth [MBytes/s] STAT',
-                'Memory data volume [GBytes] STAT',
-                'Memory read bandwidth [MBytes/s] STAT',
-                'Memory read data volume [GBytes] STAT',
-                'Memory write bandwidth [MBytes/s] STAT',
-                'Memory write data volume [GBytes] STAT',
-                'Operational intensity STAT',
-                'Packed [MUOPS/s] STAT',
-                'Power DRAM [W] STAT',
-                'Power [W] STAT',
-                'Scalar [MUOPS/s] STAT']
-    avg_keys = ['Runtime (RDTSC) [s] STAT',
-                'Clock [MHz] STAT',
-                'DP [MFLOP/s] STAT', ]
-    return parse_likwid_json(file, perf_group,
-                             sum_keys=sum_keys,
-                             avg_keys=avg_keys,
-                             min_keys=avg_keys,
-                             max_keys=avg_keys)
-
-
-def gravitywave_sqlite(db_file: str, *,
-                       perf_group: str,
-                       ):
+    sum_keys = [
+        "AVX DP [MFLOP/s] STAT",
+        "CPI STAT",
+        "DP [MFLOP/s] STAT",
+        "Energy DRAM [J] STAT",
+        "Energy [J] STAT",
+        "Memory bandwidth [MBytes/s] STAT",
+        "Memory data volume [GBytes] STAT",
+        "Memory read bandwidth [MBytes/s] STAT",
+        "Memory read data volume [GBytes] STAT",
+        "Memory write bandwidth [MBytes/s] STAT",
+        "Memory write data volume [GBytes] STAT",
+        "Operational intensity STAT",
+        "Packed [MUOPS/s] STAT",
+        "Power DRAM [W] STAT",
+        "Power [W] STAT",
+        "Scalar [MUOPS/s] STAT",
+    ]
+    avg_keys = [
+        "Runtime (RDTSC) [s] STAT",
+        "Clock [MHz] STAT",
+        "DP [MFLOP/s] STAT",
+    ]
+    return parse_likwid_json(
+        file,
+        perf_group,
+        sum_keys=sum_keys,
+        avg_keys=avg_keys,
+        min_keys=avg_keys,
+        max_keys=avg_keys,
+    )

+
+def gravitywave_sqlite(
+    db_file: str,
+    *,
+    perf_group: str,
+):
    with cb.sqlite_context(db_file) as connection:
        runs = cb.query_complete_table(connection, "runs")
        for run in runs.fetchall():
@@ -218,13 +247,48 @@ def gravitywave_sqlite(db_file: str, *,
            runid = run["runId"]
            tags = {k: run[k] for k in run.keys()}
            fields = {"simulationTime": run["simulationTime"]}
-            for timing in cb.build_iterate_query(connection,
-                                                 from_table="timingPool",
-                                                 where_args=["runId", runid]):
+            for timing in cb.build_iterate_query(
+                connection, from_table="timingPool", where_args=["runId", runid]
+            ):
+                for col in ["average", "min", "max", "count", "variance", "percentage"]:
+                    fields.update({f'{timing["sweep"]}_{col}': timing[col]})
+
+            yield cb.DataPoint(
+                "FSLBM_gravitywave", time=time, fields=fields, tags=tags
+            ).asdict()
+
+
+def percolation_sqlite(
+    db_file: str,
+    *,
+    perf_group: str,
+):
+    with cb.sqlite_context(db_file) as connection:
+        runs = cb.query_complete_table(connection, "runs")
+        field_keys = [
+            "MFLUPS",
+            "MFLUPS_core",
+            "MFLUPS_process",
+            "MLUPS",
+            "MLUPS_core",
+            "MLUPS_process",
+            "time",
+            "timeStepsPerSecond",
+        ]
+        for run in runs.fetchall():
+            time = cb.util.get_time_stamp_from_env(
+                fallback=lambda: cb.util.time_conversion(run["timestamp"])
+            )
+
+            runid = run["runId"]
+            tags = {k: run[k] for k in run.keys() if k not in field_keys}
+            fields = {f: run[f] for f in field_keys}
+            for timing in cb.build_iterate_query(
+                connection, from_table="timingPool", where_args=["runId", runid]
+            ):
                for col in ["average", "min", "max", "count", "variance", "percentage"]:
                    fields.update({f'{timing["sweep"]}_{col}': timing[col]})

-            yield cb.DataPoint("FSLBM_gravitywave",
-                               time=time,
-                               fields=fields,
-                               tags=tags).asdict()
+            yield cb.DataPoint(
+                "PercolationGPU", time=time, fields=fields, tags=tags
+            ).asdict()