Commit 50d02331 authored by Jan Eitzinger's avatar Jan Eitzinger
Browse files

Cleanup. Move benchmarking scripts in Wiki.

parent 9b18a7c9
# Single file teaching version
bwBench.c contains a single file version of The Bandwidth Benchmark that is tailored for usage in Tutorials or Courses.
It should compile with any C99 compiler.
# Benchmarking skripts
## bench.pl to determine the absolute highest main memory bandwidth
A wrapper scripts in perl (bench.pl) and python (bench.py) are also provided to scan ranges of thread counts and determine the absolute highest sustained main memory bandwidth. In order to use it `likwid-pin` has to be in your path. The script has three required and one optional command line arguments:
```
$./bench.pl <executable> <thread count range> <repetitions> [<SMT setting>]
```
Example usage:
```
$./bench.pl ./bwbench-GCC 2-8 6
```
The script will always use physical cores only, where two SMT threads is the default. For different SMT thread counts use the 4th command line argument. Example for a processor without SMT:
```
$./bench.pl ./bwbench-GCC 14-24 10 1
```
## extractResults.pl to generate a plottable output files from multiple scaling runs
Please see how to use it in the toplevel [README](https://github.com/RRZE-HPC/TheBandwidthBenchmark#scaling-runs).
## benchmarkSystem.pl to benchmark a system and generate plots and markdown for the result wiki
**Please use with care!**
The script is designed to be used from the root of TheBandwidthBenchmark.
This script cleans and builds the currently configured toolchain. It expects that all Likwid tools are in the path!
Desired frequency settings must be already in place.
Usage:
```
perl ./benchmarkSystem.pl <DATA-DIR> <EXECUTABLE> <PREFIX>
```
where ```<DATA-DIR>``` is the directory where you want to store all results and generated output.
```<EXECUTABLE>``` is the bwBench executable name, this must be in accordance to the configured tool chain in ```config.mk```. E.g. ```./bwBench-CLANG```.
```<PREFIX>``` is the file prefix for all generated output, e.g. Intel-Haswell .
#!/usr/bin/env perl
# =======================================================================================
#
# Author: Jan Eitzinger (je), jan.eitzinger@fau.de
# Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# =======================================================================================
use strict;
use warnings;
use utf8;
my $CMD = $ARGV[0];
my @N = split /-/, $ARGV[1];
my $R = $ARGV[2];
my $MAX = 0; my $CORES = 0; my $BENCH = '';
my $SMT = $ARGV[3] ? $ARGV[3] : 2;
foreach my $numcores ( $N[0] ... $N[1] ) {
foreach ( 1 ... $R ) {
foreach my $ln ( split /\n/, `likwid-pin -c E:S0:$numcores:1:$SMT $CMD` ){
if ( $ln =~ /^([A-Za-z]+):[ ]+([0-9.]+) /) {
if ( $MAX < $2 ){
$MAX = $2; $CORES = $numcores; $BENCH = $1;
}
}
}
}
}
print "$BENCH was best using $CORES threads: $MAX\n";
This diff is collapsed.
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <limits.h>
#include <float.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <likwid-marker.h>
#define SIZE 120000000ull
#define NTIMES 5
#define ARRAY_ALIGNMENT 64
#define HLINE "----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
#define LIKWID_PROFILE(tag,call) \
_Pragma ("omp parallel") \
{LIKWID_MARKER_START(#tag);} \
times[tag][k] = call; \
_Pragma ("omp parallel") \
{LIKWID_MARKER_STOP(#tag);}
typedef enum benchmark {
INIT = 0,
COPY,
UPDATE,
TRIAD,
DAXPY,
STRIAD,
SDAXPY,
NUMBENCH
} benchmark;
typedef struct {
char* label;
int words;
int flops;
} benchmarkType;
extern double init(double*, double, int);
extern double copy(double*, double*, int);
extern double update(double*, double, int);
extern double triad(double*, double*, double*, double, int);
extern double daxpy(double*, double*, double, int);
extern double striad(double*, double*, double*, double*, int);
extern double sdaxpy(double*, double*, double*, int);
extern void check(double*, double*, double*, double*, int);
extern double getTimeStamp();
int main (int argc, char** argv)
{
size_t bytesPerWord = sizeof(double);
size_t N = SIZE;
double *a, *b, *c, *d;
double scalar, tmp;
double E, S;
double avgtime[NUMBENCH],
maxtime[NUMBENCH],
mintime[NUMBENCH];
double times[NUMBENCH][NTIMES];
benchmarkType benchmarks[NUMBENCH] = {
{"Init: ", 1, 0},
{"Copy: ", 2, 0},
{"Update: ", 2, 1},
{"Triad: ", 3, 2},
{"Daxpy: ", 3, 2},
{"STriad: ", 4, 2},
{"SDaxpy: ", 4, 2}
};
LIKWID_MARKER_INIT;
#pragma omp parallel
{
LIKWID_MARKER_REGISTER("INIT");
LIKWID_MARKER_REGISTER("COPY");
LIKWID_MARKER_REGISTER("UPDATE");
LIKWID_MARKER_REGISTER("TRIAD");
LIKWID_MARKER_REGISTER("DAXPY");
LIKWID_MARKER_REGISTER("STRIAD");
LIKWID_MARKER_REGISTER("SDAXPY");
}
posix_memalign((void**) &a, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &b, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &c, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &d, ARRAY_ALIGNMENT, N * bytesPerWord );
for (int i=0; i<NUMBENCH; i++) {
avgtime[i] = 0;
maxtime[i] = 0;
mintime[i] = FLT_MAX;
}
#ifdef _OPENMP
printf(HLINE);
#pragma omp parallel
{
int k = omp_get_num_threads();
int i = omp_get_thread_num();
#pragma omp single
printf ("OpenMP enabled, running with %d threads\n", k);
}
#endif
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = 2.0;
b[i] = 2.0;
c[i] = 0.5;
d[i] = 1.0;
}
scalar = 3.0;
for ( int k=0; k < NTIMES; k++) {
LIKWID_PROFILE(INIT,init(b, scalar, N));
LIKWID_PROFILE(COPY,copy(c, a, N));
LIKWID_PROFILE(UPDATE,update(a, scalar, N));
LIKWID_PROFILE(TRIAD,triad(a, b, c, scalar, N));
LIKWID_PROFILE(DAXPY,daxpy(a, b, scalar, N));
LIKWID_PROFILE(STRIAD,striad(a, b, c, d, N));
LIKWID_PROFILE(SDAXPY,sdaxpy(a, b, c, N));
}
for (int j=0; j<NUMBENCH; j++) {
for (int k=1; k<NTIMES; k++) {
avgtime[j] = avgtime[j] + times[j][k];
mintime[j] = MIN(mintime[j], times[j][k]);
maxtime[j] = MAX(maxtime[j], times[j][k]);
}
}
printf(HLINE);
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
for (int j=0; j<NUMBENCH; j++) {
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
double flops = (double) benchmarks[j].flops * N;
if (flops > 0){
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
1.0E-06 * bytes/mintime[j],
1.0E-06 * flops/mintime[j],
avgtime[j],
mintime[j],
maxtime[j]);
} else {
printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label,
1.0E-06 * bytes/mintime[j],
avgtime[j],
mintime[j],
maxtime[j]);
}
}
printf(HLINE);
check(a, b, c, d, N);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}
void check(
double * a,
double * b,
double * c,
double * d,
int N
)
{
double aj, bj, cj, dj, scalar;
double asum, bsum, csum, dsum;
double epsilon;
/* reproduce initialization */
aj = 2.0;
bj = 2.0;
cj = 0.5;
dj = 1.0;
/* now execute timing loop */
scalar = 3.0;
for (int k=0; k<NTIMES; k++) {
bj = scalar;
cj = aj;
aj = aj * scalar;
aj = bj + scalar * cj;
aj = aj + scalar * bj;
aj = bj + cj * dj;
aj = aj + bj * cj;
}
aj = aj * (double) (N);
bj = bj * (double) (N);
cj = cj * (double) (N);
dj = dj * (double) (N);
asum = 0.0; bsum = 0.0; csum = 0.0; dsum = 0.0;
for (int i=0; i<N; i++) {
asum += a[i];
bsum += b[i];
csum += c[i];
dsum += d[i];
}
#ifdef VERBOSE
printf ("Results Comparison: \n");
printf (" Expected : %f %f %f \n",aj,bj,cj);
printf (" Observed : %f %f %f \n",asum,bsum,csum);
#endif
epsilon = 1.e-8;
if (ABS(aj-asum)/asum > epsilon) {
printf ("Failed Validation on array a[]\n");
printf (" Expected : %f \n",aj);
printf (" Observed : %f \n",asum);
}
else if (ABS(bj-bsum)/bsum > epsilon) {
printf ("Failed Validation on array b[]\n");
printf (" Expected : %f \n",bj);
printf (" Observed : %f \n",bsum);
}
else if (ABS(cj-csum)/csum > epsilon) {
printf ("Failed Validation on array c[]\n");
printf (" Expected : %f \n",cj);
printf (" Observed : %f \n",csum);
}
else if (ABS(dj-dsum)/dsum > epsilon) {
printf ("Failed Validation on array d[]\n");
printf (" Expected : %f \n",dj);
printf (" Observed : %f \n",dsum);
}
else {
printf ("Solution Validates\n");
}
}
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double init(
double * restrict a,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = scalar;
}
E = getTimeStamp();
return E-S;
}
double copy(
double * restrict a,
double * restrict b,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i];
}
E = getTimeStamp();
return E-S;
}
double update(
double * restrict a,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] * scalar;
}
E = getTimeStamp();
return E-S;
}
double triad(
double * restrict a,
double * restrict b,
double * restrict c,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i] + scalar * c[i];
}
E = getTimeStamp();
return E-S;
}
double daxpy(
double * restrict a,
double * restrict b,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] + scalar * b[i];
}
E = getTimeStamp();
return E-S;
}
double striad(
double * restrict a,
double * restrict b,
double * restrict c,
double * restrict d,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i] + d[i] * c[i];
}
E = getTimeStamp();
return E-S;
}
double sdaxpy(
double * restrict a,
double * restrict b,
double * restrict c,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] + b[i] * c[i];
}
E = getTimeStamp();
return E-S;
}
!=======================================================================================
!
! Author: Jan Eitzinger (je), jan.treibig@gmail.com
! Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
!
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in all
! copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
! SOFTWARE.
!
!=======================================================================================
module timer
use iso_fortran_env, only: int32, int64, real64
implicit none
public :: getTimeStamp
contains
function getTimeStamp() result(ts)
implicit none
integer(int64) :: counter, count_step
real(real64) :: ts
call system_clock(counter, count_step)
ts = counter / real(count_step,real64)
end function getTimeStamp
end module timer
module constants
implicit none
integer, parameter :: n = 20000000
integer, parameter :: ntimes = 10
integer, parameter :: sp = kind(0.0e0)
integer, parameter :: dp = kind(0.0d0)
end module constants
module benchmarks
use timer
use likwid
use constants
contains
function init (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("INIT")
!$omp do
do i = 1, n
a(i) = scalar
end do
call likwid_markerStopRegion("INIT")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function init
function copy (a, b) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("COPY")
!$omp do
do i = 1, n
a(i) = b(i)
end do
call likwid_markerStopRegion("COPY")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function copy