Commit c70223d2 authored by Jan Eitzinger's avatar Jan Eitzinger
Browse files

Merge branch 'master' of github.com:RRZE-HPC/TheBandwidthBenchmark

parents 73fa437f 32a69ec7
!=======================================================================================
!
! Author: Jan Eitzinger (je), jan.treibig@gmail.com
! Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
!
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in all
! copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
! SOFTWARE.
!
!=======================================================================================
module timer
use iso_fortran_env, only: int32, int64, real64
implicit none
public :: getTimeStamp
contains
function getTimeStamp() result(ts)
implicit none
integer(int64) :: counter, count_step
real(real64) :: ts
call system_clock(counter, count_step)
ts = counter / real(count_step,real64)
end function getTimeStamp
end module timer
module constants
implicit none
integer, parameter :: n = 20000000
integer, parameter :: ntimes = 10
integer, parameter :: sp = kind(0.0e0)
integer, parameter :: dp = kind(0.0d0)
end module constants
module benchmarks
use timer
use likwid
use constants
contains
function init (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("INIT")
!$omp do
do i = 1, n
a(i) = scalar
end do
call likwid_markerStopRegion("INIT")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function init
function copy (a, b) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("COPY")
!$omp do
do i = 1, n
a(i) = b(i)
end do
call likwid_markerStopRegion("COPY")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function copy
function update (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("UPDATE")
!$omp do
do i = 1, n
a(i) = a(i) * scalar
end do
call likwid_markerStopRegion("UPDATE")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function update
function triad (a, b, c, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("TRIAD")
!$omp do
do i = 1, n
a(i) = b(i) + scalar * c(i)
end do
call likwid_markerStopRegion("TRIAD")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function triad
function daxpy (a, b, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("DAXPY")
!$omp do
do i = 1, n
a(i) = a(i) + scalar * b(i)
end do
call likwid_markerStopRegion("DAXPY")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function daxpy
function striad (a, b, c, d) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), allocatable, intent(in) :: d(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("STRIAD")
!$omp do
do i = 1, n
a(i) = b(i) + c(i) * d(i)
end do
call likwid_markerStopRegion("STRIAD")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function striad
function sdaxpy (a, b, c) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("SDAXPY")
!$omp do
do i = 1, n
a(i) = a(i) + b(i) * c(i)
end do
call likwid_markerStopRegion("SDAXPY")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function sdaxpy
end module benchmarks
program bwBench
use constants
use benchmarks
use likwid
implicit none
integer, parameter :: numbench = 7
real(kind=dp), allocatable :: a(:), b(:), c(:), d(:)
real(kind=dp) :: scalar, tmp
real(kind=dp) :: maxtime(numbench), mintime(numbench), avgtime(numbench), &
times(numbench,ntimes)
integer :: i, k
integer :: bytes(numbench)
integer :: bytesPerWord
character :: label(numbench)*11
!$ INTEGER omp_get_num_threads
!$ EXTERNAL omp_get_num_threads
bytesPerWord = 8
bytes(1) = 1 * bytesPerWord * n ! init
bytes(2) = 2 * bytesPerWord * n ! copy
bytes(3) = 2 * bytesPerWord * n ! update
bytes(4) = 3 * bytesPerWord * n ! triad
bytes(5) = 3 * bytesPerWord * n ! daxpy
bytes(6) = 4 * bytesPerWord * n ! striad
bytes(7) = 4 * bytesPerWord * n ! sdaxpy
label(1) = " Init: "
label(2) = " Copy: "
label(3) = " Update: "
label(4) = " Triad: "
label(5) = " Daxpy: "
label(6) = " STriad: "
label(7) = " SDaxpy: "
do i = 1, numbench
avgtime(i) = 0.0D0
mintime(i) = 1.0D+36
maxtime(i) = 0.0D0
end do
allocate(a(n))
allocate(b(n))
allocate(c(n))
allocate(d(n))
call likwid_markerInit()
!$omp parallel
call likwid_markerRegisterRegion("INIT")
call likwid_markerRegisterRegion("COPY")
call likwid_markerRegisterRegion("UPDATE")
call likwid_markerRegisterRegion("TRIAD")
call likwid_markerRegisterRegion("DAXPY")
call likwid_markerRegisterRegion("STRIAD")
call likwid_markerRegisterRegion("SDAXPY")
!$omp master
print *,'----------------------------------------------'
!$ print *,'Number of Threads = ',OMP_GET_NUM_THREADS()
!$omp end master
!$omp end parallel
PRINT *,'----------------------------------------------'
!$OMP PARALLEL DO
do i = 1, n
a(i) = 2.0d0
b(i) = 2.0d0
c(i) = 0.5d0
d(i) = 1.0d0
end do
scalar = 3.0d0
do k = 1, ntimes
times(1, k) = init(b, scalar)
times(2, k) = copy(c, a)
times(3, k) = update(a, scalar)
times(4, k) = triad(a, b, c, scalar)
times(5, k) = daxpy(a, b, scalar)
times(6, k) = striad(a, b, c, d)
times(7, k) = sdaxpy(a, b, c)
end do
do k = 1, ntimes
do i = 1, numbench
avgtime(i) = avgtime(i) + times(i, k)
mintime(i) = MIN(mintime(i), times(i, k))
maxtime(i) = MAX(mintime(i), times(i, k))
end do
end do
print *,"-------------------------------------------------------------"
print *,"Function Rate (MB/s) Avg time Min time Max time"
do i = 1, numbench
avgtime(i) = avgtime(i)/dble(ntimes-1)
print "(a,f12.2, 2x, 3 (f10.4,3x))", label(i), bytes(i)/mintime(i)/1.0D6, &
avgtime(i), mintime(i), maxtime(i)
end do
print *,"-------------------------------------------------------------"
call likwid_markerClose()
end program bwBench
!=======================================================================================
!
! Author: Jan Eitzinger (je), jan.treibig@gmail.com
! Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
!
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in all
! copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
! SOFTWARE.
!
!=======================================================================================
module timer
use iso_fortran_env, only: int32, int64, real64
implicit none
public :: getTimeStamp
contains
function getTimeStamp() result(ts)
implicit none
integer(int64) :: counter, count_step
real(real64) :: ts
call system_clock(counter, count_step)
ts = counter / real(count_step,real64)
end function getTimeStamp
end module timer
module constants
implicit none
integer, parameter :: n = 20000000
integer, parameter :: ntimes = 10
integer, parameter :: sp = kind(0.0e0)
integer, parameter :: dp = kind(0.0d0)
end module constants
module benchmarks
use timer
use constants
contains
function init (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = scalar
end do
E = getTimeStamp()
seconds = E-S
end function init
function copy (a, b) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = b(i)
end do
E = getTimeStamp()
seconds = E-S
end function copy
function update (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = a(i) * scalar
end do
E = getTimeStamp()
seconds = E-S
end function update
function triad (a, b, c, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = b(i) + scalar * c(i)
end do
E = getTimeStamp()
seconds = E-S
end function triad
function daxpy (a, b, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = a(i) + scalar * b(i)
end do
E = getTimeStamp()
seconds = E-S
end function daxpy
function striad (a, b, c, d) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), allocatable, intent(in) :: d(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = b(i) + c(i) * d(i)
end do
E = getTimeStamp()
seconds = E-S
end function striad
function sdaxpy (a, b, c) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = a(i) + b(i) * c(i)
end do
E = getTimeStamp()
seconds = E-S
end function sdaxpy
end module benchmarks
program bwBench
use constants
use benchmarks
implicit none
integer, parameter :: numbench = 7
real(kind=dp), allocatable :: a(:), b(:), c(:), d(:)
real(kind=dp) :: scalar, tmp
real(kind=dp) :: maxtime(numbench), mintime(numbench), avgtime(numbench), &
times(numbench,ntimes)
integer :: i, k
integer :: bytes(numbench)
integer :: bytesPerWord
character :: label(numbench)*11
!$ INTEGER omp_get_num_threads
!$ EXTERNAL omp_get_num_threads
bytesPerWord = 8
bytes(1) = 1 * bytesPerWord * n ! init
bytes(2) = 2 * bytesPerWord * n ! copy
bytes(3) = 2 * bytesPerWord * n ! update
bytes(4) = 3 * bytesPerWord * n ! triad
bytes(5) = 3 * bytesPerWord * n ! daxpy
bytes(6) = 4 * bytesPerWord * n ! striad
bytes(7) = 4 * bytesPerWord * n ! sdaxpy
label(1) = " Init: "
label(2) = " Copy: "
label(3) = " Update: "
label(4) = " Triad: "
label(5) = " Daxpy: "
label(6) = " STriad: "
label(7) = " SDaxpy: "
do i = 1, numbench
avgtime(i) = 0.0D0
mintime(i) = 1.0D+36
maxtime(i) = 0.0D0
end do
allocate(a(n))
allocate(b(n))
allocate(c(n))
allocate(d(n))
!$omp parallel
!$omp master
print *,'----------------------------------------------'
!$ print *,'Number of Threads = ',OMP_GET_NUM_THREADS()
!$omp end master
!$omp end parallel
PRINT *,'----------------------------------------------'
!$OMP PARALLEL DO
do i = 1, n
a(i) = 2.0d0
b(i) = 2.0d0
c(i) = 0.5d0
d(i) = 1.0d0
end do
scalar = 3.0d0
do k = 1, ntimes
times(1, k) = init(b, scalar)
times(2, k) = copy(c, a)
times(3, k) = update(a, scalar)
times(4, k) = triad(a, b, c, scalar)
times(5, k) = daxpy(a, b, scalar)
times(6, k) = striad(a, b, c, d)
times(7, k) = sdaxpy(a, b, c)
end do
do k = 1, ntimes