Skip to content
Snippets Groups Projects
Commit e5486c97 authored by Johannes Mey's avatar Johannes Mey
Browse files

huge update of tests

parent f13485cd
No related merge requests found
Showing
with 14430 additions and 0 deletions
!$acc loop private(#PVT#) collapse(#NEST#)
do i = 0,10
#inner#
end do
!$acc end loop
end
\ No newline at end of file
do a = b,c
#inner#
end do
#inner#
#inner:ExecutableConstruct#
!-------------------------------------------------------------------------!
! !
! N A S P A R A L L E L B E N C H M A R K S 3.3 !
! !
! O p e n M P V E R S I O N !
! !
! F T !
! !
!-------------------------------------------------------------------------!
! !
! This benchmark is an OpenMP version of the NPB FT code. !
! It is described in NAS Technical Report 99-011. !
! !
! Permission to use, copy, distribute and modify this software !
! for any purpose with or without fee is hereby granted. We !
! request, however, that all derived work reference the NAS !
! Parallel Benchmarks 3.3. This software is provided "as is" !
! without express or implied warranty. !
! !
! Information on NPB 3.3, including the technical report, the !
! original specifications, source code, results and information !
! on how to submit new results, is available at: !
! !
! http://www.nas.nasa.gov/Software/NPB/ !
! !
! Send comments or suggestions to npb@nas.nasa.gov !
! !
! NAS Parallel Benchmarks Group !
! NASA Ames Research Center !
! Mail Stop: T27A-1 !
! Moffett Field, CA 94035-1000 !
! !
! E-mail: npb@nas.nasa.gov !
! Fax: (650) 604-3957 !
! !
!-------------------------------------------------------------------------!
!---------------------------------------------------------------------
! Authors: D. Bailey
! W. Saphir
! H. Jin
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! FT benchmark
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
program ft
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
! CLASS = A
!
!
! This file is generated automatically by the setparams utility.
! It sets the number of processors and the class of the NPB
! in this directory. Do not modify it by hand.
!
integer nx, ny, nz, maxdim, niter_default
integer ntotal, nxp, nyp, ntotalp
parameter (nx=256, ny=256, nz=128, maxdim=256)
parameter (niter_default=6)
parameter (nxp=nx+1, nyp=ny)
parameter (ntotal=nx*nyp*nz)
parameter (ntotalp=nxp*nyp*nz)
logical convertdouble
parameter (convertdouble = .false.)
character compiletime*11
parameter (compiletime='18 Oct 2016')
character npbversion*5
parameter (npbversion='3.3.1')
character cs1*8
parameter (cs1='gfortran')
character cs2*6
parameter (cs2='$(F77)')
character cs3*6
parameter (cs3='(none)')
character cs4*6
parameter (cs4='(none)')
character cs5*40
parameter (cs5='-ffree-form -O3 -fopenmp -mcmodel=medium')
character cs6*28
parameter (cs6='-O3 -fopenmp -mcmodel=medium')
character cs7*6
parameter (cs7='randi8')
! If processor array is 1x1 -> 0D grid decomposition
! Cache blocking params. These values are good for most
! RISC processors.
! FFT parameters:
! fftblock controls how many ffts are done at a time.
! The default is appropriate for most cache-based machines
! On vector machines, the FFT can be vectorized with vector
! length equal to the block size, so the block size should
! be as large as possible. This is the size of the smallest
! dimension of the problem: 128 for class A, 256 for class B and
! 512 for class C.
integer :: fftblock_default, fftblockpad_default
! parameter (fftblock_default=16, fftblockpad_default=18)
parameter (fftblock_default=32, fftblockpad_default=33)
integer :: fftblock, fftblockpad
common /blockinfo/ fftblock, fftblockpad
! we need a bunch of logic to keep track of how
! arrays are laid out.
! Note: this serial version is the derived from the parallel 0D case
! of the ft NPB.
! The computation proceeds logically as
! set up initial conditions
! fftx(1)
! transpose (1->2)
! ffty(2)
! transpose (2->3)
! fftz(3)
! time evolution
! fftz(3)
! transpose (3->2)
! ffty(2)
! transpose (2->1)
! fftx(1)
! compute residual(1)
! for the 0D, 1D, 2D strategies, the layouts look like xxx
! 0D 1D 2D
! 1: xyz xyz xyz
! the array dimensions are stored in dims(coord, phase)
integer :: dims(3)
common /layout/ dims
integer :: T_total, T_setup, T_fft, T_evolve, T_checksum, &
T_fftx, T_ffty, &
T_fftz, T_max
parameter (T_total = 1, T_setup = 2, T_fft = 3, &
T_evolve = 4, T_checksum = 5, &
T_fftx = 6, &
T_ffty = 7, &
T_fftz = 8, T_max = 8)
logical :: timers_enabled
external timer_read
double precision :: timer_read
external ilog2
integer :: ilog2
external randlc
double precision :: randlc
! other stuff
logical :: debug, debugsynch
common /dbg/ debug, debugsynch, timers_enabled
double precision :: seed, a, pi, alpha
parameter (seed = 314159265.d0, a = 1220703125.d0, &
pi = 3.141592653589793238d0, alpha=1.0d-6)
! roots of unity array
! relies on x being largest dimension?
double complex u(nxp)
common /ucomm/ u
! for checksum data
double complex sums(0:niter_default)
common /sumcomm/ sums
! number of iterations
integer :: niter
common /iter/ niter
integer :: i
!---------------------------------------------------------------------
! u0, u1, u2 are the main arrays in the problem.
! Depending on the decomposition, these arrays will have different
! dimensions. To accomodate all possibilities, we allocate them as
! one-dimensional arrays and pass them to subroutines for different
! views
! - u0 contains the initial (transformed) initial condition
! - u1 and u2 are working arrays
! - twiddle contains exponents for the time evolution operator.
!---------------------------------------------------------------------
double complex u0(ntotalp), &
u1(ntotalp)
! > u2(ntotalp)
double precision :: twiddle(ntotalp)
!---------------------------------------------------------------------
! Large arrays are in common so that they are allocated on the
! heap rather than the stack. This common block is not
! referenced directly anywhere else. Padding is to avoid accidental
! cache problems, since all array sizes are powers of two.
!---------------------------------------------------------------------
! double complex pad1(3), pad2(3), pad3(3)
! common /bigarrays/ u0, pad1, u1, pad2, u2, pad3, twiddle
double complex pad1(3), pad2(3)
common /bigarrays/ u0, pad1, u1, pad2, twiddle
integer :: iter
double precision :: total_time, mflops
logical :: verified
character class
!---------------------------------------------------------------------
! Run the entire problem once to make sure all data is touched.
! This reduces variable startup costs, which is important for such a
! short benchmark. The other NPB 2 implementations are similar.
!---------------------------------------------------------------------
do i = 1, t_max
call timer_clear(i)
end do
call setup()
call init_ui(u0, u1, twiddle, dims(1), dims(2), dims(3))
call compute_indexmap(twiddle, dims(1), dims(2), dims(3))
call compute_initial_conditions(u1, dims(1), dims(2), dims(3))
call fft_init (dims(1))
call fft(1, u1, u0)
!---------------------------------------------------------------------
! Start over from the beginning. Note that all operations must
! be timed, in contrast to other benchmarks.
!---------------------------------------------------------------------
do i = 1, t_max
call timer_clear(i)
end do
call timer_start(T_total)
if (timers_enabled) call timer_start(T_setup)
call compute_indexmap(twiddle, dims(1), dims(2), dims(3))
call compute_initial_conditions(u1, dims(1), dims(2), dims(3))
call fft_init (dims(1))
if (timers_enabled) call timer_stop(T_setup)
if (timers_enabled) call timer_start(T_fft)
call fft(1, u1, u0)
if (timers_enabled) call timer_stop(T_fft)
do iter = 1, niter
if (timers_enabled) call timer_start(T_evolve)
call evolve(u0, u1, twiddle, dims(1), dims(2), dims(3))
if (timers_enabled) call timer_stop(T_evolve)
if (timers_enabled) call timer_start(T_fft)
! call fft(-1, u1, u2)
call fft(-1, u1, u1)
if (timers_enabled) call timer_stop(T_fft)
if (timers_enabled) call timer_start(T_checksum)
! call checksum(iter, u2, dims(1), dims(2), dims(3))
call checksum(iter, u1, dims(1), dims(2), dims(3))
if (timers_enabled) call timer_stop(T_checksum)
end do
call verify(nx, ny, nz, niter, verified, class)
call timer_stop(t_total)
total_time = timer_read(t_total)
if( total_time /= 0. ) then
mflops = 1.0d-6*float(ntotal) * &
(14.8157+7.19641*log(float(ntotal)) &
+ (5.23518+7.21113*log(float(ntotal)))*niter) &
/total_time
else
mflops = 0.0
endif
call print_results('FT', class, nx, ny, nz, niter, &
total_time, mflops, ' floating point', verified, &
npbversion, compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
if (timers_enabled) call print_timers()
END PROGRAM
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
real(RDP), intent(in) :: d(:)
real(RDP), intent(in) :: d(0:)
real(RDP), intent(in) :: d(:,0:)
real(RDP), intent(in) :: u_v( :,:)
real(RDP), intent(inout) :: r_e( :,:,:)
real(RDP), intent(inout) :: r_v( :,:)
REAL, DIMENSION (N, 10) :: W
REAL A (:), B (0:)
REAL, POINTER :: D (:, :)
REAL, DIMENSION (:), POINTER :: P
REAL, ALLOCATABLE, DIMENSION (:) :: E
REAL, PARAMETER :: V(0:*) = [0.1, 1.1]
end
!$acc parallel async(2+2)
x = #test#
!$acc end parallel
end
! BlockSlot
do x = 0,p
#test#
end do
end program
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
!> \file eigenspace_condensed_part.f
!> \brief Condensed part for condensed face eigenspace solver
!> \author Immo Huismann
!> \date 2015/05/23
!> \copyright Institute of Fluid Mechanics, TU Dresden, 01062 Dresden, Germany
!>
!> \details
!> This module provides the primary part for the condensed system eigenspace
!> solver.
!===============================================================================
module Transformed_Condensed_Part
use Kind_Parameters, only: RDP
use Constants, only: HALF
use ACC_Parameters, only: ACC_EXEC_QUEUE
use Condensed_SEM_Operators, only: CondensedOperators1D
use Geometry_Coefficients, only: GetLaplaceCoefficients
use Element_Boundary_Parameters, only: N_FACES
implicit none
private
public :: SubtractTransformedCondensedPart
contains
!-------------------------------------------------------------------------------
!> \brief Subtracts the condensed part of the eigenspace system from r.
!> \author Immo Huismann
!>
!> \details
!> Serves as a wrapper to the real routine, but with far fewer arguments.
subroutine SubtractTransformedCondensedPart(cop,d,D_inv,u_f,r_f)
class(CondensedOperators1D), intent(in) :: cop !< condensed operators
real(RDP), intent(in) :: d(0:,:) !< Helmholtz parameters
real(RDP), intent(in) :: D_inv(:,:,:,:) !< eigenvalues of iHii
real(RDP), intent(in) :: u_f(:,:,:,:) !< variable u on faces
real(RDP), intent(inout) :: r_f(:,:,:,:) !< result on faces
integer :: n, n_element ! points per edge, number of elements
! Prologue ...................................................................
n = size(D_inv,1)
n_element = size(D_inv,4)
! computation ................................................................
! map from faces to inner element eigenspace.
! This is done with the transpose of the transformation matrix and the
! Helmholtz suboperator corresponding to the specific face
call Operator(n,n_element,cop%S_T_L_I0,cop%S_T_L_Ip,d,u_f,D_inv,r_f)
end subroutine SubtractTransformedCondensedPart
!-------------------------------------------------------------------------------
!> \brief Explicit size implementation of the transformed condensed part
!> \author Immo Huismann
!>
!> \details
!> This implementation uses a small working set (one temporary), leading to few
!> load and store operations from memory.
subroutine Operator(n,n_element,S_T_L_I0,S_T_L_Ip,d,u_f,D_inv,r_f)
integer, intent(in) :: n !< points per edge
integer, intent(in) :: n_element !< number of elements
real(RDP), intent(in) :: S_T_L_I0(n) !< \f$S^{T} L_{I0}\f$
real(RDP), intent(in) :: S_T_L_Ip(n) !< \f$S^{T} L_{Ip}\f$
real(RDP), intent(in) :: d(0:3,n_element) !< Helmholtz coefficients
real(RDP), intent(in) :: u_f ( n,n,N_FACES,n_element) !< u face values
real(RDP), intent(in) :: D_inv(n,n,n,n_element) !< D^{-1}
real(RDP), intent(inout) :: r_f ( n,n,N_FACES,n_element) !< r face values
real(RDP) :: v(n,n,n)
real(RDP) :: L_0I_S(n), L_pI_S(n)
integer :: i,j,k,e
! Small structure exploitation
L_0I_S = S_T_L_I0
L_pI_S = S_T_L_Ip
! Loop over all elements
!$acc parallel async(ACC_EXEC_QUEUE) present(d,D_inv,u_f,r_f)
!$acc loop private(v)
!$omp do private(i,j,k,e,v)
do e = 1, n_element
! Gather contributions .....................................................
!$acc loop collapse(3)
do k = 1, n
do j = 1, n
do i = 1, n
v(i,j,k) = d(1,e) * S_T_L_I0(i) * u_f( j,k,1,e) &
+ d(1,e) * S_T_L_Ip(i) * u_f( j,k,2,e) &
+ d(2,e) * S_T_L_I0(j) * u_f(i, k,3,e) &
+ d(2,e) * S_T_L_Ip(j) * u_f(i, k,4,e) &
+ d(3,e) * S_T_L_I0(k) * u_f(i,j, 5,e) &
+ d(3,e) * S_T_L_Ip(k) * u_f(i,j, 6,e)
v(i,j,k) = D_inv(i,j,k,e) * v(i,j,k)
end do
end do
end do
!$acc end loop
! Scatter contributions ....................................................
! First direction
!$acc loop collapse(2)
do k = 1, n
do j = 1, n
do i = 1, n
r_f( j,k,1,e) = r_f( j,k,1,e) - d(1,e) * L_0I_S(i) * v(i,j,k)
r_f( j,k,2,e) = r_f( j,k,2,e) - d(1,e) * L_pI_S(i) * v(i,j,k)
end do
end do
end do
! Second direction
!$acc loop collapse(2)
do k = 1, n
do i = 1, n
do j = 1, n
r_f(i, k,3,e) = r_f(i, k,3,e) - d(2,e) * L_0I_S(j) * v(i,j,k)
r_f(i, k,4,e) = r_f(i, k,4,e) - d(2,e) * L_pI_S(j) * v(i,j,k)
end do
end do
end do
!$acc end loop
! Third direction
do k = 1, n
!$acc loop collapse(2)
do j = 1, n
do i = 1, n
r_f(i,j, 5,e) = r_f(i,j, 5,e) - d(3,e) * L_0I_S(k) * v(i,j,k)
r_f(i,j, 6,e) = r_f(i,j, 6,e) - d(3,e) * L_pI_S(k) * v(i,j,k)
end do
end do
!$acc end loop
end do
end do
!$omp end do
!$acc end loop
!$acc end parallel
end subroutine Operator
!===============================================================================
end module Transformed_Condensed_Part
This diff is collapsed.
...@@ -2,6 +2,8 @@ package org.tud.forty.test; ...@@ -2,6 +2,8 @@ package org.tud.forty.test;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.tud.forty.ast.ExecutableConstruct;
import org.tud.forty.ast.ExecutionPartConstruct;
import org.tud.forty.ast.Expr; import org.tud.forty.ast.Expr;
import java.io.File; import java.io.File;
...@@ -14,9 +16,24 @@ public class FragmentTest extends TestBase { ...@@ -14,9 +16,24 @@ public class FragmentTest extends TestBase {
return ruleProvider("test-data/fragments/Expr"); return ruleProvider("test-data/fragments/Expr");
} }
@DataProvider(name = "executableconstruct")
public static Iterator<Object[]> fortranExecutableConstructProvider() {
return ruleProvider("test-data/fragments/ExecutableConstruct");
}
@Test(dataProvider = "exprs") @Test(dataProvider = "exprs")
public void testFragmentExprParser(File f) throws Exception { public void testFragmentExprParser(File f) throws Exception {
testParse(f, false, true, false, true, Expr.class); testParse(f, false, true, false, true, Expr.class);
} }
@Test(dataProvider = "executableconstruct")
public void testFragmentExecutableConstructParser(File f) throws Exception {
testParse(f, false, true, false, true, ExecutableConstruct.class, true);
}
@Test(dataProvider = "executableconstruct")
public void testFragmentExecutionPartConstructParser(File f) throws Exception {
testParse(f, false, true, false, true, ExecutionPartConstruct.class, true);
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment