huge update of tests

e5486c97 · Johannes Mey · f13485cd · e5486c97 · e5486c97 · e5486c97
Commit e5486c97 authored 8 years ago by Johannes Mey
--- a/Parser/test-data/OpenAcc/slots.f
+++ b/Parser/test-data/OpenAcc/slots.f
+!$acc loop private(#PVT#) collapse(#NEST#)
+do i = 0,10
+  #inner#
+end do
+!$acc end loop
+end
\ No newline at end of file
--- a/Parser/test-data/fragments/ExecutableConstruct/ExecutableConstruct1.f
+++ b/Parser/test-data/fragments/ExecutableConstruct/ExecutableConstruct1.f
+    do a = b,c
+        #inner#
+    end do
--- a/Parser/test-data/fragments/ExecutableConstruct/Slot.f
+++ b/Parser/test-data/fragments/ExecutableConstruct/Slot.f
+#inner#
--- a/Parser/test-data/fragments/ExecutableConstruct/TypedSlot.f
+++ b/Parser/test-data/fragments/ExecutableConstruct/TypedSlot.f
+#inner:ExecutableConstruct#
--- a/Parser/test-data/nas/ft-min.f
+++ b/Parser/test-data/nas/ft-min.f
+!-------------------------------------------------------------------------!
+!                                                                         !
+!        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         !
+!                                                                         !
+!                       O p e n M P     V E R S I O N                     !
+!                                                                         !
+!                                   F T                                   !
+!                                                                         !
+!-------------------------------------------------------------------------!
+!                                                                         !
+!    This benchmark is an OpenMP version of the NPB FT code.              !
+!    It is described in NAS Technical Report 99-011.                      !
+!                                                                         !
+!    Permission to use, copy, distribute and modify this software         !
+!    for any purpose with or without fee is hereby granted.  We           !
+!    request, however, that all derived work reference the NAS            !
+!    Parallel Benchmarks 3.3. This software is provided "as is"           !
+!    without express or implied warranty.                                 !
+!                                                                         !
+!    Information on NPB 3.3, including the technical report, the          !
+!    original specifications, source code, results and information        !
+!    on how to submit new results, is available at:                       !
+!                                                                         !
+!           http://www.nas.nasa.gov/Software/NPB/                         !
+!                                                                         !
+!    Send comments or suggestions to  npb@nas.nasa.gov                    !
+!                                                                         !
+!          NAS Parallel Benchmarks Group                                  !
+!          NASA Ames Research Center                                      !
+!          Mail Stop: T27A-1                                              !
+!          Moffett Field, CA   94035-1000                                 !
+!                                                                         !
+!          E-mail:  npb@nas.nasa.gov                                      !
+!          Fax:     (650) 604-3957                                        !
+!                                                                         !
+!-------------------------------------------------------------------------!
+!---------------------------------------------------------------------
+! Authors: D. Bailey
+!          W. Saphir
+!          H. Jin
+!---------------------------------------------------------------------
+!---------------------------------------------------------------------
+!---------------------------------------------------------------------
+! FT benchmark
+!---------------------------------------------------------------------
+!---------------------------------------------------------------------
+!---------------------------------------------------------------------
+    program ft
+!---------------------------------------------------------------------
+!---------------------------------------------------------------------
+    implicit none
+! CLASS = A
+!  
+!  
+!  This file is generated automatically by the setparams utility.
+!  It sets the number of processors and the class of the NPB
+!  in this directory. Do not modify it by hand.
+!  
+        integer nx, ny, nz, maxdim, niter_default
+        integer ntotal, nxp, nyp, ntotalp
+        parameter (nx=256, ny=256, nz=128, maxdim=256)
+        parameter (niter_default=6)
+        parameter (nxp=nx+1, nyp=ny)
+        parameter (ntotal=nx*nyp*nz)
+        parameter (ntotalp=nxp*nyp*nz)
+        logical  convertdouble
+        parameter (convertdouble = .false.)
+        character compiletime*11
+        parameter (compiletime='18 Oct 2016')
+        character npbversion*5
+        parameter (npbversion='3.3.1')
+        character cs1*8
+        parameter (cs1='gfortran')
+        character cs2*6
+        parameter (cs2='$(F77)')
+        character cs3*6
+        parameter (cs3='(none)')
+        character cs4*6
+        parameter (cs4='(none)')
+        character cs5*40
+        parameter (cs5='-ffree-form -O3 -fopenmp -mcmodel=medium')
+        character cs6*28
+        parameter (cs6='-O3 -fopenmp -mcmodel=medium')
+        character cs7*6
+        parameter (cs7='randi8')
+! If processor array is 1x1 -> 0D grid decomposition
+! Cache blocking params. These values are good for most
+! RISC processors.
+! FFT parameters:
+!  fftblock controls how many ffts are done at a time.
+!  The default is appropriate for most cache-based machines
+!  On vector machines, the FFT can be vectorized with vector
+!  length equal to the block size, so the block size should
+!  be as large as possible. This is the size of the smallest
+!  dimension of the problem: 128 for class A, 256 for class B and
+!  512 for class C.
+    integer :: fftblock_default, fftblockpad_default
+!      parameter (fftblock_default=16, fftblockpad_default=18)
+    parameter (fftblock_default=32, fftblockpad_default=33)
+    integer :: fftblock, fftblockpad
+    common /blockinfo/ fftblock, fftblockpad
+! we need a bunch of logic to keep track of how
+! arrays are laid out.
+! Note: this serial version is the derived from the parallel 0D case
+! of the ft NPB.
+! The computation proceeds logically as
+! set up initial conditions
+! fftx(1)
+! transpose (1->2)
+! ffty(2)
+! transpose (2->3)
+! fftz(3)
+! time evolution
+! fftz(3)
+! transpose (3->2)
+! ffty(2)
+! transpose (2->1)
+! fftx(1)
+! compute residual(1)
+! for the 0D, 1D, 2D strategies, the layouts look like xxx
+!            0D        1D        2D
+! 1:        xyz       xyz       xyz
+! the array dimensions are stored in dims(coord, phase)
+    integer :: dims(3)
+    common /layout/ dims
+    integer :: T_total, T_setup, T_fft, T_evolve, T_checksum, &
+    T_fftx, T_ffty, &
+    T_fftz, T_max
+    parameter (T_total = 1, T_setup = 2, T_fft = 3, &
+    T_evolve = 4, T_checksum = 5, &
+    T_fftx = 6, &
+    T_ffty = 7, &
+    T_fftz = 8, T_max = 8)
+    logical :: timers_enabled
+    external timer_read
+    double precision :: timer_read
+    external ilog2
+    integer :: ilog2
+    external randlc
+    double precision :: randlc
+! other stuff
+    logical :: debug, debugsynch
+    common /dbg/ debug, debugsynch, timers_enabled
+    double precision :: seed, a, pi, alpha
+    parameter (seed = 314159265.d0, a = 1220703125.d0, &
+    pi = 3.141592653589793238d0, alpha=1.0d-6)
+! roots of unity array
+! relies on x being largest dimension?
+    double complex u(nxp)
+    common /ucomm/ u
+! for checksum data
+    double complex sums(0:niter_default)
+    common /sumcomm/ sums
+! number of iterations
+    integer :: niter
+    common /iter/ niter
+    integer :: i
+!---------------------------------------------------------------------
+! u0, u1, u2 are the main arrays in the problem.
+! Depending on the decomposition, these arrays will have different
+! dimensions. To accomodate all possibilities, we allocate them as
+! one-dimensional arrays and pass them to subroutines for different
+! views
+!  - u0 contains the initial (transformed) initial condition
+!  - u1 and u2 are working arrays
+!  - twiddle contains exponents for the time evolution operator.
+!---------------------------------------------------------------------
+    double complex   u0(ntotalp), &
+    u1(ntotalp)
+!     >                 u2(ntotalp)
+    double precision :: twiddle(ntotalp)
+!---------------------------------------------------------------------
+! Large arrays are in common so that they are allocated on the
+! heap rather than the stack. This common block is not
+! referenced directly anywhere else. Padding is to avoid accidental
+! cache problems, since all array sizes are powers of two.
+!---------------------------------------------------------------------
+!      double complex pad1(3), pad2(3), pad3(3)
+!      common /bigarrays/ u0, pad1, u1, pad2, u2, pad3, twiddle
+    double complex pad1(3), pad2(3)
+    common /bigarrays/ u0, pad1, u1, pad2, twiddle
+    integer :: iter
+    double precision :: total_time, mflops
+    logical :: verified
+    character class
+!---------------------------------------------------------------------
+! Run the entire problem once to make sure all data is touched.
+! This reduces variable startup costs, which is important for such a
+! short benchmark. The other NPB 2 implementations are similar.
+!---------------------------------------------------------------------
+    do i = 1, t_max
+        call timer_clear(i)
+    end do
+    call setup()
+    call init_ui(u0, u1, twiddle, dims(1), dims(2), dims(3))
+    call compute_indexmap(twiddle, dims(1), dims(2), dims(3))
+    call compute_initial_conditions(u1, dims(1), dims(2), dims(3))
+    call fft_init (dims(1))
+    call fft(1, u1, u0)
+!---------------------------------------------------------------------
+! Start over from the beginning. Note that all operations must
+! be timed, in contrast to other benchmarks.
+!---------------------------------------------------------------------
+    do i = 1, t_max
+        call timer_clear(i)
+    end do
+    call timer_start(T_total)
+    if (timers_enabled) call timer_start(T_setup)
+    call compute_indexmap(twiddle, dims(1), dims(2), dims(3))
+    call compute_initial_conditions(u1, dims(1), dims(2), dims(3))
+    call fft_init (dims(1))
+    if (timers_enabled) call timer_stop(T_setup)
+    if (timers_enabled) call timer_start(T_fft)
+    call fft(1, u1, u0)
+    if (timers_enabled) call timer_stop(T_fft)
+    do iter = 1, niter
+        if (timers_enabled) call timer_start(T_evolve)
+        call evolve(u0, u1, twiddle, dims(1), dims(2), dims(3))
+        if (timers_enabled) call timer_stop(T_evolve)
+        if (timers_enabled) call timer_start(T_fft)
+    !         call fft(-1, u1, u2)
+        call fft(-1, u1, u1)
+        if (timers_enabled) call timer_stop(T_fft)
+        if (timers_enabled) call timer_start(T_checksum)
+    !         call checksum(iter, u2, dims(1), dims(2), dims(3))
+        call checksum(iter, u1, dims(1), dims(2), dims(3))
+        if (timers_enabled) call timer_stop(T_checksum)
+    end do
+    call verify(nx, ny, nz, niter, verified, class)
+    call timer_stop(t_total)
+    total_time = timer_read(t_total)
+    if( total_time /= 0. ) then
+        mflops = 1.0d-6*float(ntotal) * &
+        (14.8157+7.19641*log(float(ntotal)) &
+        +  (5.23518+7.21113*log(float(ntotal)))*niter) &
+        /total_time
+    else
+        mflops = 0.0
+    endif
+    call print_results('FT', class, nx, ny, nz, niter, &
+    total_time, mflops, '          floating point', verified, &
+    npbversion, compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
+    if (timers_enabled) call print_timers()
+    END PROGRAM
--- a/Parser/test-data/nas/ft.f
+++ b/Parser/test-data/nas/ft.f
--- a/Parser/test-data/nas/ft2.f
+++ b/Parser/test-data/nas/ft2.f
--- a/Parser/test-data/nas/ft3.f
+++ b/Parser/test-data/nas/ft3.f
--- a/Parser/test-data/nas/ft4.f
+++ b/Parser/test-data/nas/ft4.f
--- a/Parser/test-data/nas/ft5.f
+++ b/Parser/test-data/nas/ft5.f
--- a/Parser/test-data/rules/R503a.f90
+++ b/Parser/test-data/rules/R503a.f90
+real(RDP), intent(in)    :: d(:)
+real(RDP), intent(in)    :: d(0:)
+real(RDP), intent(in)    :: d(:,0:)
+real(RDP), intent(in)    :: u_v(    :,:)
+real(RDP), intent(inout) :: r_e(  :,:,:)
+real(RDP), intent(inout) :: r_v(    :,:)
+REAL, DIMENSION (N, 10) :: W
+REAL A (:), B (0:)
+REAL, POINTER :: D (:, :)
+REAL, DIMENSION (:), POINTER :: P
+REAL, ALLOCATABLE, DIMENSION (:) :: E
+REAL, PARAMETER :: V(0:*) = [0.1, 1.1]
+end
--- a/Parser/test-data/rules/tmp.f90
+++ b/Parser/test-data/rules/tmp.f90
+!$acc parallel async(2+2)
+x = #test#
+!$acc end parallel
+end
--- a/Parser/test-data/slots/block.f
+++ b/Parser/test-data/slots/block.f
+! BlockSlot
+  do x = 0,p
+    #test#
+  end do
+end program
--- a/Parser/test-data/specht/array_operations.f
+++ b/Parser/test-data/specht/array_operations.f
--- a/Parser/test-data/specht/condensed_primary_part.f
+++ b/Parser/test-data/specht/condensed_primary_part.f
--- a/Parser/test-data/specht/matrix_operations.f
+++ b/Parser/test-data/specht/matrix_operations.f
--- a/Parser/test-data/specht/tiny_matrix_products_explicit.f
+++ b/Parser/test-data/specht/tiny_matrix_products_explicit.f
--- a/Parser/test-data/specht/transformed_condensed_part.f
+++ b/Parser/test-data/specht/transformed_condensed_part.f
+!> \file      eigenspace_condensed_part.f
+!> \brief     Condensed part for condensed face eigenspace solver
+!> \author    Immo Huismann
+!> \date      2015/05/23
+!> \copyright Institute of Fluid Mechanics, TU Dresden, 01062 Dresden, Germany
+!>
+!> \details
+!> This module provides the primary part for the condensed system eigenspace
+!> solver.
+!===============================================================================
+module Transformed_Condensed_Part
+  use Kind_Parameters,             only: RDP
+  use Constants,                   only: HALF
+  use ACC_Parameters,              only: ACC_EXEC_QUEUE
+  use Condensed_SEM_Operators,     only: CondensedOperators1D
+  use Geometry_Coefficients,       only: GetLaplaceCoefficients
+  use Element_Boundary_Parameters, only: N_FACES
+  implicit none
+  private
+  public :: SubtractTransformedCondensedPart
+contains
+!-------------------------------------------------------------------------------
+!> \brief   Subtracts the condensed part of the eigenspace system from r.
+!> \author  Immo Huismann
+!>
+!> \details
+!> Serves as a wrapper to the real routine, but with far fewer arguments.
+subroutine SubtractTransformedCondensedPart(cop,d,D_inv,u_f,r_f)
+  class(CondensedOperators1D), intent(in) :: cop !< condensed operators
+  real(RDP), intent(in)    :: d(0:,:)            !< Helmholtz parameters
+  real(RDP), intent(in)    :: D_inv(:,:,:,:)     !< eigenvalues of iHii
+  real(RDP), intent(in)    :: u_f(:,:,:,:)       !< variable u on faces
+  real(RDP), intent(inout) :: r_f(:,:,:,:)       !< result on faces
+  integer :: n, n_element         ! points per edge, number of elements
+  ! Prologue ...................................................................
+  n         = size(D_inv,1)
+  n_element = size(D_inv,4)
+  ! computation ................................................................
+  ! map from faces to inner element eigenspace.
+  ! This is done with the transpose of the transformation matrix and the
+  ! Helmholtz suboperator corresponding to the specific face
+  call Operator(n,n_element,cop%S_T_L_I0,cop%S_T_L_Ip,d,u_f,D_inv,r_f)
+end subroutine SubtractTransformedCondensedPart
+!-------------------------------------------------------------------------------
+!> \brief   Explicit size implementation of the transformed condensed part
+!> \author  Immo Huismann
+!>
+!> \details
+!> This implementation uses a small working set (one temporary), leading to few
+!> load and store operations from memory.
+subroutine Operator(n,n_element,S_T_L_I0,S_T_L_Ip,d,u_f,D_inv,r_f)
+  integer,   intent(in)    :: n                !< points per edge
+  integer,   intent(in)    :: n_element        !< number of elements
+  real(RDP), intent(in)    :: S_T_L_I0(n)      !< \f$S^{T} L_{I0}\f$
+  real(RDP), intent(in)    :: S_T_L_Ip(n)      !< \f$S^{T} L_{Ip}\f$
+  real(RDP), intent(in)    :: d(0:3,n_element) !< Helmholtz coefficients
+  real(RDP), intent(in)    :: u_f  (  n,n,N_FACES,n_element) !< u face values
+  real(RDP), intent(in)    :: D_inv(n,n,n,n_element)         !< D^{-1}
+  real(RDP), intent(inout) :: r_f  (  n,n,N_FACES,n_element) !< r face values
+  real(RDP) :: v(n,n,n)
+  real(RDP) :: L_0I_S(n), L_pI_S(n)
+  integer   :: i,j,k,e
+  ! Small structure exploitation
+  L_0I_S = S_T_L_I0
+  L_pI_S = S_T_L_Ip
+  ! Loop over all elements
+  !$acc parallel async(ACC_EXEC_QUEUE) present(d,D_inv,u_f,r_f)
+  !$acc loop private(v)
+  !$omp do private(i,j,k,e,v)
+  do e = 1, n_element
+    ! Gather contributions .....................................................
+    !$acc loop collapse(3)
+    do k = 1, n
+    do j = 1, n
+    do i = 1, n
+      v(i,j,k) = d(1,e) * S_T_L_I0(i) * u_f(  j,k,1,e)                         &
+               + d(1,e) * S_T_L_Ip(i) * u_f(  j,k,2,e)                         &
+               + d(2,e) * S_T_L_I0(j) * u_f(i,  k,3,e)                         &
+               + d(2,e) * S_T_L_Ip(j) * u_f(i,  k,4,e)                         &
+               + d(3,e) * S_T_L_I0(k) * u_f(i,j,  5,e)                         &
+               + d(3,e) * S_T_L_Ip(k) * u_f(i,j,  6,e)
+      v(i,j,k) = D_inv(i,j,k,e) * v(i,j,k)
+    end do
+    end do
+    end do
+    !$acc end loop
+    ! Scatter contributions ....................................................
+    ! First direction
+    !$acc loop collapse(2)
+    do k = 1, n
+    do j = 1, n
+      do i = 1, n
+        r_f(  j,k,1,e) = r_f(  j,k,1,e) - d(1,e) * L_0I_S(i) * v(i,j,k)
+        r_f(  j,k,2,e) = r_f(  j,k,2,e) - d(1,e) * L_pI_S(i) * v(i,j,k)
+      end do
+    end do
+    end do
+    ! Second direction
+    !$acc loop collapse(2)
+    do k = 1, n
+    do i = 1, n
+      do j = 1, n
+        r_f(i,  k,3,e) = r_f(i,  k,3,e) - d(2,e) * L_0I_S(j) * v(i,j,k)
+        r_f(i,  k,4,e) = r_f(i,  k,4,e) - d(2,e) * L_pI_S(j) * v(i,j,k)
+      end do
+    end do
+    end do
+    !$acc end loop
+    ! Third direction
+    do k = 1, n
+      !$acc loop collapse(2)
+      do j = 1, n
+      do i = 1, n
+        r_f(i,j,  5,e) = r_f(i,j,  5,e) - d(3,e) * L_0I_S(k) * v(i,j,k)
+        r_f(i,j,  6,e) = r_f(i,j,  6,e) - d(3,e) * L_pI_S(k) * v(i,j,k)
+      end do
+      end do
+      !$acc end loop
+    end do
+  end do
+  !$omp end do
+  !$acc end loop
+  !$acc end parallel
+end subroutine Operator
+!===============================================================================
+end module Transformed_Condensed_Part
--- a/Parser/test-data/specht/transformed_primary_part.f
+++ b/Parser/test-data/specht/transformed_primary_part.f
--- a/Parser/test/org/tud/forty/test/FragmentTest.java
+++ b/Parser/test/org/tud/forty/test/FragmentTest.java
@@ -2,6 +2,8 @@ package org.tud.forty.test;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
+import org.tud.forty.ast.ExecutableConstruct;
+import org.tud.forty.ast.ExecutionPartConstruct;
 import org.tud.forty.ast.Expr;
 import java.io.File;
@@ -14,9 +16,24 @@ public class FragmentTest extends TestBase {
        return ruleProvider("test-data/fragments/Expr");
    }
+    @DataProvider(name = "executableconstruct")
+    public static Iterator<Object[]> fortranExecutableConstructProvider() {
+        return ruleProvider("test-data/fragments/ExecutableConstruct");
+    }
    @Test(dataProvider = "exprs")
    public void testFragmentExprParser(File f) throws Exception {
        testParse(f, false, true, false, true, Expr.class);
    }
+    @Test(dataProvider = "executableconstruct")
+    public void testFragmentExecutableConstructParser(File f) throws Exception {
+        testParse(f, false, true, false, true, ExecutableConstruct.class, true);
+    }
+    @Test(dataProvider = "executableconstruct")
+    public void testFragmentExecutionPartConstructParser(File f) throws Exception {
+        testParse(f, false, true, false, true, ExecutionPartConstruct.class, true);
+    }
 }