mirror of
https://github.com/saitohirga/WSJT-X.git
synced 2024-11-23 20:58:55 -05:00
More testing of FFTW performance...
git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4908 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
parent
bba2757545
commit
ec3fd43b8e
1246
lib/fftw3.f03
Normal file
1246
lib/fftw3.f03
Normal file
File diff suppressed because it is too large
Load Diff
4
lib/fftw3mod.f90
Normal file
4
lib/fftw3mod.f90
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
module FFTW3
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
include 'fftw3.f03'
|
||||||
|
end module FFTW3
|
3
lib/g4.bat
Normal file
3
lib/g4.bat
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
gcc -c gran.c
|
||||||
|
gfortran -c fftw3mod.f90
|
||||||
|
gfortran -o timefft timefft.f90 timefft_opts.f90 gran.o libfftw3f-3.dll
|
137
lib/timefft.f90
Normal file
137
lib/timefft.f90
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
program timefft
|
||||||
|
|
||||||
|
! Tests and times one-dimensional FFTs computed by FFTW3
|
||||||
|
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
use FFTW3
|
||||||
|
|
||||||
|
complex(C_FLOAT_COMPLEX),pointer :: a(:),b(:),c(:)
|
||||||
|
real(C_FLOAT),pointer :: ar(:),br(:)
|
||||||
|
type(C_PTR) :: plan1,plan2 !Pointers to FFTW plans
|
||||||
|
type(C_PTR) :: pa,pb,pc
|
||||||
|
integer(C_INT) iret
|
||||||
|
integer*8 count0,count1,clkfreq
|
||||||
|
character infile*12,arg*8,problem*9
|
||||||
|
logical linplace,lcomplex
|
||||||
|
|
||||||
|
! Get command-line parameters
|
||||||
|
call timefft_opts(npatience,maxthreads,linplace,lcomplex,nfft,problem,nflags)
|
||||||
|
|
||||||
|
! Allocate data arrays
|
||||||
|
pa=fftwf_alloc_complex(int(nfft,C_SIZE_T))
|
||||||
|
call c_f_pointer(pa,a,[nfft])
|
||||||
|
call c_f_pointer(pa,ar,[nfft])
|
||||||
|
|
||||||
|
pb=fftwf_alloc_complex(int(nfft,C_SIZE_T))
|
||||||
|
call c_f_pointer(pb,b,[nfft])
|
||||||
|
call c_f_pointer(pb,br,[nfft])
|
||||||
|
|
||||||
|
pc=fftwf_alloc_complex(int(nfft,C_SIZE_T))
|
||||||
|
call c_f_pointer(pc,c,[nfft])
|
||||||
|
|
||||||
|
! Initialize FFTW threading
|
||||||
|
iret=fftwf_init_threads()
|
||||||
|
|
||||||
|
! Import FFTW wisdom, if available
|
||||||
|
iret=fftwf_import_wisdom_from_filename(C_CHAR_'wis.dat' // C_NULL_CHAR)
|
||||||
|
|
||||||
|
do i=1,nfft !Generate random data
|
||||||
|
x=gran()
|
||||||
|
y=gran()
|
||||||
|
b(i)=cmplx(x,y)
|
||||||
|
enddo
|
||||||
|
iters=100
|
||||||
|
|
||||||
|
print*,nflags
|
||||||
|
|
||||||
|
write(*,1000)
|
||||||
|
1000 format('Problem Threads Plan Time Gflops RMS'/ &
|
||||||
|
'--------------------------------------------------')
|
||||||
|
|
||||||
|
! Try nthreads = 1,maxthreads
|
||||||
|
do nthreads=1,maxthreads
|
||||||
|
a(1:nfft)=b(1:nfft) !Copy test data into a()
|
||||||
|
call system_clock(count0,clkfreq)
|
||||||
|
! Make the plans
|
||||||
|
call fftwf_plan_with_nthreads(nthreads)
|
||||||
|
if(lcomplex) then
|
||||||
|
if(linplace) then
|
||||||
|
plan1=fftwf_plan_dft_1d(nfft,a,a,-1,nflags)
|
||||||
|
plan2=fftwf_plan_dft_1d(nfft,a,a,+1,nflags)
|
||||||
|
else
|
||||||
|
plan1=fftwf_plan_dft_1d(nfft,a,c,-1,nflags)
|
||||||
|
plan2=fftwf_plan_dft_1d(nfft,c,a,+1,nflags)
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
if(linplace) then
|
||||||
|
plan1=fftwf_plan_dft_r2c_1d(nfft,ar,a,nflags)
|
||||||
|
plan2=fftwf_plan_dft_c2r_1d(nfft,a,ar,nflags)
|
||||||
|
else
|
||||||
|
plan1=fftwf_plan_dft_r2c_1d(nfft,ar,c,nflags)
|
||||||
|
plan2=fftwf_plan_dft_c2r_1d(nfft,c,ar,nflags)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
call system_clock(count1,clkfreq)
|
||||||
|
tplan=0.5*(count1-count0)/float(clkfreq) !Plan time for one transform
|
||||||
|
|
||||||
|
total=0.
|
||||||
|
do iter=1,iters !Do many iterations
|
||||||
|
a=b !Copy test data into a()
|
||||||
|
call system_clock(count0,clkfreq)
|
||||||
|
! Compute the transforms
|
||||||
|
if(lcomplex) then
|
||||||
|
if(linplace) then
|
||||||
|
call fftwf_execute_dft(plan1,a,a)
|
||||||
|
call fftwf_execute_dft(plan2,a,a)
|
||||||
|
else
|
||||||
|
call fftwf_execute_dft(plan1,a,c)
|
||||||
|
call fftwf_execute_dft(plan2,c,a)
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
if(linplace) then
|
||||||
|
call fftwf_execute_dft_r2c(plan1,ar,a)
|
||||||
|
call fftwf_execute_dft_c2r(plan2,a,ar)
|
||||||
|
else
|
||||||
|
call fftwf_execute_dft_r2c(plan1,ar,c)
|
||||||
|
call fftwf_execute_dft_c2r(plan2,c,ar)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
call system_clock(count1,clkfreq)
|
||||||
|
total=total + (count1-count0)/float(clkfreq)
|
||||||
|
if(total.ge.1.0) go to 40 !Cut iterations short if t>1 s
|
||||||
|
enddo
|
||||||
|
iter=iters
|
||||||
|
|
||||||
|
40 time=0.5*total/iter !Time for one FFT
|
||||||
|
gflops=5.0/(1.e9*time/(nfft*log(float(nfft))/log(2.0)))
|
||||||
|
a(1:nfft)=a(1:nfft)/nfft !Normalize the back-transformed data
|
||||||
|
|
||||||
|
! Compute RMS difference between original data and back-transformed data.
|
||||||
|
sq=0.
|
||||||
|
if(lcomplex) then
|
||||||
|
do i=1,nfft
|
||||||
|
sq=sq + real(a(i)-b(i))**2 + aimag(a(i)-b(i))**2
|
||||||
|
enddo
|
||||||
|
else
|
||||||
|
do i=1,nfft
|
||||||
|
sq=sq + (ar(i)-br(i))**2
|
||||||
|
enddo
|
||||||
|
endif
|
||||||
|
rms=sqrt(sq/nfft)
|
||||||
|
|
||||||
|
! Display results
|
||||||
|
write(*,1050) problem,nthreads,tplan,time,gflops,rms
|
||||||
|
1050 format(a9,i4,f8.3,f10.6,f7.2,f11.7)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Export accumulated FFTW wisdom
|
||||||
|
iret=fftwf_export_wisdom_to_filename(C_CHAR_'wis.dat' // C_NULL_CHAR)
|
||||||
|
|
||||||
|
! Clean up
|
||||||
|
call fftwf_destroy_plan(plan1)
|
||||||
|
call fftwf_destroy_plan(plan2)
|
||||||
|
call fftwf_free(pc)
|
||||||
|
call fftwf_cleanup()
|
||||||
|
call fftwf_cleanup_threads()
|
||||||
|
|
||||||
|
end program timefft
|
46
lib/timefft_opts.f90
Normal file
46
lib/timefft_opts.f90
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
subroutine timefft_opts(npatience,nthreads,linplace,lcomplex,nfft, &
|
||||||
|
problem,nflags)
|
||||||
|
|
||||||
|
use FFTW3
|
||||||
|
|
||||||
|
logical linplace,lcomplex
|
||||||
|
character problem*9,arg*12
|
||||||
|
|
||||||
|
nargs=iargc()
|
||||||
|
if(nargs.lt.3) then
|
||||||
|
print*,'Usage: timefft npatience maxthreads [[o|i][r|c]]nfft'
|
||||||
|
print*,' npatience - 0 to 4'
|
||||||
|
print*,' maxthreads - suggest number of CPUs minus 1'
|
||||||
|
print*,' o,i - out-of-place or in-place (default=in-place)'
|
||||||
|
print*,' r,c - real or complex (default=complex)'
|
||||||
|
print*,'Examples:'
|
||||||
|
print*,' timefft 2 1 32768 (1 thread in-place, complex)'
|
||||||
|
print*,' timefft 2 3 or32768 (3 threads, out-of-place, real)'
|
||||||
|
stop
|
||||||
|
endif
|
||||||
|
|
||||||
|
call getarg(1,arg)
|
||||||
|
read(arg,*) npatience
|
||||||
|
call getarg(2,arg)
|
||||||
|
read(arg,*) nthreads
|
||||||
|
call getarg(3,arg)
|
||||||
|
linplace=arg(1:1).ne.'o' .and. arg(2:2).ne.'o'
|
||||||
|
lcomplex=arg(1:1).ne.'r' .and. arg(2:2).ne.'r'
|
||||||
|
k=3
|
||||||
|
if(ichar(arg(2:2)).ge.48 .and. ichar(arg(2:2)).le.57) k=2
|
||||||
|
if(ichar(arg(1:1)).ge.48 .and. ichar(arg(1:1)).le.57) k=1
|
||||||
|
read(arg(k:),*) nfft
|
||||||
|
|
||||||
|
write(problem,'(i9)') nfft
|
||||||
|
problem='ic'//adjustl(problem)
|
||||||
|
if(.not.linplace) problem(1:1)='o'
|
||||||
|
if(.not.lcomplex) problem(2:2)='r'
|
||||||
|
|
||||||
|
nflags=FFTW_ESTIMATE
|
||||||
|
if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
|
||||||
|
if(npatience.eq.2) nflags=FFTW_MEASURE
|
||||||
|
if(npatience.eq.3) nflags=FFTW_PATIENT
|
||||||
|
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
|
||||||
|
|
||||||
|
return
|
||||||
|
end subroutine timefft_opts
|
Loading…
Reference in New Issue
Block a user