WSJT-X/lib/timefft.f90

142 lines
4.3 KiB
Fortran

program timefft
! Tests and times one-dimensional FFTs computed by FFTW3
use, intrinsic :: iso_c_binding
use FFTW3
complex(C_FLOAT_COMPLEX),pointer :: a(:),b(:),c(:)
real(C_FLOAT),pointer :: ar(:),br(:)
type(C_PTR) :: plan1,plan2 !Pointers to FFTW plans
type(C_PTR) :: pa,pb,pc
integer(C_INT) iret
integer*8 count0,count1,clkfreq
character problem*9
logical linplace,lcomplex,lthreading
! Get command-line parameters
call timefft_opts(npatience,maxthreads,linplace,lcomplex,nfft,problem,nflags)
lthreading=maxthreads.ge.1
maxthreads=max(1,maxthreads)
call sgran() ! see C rand generator (used in gran)
! Allocate data arrays
pa=fftwf_alloc_complex(int(nfft,C_SIZE_T))
call c_f_pointer(pa,a,[nfft])
call c_f_pointer(pa,ar,[nfft])
pb=fftwf_alloc_complex(int(nfft,C_SIZE_T))
call c_f_pointer(pb,b,[nfft])
call c_f_pointer(pb,br,[nfft])
pc=fftwf_alloc_complex(int(nfft,C_SIZE_T))
call c_f_pointer(pc,c,[nfft])
! Initialize FFTW threading
if(lthreading) iret=fftwf_init_threads()
! Import FFTW wisdom, if available
iret=fftwf_import_wisdom_from_filename(C_CHAR_'wis.dat' // C_NULL_CHAR)
do i=1,nfft !Generate random data
x=gran()
y=gran()
b(i)=cmplx(x,y)
enddo
iters=100
write(*,1000)
1000 format(/'Problem Threads Plan Time Gflops RMS iters'/ &
'--------------------------------------------------------')
! Try nthreads = 1,maxthreads
do nthreads=1,maxthreads
a(1:nfft)=b(1:nfft) !Copy test data into a()
call system_clock(count0,clkfreq)
! Make the plans
if(lthreading) call fftwf_plan_with_nthreads(nthreads)
if(lcomplex) then
if(linplace) then
plan1=fftwf_plan_dft_1d(nfft,a,a,-1,nflags)
plan2=fftwf_plan_dft_1d(nfft,a,a,+1,nflags)
else
plan1=fftwf_plan_dft_1d(nfft,a,c,-1,nflags)
plan2=fftwf_plan_dft_1d(nfft,c,a,+1,nflags)
endif
else
if(linplace) then
plan1=fftwf_plan_dft_r2c_1d(nfft,ar,a,nflags)
plan2=fftwf_plan_dft_c2r_1d(nfft,a,ar,nflags)
else
plan1=fftwf_plan_dft_r2c_1d(nfft,ar,c,nflags)
plan2=fftwf_plan_dft_c2r_1d(nfft,c,ar,nflags)
endif
endif
call system_clock(count1,clkfreq)
tplan=0.5*float(count1-count0)/float(clkfreq) !Plan time for one transform
total=0.
do iter=1,iters !Do many iterations
a=b !Copy test data into a()
call system_clock(count0,clkfreq)
! Compute the transforms
if(lcomplex) then
if(linplace) then
call fftwf_execute_dft(plan1,a,a)
call fftwf_execute_dft(plan2,a,a)
else
call fftwf_execute_dft(plan1,a,c)
call fftwf_execute_dft(plan2,c,a)
endif
else
if(linplace) then
call fftwf_execute_dft_r2c(plan1,ar,a)
call fftwf_execute_dft_c2r(plan2,a,ar)
else
call fftwf_execute_dft_r2c(plan1,ar,c)
call fftwf_execute_dft_c2r(plan2,c,ar)
endif
endif
call system_clock(count1,clkfreq)
total=total + float(count1-count0)/float(clkfreq)
if(total>=1.0 .and. iter>=10) go to 40 !Cut iterations short ?
enddo
iter=iters
40 time=0.5*total/iter !Time for one FFT
gflops=5.0/(1.e9*time/(nfft*log(float(nfft))/log(2.0)))
a(1:nfft)=a(1:nfft)/nfft !Normalize the back-transformed data
! Compute RMS difference between original data and back-transformed data.
sq=0.
if(lcomplex) then
do i=1,nfft
sq=sq + real(a(i)-b(i))**2 + aimag(a(i)-b(i))**2
enddo
else
do i=1,nfft
sq=sq + (ar(i)-br(i))**2
enddo
endif
rms=sqrt(sq/nfft)
! Display results
write(*,1050) problem,nthreads,tplan,time,gflops,rms,iter
1050 format(a9,i4,f8.3,f10.6,f7.2,f11.7,i5)
enddo
! Export accumulated FFTW wisdom
iret=fftwf_export_wisdom_to_filename(C_CHAR_'wis.dat' // C_NULL_CHAR)
! Clean up
call fftwf_destroy_plan(plan1)
call fftwf_destroy_plan(plan2)
call fftwf_free(pa)
call fftwf_free(pb)
call fftwf_free(pc)
call fftwf_cleanup_threads()
call fftwf_cleanup()
end program timefft