mirror of
				https://github.com/saitohirga/WSJT-X.git
				synced 2025-11-03 13:30:52 -05:00 
			
		
		
		
	More testing of FFTW performance...
git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4908 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
		
							parent
							
								
									cbeed6b053
								
							
						
					
					
						commit
						104f001590
					
				
							
								
								
									
										1246
									
								
								lib/fftw3.f03
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1246
									
								
								lib/fftw3.f03
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										4
									
								
								lib/fftw3mod.f90
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								lib/fftw3mod.f90
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,4 @@
 | 
			
		||||
module FFTW3
 | 
			
		||||
  use, intrinsic :: iso_c_binding
 | 
			
		||||
  include 'fftw3.f03'
 | 
			
		||||
end module FFTW3
 | 
			
		||||
							
								
								
									
										3
									
								
								lib/g4.bat
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								lib/g4.bat
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,3 @@
 | 
			
		||||
gcc -c gran.c
 | 
			
		||||
gfortran -c fftw3mod.f90
 | 
			
		||||
gfortran -o timefft timefft.f90 timefft_opts.f90 gran.o libfftw3f-3.dll
 | 
			
		||||
							
								
								
									
										137
									
								
								lib/timefft.f90
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								lib/timefft.f90
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,137 @@
 | 
			
		||||
program timefft
 | 
			
		||||
 | 
			
		||||
! Tests and times one-dimensional FFTs computed by FFTW3
 | 
			
		||||
 | 
			
		||||
  use, intrinsic :: iso_c_binding
 | 
			
		||||
  use FFTW3
 | 
			
		||||
 | 
			
		||||
  complex(C_FLOAT_COMPLEX),pointer :: a(:),b(:),c(:)
 | 
			
		||||
  real(C_FLOAT),pointer :: ar(:),br(:)
 | 
			
		||||
  type(C_PTR) :: plan1,plan2              !Pointers to FFTW plans
 | 
			
		||||
  type(C_PTR) :: pa,pb,pc
 | 
			
		||||
  integer(C_INT) iret
 | 
			
		||||
  integer*8 count0,count1,clkfreq
 | 
			
		||||
  character infile*12,arg*8,problem*9
 | 
			
		||||
  logical linplace,lcomplex
 | 
			
		||||
 | 
			
		||||
! Get command-line parameters
 | 
			
		||||
  call timefft_opts(npatience,maxthreads,linplace,lcomplex,nfft,problem,nflags)
 | 
			
		||||
 | 
			
		||||
! Allocate data arrays
 | 
			
		||||
  pa=fftwf_alloc_complex(int(nfft,C_SIZE_T))
 | 
			
		||||
  call c_f_pointer(pa,a,[nfft])
 | 
			
		||||
  call c_f_pointer(pa,ar,[nfft])
 | 
			
		||||
 | 
			
		||||
  pb=fftwf_alloc_complex(int(nfft,C_SIZE_T))
 | 
			
		||||
  call c_f_pointer(pb,b,[nfft])
 | 
			
		||||
  call c_f_pointer(pb,br,[nfft])
 | 
			
		||||
 | 
			
		||||
  pc=fftwf_alloc_complex(int(nfft,C_SIZE_T))
 | 
			
		||||
  call c_f_pointer(pc,c,[nfft])
 | 
			
		||||
 | 
			
		||||
! Initialize FFTW threading
 | 
			
		||||
  iret=fftwf_init_threads()
 | 
			
		||||
 | 
			
		||||
! Import FFTW wisdom, if available
 | 
			
		||||
  iret=fftwf_import_wisdom_from_filename(C_CHAR_'wis.dat' // C_NULL_CHAR)
 | 
			
		||||
 | 
			
		||||
  do i=1,nfft                           !Generate random data
 | 
			
		||||
     x=gran()
 | 
			
		||||
     y=gran()
 | 
			
		||||
     b(i)=cmplx(x,y)
 | 
			
		||||
  enddo
 | 
			
		||||
  iters=100
 | 
			
		||||
 | 
			
		||||
  print*,nflags
 | 
			
		||||
 | 
			
		||||
  write(*,1000) 
 | 
			
		||||
1000 format('Problem  Threads Plan    Time    Gflops     RMS'/    &
 | 
			
		||||
            '--------------------------------------------------')
 | 
			
		||||
 | 
			
		||||
! Try nthreads = 1,maxthreads
 | 
			
		||||
  do nthreads=1,maxthreads
 | 
			
		||||
     a(1:nfft)=b(1:nfft)                             !Copy test data into a()
 | 
			
		||||
     call system_clock(count0,clkfreq)
 | 
			
		||||
! Make the plans
 | 
			
		||||
     call fftwf_plan_with_nthreads(nthreads)
 | 
			
		||||
     if(lcomplex) then
 | 
			
		||||
        if(linplace) then
 | 
			
		||||
           plan1=fftwf_plan_dft_1d(nfft,a,a,-1,nflags)
 | 
			
		||||
           plan2=fftwf_plan_dft_1d(nfft,a,a,+1,nflags)
 | 
			
		||||
        else
 | 
			
		||||
           plan1=fftwf_plan_dft_1d(nfft,a,c,-1,nflags)
 | 
			
		||||
           plan2=fftwf_plan_dft_1d(nfft,c,a,+1,nflags)
 | 
			
		||||
        endif
 | 
			
		||||
     else
 | 
			
		||||
        if(linplace) then
 | 
			
		||||
           plan1=fftwf_plan_dft_r2c_1d(nfft,ar,a,nflags)
 | 
			
		||||
           plan2=fftwf_plan_dft_c2r_1d(nfft,a,ar,nflags)
 | 
			
		||||
        else
 | 
			
		||||
           plan1=fftwf_plan_dft_r2c_1d(nfft,ar,c,nflags)
 | 
			
		||||
           plan2=fftwf_plan_dft_c2r_1d(nfft,c,ar,nflags)
 | 
			
		||||
        endif
 | 
			
		||||
     endif
 | 
			
		||||
     call system_clock(count1,clkfreq)
 | 
			
		||||
     tplan=0.5*(count1-count0)/float(clkfreq)    !Plan time for one transform
 | 
			
		||||
 | 
			
		||||
     total=0.
 | 
			
		||||
     do iter=1,iters                             !Do many iterations
 | 
			
		||||
        a=b                                      !Copy test data into a()
 | 
			
		||||
        call system_clock(count0,clkfreq)
 | 
			
		||||
! Compute the transforms
 | 
			
		||||
        if(lcomplex) then
 | 
			
		||||
           if(linplace) then
 | 
			
		||||
              call fftwf_execute_dft(plan1,a,a)
 | 
			
		||||
              call fftwf_execute_dft(plan2,a,a)
 | 
			
		||||
           else
 | 
			
		||||
              call fftwf_execute_dft(plan1,a,c)
 | 
			
		||||
              call fftwf_execute_dft(plan2,c,a)
 | 
			
		||||
           endif
 | 
			
		||||
        else
 | 
			
		||||
           if(linplace) then
 | 
			
		||||
              call fftwf_execute_dft_r2c(plan1,ar,a)
 | 
			
		||||
              call fftwf_execute_dft_c2r(plan2,a,ar)
 | 
			
		||||
           else
 | 
			
		||||
              call fftwf_execute_dft_r2c(plan1,ar,c)
 | 
			
		||||
              call fftwf_execute_dft_c2r(plan2,c,ar)
 | 
			
		||||
           endif
 | 
			
		||||
        endif
 | 
			
		||||
        call system_clock(count1,clkfreq)
 | 
			
		||||
        total=total + (count1-count0)/float(clkfreq) 
 | 
			
		||||
        if(total.ge.1.0) go to 40                !Cut iterations short if t>1 s
 | 
			
		||||
     enddo
 | 
			
		||||
     iter=iters
 | 
			
		||||
 | 
			
		||||
40   time=0.5*total/iter                         !Time for one FFT 
 | 
			
		||||
     gflops=5.0/(1.e9*time/(nfft*log(float(nfft))/log(2.0)))
 | 
			
		||||
     a(1:nfft)=a(1:nfft)/nfft              !Normalize the back-transformed data
 | 
			
		||||
 | 
			
		||||
! Compute RMS difference between original data and back-transformed data.
 | 
			
		||||
     sq=0.
 | 
			
		||||
     if(lcomplex) then
 | 
			
		||||
        do i=1,nfft
 | 
			
		||||
           sq=sq + real(a(i)-b(i))**2 + aimag(a(i)-b(i))**2
 | 
			
		||||
        enddo
 | 
			
		||||
     else
 | 
			
		||||
        do i=1,nfft
 | 
			
		||||
           sq=sq + (ar(i)-br(i))**2
 | 
			
		||||
        enddo
 | 
			
		||||
     endif
 | 
			
		||||
     rms=sqrt(sq/nfft)
 | 
			
		||||
 | 
			
		||||
! Display results
 | 
			
		||||
     write(*,1050) problem,nthreads,tplan,time,gflops,rms
 | 
			
		||||
1050 format(a9,i4,f8.3,f10.6,f7.2,f11.7)
 | 
			
		||||
  enddo
 | 
			
		||||
 | 
			
		||||
! Export accumulated FFTW wisdom
 | 
			
		||||
  iret=fftwf_export_wisdom_to_filename(C_CHAR_'wis.dat' // C_NULL_CHAR)
 | 
			
		||||
 | 
			
		||||
! Clean up
 | 
			
		||||
  call fftwf_destroy_plan(plan1)
 | 
			
		||||
  call fftwf_destroy_plan(plan2)
 | 
			
		||||
  call fftwf_free(pc)
 | 
			
		||||
  call fftwf_cleanup()
 | 
			
		||||
  call fftwf_cleanup_threads()
 | 
			
		||||
 | 
			
		||||
end program timefft
 | 
			
		||||
							
								
								
									
										46
									
								
								lib/timefft_opts.f90
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								lib/timefft_opts.f90
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,46 @@
 | 
			
		||||
subroutine timefft_opts(npatience,nthreads,linplace,lcomplex,nfft,    &
 | 
			
		||||
     problem,nflags)
 | 
			
		||||
 | 
			
		||||
  use FFTW3
 | 
			
		||||
 | 
			
		||||
  logical linplace,lcomplex
 | 
			
		||||
  character problem*9,arg*12
 | 
			
		||||
 | 
			
		||||
  nargs=iargc()
 | 
			
		||||
  if(nargs.lt.3) then
 | 
			
		||||
     print*,'Usage: timefft npatience maxthreads [[o|i][r|c]]nfft'
 | 
			
		||||
     print*,'       npatience  - 0 to 4'
 | 
			
		||||
     print*,'       maxthreads - suggest number of CPUs minus 1'
 | 
			
		||||
     print*,'       o,i        - out-of-place or in-place (default=in-place)'
 | 
			
		||||
     print*,'       r,c        - real or complex (default=complex)'
 | 
			
		||||
     print*,'Examples:'
 | 
			
		||||
     print*,'       timefft 2 1 32768     (1 thread in-place, complex)'
 | 
			
		||||
     print*,'       timefft 2 3 or32768   (3 threads, out-of-place, real)'
 | 
			
		||||
     stop
 | 
			
		||||
  endif
 | 
			
		||||
 | 
			
		||||
  call getarg(1,arg)
 | 
			
		||||
  read(arg,*) npatience
 | 
			
		||||
  call getarg(2,arg)
 | 
			
		||||
  read(arg,*) nthreads
 | 
			
		||||
  call getarg(3,arg)
 | 
			
		||||
  linplace=arg(1:1).ne.'o' .and. arg(2:2).ne.'o'
 | 
			
		||||
  lcomplex=arg(1:1).ne.'r' .and. arg(2:2).ne.'r'
 | 
			
		||||
  k=3
 | 
			
		||||
  if(ichar(arg(2:2)).ge.48 .and. ichar(arg(2:2)).le.57) k=2
 | 
			
		||||
  if(ichar(arg(1:1)).ge.48 .and. ichar(arg(1:1)).le.57) k=1
 | 
			
		||||
  read(arg(k:),*) nfft
 | 
			
		||||
 | 
			
		||||
  write(problem,'(i9)') nfft
 | 
			
		||||
  problem='ic'//adjustl(problem)
 | 
			
		||||
  if(.not.linplace) problem(1:1)='o'
 | 
			
		||||
  if(.not.lcomplex) problem(2:2)='r'
 | 
			
		||||
 | 
			
		||||
  nflags=FFTW_ESTIMATE
 | 
			
		||||
  if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
 | 
			
		||||
  if(npatience.eq.2) nflags=FFTW_MEASURE
 | 
			
		||||
  if(npatience.eq.3) nflags=FFTW_PATIENT
 | 
			
		||||
  if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
 | 
			
		||||
 | 
			
		||||
  return
 | 
			
		||||
end subroutine timefft_opts
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user