mirror of
				https://github.com/saitohirga/WSJT-X.git
				synced 2025-10-31 13:10:19 -04:00 
			
		
		
		
	More testing of FFTW performance...
git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4908 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
		
							parent
							
								
									cbeed6b053
								
							
						
					
					
						commit
						104f001590
					
				
							
								
								
									
										1246
									
								
								lib/fftw3.f03
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1246
									
								
								lib/fftw3.f03
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										4
									
								
								lib/fftw3mod.f90
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								lib/fftw3mod.f90
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,4 @@ | ||||
| module FFTW3 | ||||
|   use, intrinsic :: iso_c_binding | ||||
|   include 'fftw3.f03' | ||||
| end module FFTW3 | ||||
							
								
								
									
										3
									
								
								lib/g4.bat
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								lib/g4.bat
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,3 @@ | ||||
| gcc -c gran.c | ||||
| gfortran -c fftw3mod.f90 | ||||
| gfortran -o timefft timefft.f90 timefft_opts.f90 gran.o libfftw3f-3.dll | ||||
							
								
								
									
										137
									
								
								lib/timefft.f90
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								lib/timefft.f90
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,137 @@ | ||||
| program timefft | ||||
| 
 | ||||
| ! Tests and times one-dimensional FFTs computed by FFTW3 | ||||
| 
 | ||||
|   use, intrinsic :: iso_c_binding | ||||
|   use FFTW3 | ||||
| 
 | ||||
|   complex(C_FLOAT_COMPLEX),pointer :: a(:),b(:),c(:) | ||||
|   real(C_FLOAT),pointer :: ar(:),br(:) | ||||
|   type(C_PTR) :: plan1,plan2              !Pointers to FFTW plans | ||||
|   type(C_PTR) :: pa,pb,pc | ||||
|   integer(C_INT) iret | ||||
|   integer*8 count0,count1,clkfreq | ||||
|   character infile*12,arg*8,problem*9 | ||||
|   logical linplace,lcomplex | ||||
| 
 | ||||
| ! Get command-line parameters | ||||
|   call timefft_opts(npatience,maxthreads,linplace,lcomplex,nfft,problem,nflags) | ||||
| 
 | ||||
| ! Allocate data arrays | ||||
|   pa=fftwf_alloc_complex(int(nfft,C_SIZE_T)) | ||||
|   call c_f_pointer(pa,a,[nfft]) | ||||
|   call c_f_pointer(pa,ar,[nfft]) | ||||
| 
 | ||||
|   pb=fftwf_alloc_complex(int(nfft,C_SIZE_T)) | ||||
|   call c_f_pointer(pb,b,[nfft]) | ||||
|   call c_f_pointer(pb,br,[nfft]) | ||||
| 
 | ||||
|   pc=fftwf_alloc_complex(int(nfft,C_SIZE_T)) | ||||
|   call c_f_pointer(pc,c,[nfft]) | ||||
| 
 | ||||
| ! Initialize FFTW threading | ||||
|   iret=fftwf_init_threads() | ||||
| 
 | ||||
| ! Import FFTW wisdom, if available | ||||
|   iret=fftwf_import_wisdom_from_filename(C_CHAR_'wis.dat' // C_NULL_CHAR) | ||||
| 
 | ||||
|   do i=1,nfft                           !Generate random data | ||||
|      x=gran() | ||||
|      y=gran() | ||||
|      b(i)=cmplx(x,y) | ||||
|   enddo | ||||
|   iters=100 | ||||
| 
 | ||||
|   print*,nflags | ||||
| 
 | ||||
|   write(*,1000)  | ||||
| 1000 format('Problem  Threads Plan    Time    Gflops     RMS'/    & | ||||
|             '--------------------------------------------------') | ||||
| 
 | ||||
| ! Try nthreads = 1,maxthreads | ||||
|   do nthreads=1,maxthreads | ||||
|      a(1:nfft)=b(1:nfft)                             !Copy test data into a() | ||||
|      call system_clock(count0,clkfreq) | ||||
| ! Make the plans | ||||
|      call fftwf_plan_with_nthreads(nthreads) | ||||
|      if(lcomplex) then | ||||
|         if(linplace) then | ||||
|            plan1=fftwf_plan_dft_1d(nfft,a,a,-1,nflags) | ||||
|            plan2=fftwf_plan_dft_1d(nfft,a,a,+1,nflags) | ||||
|         else | ||||
|            plan1=fftwf_plan_dft_1d(nfft,a,c,-1,nflags) | ||||
|            plan2=fftwf_plan_dft_1d(nfft,c,a,+1,nflags) | ||||
|         endif | ||||
|      else | ||||
|         if(linplace) then | ||||
|            plan1=fftwf_plan_dft_r2c_1d(nfft,ar,a,nflags) | ||||
|            plan2=fftwf_plan_dft_c2r_1d(nfft,a,ar,nflags) | ||||
|         else | ||||
|            plan1=fftwf_plan_dft_r2c_1d(nfft,ar,c,nflags) | ||||
|            plan2=fftwf_plan_dft_c2r_1d(nfft,c,ar,nflags) | ||||
|         endif | ||||
|      endif | ||||
|      call system_clock(count1,clkfreq) | ||||
|      tplan=0.5*(count1-count0)/float(clkfreq)    !Plan time for one transform | ||||
| 
 | ||||
|      total=0. | ||||
|      do iter=1,iters                             !Do many iterations | ||||
|         a=b                                      !Copy test data into a() | ||||
|         call system_clock(count0,clkfreq) | ||||
| ! Compute the transforms | ||||
|         if(lcomplex) then | ||||
|            if(linplace) then | ||||
|               call fftwf_execute_dft(plan1,a,a) | ||||
|               call fftwf_execute_dft(plan2,a,a) | ||||
|            else | ||||
|               call fftwf_execute_dft(plan1,a,c) | ||||
|               call fftwf_execute_dft(plan2,c,a) | ||||
|            endif | ||||
|         else | ||||
|            if(linplace) then | ||||
|               call fftwf_execute_dft_r2c(plan1,ar,a) | ||||
|               call fftwf_execute_dft_c2r(plan2,a,ar) | ||||
|            else | ||||
|               call fftwf_execute_dft_r2c(plan1,ar,c) | ||||
|               call fftwf_execute_dft_c2r(plan2,c,ar) | ||||
|            endif | ||||
|         endif | ||||
|         call system_clock(count1,clkfreq) | ||||
|         total=total + (count1-count0)/float(clkfreq)  | ||||
|         if(total.ge.1.0) go to 40                !Cut iterations short if t>1 s | ||||
|      enddo | ||||
|      iter=iters | ||||
| 
 | ||||
| 40   time=0.5*total/iter                         !Time for one FFT  | ||||
|      gflops=5.0/(1.e9*time/(nfft*log(float(nfft))/log(2.0))) | ||||
|      a(1:nfft)=a(1:nfft)/nfft              !Normalize the back-transformed data | ||||
| 
 | ||||
| ! Compute RMS difference between original data and back-transformed data. | ||||
|      sq=0. | ||||
|      if(lcomplex) then | ||||
|         do i=1,nfft | ||||
|            sq=sq + real(a(i)-b(i))**2 + aimag(a(i)-b(i))**2 | ||||
|         enddo | ||||
|      else | ||||
|         do i=1,nfft | ||||
|            sq=sq + (ar(i)-br(i))**2 | ||||
|         enddo | ||||
|      endif | ||||
|      rms=sqrt(sq/nfft) | ||||
| 
 | ||||
| ! Display results | ||||
|      write(*,1050) problem,nthreads,tplan,time,gflops,rms | ||||
| 1050 format(a9,i4,f8.3,f10.6,f7.2,f11.7) | ||||
|   enddo | ||||
| 
 | ||||
| ! Export accumulated FFTW wisdom | ||||
|   iret=fftwf_export_wisdom_to_filename(C_CHAR_'wis.dat' // C_NULL_CHAR) | ||||
| 
 | ||||
| ! Clean up | ||||
|   call fftwf_destroy_plan(plan1) | ||||
|   call fftwf_destroy_plan(plan2) | ||||
|   call fftwf_free(pc) | ||||
|   call fftwf_cleanup() | ||||
|   call fftwf_cleanup_threads() | ||||
| 
 | ||||
| end program timefft | ||||
							
								
								
									
										46
									
								
								lib/timefft_opts.f90
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								lib/timefft_opts.f90
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | ||||
| subroutine timefft_opts(npatience,nthreads,linplace,lcomplex,nfft,    & | ||||
|      problem,nflags) | ||||
| 
 | ||||
|   use FFTW3 | ||||
| 
 | ||||
|   logical linplace,lcomplex | ||||
|   character problem*9,arg*12 | ||||
| 
 | ||||
|   nargs=iargc() | ||||
|   if(nargs.lt.3) then | ||||
|      print*,'Usage: timefft npatience maxthreads [[o|i][r|c]]nfft' | ||||
|      print*,'       npatience  - 0 to 4' | ||||
|      print*,'       maxthreads - suggest number of CPUs minus 1' | ||||
|      print*,'       o,i        - out-of-place or in-place (default=in-place)' | ||||
|      print*,'       r,c        - real or complex (default=complex)' | ||||
|      print*,'Examples:' | ||||
|      print*,'       timefft 2 1 32768     (1 thread in-place, complex)' | ||||
|      print*,'       timefft 2 3 or32768   (3 threads, out-of-place, real)' | ||||
|      stop | ||||
|   endif | ||||
| 
 | ||||
|   call getarg(1,arg) | ||||
|   read(arg,*) npatience | ||||
|   call getarg(2,arg) | ||||
|   read(arg,*) nthreads | ||||
|   call getarg(3,arg) | ||||
|   linplace=arg(1:1).ne.'o' .and. arg(2:2).ne.'o' | ||||
|   lcomplex=arg(1:1).ne.'r' .and. arg(2:2).ne.'r' | ||||
|   k=3 | ||||
|   if(ichar(arg(2:2)).ge.48 .and. ichar(arg(2:2)).le.57) k=2 | ||||
|   if(ichar(arg(1:1)).ge.48 .and. ichar(arg(1:1)).le.57) k=1 | ||||
|   read(arg(k:),*) nfft | ||||
| 
 | ||||
|   write(problem,'(i9)') nfft | ||||
|   problem='ic'//adjustl(problem) | ||||
|   if(.not.linplace) problem(1:1)='o' | ||||
|   if(.not.lcomplex) problem(2:2)='r' | ||||
| 
 | ||||
|   nflags=FFTW_ESTIMATE | ||||
|   if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT | ||||
|   if(npatience.eq.2) nflags=FFTW_MEASURE | ||||
|   if(npatience.eq.3) nflags=FFTW_PATIENT | ||||
|   if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE | ||||
| 
 | ||||
|   return | ||||
| end subroutine timefft_opts | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user