mirror of
				https://github.com/saitohirga/WSJT-X.git
				synced 2025-10-25 10:00:23 -04:00 
			
		
		
		
	Several tweaks to chkfft.f90. Add descriptive file chkfft.txt.
git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4836 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
		
							parent
							
								
									20dd76d5d9
								
							
						
					
					
						commit
						01d0b747bc
					
				| @ -16,16 +16,18 @@ program chkfft | ||||
|   nargs=iargc() | ||||
|   if(nargs.ne.5) then | ||||
|      print*,'Usage: chkfft <nfft | infile> nr nw nc np' | ||||
|      print*,'         nr: 0/1 for read wisdom' | ||||
|      print*,'         nw: 0/1 for write wisdom' | ||||
|      print*,'         nc: 0/1 for real/complex' | ||||
|      print*,'         np: patience, 0-4' | ||||
|      print*,'         negative nfft: powers of 2 up to 2^(-nfft)' | ||||
|      print*,'       nfft:   length of FFT' | ||||
|      print*,'       nfft=0: do lengths 2^n, n=2^4 to 2^23' | ||||
|      print*,'       infile: name of file with nfft values, one per line' | ||||
|      print*,'       nr:     0/1 to not read (or read) wisdom' | ||||
|      print*,'       nw:     0/1 to not write (or write) wisdom' | ||||
|      print*,'       nc:     0/1 for real or complex data' | ||||
|      print*,'       np:     0-4 patience for finding best algorithm' | ||||
|      go to 999 | ||||
|   endif | ||||
| 
 | ||||
|   list=.false. | ||||
|   nfft=0 | ||||
|   nfft=-1 | ||||
|   call getarg(1,infile) | ||||
|   open(10,file=infile,status='old',err=1) | ||||
|   list=.true.                          !A valid file name was provided | ||||
| @ -46,12 +48,15 @@ program chkfft | ||||
| 1002 format(/'nfft: ',i10,'   nr:',i2,'   nw',i2,'   nc:',i2,'   np:',i2/) | ||||
| 
 | ||||
|   open(12,file='chkfft.out',status='unknown') | ||||
|   open(13,file='fftw_wisdom.dat',status='unknown') | ||||
|   open(13,file='fftwf_wisdom.dat',status='unknown') | ||||
| 
 | ||||
|   if(nr.ne.0) then | ||||
|      call import_wisdom_from_file(isuccess,13) | ||||
|      if(isuccess.ne.0) write(*,1010)  | ||||
| 1010 format('Imported FFTW wisdom') | ||||
|      if(isuccess.eq.0) then | ||||
|         write(*,1010)  | ||||
| 1010    format('Failed to import FFTW wisdom.') | ||||
|         go to 999 | ||||
|      endif | ||||
|   endif | ||||
| 
 | ||||
|   idum=-1                               !Set random seed | ||||
| @ -72,7 +77,7 @@ program chkfft | ||||
|           '  tplan'/61('-')) | ||||
|   else | ||||
|      n1=4 | ||||
|      n2=min(-nfft,23) | ||||
|      n2=23 | ||||
|      write(*,1030)  | ||||
| 1030 format(' n   N=2^n     Time        rms      MHz   MFlops  iters',  & | ||||
|           '  tplan'/63('-')) | ||||
| @ -118,7 +123,7 @@ program chkfft | ||||
|      if(tplan.lt.0) tplan=0. | ||||
|      a(1:nfft)=a(1:nfft)/nfft | ||||
| 
 | ||||
| ! Compute RMS error | ||||
| ! Compute RMS difference between original array and back-transformed array. | ||||
|      sq=0. | ||||
|      if(ncomplex.eq.1) then | ||||
|         do i=1,nfft | ||||
| @ -133,7 +138,7 @@ program chkfft | ||||
| 
 | ||||
|      freq=1.e-6*nfft/time | ||||
|      mflops=5.0/(1.e6*time/(nfft*log(float(nfft))/log(2.0))) | ||||
|      if(n2.eq.999999) then | ||||
|      if(n2.eq.1 .or. n2.eq.999999) then | ||||
|         write(*,1050) nfft,time,rms,freq,mflops,iter,tplan | ||||
|         write(12,1050) nfft,time,rms,freq,mflops,iter,tplan | ||||
| 1050    format(i8,f11.7,f12.8,f7.2,f8.1,i8,f6.1) | ||||
| @ -149,8 +154,8 @@ program chkfft | ||||
|   if(nw.eq.1) then | ||||
|      rewind 13 | ||||
|      call export_wisdom_to_file(13) | ||||
|      write(*,1070)  | ||||
| 1070 format('Exported FFTW wisdom') | ||||
| !     write(*,1070)  | ||||
| !1070 format(/'Exported FFTW wisdom') | ||||
|   endif | ||||
| 
 | ||||
| 999 call four2a(0,-1,0,0,0) | ||||
|  | ||||
							
								
								
									
										115
									
								
								lib/chkfft.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								lib/chkfft.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,115 @@ | ||||
| 		Brief Description of chkfft, by K1JT | ||||
| 		------------------------------------ | ||||
| 
 | ||||
| Discrete Fourier transforms (DFTs) are found at the root of most | ||||
| digital signal processing tasks. In WSJT and its sister programs the | ||||
| transforms are done using the FFTW library, and subroutine four2 | ||||
| provides a convenient interface to the library.  Program chkfft is a | ||||
| command-line utility offering a convenient way to test FFT execution | ||||
| times under a variety of circumstances. | ||||
| 
 | ||||
| To compile chkfft in Linux: | ||||
| 
 | ||||
| $ gfortran -o chkfft chkfft.f90 four2a.f90 f77_wisdom.f90 gran.c -lfftw3f | ||||
| 
 | ||||
| To compile chkfft in Windows (you may need to customize the hard-coded | ||||
| path shown here for libfftw3f-3.dll): | ||||
| 
 | ||||
| > gfortran -o chkfft chkfft.f90 four2a.f90 f77_wisdom.f90 gran.c \ | ||||
|   /JTSDK-QT/appsupport/runtime/libfftw3f-3.dll | ||||
| 
 | ||||
| To see a brief usage message, type chkfft at the command prompt: | ||||
|   | ||||
| $ chkfft | ||||
|  Usage: chkfft <nfft | infile> nr nw nc np | ||||
|         nfft:   length of FFT | ||||
|         nfft=0: do lengths 2^n, n=2^4 to 2^23 | ||||
|         infile: name of file with nfft values, one per line | ||||
|         nr:     0/1 to not read (or read) wisdom | ||||
|         nw:     0/1 to not write (or write) wisdom | ||||
|         nc:     0/1 for real or complex data | ||||
|         np:     0-4 patience for finding best algorithm | ||||
| 
 | ||||
| As an example, to measure the speed of a complex DFT of length 131072: | ||||
| 
 | ||||
| ####################################################################### | ||||
| $ chkfft 131072 0 1 1 2 | ||||
| 
 | ||||
| nfft:     131072   nr: 0   nw 1   nc: 1   np: 2 | ||||
| 
 | ||||
|     NFFT     Time        rms      MHz   MFlops  iters  tplan | ||||
| ------------------------------------------------------------- | ||||
|   131072  0.0021948  0.00000032  59.72  5076.1     231   2.9 | ||||
| ####################################################################### | ||||
| 
 | ||||
| Program output shows that on the test machine the average time for one | ||||
| forward (or inverse) transform of length N=131072 is about 2.2 ms, | ||||
| corresponding to slightly over 5 GFlops computing speed.  The planning | ||||
| time in FFTW was 2.9 s. | ||||
| 
 | ||||
| Running the command again with parameter nr=1 will use the  | ||||
| "wisdom" already accumulated for complex N=131072 FFTs.  The execution | ||||
| speed will be essentially the same, but no planning time is required:   | ||||
| 
 | ||||
| ####################################################################### | ||||
| $ chkfft 131072 1 1 1 2 | ||||
| 
 | ||||
| nfft:     131072   nr: 1   nw 1   nc: 1   np: 2 | ||||
| 
 | ||||
|     NFFT     Time        rms      MHz   MFlops  iters  tplan | ||||
| ------------------------------------------------------------- | ||||
|   131072  0.0021575  0.00000032  60.75  5164.0     235   0.0 | ||||
| ####################################################################### | ||||
| 
 | ||||
| Optimized algorithms can compute DFTs much faster for lengths that are | ||||
| the product of small integers.  Length N=131072 = 2^17 is a good | ||||
| example, and FFTs should be very efficient.  For comparison, look at | ||||
| the speed for N=131071, a prime number.  The average time is now about | ||||
| 7 times larger: | ||||
| 
 | ||||
| ####################################################################### | ||||
| C:\JTSDK-QT\src\wsjtx\lib>chkfft 131071 1 1 1 2 | ||||
| 
 | ||||
| nfft:     131071   nr: 1   nw 1   nc: 1   np: 2 | ||||
| 
 | ||||
|     NFFT     Time        rms      MHz   MFlops  iters  tplan | ||||
| ------------------------------------------------------------- | ||||
|   131071  0.0153637  0.00000065   8.53   725.2      33   5.6 | ||||
| ####################################################################### | ||||
| 
 | ||||
| Here's an example that measures execution times for all integral | ||||
| power-of-2 lengths from 2^4 to 2^23: | ||||
| 
 | ||||
| ####################################################################### | ||||
| $ chkfft 0 1 1 1 2 | ||||
| 
 | ||||
| nfft:          0   nr: 1   nw 1   nc: 1   np: 2 | ||||
| 
 | ||||
|   n   N=2^n    Time        rms      MHz   MFlops  iters  tplan | ||||
| --------------------------------------------------------------- | ||||
|  4      16  0.0000003  0.00000014  58.61  1172.2 1000000   0.0 | ||||
|  5      32  0.0000004  0.00000016  89.19  2229.6 1000000   0.0 | ||||
|  6      64  0.0000006  0.00000016 109.44  3283.2  866975   0.0 | ||||
|  7     128  0.0000009  0.00000021 135.92  4757.1  538369   0.0 | ||||
|  8     256  0.0000016  0.00000020 158.40  6335.8  313701   0.0 | ||||
|  9     512  0.0000032  0.00000021 162.53  7313.8  160943   0.1 | ||||
| 10    1024  0.0000067  0.00000023 152.53  7626.5   75521   0.1 | ||||
| 11    2048  0.0000136  0.00000025 150.42  8273.3   37239   0.2 | ||||
| 12    4096  0.0000316  0.00000027 129.75  7784.8   16060   0.3 | ||||
| 13    8192  0.0000720  0.00000026 113.75  7393.8    7040   0.5 | ||||
| 14   16384  0.0001620  0.00000028 101.11  7078.0    3129   0.9 | ||||
| 15   32768  0.0003227  0.00000030 101.53  7615.1    1571   1.7 | ||||
| 16   65536  0.0010020  0.00000030  65.41  5232.5     506   4.1 | ||||
| 17  131072  0.0021575  0.00000032  60.75  5164.0     235   0.0 | ||||
| 18  262144  0.0053937  0.00000032  48.60  4374.2      94   3.6 | ||||
| 19  524288  0.0190668  0.00000034  27.50  2612.2      27   6.8 | ||||
| 20 1048576  0.0468001  0.00000035  22.41  2240.5      11   2.4 | ||||
| 21 2097152  0.0936012  0.00000036  22.41  2352.5       6  31.6 | ||||
| 22 4194304  0.1949997  0.00000037  21.51  2366.0       3   9.8 | ||||
| 23 8388608  0.4212036  0.00000038  19.92  2290.3       2 112.9 | ||||
| ####################################################################### | ||||
| 
 | ||||
| Test data for all transforms is gaussian random noise of zero mean and | ||||
| standard deviation 1.  Tabulated values of "rms" are the | ||||
| root-mean-square differences between the original data and the | ||||
| back-transfmred data. | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user