mirror of
https://github.com/saitohirga/WSJT-X.git
synced 2024-11-22 12:23:37 -05:00
Several tweaks to chkfft.f90. Add descriptive file chkfft.txt.
git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4836 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
parent
22e15084e1
commit
1889b24c78
@ -16,16 +16,18 @@ program chkfft
|
|||||||
nargs=iargc()
|
nargs=iargc()
|
||||||
if(nargs.ne.5) then
|
if(nargs.ne.5) then
|
||||||
print*,'Usage: chkfft <nfft | infile> nr nw nc np'
|
print*,'Usage: chkfft <nfft | infile> nr nw nc np'
|
||||||
print*,' nr: 0/1 for read wisdom'
|
print*,' nfft: length of FFT'
|
||||||
print*,' nw: 0/1 for write wisdom'
|
print*,' nfft=0: do lengths 2^n, n=2^4 to 2^23'
|
||||||
print*,' nc: 0/1 for real/complex'
|
print*,' infile: name of file with nfft values, one per line'
|
||||||
print*,' np: patience, 0-4'
|
print*,' nr: 0/1 to not read (or read) wisdom'
|
||||||
print*,' negative nfft: powers of 2 up to 2^(-nfft)'
|
print*,' nw: 0/1 to not write (or write) wisdom'
|
||||||
|
print*,' nc: 0/1 for real or complex data'
|
||||||
|
print*,' np: 0-4 patience for finding best algorithm'
|
||||||
go to 999
|
go to 999
|
||||||
endif
|
endif
|
||||||
|
|
||||||
list=.false.
|
list=.false.
|
||||||
nfft=0
|
nfft=-1
|
||||||
call getarg(1,infile)
|
call getarg(1,infile)
|
||||||
open(10,file=infile,status='old',err=1)
|
open(10,file=infile,status='old',err=1)
|
||||||
list=.true. !A valid file name was provided
|
list=.true. !A valid file name was provided
|
||||||
@ -46,12 +48,15 @@ program chkfft
|
|||||||
1002 format(/'nfft: ',i10,' nr:',i2,' nw',i2,' nc:',i2,' np:',i2/)
|
1002 format(/'nfft: ',i10,' nr:',i2,' nw',i2,' nc:',i2,' np:',i2/)
|
||||||
|
|
||||||
open(12,file='chkfft.out',status='unknown')
|
open(12,file='chkfft.out',status='unknown')
|
||||||
open(13,file='fftw_wisdom.dat',status='unknown')
|
open(13,file='fftwf_wisdom.dat',status='unknown')
|
||||||
|
|
||||||
if(nr.ne.0) then
|
if(nr.ne.0) then
|
||||||
call import_wisdom_from_file(isuccess,13)
|
call import_wisdom_from_file(isuccess,13)
|
||||||
if(isuccess.ne.0) write(*,1010)
|
if(isuccess.eq.0) then
|
||||||
1010 format('Imported FFTW wisdom')
|
write(*,1010)
|
||||||
|
1010 format('Failed to import FFTW wisdom.')
|
||||||
|
go to 999
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
idum=-1 !Set random seed
|
idum=-1 !Set random seed
|
||||||
@ -72,9 +77,9 @@ program chkfft
|
|||||||
' tplan'/61('-'))
|
' tplan'/61('-'))
|
||||||
else
|
else
|
||||||
n1=4
|
n1=4
|
||||||
n2=min(-nfft,23)
|
n2=23
|
||||||
write(*,1030)
|
write(*,1030)
|
||||||
1030 format(' n N=2^n Time rms MHz MFlops iters', &
|
1030 format(' n N=2^n Time rms MHz MFlops iters', &
|
||||||
' tplan'/63('-'))
|
' tplan'/63('-'))
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@ -118,7 +123,7 @@ program chkfft
|
|||||||
if(tplan.lt.0) tplan=0.
|
if(tplan.lt.0) tplan=0.
|
||||||
a(1:nfft)=a(1:nfft)/nfft
|
a(1:nfft)=a(1:nfft)/nfft
|
||||||
|
|
||||||
! Compute RMS error
|
! Compute RMS difference between original array and back-transformed array.
|
||||||
sq=0.
|
sq=0.
|
||||||
if(ncomplex.eq.1) then
|
if(ncomplex.eq.1) then
|
||||||
do i=1,nfft
|
do i=1,nfft
|
||||||
@ -133,7 +138,7 @@ program chkfft
|
|||||||
|
|
||||||
freq=1.e-6*nfft/time
|
freq=1.e-6*nfft/time
|
||||||
mflops=5.0/(1.e6*time/(nfft*log(float(nfft))/log(2.0)))
|
mflops=5.0/(1.e6*time/(nfft*log(float(nfft))/log(2.0)))
|
||||||
if(n2.eq.999999) then
|
if(n2.eq.1 .or. n2.eq.999999) then
|
||||||
write(*,1050) nfft,time,rms,freq,mflops,iter,tplan
|
write(*,1050) nfft,time,rms,freq,mflops,iter,tplan
|
||||||
write(12,1050) nfft,time,rms,freq,mflops,iter,tplan
|
write(12,1050) nfft,time,rms,freq,mflops,iter,tplan
|
||||||
1050 format(i8,f11.7,f12.8,f7.2,f8.1,i8,f6.1)
|
1050 format(i8,f11.7,f12.8,f7.2,f8.1,i8,f6.1)
|
||||||
@ -149,8 +154,8 @@ program chkfft
|
|||||||
if(nw.eq.1) then
|
if(nw.eq.1) then
|
||||||
rewind 13
|
rewind 13
|
||||||
call export_wisdom_to_file(13)
|
call export_wisdom_to_file(13)
|
||||||
write(*,1070)
|
! write(*,1070)
|
||||||
1070 format('Exported FFTW wisdom')
|
!1070 format(/'Exported FFTW wisdom')
|
||||||
endif
|
endif
|
||||||
|
|
||||||
999 call four2a(0,-1,0,0,0)
|
999 call four2a(0,-1,0,0,0)
|
||||||
|
115
lib/chkfft.txt
Normal file
115
lib/chkfft.txt
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
Brief Description of chkfft, by K1JT
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
Discrete Fourier transforms (DFTs) are found at the root of most
|
||||||
|
digital signal processing tasks. In WSJT and its sister programs the
|
||||||
|
transforms are done using the FFTW library, and subroutine four2
|
||||||
|
provides a convenient interface to the library. Program chkfft is a
|
||||||
|
command-line utility offering a convenient way to test FFT execution
|
||||||
|
times under a variety of circumstances.
|
||||||
|
|
||||||
|
To compile chkfft in Linux:
|
||||||
|
|
||||||
|
$ gfortran -o chkfft chkfft.f90 four2a.f90 f77_wisdom.f90 gran.c -lfftw3f
|
||||||
|
|
||||||
|
To compile chkfft in Windows (you may need to customize the hard-coded
|
||||||
|
path shown here for libfftw3f-3.dll):
|
||||||
|
|
||||||
|
> gfortran -o chkfft chkfft.f90 four2a.f90 f77_wisdom.f90 gran.c \
|
||||||
|
/JTSDK-QT/appsupport/runtime/libfftw3f-3.dll
|
||||||
|
|
||||||
|
To see a brief usage message, type chkfft at the command prompt:
|
||||||
|
|
||||||
|
$ chkfft
|
||||||
|
Usage: chkfft <nfft | infile> nr nw nc np
|
||||||
|
nfft: length of FFT
|
||||||
|
nfft=0: do lengths 2^n, n=2^4 to 2^23
|
||||||
|
infile: name of file with nfft values, one per line
|
||||||
|
nr: 0/1 to not read (or read) wisdom
|
||||||
|
nw: 0/1 to not write (or write) wisdom
|
||||||
|
nc: 0/1 for real or complex data
|
||||||
|
np: 0-4 patience for finding best algorithm
|
||||||
|
|
||||||
|
As an example, to measure the speed of a complex DFT of length 131072:
|
||||||
|
|
||||||
|
#######################################################################
|
||||||
|
$ chkfft 131072 0 1 1 2
|
||||||
|
|
||||||
|
nfft: 131072 nr: 0 nw 1 nc: 1 np: 2
|
||||||
|
|
||||||
|
NFFT Time rms MHz MFlops iters tplan
|
||||||
|
-------------------------------------------------------------
|
||||||
|
131072 0.0021948 0.00000032 59.72 5076.1 231 2.9
|
||||||
|
#######################################################################
|
||||||
|
|
||||||
|
Program output shows that on the test machine the average time for one
|
||||||
|
forward (or inverse) transform of length N=131072 is about 2.2 ms,
|
||||||
|
corresponding to slightly over 5 GFlops computing speed. The planning
|
||||||
|
time in FFTW was 2.9 s.
|
||||||
|
|
||||||
|
Running the command again with parameter nr=1 will use the
|
||||||
|
"wisdom" already accumulated for complex N=131072 FFTs. The execution
|
||||||
|
speed will be essentially the same, but no planning time is required:
|
||||||
|
|
||||||
|
#######################################################################
|
||||||
|
$ chkfft 131072 1 1 1 2
|
||||||
|
|
||||||
|
nfft: 131072 nr: 1 nw 1 nc: 1 np: 2
|
||||||
|
|
||||||
|
NFFT Time rms MHz MFlops iters tplan
|
||||||
|
-------------------------------------------------------------
|
||||||
|
131072 0.0021575 0.00000032 60.75 5164.0 235 0.0
|
||||||
|
#######################################################################
|
||||||
|
|
||||||
|
Optimized algorithms can compute DFTs much faster for lengths that are
|
||||||
|
the product of small integers. Length N=131072 = 2^17 is a good
|
||||||
|
example, and FFTs should be very efficient. For comparison, look at
|
||||||
|
the speed for N=131071, a prime number. The average time is now about
|
||||||
|
7 times larger:
|
||||||
|
|
||||||
|
#######################################################################
|
||||||
|
C:\JTSDK-QT\src\wsjtx\lib>chkfft 131071 1 1 1 2
|
||||||
|
|
||||||
|
nfft: 131071 nr: 1 nw 1 nc: 1 np: 2
|
||||||
|
|
||||||
|
NFFT Time rms MHz MFlops iters tplan
|
||||||
|
-------------------------------------------------------------
|
||||||
|
131071 0.0153637 0.00000065 8.53 725.2 33 5.6
|
||||||
|
#######################################################################
|
||||||
|
|
||||||
|
Here's an example that measures execution times for all integral
|
||||||
|
power-of-2 lengths from 2^4 to 2^23:
|
||||||
|
|
||||||
|
#######################################################################
|
||||||
|
$ chkfft 0 1 1 1 2
|
||||||
|
|
||||||
|
nfft: 0 nr: 1 nw 1 nc: 1 np: 2
|
||||||
|
|
||||||
|
n N=2^n Time rms MHz MFlops iters tplan
|
||||||
|
---------------------------------------------------------------
|
||||||
|
4 16 0.0000003 0.00000014 58.61 1172.2 1000000 0.0
|
||||||
|
5 32 0.0000004 0.00000016 89.19 2229.6 1000000 0.0
|
||||||
|
6 64 0.0000006 0.00000016 109.44 3283.2 866975 0.0
|
||||||
|
7 128 0.0000009 0.00000021 135.92 4757.1 538369 0.0
|
||||||
|
8 256 0.0000016 0.00000020 158.40 6335.8 313701 0.0
|
||||||
|
9 512 0.0000032 0.00000021 162.53 7313.8 160943 0.1
|
||||||
|
10 1024 0.0000067 0.00000023 152.53 7626.5 75521 0.1
|
||||||
|
11 2048 0.0000136 0.00000025 150.42 8273.3 37239 0.2
|
||||||
|
12 4096 0.0000316 0.00000027 129.75 7784.8 16060 0.3
|
||||||
|
13 8192 0.0000720 0.00000026 113.75 7393.8 7040 0.5
|
||||||
|
14 16384 0.0001620 0.00000028 101.11 7078.0 3129 0.9
|
||||||
|
15 32768 0.0003227 0.00000030 101.53 7615.1 1571 1.7
|
||||||
|
16 65536 0.0010020 0.00000030 65.41 5232.5 506 4.1
|
||||||
|
17 131072 0.0021575 0.00000032 60.75 5164.0 235 0.0
|
||||||
|
18 262144 0.0053937 0.00000032 48.60 4374.2 94 3.6
|
||||||
|
19 524288 0.0190668 0.00000034 27.50 2612.2 27 6.8
|
||||||
|
20 1048576 0.0468001 0.00000035 22.41 2240.5 11 2.4
|
||||||
|
21 2097152 0.0936012 0.00000036 22.41 2352.5 6 31.6
|
||||||
|
22 4194304 0.1949997 0.00000037 21.51 2366.0 3 9.8
|
||||||
|
23 8388608 0.4212036 0.00000038 19.92 2290.3 2 112.9
|
||||||
|
#######################################################################
|
||||||
|
|
||||||
|
Test data for all transforms is gaussian random noise of zero mean and
|
||||||
|
standard deviation 1. Tabulated values of "rms" are the
|
||||||
|
root-mean-square differences between the original data and the
|
||||||
|
back-transfmred data.
|
Loading…
Reference in New Issue
Block a user