mirror of
https://github.com/saitohirga/WSJT-X.git
synced 2024-10-31 23:57:10 -04:00
Several tweaks to chkfft.f90. Add descriptive file chkfft.txt.
git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4836 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
parent
20dd76d5d9
commit
01d0b747bc
@ -16,16 +16,18 @@ program chkfft
|
||||
nargs=iargc()
|
||||
if(nargs.ne.5) then
|
||||
print*,'Usage: chkfft <nfft | infile> nr nw nc np'
|
||||
print*,' nr: 0/1 for read wisdom'
|
||||
print*,' nw: 0/1 for write wisdom'
|
||||
print*,' nc: 0/1 for real/complex'
|
||||
print*,' np: patience, 0-4'
|
||||
print*,' negative nfft: powers of 2 up to 2^(-nfft)'
|
||||
print*,' nfft: length of FFT'
|
||||
print*,' nfft=0: do lengths 2^n, n=2^4 to 2^23'
|
||||
print*,' infile: name of file with nfft values, one per line'
|
||||
print*,' nr: 0/1 to not read (or read) wisdom'
|
||||
print*,' nw: 0/1 to not write (or write) wisdom'
|
||||
print*,' nc: 0/1 for real or complex data'
|
||||
print*,' np: 0-4 patience for finding best algorithm'
|
||||
go to 999
|
||||
endif
|
||||
|
||||
list=.false.
|
||||
nfft=0
|
||||
nfft=-1
|
||||
call getarg(1,infile)
|
||||
open(10,file=infile,status='old',err=1)
|
||||
list=.true. !A valid file name was provided
|
||||
@ -46,12 +48,15 @@ program chkfft
|
||||
1002 format(/'nfft: ',i10,' nr:',i2,' nw',i2,' nc:',i2,' np:',i2/)
|
||||
|
||||
open(12,file='chkfft.out',status='unknown')
|
||||
open(13,file='fftw_wisdom.dat',status='unknown')
|
||||
open(13,file='fftwf_wisdom.dat',status='unknown')
|
||||
|
||||
if(nr.ne.0) then
|
||||
call import_wisdom_from_file(isuccess,13)
|
||||
if(isuccess.ne.0) write(*,1010)
|
||||
1010 format('Imported FFTW wisdom')
|
||||
if(isuccess.eq.0) then
|
||||
write(*,1010)
|
||||
1010 format('Failed to import FFTW wisdom.')
|
||||
go to 999
|
||||
endif
|
||||
endif
|
||||
|
||||
idum=-1 !Set random seed
|
||||
@ -72,9 +77,9 @@ program chkfft
|
||||
' tplan'/61('-'))
|
||||
else
|
||||
n1=4
|
||||
n2=min(-nfft,23)
|
||||
n2=23
|
||||
write(*,1030)
|
||||
1030 format(' n N=2^n Time rms MHz MFlops iters', &
|
||||
1030 format(' n N=2^n Time rms MHz MFlops iters', &
|
||||
' tplan'/63('-'))
|
||||
endif
|
||||
|
||||
@ -118,7 +123,7 @@ program chkfft
|
||||
if(tplan.lt.0) tplan=0.
|
||||
a(1:nfft)=a(1:nfft)/nfft
|
||||
|
||||
! Compute RMS error
|
||||
! Compute RMS difference between original array and back-transformed array.
|
||||
sq=0.
|
||||
if(ncomplex.eq.1) then
|
||||
do i=1,nfft
|
||||
@ -133,7 +138,7 @@ program chkfft
|
||||
|
||||
freq=1.e-6*nfft/time
|
||||
mflops=5.0/(1.e6*time/(nfft*log(float(nfft))/log(2.0)))
|
||||
if(n2.eq.999999) then
|
||||
if(n2.eq.1 .or. n2.eq.999999) then
|
||||
write(*,1050) nfft,time,rms,freq,mflops,iter,tplan
|
||||
write(12,1050) nfft,time,rms,freq,mflops,iter,tplan
|
||||
1050 format(i8,f11.7,f12.8,f7.2,f8.1,i8,f6.1)
|
||||
@ -149,8 +154,8 @@ program chkfft
|
||||
if(nw.eq.1) then
|
||||
rewind 13
|
||||
call export_wisdom_to_file(13)
|
||||
write(*,1070)
|
||||
1070 format('Exported FFTW wisdom')
|
||||
! write(*,1070)
|
||||
!1070 format(/'Exported FFTW wisdom')
|
||||
endif
|
||||
|
||||
999 call four2a(0,-1,0,0,0)
|
||||
|
115
lib/chkfft.txt
Normal file
115
lib/chkfft.txt
Normal file
@ -0,0 +1,115 @@
|
||||
Brief Description of chkfft, by K1JT
|
||||
------------------------------------
|
||||
|
||||
Discrete Fourier transforms (DFTs) are found at the root of most
|
||||
digital signal processing tasks. In WSJT and its sister programs the
|
||||
transforms are done using the FFTW library, and subroutine four2
|
||||
provides a convenient interface to the library. Program chkfft is a
|
||||
command-line utility offering a convenient way to test FFT execution
|
||||
times under a variety of circumstances.
|
||||
|
||||
To compile chkfft in Linux:
|
||||
|
||||
$ gfortran -o chkfft chkfft.f90 four2a.f90 f77_wisdom.f90 gran.c -lfftw3f
|
||||
|
||||
To compile chkfft in Windows (you may need to customize the hard-coded
|
||||
path shown here for libfftw3f-3.dll):
|
||||
|
||||
> gfortran -o chkfft chkfft.f90 four2a.f90 f77_wisdom.f90 gran.c \
|
||||
/JTSDK-QT/appsupport/runtime/libfftw3f-3.dll
|
||||
|
||||
To see a brief usage message, type chkfft at the command prompt:
|
||||
|
||||
$ chkfft
|
||||
Usage: chkfft <nfft | infile> nr nw nc np
|
||||
nfft: length of FFT
|
||||
nfft=0: do lengths 2^n, n=2^4 to 2^23
|
||||
infile: name of file with nfft values, one per line
|
||||
nr: 0/1 to not read (or read) wisdom
|
||||
nw: 0/1 to not write (or write) wisdom
|
||||
nc: 0/1 for real or complex data
|
||||
np: 0-4 patience for finding best algorithm
|
||||
|
||||
As an example, to measure the speed of a complex DFT of length 131072:
|
||||
|
||||
#######################################################################
|
||||
$ chkfft 131072 0 1 1 2
|
||||
|
||||
nfft: 131072 nr: 0 nw 1 nc: 1 np: 2
|
||||
|
||||
NFFT Time rms MHz MFlops iters tplan
|
||||
-------------------------------------------------------------
|
||||
131072 0.0021948 0.00000032 59.72 5076.1 231 2.9
|
||||
#######################################################################
|
||||
|
||||
Program output shows that on the test machine the average time for one
|
||||
forward (or inverse) transform of length N=131072 is about 2.2 ms,
|
||||
corresponding to slightly over 5 GFlops computing speed. The planning
|
||||
time in FFTW was 2.9 s.
|
||||
|
||||
Running the command again with parameter nr=1 will use the
|
||||
"wisdom" already accumulated for complex N=131072 FFTs. The execution
|
||||
speed will be essentially the same, but no planning time is required:
|
||||
|
||||
#######################################################################
|
||||
$ chkfft 131072 1 1 1 2
|
||||
|
||||
nfft: 131072 nr: 1 nw 1 nc: 1 np: 2
|
||||
|
||||
NFFT Time rms MHz MFlops iters tplan
|
||||
-------------------------------------------------------------
|
||||
131072 0.0021575 0.00000032 60.75 5164.0 235 0.0
|
||||
#######################################################################
|
||||
|
||||
Optimized algorithms can compute DFTs much faster for lengths that are
|
||||
the product of small integers. Length N=131072 = 2^17 is a good
|
||||
example, and FFTs should be very efficient. For comparison, look at
|
||||
the speed for N=131071, a prime number. The average time is now about
|
||||
7 times larger:
|
||||
|
||||
#######################################################################
|
||||
C:\JTSDK-QT\src\wsjtx\lib>chkfft 131071 1 1 1 2
|
||||
|
||||
nfft: 131071 nr: 1 nw 1 nc: 1 np: 2
|
||||
|
||||
NFFT Time rms MHz MFlops iters tplan
|
||||
-------------------------------------------------------------
|
||||
131071 0.0153637 0.00000065 8.53 725.2 33 5.6
|
||||
#######################################################################
|
||||
|
||||
Here's an example that measures execution times for all integral
|
||||
power-of-2 lengths from 2^4 to 2^23:
|
||||
|
||||
#######################################################################
|
||||
$ chkfft 0 1 1 1 2
|
||||
|
||||
nfft: 0 nr: 1 nw 1 nc: 1 np: 2
|
||||
|
||||
n N=2^n Time rms MHz MFlops iters tplan
|
||||
---------------------------------------------------------------
|
||||
4 16 0.0000003 0.00000014 58.61 1172.2 1000000 0.0
|
||||
5 32 0.0000004 0.00000016 89.19 2229.6 1000000 0.0
|
||||
6 64 0.0000006 0.00000016 109.44 3283.2 866975 0.0
|
||||
7 128 0.0000009 0.00000021 135.92 4757.1 538369 0.0
|
||||
8 256 0.0000016 0.00000020 158.40 6335.8 313701 0.0
|
||||
9 512 0.0000032 0.00000021 162.53 7313.8 160943 0.1
|
||||
10 1024 0.0000067 0.00000023 152.53 7626.5 75521 0.1
|
||||
11 2048 0.0000136 0.00000025 150.42 8273.3 37239 0.2
|
||||
12 4096 0.0000316 0.00000027 129.75 7784.8 16060 0.3
|
||||
13 8192 0.0000720 0.00000026 113.75 7393.8 7040 0.5
|
||||
14 16384 0.0001620 0.00000028 101.11 7078.0 3129 0.9
|
||||
15 32768 0.0003227 0.00000030 101.53 7615.1 1571 1.7
|
||||
16 65536 0.0010020 0.00000030 65.41 5232.5 506 4.1
|
||||
17 131072 0.0021575 0.00000032 60.75 5164.0 235 0.0
|
||||
18 262144 0.0053937 0.00000032 48.60 4374.2 94 3.6
|
||||
19 524288 0.0190668 0.00000034 27.50 2612.2 27 6.8
|
||||
20 1048576 0.0468001 0.00000035 22.41 2240.5 11 2.4
|
||||
21 2097152 0.0936012 0.00000036 22.41 2352.5 6 31.6
|
||||
22 4194304 0.1949997 0.00000037 21.51 2366.0 3 9.8
|
||||
23 8388608 0.4212036 0.00000038 19.92 2290.3 2 112.9
|
||||
#######################################################################
|
||||
|
||||
Test data for all transforms is gaussian random noise of zero mean and
|
||||
standard deviation 1. Tabulated values of "rms" are the
|
||||
root-mean-square differences between the original data and the
|
||||
back-transfmred data.
|
Loading…
Reference in New Issue
Block a user