From 22e15084e1e13ef7bc588ff086d288f7fc2d0b98 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Thu, 18 Dec 2014 16:41:18 +0000 Subject: [PATCH] Add chkfft.f90; more thorough comments in four2a.f90. git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4835 ab8295b8-cf94-4d9e-aec4-7959e3be5d79 --- lib/chkfft.f90 | 157 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/four2a.f90 | 59 ++++++++++--------- lib/g1 | 4 ++ lib/g1.bat | 4 ++ 4 files changed, 196 insertions(+), 28 deletions(-) create mode 100644 lib/chkfft.f90 create mode 100644 lib/g1 create mode 100644 lib/g1.bat diff --git a/lib/chkfft.f90 b/lib/chkfft.f90 new file mode 100644 index 000000000..0d9590e0a --- /dev/null +++ b/lib/chkfft.f90 @@ -0,0 +1,157 @@ +program chkfft + +! Tests and times one-dimensional FFTs computed by four2a(). +! An all-Fortran version of four2a() is available, but the preferred +! version uses calls to the FFTW library. + + parameter (NMAX=8*1024*1024) !Maximum FFT length + complex a(NMAX),b(NMAX) + real ar(NMAX),br(NMAX) + real mflops + character infile*12,arg*8 + logical list + common/patience/npatience + equivalence (a,ar),(b,br) + + nargs=iargc() + if(nargs.ne.5) then + print*,'Usage: chkfft nr nw nc np' + print*,' nr: 0/1 for read wisdom' + print*,' nw: 0/1 for write wisdom' + print*,' nc: 0/1 for real/complex' + print*,' np: patience, 0-4' + print*,' negative nfft: powers of 2 up to 2^(-nfft)' + go to 999 + endif + + list=.false. + nfft=0 + call getarg(1,infile) + open(10,file=infile,status='old',err=1) + list=.true. !A valid file name was provided + go to 2 +1 read(infile,*) nfft !Takje first argument to be nfft +2 call getarg(2,arg) + read(arg,*) nr + call getarg(3,arg) + read(arg,*) nw + call getarg(4,arg) + read(arg,*) ncomplex + call getarg(5,arg) + read(arg,*) npatience + + if(list) write(*,1000) infile,nr,nw,ncomplex,npatience +1000 format(/'infile: ',a12,' nr:',i2,' nw',i2,' nc:',i2,' np:',i2/) + if(.not.list) write(*,1002) nfft,nr,nw,ncomplex,npatience +1002 format(/'nfft: ',i10,' nr:',i2,' nw',i2,' nc:',i2,' np:',i2/) + + open(12,file='chkfft.out',status='unknown') + open(13,file='fftw_wisdom.dat',status='unknown') + + if(nr.ne.0) then + call import_wisdom_from_file(isuccess,13) + if(isuccess.ne.0) write(*,1010) +1010 format('Imported FFTW wisdom') + endif + + idum=-1 !Set random seed + ndim=1 !One-dimensional transforms + do i=1,NMAX !Set random data + x=gran() + y=gran() + b(i)=cmplx(x,y) !Generate random data + enddo + + iters=1000000 + if(list .or. (nfft.gt.0)) then + n1=1 + n2=1 + if(nfft.eq.0) n2=999999 + write(*,1020) +1020 format(' NFFT Time rms MHz MFlops iters', & + ' tplan'/61('-')) + else + n1=4 + n2=min(-nfft,23) + write(*,1030) +1030 format(' n N=2^n Time rms MHz MFlops iters', & + ' tplan'/63('-')) + endif + + do ii=n1,n2 !Test one or more FFT lengths + if(list) then + read(10,*,end=900) nfft !Read nfft from file + else if(n2.gt.n1) then + nfft=2**ii !Do powers of 2 + endif + + iformf=1 + iformb=1 + if(ncomplex.eq.0) then + iformf=0 !Real-to-complex transform + iformb=-1 !Complex-to-real (inverse) transform + endif + + if(nfft.gt.NMAX) go to 900 + a(1:nfft)=b(1:nfft) !Copy test data into a() + t0=second() + call four2a(a,nfft,ndim,-1,iformf) !Get planning time for forward FFT + call four2a(a,nfft,ndim,+1,iformb) !Get planning time for backward FFT + t2=second() + tplan=t2-t0 !Total planning time for this length + + total=0. + do iter=1,iters !Now do many iterations + a(1:nfft)=b(1:nfft) !Copy test data into a() + + t0=second() + call four2a(a,nfft,ndim,-1,iformf) !Forward FFT + call four2a(a,nfft,ndim,+1,iformb) !Backward FFT on same data + t1=second() + total=total+t1-t0 + if(total.ge.1.0) go to 40 !Cut iterations short if t>1 s + enddo + iter=iters + +40 time=0.5*total/iter !Time for one FFT of current length + tplan=0.5*tplan-time !Planning time for one FFT + if(tplan.lt.0) tplan=0. + a(1:nfft)=a(1:nfft)/nfft + +! Compute RMS error + sq=0. + if(ncomplex.eq.1) then + do i=1,nfft + sq=sq + real(a(i)-b(i))**2 + imag(a(i)-b(i))**2 + enddo + else + do i=1,nfft + sq=sq + (ar(i)-br(i))**2 + enddo + endif + rms=sqrt(sq/nfft) + + freq=1.e-6*nfft/time + mflops=5.0/(1.e6*time/(nfft*log(float(nfft))/log(2.0))) + if(n2.eq.999999) then + write(*,1050) nfft,time,rms,freq,mflops,iter,tplan + write(12,1050) nfft,time,rms,freq,mflops,iter,tplan +1050 format(i8,f11.7,f12.8,f7.2,f8.1,i8,f6.1) + else + write(*,1060) ii,nfft,time,rms,freq,mflops,iter,tplan + write(12,1060) ii,nfft,time,rms,freq,mflops,iter,tplan +1060 format(i2,i8,f11.7,f12.8,f7.2,f8.1,i8,f6.1) + endif + if(mod(ii,50).eq.0) call four2a(0,-1,0,0,0) + enddo + +900 continue + if(nw.eq.1) then + rewind 13 + call export_wisdom_to_file(13) + write(*,1070) +1070 format('Exported FFTW wisdom') + endif + +999 call four2a(0,-1,0,0,0) +end program chkfft diff --git a/lib/four2a.f90 b/lib/four2a.f90 index f36c605ee..d45c1617d 100644 --- a/lib/four2a.f90 +++ b/lib/four2a.f90 @@ -1,30 +1,34 @@ subroutine four2a(a,nfft,ndim,isign,iform) -! IFORM = 1, 0 or -1, as data is -! complex, real, or the first half of a complex array. Transform -! values are returned in array DATA. They are complex, real, or -! the first half of a complex array, as IFORM = 1, -1 or 0. +! IFORM = 1, 0 or -1, as data is +! complex, real, or the first half of a complex array. Transform +! values are returned in array DATA. They are complex, real, or +! the first half of a complex array, as IFORM = 1, -1 or 0. -! The transform of a real array (IFORM = 0) dimensioned N(1) by N(2) -! by ... will be returned in the same array, now considered to -! be complex of dimensions N(1)/2+1 by N(2) by .... Note that if -! IFORM = 0 or -1, N(1) must be even, and enough room must be -! reserved. The missing values may be obtained by complex conjugation. +! The transform of a real array (IFORM = 0) dimensioned N(1) by N(2) +! by ... will be returned in the same array, now considered to +! be complex of dimensions N(1)/2+1 by N(2) by .... Note that if +! IFORM = 0 or -1, N(1) must be even, and enough room must be +! reserved. The missing values may be obtained by complex conjugation. -! The reverse transformation of a half complex array dimensioned -! N(1)/2+1 by N(2) by ..., is accomplished by setting IFORM -! to -1. In the N array, N(1) must be the true N(1), not N(1)/2+1. -! The transform will be real and returned to the input array. +! The reverse transformation of a half complex array dimensioned +! N(1)/2+1 by N(2) by ..., is accomplished by setting IFORM +! to -1. In the N array, N(1) must be the true N(1), not N(1)/2+1. +! The transform will be real and returned to the input array. - parameter (NPMAX=100) - parameter (NSMALL=16384) - complex a(nfft) - complex aa(NSMALL) - integer nn(NPMAX),ns(NPMAX),nf(NPMAX) - integer*8 plan(NPMAX),nl(NPMAX),nloc - data nplan/0/ - common/patience/npatience - include 'fftw3.f90' +! This version of four2a makes calls to the FFTW library to do the +! actual computations. + + parameter (NPMAX=100) !Max numberf of stored plans + parameter (NSMALL=16384) !Max size of "small" FFTs + complex a(nfft) !Array to be transformed + complex aa(NSMALL) !Local copy of "small" a() + integer nn(NPMAX),ns(NPMAX),nf(NPMAX) !Params of stored plans + integer*8 nl(NPMAX),nloc !More params of plans + integer*8 plan(NPMAX) !Pointers to stored plans + data nplan/0/ !Number of stored plans + common/patience/npatience !Patience for forming FFTW plans + include 'fftw3.f90' !FFTW definitions save plan,nplan,nn,ns,nf,nl if(nfft.lt.0) go to 999 @@ -53,10 +57,9 @@ subroutine four2a(a,nfft,ndim,isign,iform) if(nfft.le.NSMALL) then jz=nfft if(iform.eq.0) jz=nfft/2 - do j=1,jz - aa(j)=a(j) - enddo + aa(1:jz)=a(1:jz) endif + if(isign.eq.-1 .and. iform.eq.1) then call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_FORWARD,nflags) else if(isign.eq.1 .and. iform.eq.1) then @@ -68,13 +71,12 @@ subroutine four2a(a,nfft,ndim,isign,iform) else stop 'Unsupported request in four2a' endif + i=nplan if(nfft.le.NSMALL) then jz=nfft if(iform.eq.0) jz=nfft/2 - do j=1,jz - a(j)=aa(j) - enddo + a(1:jz)=aa(1:jz) endif 10 continue @@ -85,6 +87,7 @@ subroutine four2a(a,nfft,ndim,isign,iform) ! The test is only to silence a compiler warning: if(ndim.ne.-999) call sfftw_destroy_plan(plan(i)) enddo + nplan=0 return end subroutine four2a diff --git a/lib/g1 b/lib/g1 new file mode 100644 index 000000000..52f76160a --- /dev/null +++ b/lib/g1 @@ -0,0 +1,4 @@ +gcc -c gran.c +gfortran -c four2a.f90 +gfortran -c f77_wisdom.f90 +gfortran -o chkfft chkfft.f90 four2a.o f77_wisdom.o gran.o -lfftw3f diff --git a/lib/g1.bat b/lib/g1.bat new file mode 100644 index 000000000..6242f25f9 --- /dev/null +++ b/lib/g1.bat @@ -0,0 +1,4 @@ +gcc -c gran.c +gfortran -c four2a.f90 +gfortran -c f77_wisdom.f90 +gfortran -o chkfft chkfft.f90 four2a.o f77_wisdom.o gran.o /JTSDK-QT/appsupport/runtime/libfftw3f-3.dll