From 3a8232b113d7b04575379bec73abc290f0ba0360 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Fri, 30 Jan 2015 21:28:10 +0000 Subject: [PATCH] Made a start at implementing an option to use multi-threaded FFTs. New command-line option for jt9: [-m nthreads]. Default is nthreads=1. Also refactored a loop in filbig.f90 that was taking far too much time. git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4916 ab8295b8-cf94-4d9e-aec4-7959e3be5d79 --- CMakeLists.txt | 1 + lib/Makefile.jtsdk | 22 ++++++++++---------- lib/filbig.f90 | 40 +++++++++++++++++++---------------- lib/four2a.f90 | 2 +- lib/jt9.f90 | 52 ++++++++++++++++++---------------------------- lib/jt9c.f90 | 2 +- lib/usleep.c | 1 + mainwindow.cpp | 3 ++- 8 files changed, 59 insertions(+), 64 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b640c9f32..710260e12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,6 +255,7 @@ set (wsjt_FSRCS lib/fano232.f90 lib/fchisq.f90 lib/fchisq65.f90 + lib/fftw3mod.f90 lib/fil3.f90 lib/fil4.f90 lib/fil6521.f90 diff --git a/lib/Makefile.jtsdk b/lib/Makefile.jtsdk index 135804caf..83ea2762e 100644 --- a/lib/Makefile.jtsdk +++ b/lib/Makefile.jtsdk @@ -5,16 +5,16 @@ # Set paths EXE_DIR = ../../wsjtx_install -INCPATH = -I'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/include/QtCore' \ - -I'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/include/' +INCPATH = -I'C:/JTSDK/Qt5/5.2.1/mingw48_32/include/QtCore' \ + -I'C:/JTSDK/Qt5/5.2.1/mingw48_32/include/' # Compilers -CC = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/gcc -FC = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/gfortran -CXX = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/g++ +CC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gcc +FC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gfortran +CXX = c:/JTSDK/Qt5/Tools/mingw48_32/bin/g++ -FFLAGS = -O2 -fbounds-check -Wall -Wno-precision-loss -fno-second-underscore -CFLAGS = -I. -fbounds-check -mno-stack-arg-probe +FFLAGS = -O2 -fbounds-check -Wall -fno-second-underscore -DWIN32 +CFLAGS = -I. -fbounds-check -mno-stack-arg-probe -DWIN32 # Default rules %.o: %.c @@ -34,7 +34,7 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \ unpackmsg.o igray.o unpackcall.o unpackgrid.o \ grid2k.o unpacktext.o getpfx2.o packmsg.o deg2grid.o \ packtext.o getpfx1.o packcall.o k2grid.o packgrid.o \ - nchar.o four2a.o grid2deg.o pfxdump.o f77_wisdom.o \ + nchar.o four2a.o grid2deg.o pfxdump.o wisdom.o \ symspec.o analytic.o db.o genjt9.o flat1.o smo.o \ packbits.o unpackbits.o encode232.o interleave9.o \ entail.o fano232.o gran.o sync9.o decode9.o \ @@ -48,17 +48,17 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \ move.o indexx.o graycode65.o twkfreq65.o smo121.o \ wrapkarn.o init_rs.o encode_rs.o decode_rs.o gen65.o fil4.o \ flat3.o polfit.o determ.o baddata.o prog_args.f90 \ - options.o prog_args.o + options.o prog_args.o libjt9.a: $(OBJS1) ar cr libjt9.a $(OBJS1) ranlib libjt9.a OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o -LIBS2 = -L'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/lib' -lQt5Core +LIBS2 = -L'C:/JTSDK/Qt5/5.2.1/mingw48_32/lib' -lQt5Core jt9.exe: $(OBJS2) libjt9.a $(CXX) -o jt9.exe -static $(OBJS2) $(LIBS2) libjt9.a \ - ../libfftw3f_win.a -lgfortran + C:\JTSDK\fftw3f\libfftw3f-3.dll -lgfortran # mkdir -p $(EXE_DIR) cp jt9.exe $(EXE_DIR) diff --git a/lib/filbig.f90 b/lib/filbig.f90 index 530f80642..8cd8f81ea 100644 --- a/lib/filbig.f90 +++ b/lib/filbig.f90 @@ -3,6 +3,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) ! Filter and downsample the real data in array dd(npts), sampled at 12000 Hz. ! Output is complex, sampled at 1378.125 Hz. + use, intrinsic :: iso_c_binding + use FFTW3 + parameter (NSZ=3413) parameter (NFFT1=672000,NFFT2=77175) parameter (NZ2=1000) @@ -15,15 +18,17 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) real halfpulse(8) !Impulse response of filter (one sided) complex cfilt(NFFT2) !Filter (complex; imag = 0) real rfilt(NFFT2) !Filter (real) - integer*8 plan1,plan2,plan3 +! integer*8 plan1,plan2,plan3 + type(C_PTR) :: plan1,plan2,plan3 !Pointers to FFTW plans + logical first - include 'fftw3.f90' +! include 'fftw3.f90' equivalence (rfilt,cfilt),(rca,ca) data first/.true./ data halfpulse/114.97547150,36.57879257,-20.93789101, & 5.89886379,1.59355187,-2.49138308,0.60910773,-0.04248129/ common/refspec/dfref,ref(NSZ) - common/patience/npatience + common/patience/npatience,nthreads save if(npts.lt.0) go to 900 !Clean up at end of program @@ -35,11 +40,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) if(npatience.eq.3) nflags=FFTW_PATIENT if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE ! Plan the FFTs just once - call timer('FFTplans ',0) - call sfftw_plan_dft_r2c_1d(plan1,nfft1,rca,rca,nflags) - call sfftw_plan_dft_1d(plan2,nfft2,c4a,c4a,FFTW_FORWARD,nflags) - call sfftw_plan_dft_1d(plan3,nfft2,cfilt,cfilt,FFTW_BACKWARD,nflags) - call timer('FFTplans ',1) + plan1=fftwf_plan_dft_r2c_1d(nfft1,rca,ca,nflags) + plan2=fftwf_plan_dft_1d(nfft2,c4a,c4a,-1,nflags) + plan3=fftwf_plan_dft_1d(nfft2,cfilt,cfilt,+1,nflags) ! Convert impulse response to filter function do i=1,nfft2 @@ -51,11 +54,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) cfilt(i)=fac*halfpulse(i) cfilt(nfft2+2-i)=fac*halfpulse(i) enddo - call timer('FFTfilt ',0) call sfftw_execute(plan3) - call timer('FFTfilt ',1) - base=cfilt(nfft2/2+1) + base=real(cfilt(nfft2/2+1)) do i=1,nfft2 rfilt(i)=real(cfilt(i))-base enddo @@ -68,25 +69,27 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) ! If we just have a new f0, continue with the existing data in ca. if(newdat.ne.0) then + call timer('FFTbig ',0) nz=min(npts,nfft1) rca(1:nz)=dd(1:nz) rca(nz+1:)=0. - call timer('FFTbig ',0) call sfftw_execute(plan1) call timer('FFTbig ',1) - do i=1,NFFT1/2 !Flatten the spectrum - j=nint(i*df/dfref) - if(j.lt.1) j=1 - if(j.gt.NSZ) j=NSZ + call timer('flatten ',0) + ib=0 + do j=1,NSZ + ia=ib+1 + ib=nint(j*dfref/df) fac=sqrt(min(30.0,1.0/ref(j))) - ca(i)=conjg(fac * ca(i)) + ca(ia:ib)=fac*conjg(ca(ia:ib)) enddo + call timer('flatten ',1) endif ! NB: f0 is the frequency at which we want our filter centered. ! i0 is the bin number in ca closest to f0. - + call timer('loops ',0) i0=nint(f0/df) + 1 nh=nfft2/2 do i=1,nh !Copy data into c4a and apply @@ -117,6 +120,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) enddo enddo call pctile(s,NZ2,30,sq0) + call timer('loops ',1) ! Do the short reverse transform, to go back to time domain. call timer('FFTsmall',0) diff --git a/lib/four2a.f90 b/lib/four2a.f90 index e03adcd9c..c3b377557 100644 --- a/lib/four2a.f90 +++ b/lib/four2a.f90 @@ -27,7 +27,7 @@ subroutine four2a(a,nfft,ndim,isign,iform) integer*8 nl(NPMAX),nloc !More params of plans integer*8 plan(NPMAX) !Pointers to stored plans data nplan/0/ !Number of stored plans - common/patience/npatience !Patience for forming FFTW plans + common/patience/npatience,nthreads !Patience and threads for FFTW plans include 'fftw3.f90' !FFTW definitions save plan,nplan,nn,ns,nf,nl diff --git a/lib/jt9.f90 b/lib/jt9.f90 index c270d6a83..f1b0bb2a0 100644 --- a/lib/jt9.f90 +++ b/lib/jt9.f90 @@ -5,25 +5,29 @@ program jt9 use options use prog_args + use, intrinsic :: iso_c_binding + use FFTW3 include 'constants.f90' + integer(C_INT) iret integer*4 ihdr(11) real*4 s(NSMAX) integer*2 id2 character c character(len=500) optarg, infile - character wisfile*80,firstline*40 + character wisfile*80 integer*4 arglen,stat,offset,remain logical :: shmem = .false., read_files = .false., have_args = .false. type (option) :: long_options (0) common/jt9com/ss(184,NSMAX),savg(NSMAX),id2(NMAX),nutc,ndiskdat,ntr, & mousefqso,newdat,nfa,nfsplit,nfb,ntol,kin,nzhsym,nsynced,ndecoded common/tracer/limtrace,lu - common/patience/npatience - data npatience/1/ + common/patience/npatience,nthreads + data npatience/1/,nthreads/1/ do - call getopt('s:e:a:r:p:d:f:w:t:',long_options,c,optarg,arglen,stat,offset,remain) + call getopt('s:e:a:r:m:p:d:f:w:t:',long_options,c,optarg,arglen,stat, & + offset,remain) if (stat .ne. 0) then exit end if @@ -42,6 +46,9 @@ program jt9 case ('t') temp_dir = optarg(:arglen) + case ('m') + read (optarg(:arglen), *) nthreads + case ('p') read_files = .true. read (optarg(:arglen), *) ntrperiod @@ -56,39 +63,26 @@ program jt9 case ('w') read (optarg(:arglen), *) npatience + end select end do if (.not. have_args .or. (stat .lt. 0 .or. (shmem .and. remain .gt. 0) & .or. (read_files .and. remain .eq. 0) .or. & (shmem .and. read_files))) then - print*,'Usage: jt9 -p TRperiod [-d ndepth] [-f rxfreq] {-w patience] -e exe_dir file1 [file2 ...]' + print*,'Usage: jt9 -p TRperiod [-d ndepth] [-f rxfreq] {-w patience] [-e exe_dir] [-m nthreads] file1 [file2 ...]' print*,' Reads data from *.wav files.' print*,'' - print*,' jt9 -s [-w patience] -e exe_dir -a data_dir -t temp_dir' + print*,' jt9 -s [-w patience] [-m nthreads] -e exe_dir -a data_dir -t temp_dir' print*,' Gets data from shared memory region with key==' go to 999 endif -! Import FFTW wisdom, if available: - open(14,file=trim(data_dir)//'/jt9_wisdom_status.txt',status='unknown',err=30) - open(28,file=trim(data_dir)//'/jt9_wisdom.dat',status='old',err=30) - read(28,1000,err=30,end=30) firstline -1000 format(a40) - rewind 28 - isuccess=0 - wisfile=trim(data_dir)//'/jt9_wisdom.dat' - n=len_trim(wisfile) - call import_wisdom(wisfile(1:n)//char(0),isuccess) - close(28) -30 if(isuccess.ne.0) then - write(14,1010) firstline -1010 format('Imported FFTW wisdom (jt9): ',a40) - else - write(14,1011) -1011 format('No imported FFTW wisdom (jt9):') - endif - call flush(14) + iret=fftwf_init_threads() !Initialize FFTW threading + call fftwf_plan_with_nthreads(nthreads) +! Import FFTW wisdom, if available + wisfile=trim(data_dir)//'/jt9_wisdom.dat'// C_NULL_CHAR + iret=fftwf_import_wisdom_from_filename(wisfile) if (shmem) then call jt9a() @@ -169,13 +163,7 @@ program jt9 print*,infile 999 continue -! Export FFTW wisdom - wisfile=trim(data_dir)//'/jt9_wisdom.dat' - n=len_trim(wisfile) - call export_wisdom(wisfile(1:n)//char(0)) - write(14,1999) -1999 format('Exported FFTW wisdom (jt9): ') - call flush(14) + iret=fftwf_export_wisdom_to_filename(wisfile) call four2a(a,-1,1,1,1) !Save wisdom and free memory call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans diff --git a/lib/jt9c.f90 b/lib/jt9c.f90 index e77045e1e..9f06274a3 100644 --- a/lib/jt9c.f90 +++ b/lib/jt9c.f90 @@ -8,7 +8,7 @@ subroutine jt9c(ss,savg,id2,nparams0) character*20 datetime common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, & ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime - common/patience/npatience + common/patience/npatience,nthreads equivalence (nparams,nutc) nutc=id2(1)+int(savg(1)) !Silence compiler warning diff --git a/lib/usleep.c b/lib/usleep.c index 21d242a68..6fbc98c67 100644 --- a/lib/usleep.c +++ b/lib/usleep.c @@ -1,4 +1,5 @@ #include +#include "sleep.h" /* usleep(3) */ void usleep_(unsigned long *microsec) diff --git a/mainwindow.cpp b/mainwindow.cpp index 593de02cf..26658bd1f 100644 --- a/mainwindow.cpp +++ b/mainwindow.cpp @@ -375,7 +375,8 @@ MainWindow::MainWindow(bool multiple, QSettings * settings, QSharedMemory *shdme QStringList jt9_args { "-s", QApplication::applicationName () - , "-w", "1" + , "-w", "1" //FFTW patience + , "-m", "1" //FFTW threads , "-e", QDir::toNativeSeparators (m_appDir) , "-a", QDir::toNativeSeparators (m_dataDir.absolutePath ()) , "-t", QDir::toNativeSeparators (m_config.temp_dir ().absolutePath ())