Made a start at implementing an option to use multi-threaded FFTs.

New command-line option for jt9: [-m nthreads].  Default is nthreads=1.
Also refactored a loop in filbig.f90 that was taking far too much
time.


git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4916 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
Joe Taylor 2015-01-30 21:28:10 +00:00
parent 72305cf10f
commit 3a8232b113
8 changed files with 59 additions and 64 deletions

View File

@ -255,6 +255,7 @@ set (wsjt_FSRCS
lib/fano232.f90 lib/fano232.f90
lib/fchisq.f90 lib/fchisq.f90
lib/fchisq65.f90 lib/fchisq65.f90
lib/fftw3mod.f90
lib/fil3.f90 lib/fil3.f90
lib/fil4.f90 lib/fil4.f90
lib/fil6521.f90 lib/fil6521.f90

View File

@ -5,16 +5,16 @@
# Set paths # Set paths
EXE_DIR = ../../wsjtx_install EXE_DIR = ../../wsjtx_install
INCPATH = -I'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/include/QtCore' \ INCPATH = -I'C:/JTSDK/Qt5/5.2.1/mingw48_32/include/QtCore' \
-I'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/include/' -I'C:/JTSDK/Qt5/5.2.1/mingw48_32/include/'
# Compilers # Compilers
CC = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/gcc CC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gcc
FC = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/gfortran FC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gfortran
CXX = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/g++ CXX = c:/JTSDK/Qt5/Tools/mingw48_32/bin/g++
FFLAGS = -O2 -fbounds-check -Wall -Wno-precision-loss -fno-second-underscore FFLAGS = -O2 -fbounds-check -Wall -fno-second-underscore -DWIN32
CFLAGS = -I. -fbounds-check -mno-stack-arg-probe CFLAGS = -I. -fbounds-check -mno-stack-arg-probe -DWIN32
# Default rules # Default rules
%.o: %.c %.o: %.c
@ -34,7 +34,7 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
unpackmsg.o igray.o unpackcall.o unpackgrid.o \ unpackmsg.o igray.o unpackcall.o unpackgrid.o \
grid2k.o unpacktext.o getpfx2.o packmsg.o deg2grid.o \ grid2k.o unpacktext.o getpfx2.o packmsg.o deg2grid.o \
packtext.o getpfx1.o packcall.o k2grid.o packgrid.o \ packtext.o getpfx1.o packcall.o k2grid.o packgrid.o \
nchar.o four2a.o grid2deg.o pfxdump.o f77_wisdom.o \ nchar.o four2a.o grid2deg.o pfxdump.o wisdom.o \
symspec.o analytic.o db.o genjt9.o flat1.o smo.o \ symspec.o analytic.o db.o genjt9.o flat1.o smo.o \
packbits.o unpackbits.o encode232.o interleave9.o \ packbits.o unpackbits.o encode232.o interleave9.o \
entail.o fano232.o gran.o sync9.o decode9.o \ entail.o fano232.o gran.o sync9.o decode9.o \
@ -48,17 +48,17 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
move.o indexx.o graycode65.o twkfreq65.o smo121.o \ move.o indexx.o graycode65.o twkfreq65.o smo121.o \
wrapkarn.o init_rs.o encode_rs.o decode_rs.o gen65.o fil4.o \ wrapkarn.o init_rs.o encode_rs.o decode_rs.o gen65.o fil4.o \
flat3.o polfit.o determ.o baddata.o prog_args.f90 \ flat3.o polfit.o determ.o baddata.o prog_args.f90 \
options.o prog_args.o options.o prog_args.o
libjt9.a: $(OBJS1) libjt9.a: $(OBJS1)
ar cr libjt9.a $(OBJS1) ar cr libjt9.a $(OBJS1)
ranlib libjt9.a ranlib libjt9.a
OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o
LIBS2 = -L'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/lib' -lQt5Core LIBS2 = -L'C:/JTSDK/Qt5/5.2.1/mingw48_32/lib' -lQt5Core
jt9.exe: $(OBJS2) libjt9.a jt9.exe: $(OBJS2) libjt9.a
$(CXX) -o jt9.exe -static $(OBJS2) $(LIBS2) libjt9.a \ $(CXX) -o jt9.exe -static $(OBJS2) $(LIBS2) libjt9.a \
../libfftw3f_win.a -lgfortran C:\JTSDK\fftw3f\libfftw3f-3.dll -lgfortran
# mkdir -p $(EXE_DIR) # mkdir -p $(EXE_DIR)
cp jt9.exe $(EXE_DIR) cp jt9.exe $(EXE_DIR)

View File

@ -3,6 +3,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
! Filter and downsample the real data in array dd(npts), sampled at 12000 Hz. ! Filter and downsample the real data in array dd(npts), sampled at 12000 Hz.
! Output is complex, sampled at 1378.125 Hz. ! Output is complex, sampled at 1378.125 Hz.
use, intrinsic :: iso_c_binding
use FFTW3
parameter (NSZ=3413) parameter (NSZ=3413)
parameter (NFFT1=672000,NFFT2=77175) parameter (NFFT1=672000,NFFT2=77175)
parameter (NZ2=1000) parameter (NZ2=1000)
@ -15,15 +18,17 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
real halfpulse(8) !Impulse response of filter (one sided) real halfpulse(8) !Impulse response of filter (one sided)
complex cfilt(NFFT2) !Filter (complex; imag = 0) complex cfilt(NFFT2) !Filter (complex; imag = 0)
real rfilt(NFFT2) !Filter (real) real rfilt(NFFT2) !Filter (real)
integer*8 plan1,plan2,plan3 ! integer*8 plan1,plan2,plan3
type(C_PTR) :: plan1,plan2,plan3 !Pointers to FFTW plans
logical first logical first
include 'fftw3.f90' ! include 'fftw3.f90'
equivalence (rfilt,cfilt),(rca,ca) equivalence (rfilt,cfilt),(rca,ca)
data first/.true./ data first/.true./
data halfpulse/114.97547150,36.57879257,-20.93789101, & data halfpulse/114.97547150,36.57879257,-20.93789101, &
5.89886379,1.59355187,-2.49138308,0.60910773,-0.04248129/ 5.89886379,1.59355187,-2.49138308,0.60910773,-0.04248129/
common/refspec/dfref,ref(NSZ) common/refspec/dfref,ref(NSZ)
common/patience/npatience common/patience/npatience,nthreads
save save
if(npts.lt.0) go to 900 !Clean up at end of program if(npts.lt.0) go to 900 !Clean up at end of program
@ -35,11 +40,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
if(npatience.eq.3) nflags=FFTW_PATIENT if(npatience.eq.3) nflags=FFTW_PATIENT
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
! Plan the FFTs just once ! Plan the FFTs just once
call timer('FFTplans ',0) plan1=fftwf_plan_dft_r2c_1d(nfft1,rca,ca,nflags)
call sfftw_plan_dft_r2c_1d(plan1,nfft1,rca,rca,nflags) plan2=fftwf_plan_dft_1d(nfft2,c4a,c4a,-1,nflags)
call sfftw_plan_dft_1d(plan2,nfft2,c4a,c4a,FFTW_FORWARD,nflags) plan3=fftwf_plan_dft_1d(nfft2,cfilt,cfilt,+1,nflags)
call sfftw_plan_dft_1d(plan3,nfft2,cfilt,cfilt,FFTW_BACKWARD,nflags)
call timer('FFTplans ',1)
! Convert impulse response to filter function ! Convert impulse response to filter function
do i=1,nfft2 do i=1,nfft2
@ -51,11 +54,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
cfilt(i)=fac*halfpulse(i) cfilt(i)=fac*halfpulse(i)
cfilt(nfft2+2-i)=fac*halfpulse(i) cfilt(nfft2+2-i)=fac*halfpulse(i)
enddo enddo
call timer('FFTfilt ',0)
call sfftw_execute(plan3) call sfftw_execute(plan3)
call timer('FFTfilt ',1)
base=cfilt(nfft2/2+1) base=real(cfilt(nfft2/2+1))
do i=1,nfft2 do i=1,nfft2
rfilt(i)=real(cfilt(i))-base rfilt(i)=real(cfilt(i))-base
enddo enddo
@ -68,25 +69,27 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
! If we just have a new f0, continue with the existing data in ca. ! If we just have a new f0, continue with the existing data in ca.
if(newdat.ne.0) then if(newdat.ne.0) then
call timer('FFTbig ',0)
nz=min(npts,nfft1) nz=min(npts,nfft1)
rca(1:nz)=dd(1:nz) rca(1:nz)=dd(1:nz)
rca(nz+1:)=0. rca(nz+1:)=0.
call timer('FFTbig ',0)
call sfftw_execute(plan1) call sfftw_execute(plan1)
call timer('FFTbig ',1) call timer('FFTbig ',1)
do i=1,NFFT1/2 !Flatten the spectrum call timer('flatten ',0)
j=nint(i*df/dfref) ib=0
if(j.lt.1) j=1 do j=1,NSZ
if(j.gt.NSZ) j=NSZ ia=ib+1
ib=nint(j*dfref/df)
fac=sqrt(min(30.0,1.0/ref(j))) fac=sqrt(min(30.0,1.0/ref(j)))
ca(i)=conjg(fac * ca(i)) ca(ia:ib)=fac*conjg(ca(ia:ib))
enddo enddo
call timer('flatten ',1)
endif endif
! NB: f0 is the frequency at which we want our filter centered. ! NB: f0 is the frequency at which we want our filter centered.
! i0 is the bin number in ca closest to f0. ! i0 is the bin number in ca closest to f0.
call timer('loops ',0)
i0=nint(f0/df) + 1 i0=nint(f0/df) + 1
nh=nfft2/2 nh=nfft2/2
do i=1,nh !Copy data into c4a and apply do i=1,nh !Copy data into c4a and apply
@ -117,6 +120,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
enddo enddo
enddo enddo
call pctile(s,NZ2,30,sq0) call pctile(s,NZ2,30,sq0)
call timer('loops ',1)
! Do the short reverse transform, to go back to time domain. ! Do the short reverse transform, to go back to time domain.
call timer('FFTsmall',0) call timer('FFTsmall',0)

View File

@ -27,7 +27,7 @@ subroutine four2a(a,nfft,ndim,isign,iform)
integer*8 nl(NPMAX),nloc !More params of plans integer*8 nl(NPMAX),nloc !More params of plans
integer*8 plan(NPMAX) !Pointers to stored plans integer*8 plan(NPMAX) !Pointers to stored plans
data nplan/0/ !Number of stored plans data nplan/0/ !Number of stored plans
common/patience/npatience !Patience for forming FFTW plans common/patience/npatience,nthreads !Patience and threads for FFTW plans
include 'fftw3.f90' !FFTW definitions include 'fftw3.f90' !FFTW definitions
save plan,nplan,nn,ns,nf,nl save plan,nplan,nn,ns,nf,nl

View File

@ -5,25 +5,29 @@ program jt9
use options use options
use prog_args use prog_args
use, intrinsic :: iso_c_binding
use FFTW3
include 'constants.f90' include 'constants.f90'
integer(C_INT) iret
integer*4 ihdr(11) integer*4 ihdr(11)
real*4 s(NSMAX) real*4 s(NSMAX)
integer*2 id2 integer*2 id2
character c character c
character(len=500) optarg, infile character(len=500) optarg, infile
character wisfile*80,firstline*40 character wisfile*80
integer*4 arglen,stat,offset,remain integer*4 arglen,stat,offset,remain
logical :: shmem = .false., read_files = .false., have_args = .false. logical :: shmem = .false., read_files = .false., have_args = .false.
type (option) :: long_options (0) type (option) :: long_options (0)
common/jt9com/ss(184,NSMAX),savg(NSMAX),id2(NMAX),nutc,ndiskdat,ntr, & common/jt9com/ss(184,NSMAX),savg(NSMAX),id2(NMAX),nutc,ndiskdat,ntr, &
mousefqso,newdat,nfa,nfsplit,nfb,ntol,kin,nzhsym,nsynced,ndecoded mousefqso,newdat,nfa,nfsplit,nfb,ntol,kin,nzhsym,nsynced,ndecoded
common/tracer/limtrace,lu common/tracer/limtrace,lu
common/patience/npatience common/patience/npatience,nthreads
data npatience/1/ data npatience/1/,nthreads/1/
do do
call getopt('s:e:a:r:p:d:f:w:t:',long_options,c,optarg,arglen,stat,offset,remain) call getopt('s:e:a:r:m:p:d:f:w:t:',long_options,c,optarg,arglen,stat, &
offset,remain)
if (stat .ne. 0) then if (stat .ne. 0) then
exit exit
end if end if
@ -42,6 +46,9 @@ program jt9
case ('t') case ('t')
temp_dir = optarg(:arglen) temp_dir = optarg(:arglen)
case ('m')
read (optarg(:arglen), *) nthreads
case ('p') case ('p')
read_files = .true. read_files = .true.
read (optarg(:arglen), *) ntrperiod read (optarg(:arglen), *) ntrperiod
@ -56,39 +63,26 @@ program jt9
case ('w') case ('w')
read (optarg(:arglen), *) npatience read (optarg(:arglen), *) npatience
end select end select
end do end do
if (.not. have_args .or. (stat .lt. 0 .or. (shmem .and. remain .gt. 0) & if (.not. have_args .or. (stat .lt. 0 .or. (shmem .and. remain .gt. 0) &
.or. (read_files .and. remain .eq. 0) .or. & .or. (read_files .and. remain .eq. 0) .or. &
(shmem .and. read_files))) then (shmem .and. read_files))) then
print*,'Usage: jt9 -p TRperiod [-d ndepth] [-f rxfreq] {-w patience] -e exe_dir file1 [file2 ...]' print*,'Usage: jt9 -p TRperiod [-d ndepth] [-f rxfreq] {-w patience] [-e exe_dir] [-m nthreads] file1 [file2 ...]'
print*,' Reads data from *.wav files.' print*,' Reads data from *.wav files.'
print*,'' print*,''
print*,' jt9 -s <key> [-w patience] -e exe_dir -a data_dir -t temp_dir' print*,' jt9 -s <key> [-w patience] [-m nthreads] -e exe_dir -a data_dir -t temp_dir'
print*,' Gets data from shared memory region with key==<key>' print*,' Gets data from shared memory region with key==<key>'
go to 999 go to 999
endif endif
! Import FFTW wisdom, if available: iret=fftwf_init_threads() !Initialize FFTW threading
open(14,file=trim(data_dir)//'/jt9_wisdom_status.txt',status='unknown',err=30) call fftwf_plan_with_nthreads(nthreads)
open(28,file=trim(data_dir)//'/jt9_wisdom.dat',status='old',err=30) ! Import FFTW wisdom, if available
read(28,1000,err=30,end=30) firstline wisfile=trim(data_dir)//'/jt9_wisdom.dat'// C_NULL_CHAR
1000 format(a40) iret=fftwf_import_wisdom_from_filename(wisfile)
rewind 28
isuccess=0
wisfile=trim(data_dir)//'/jt9_wisdom.dat'
n=len_trim(wisfile)
call import_wisdom(wisfile(1:n)//char(0),isuccess)
close(28)
30 if(isuccess.ne.0) then
write(14,1010) firstline
1010 format('Imported FFTW wisdom (jt9): ',a40)
else
write(14,1011)
1011 format('No imported FFTW wisdom (jt9):')
endif
call flush(14)
if (shmem) then if (shmem) then
call jt9a() call jt9a()
@ -169,13 +163,7 @@ program jt9
print*,infile print*,infile
999 continue 999 continue
! Export FFTW wisdom iret=fftwf_export_wisdom_to_filename(wisfile)
wisfile=trim(data_dir)//'/jt9_wisdom.dat'
n=len_trim(wisfile)
call export_wisdom(wisfile(1:n)//char(0))
write(14,1999)
1999 format('Exported FFTW wisdom (jt9): ')
call flush(14)
call four2a(a,-1,1,1,1) !Save wisdom and free memory call four2a(a,-1,1,1,1) !Save wisdom and free memory
call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans

View File

@ -8,7 +8,7 @@ subroutine jt9c(ss,savg,id2,nparams0)
character*20 datetime character*20 datetime
common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, & common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, &
ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime
common/patience/npatience common/patience/npatience,nthreads
equivalence (nparams,nutc) equivalence (nparams,nutc)
nutc=id2(1)+int(savg(1)) !Silence compiler warning nutc=id2(1)+int(savg(1)) !Silence compiler warning

View File

@ -1,4 +1,5 @@
#include <unistd.h> #include <unistd.h>
#include "sleep.h"
/* usleep(3) */ /* usleep(3) */
void usleep_(unsigned long *microsec) void usleep_(unsigned long *microsec)

View File

@ -375,7 +375,8 @@ MainWindow::MainWindow(bool multiple, QSettings * settings, QSharedMemory *shdme
QStringList jt9_args { QStringList jt9_args {
"-s", QApplication::applicationName () "-s", QApplication::applicationName ()
, "-w", "1" , "-w", "1" //FFTW patience
, "-m", "1" //FFTW threads
, "-e", QDir::toNativeSeparators (m_appDir) , "-e", QDir::toNativeSeparators (m_appDir)
, "-a", QDir::toNativeSeparators (m_dataDir.absolutePath ()) , "-a", QDir::toNativeSeparators (m_dataDir.absolutePath ())
, "-t", QDir::toNativeSeparators (m_config.temp_dir ().absolutePath ()) , "-t", QDir::toNativeSeparators (m_config.temp_dir ().absolutePath ())