Made a start at implementing an option to use multi-threaded FFTs.

New command-line option for jt9: [-m nthreads].  Default is nthreads=1.
Also refactored a loop in filbig.f90 that was taking far too much
time.


git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4916 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
Joe Taylor 2015-01-30 21:28:10 +00:00
parent 72305cf10f
commit 3a8232b113
8 changed files with 59 additions and 64 deletions

View File

@ -255,6 +255,7 @@ set (wsjt_FSRCS
lib/fano232.f90
lib/fchisq.f90
lib/fchisq65.f90
lib/fftw3mod.f90
lib/fil3.f90
lib/fil4.f90
lib/fil6521.f90

View File

@ -5,16 +5,16 @@
# Set paths
EXE_DIR = ../../wsjtx_install
INCPATH = -I'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/include/QtCore' \
-I'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/include/'
INCPATH = -I'C:/JTSDK/Qt5/5.2.1/mingw48_32/include/QtCore' \
-I'C:/JTSDK/Qt5/5.2.1/mingw48_32/include/'
# Compilers
CC = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/gcc
FC = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/gfortran
CXX = c:/JTSDK-QT/Qt5/Tools/mingw48_32/bin/g++
CC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gcc
FC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gfortran
CXX = c:/JTSDK/Qt5/Tools/mingw48_32/bin/g++
FFLAGS = -O2 -fbounds-check -Wall -Wno-precision-loss -fno-second-underscore
CFLAGS = -I. -fbounds-check -mno-stack-arg-probe
FFLAGS = -O2 -fbounds-check -Wall -fno-second-underscore -DWIN32
CFLAGS = -I. -fbounds-check -mno-stack-arg-probe -DWIN32
# Default rules
%.o: %.c
@ -34,7 +34,7 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
unpackmsg.o igray.o unpackcall.o unpackgrid.o \
grid2k.o unpacktext.o getpfx2.o packmsg.o deg2grid.o \
packtext.o getpfx1.o packcall.o k2grid.o packgrid.o \
nchar.o four2a.o grid2deg.o pfxdump.o f77_wisdom.o \
nchar.o four2a.o grid2deg.o pfxdump.o wisdom.o \
symspec.o analytic.o db.o genjt9.o flat1.o smo.o \
packbits.o unpackbits.o encode232.o interleave9.o \
entail.o fano232.o gran.o sync9.o decode9.o \
@ -48,17 +48,17 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
move.o indexx.o graycode65.o twkfreq65.o smo121.o \
wrapkarn.o init_rs.o encode_rs.o decode_rs.o gen65.o fil4.o \
flat3.o polfit.o determ.o baddata.o prog_args.f90 \
options.o prog_args.o
options.o prog_args.o
libjt9.a: $(OBJS1)
ar cr libjt9.a $(OBJS1)
ranlib libjt9.a
OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o
LIBS2 = -L'C:/JTSDK-QT/Qt5/5.2.1/mingw48_32/lib' -lQt5Core
LIBS2 = -L'C:/JTSDK/Qt5/5.2.1/mingw48_32/lib' -lQt5Core
jt9.exe: $(OBJS2) libjt9.a
$(CXX) -o jt9.exe -static $(OBJS2) $(LIBS2) libjt9.a \
../libfftw3f_win.a -lgfortran
C:\JTSDK\fftw3f\libfftw3f-3.dll -lgfortran
# mkdir -p $(EXE_DIR)
cp jt9.exe $(EXE_DIR)

View File

@ -3,6 +3,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
! Filter and downsample the real data in array dd(npts), sampled at 12000 Hz.
! Output is complex, sampled at 1378.125 Hz.
use, intrinsic :: iso_c_binding
use FFTW3
parameter (NSZ=3413)
parameter (NFFT1=672000,NFFT2=77175)
parameter (NZ2=1000)
@ -15,15 +18,17 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
real halfpulse(8) !Impulse response of filter (one sided)
complex cfilt(NFFT2) !Filter (complex; imag = 0)
real rfilt(NFFT2) !Filter (real)
integer*8 plan1,plan2,plan3
! integer*8 plan1,plan2,plan3
type(C_PTR) :: plan1,plan2,plan3 !Pointers to FFTW plans
logical first
include 'fftw3.f90'
! include 'fftw3.f90'
equivalence (rfilt,cfilt),(rca,ca)
data first/.true./
data halfpulse/114.97547150,36.57879257,-20.93789101, &
5.89886379,1.59355187,-2.49138308,0.60910773,-0.04248129/
common/refspec/dfref,ref(NSZ)
common/patience/npatience
common/patience/npatience,nthreads
save
if(npts.lt.0) go to 900 !Clean up at end of program
@ -35,11 +40,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
if(npatience.eq.3) nflags=FFTW_PATIENT
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
! Plan the FFTs just once
call timer('FFTplans ',0)
call sfftw_plan_dft_r2c_1d(plan1,nfft1,rca,rca,nflags)
call sfftw_plan_dft_1d(plan2,nfft2,c4a,c4a,FFTW_FORWARD,nflags)
call sfftw_plan_dft_1d(plan3,nfft2,cfilt,cfilt,FFTW_BACKWARD,nflags)
call timer('FFTplans ',1)
plan1=fftwf_plan_dft_r2c_1d(nfft1,rca,ca,nflags)
plan2=fftwf_plan_dft_1d(nfft2,c4a,c4a,-1,nflags)
plan3=fftwf_plan_dft_1d(nfft2,cfilt,cfilt,+1,nflags)
! Convert impulse response to filter function
do i=1,nfft2
@ -51,11 +54,9 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
cfilt(i)=fac*halfpulse(i)
cfilt(nfft2+2-i)=fac*halfpulse(i)
enddo
call timer('FFTfilt ',0)
call sfftw_execute(plan3)
call timer('FFTfilt ',1)
base=cfilt(nfft2/2+1)
base=real(cfilt(nfft2/2+1))
do i=1,nfft2
rfilt(i)=real(cfilt(i))-base
enddo
@ -68,25 +69,27 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
! If we just have a new f0, continue with the existing data in ca.
if(newdat.ne.0) then
call timer('FFTbig ',0)
nz=min(npts,nfft1)
rca(1:nz)=dd(1:nz)
rca(nz+1:)=0.
call timer('FFTbig ',0)
call sfftw_execute(plan1)
call timer('FFTbig ',1)
do i=1,NFFT1/2 !Flatten the spectrum
j=nint(i*df/dfref)
if(j.lt.1) j=1
if(j.gt.NSZ) j=NSZ
call timer('flatten ',0)
ib=0
do j=1,NSZ
ia=ib+1
ib=nint(j*dfref/df)
fac=sqrt(min(30.0,1.0/ref(j)))
ca(i)=conjg(fac * ca(i))
ca(ia:ib)=fac*conjg(ca(ia:ib))
enddo
call timer('flatten ',1)
endif
! NB: f0 is the frequency at which we want our filter centered.
! i0 is the bin number in ca closest to f0.
call timer('loops ',0)
i0=nint(f0/df) + 1
nh=nfft2/2
do i=1,nh !Copy data into c4a and apply
@ -117,6 +120,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
enddo
enddo
call pctile(s,NZ2,30,sq0)
call timer('loops ',1)
! Do the short reverse transform, to go back to time domain.
call timer('FFTsmall',0)

View File

@ -27,7 +27,7 @@ subroutine four2a(a,nfft,ndim,isign,iform)
integer*8 nl(NPMAX),nloc !More params of plans
integer*8 plan(NPMAX) !Pointers to stored plans
data nplan/0/ !Number of stored plans
common/patience/npatience !Patience for forming FFTW plans
common/patience/npatience,nthreads !Patience and threads for FFTW plans
include 'fftw3.f90' !FFTW definitions
save plan,nplan,nn,ns,nf,nl

View File

@ -5,25 +5,29 @@ program jt9
use options
use prog_args
use, intrinsic :: iso_c_binding
use FFTW3
include 'constants.f90'
integer(C_INT) iret
integer*4 ihdr(11)
real*4 s(NSMAX)
integer*2 id2
character c
character(len=500) optarg, infile
character wisfile*80,firstline*40
character wisfile*80
integer*4 arglen,stat,offset,remain
logical :: shmem = .false., read_files = .false., have_args = .false.
type (option) :: long_options (0)
common/jt9com/ss(184,NSMAX),savg(NSMAX),id2(NMAX),nutc,ndiskdat,ntr, &
mousefqso,newdat,nfa,nfsplit,nfb,ntol,kin,nzhsym,nsynced,ndecoded
common/tracer/limtrace,lu
common/patience/npatience
data npatience/1/
common/patience/npatience,nthreads
data npatience/1/,nthreads/1/
do
call getopt('s:e:a:r:p:d:f:w:t:',long_options,c,optarg,arglen,stat,offset,remain)
call getopt('s:e:a:r:m:p:d:f:w:t:',long_options,c,optarg,arglen,stat, &
offset,remain)
if (stat .ne. 0) then
exit
end if
@ -42,6 +46,9 @@ program jt9
case ('t')
temp_dir = optarg(:arglen)
case ('m')
read (optarg(:arglen), *) nthreads
case ('p')
read_files = .true.
read (optarg(:arglen), *) ntrperiod
@ -56,39 +63,26 @@ program jt9
case ('w')
read (optarg(:arglen), *) npatience
end select
end do
if (.not. have_args .or. (stat .lt. 0 .or. (shmem .and. remain .gt. 0) &
.or. (read_files .and. remain .eq. 0) .or. &
(shmem .and. read_files))) then
print*,'Usage: jt9 -p TRperiod [-d ndepth] [-f rxfreq] {-w patience] -e exe_dir file1 [file2 ...]'
print*,'Usage: jt9 -p TRperiod [-d ndepth] [-f rxfreq] {-w patience] [-e exe_dir] [-m nthreads] file1 [file2 ...]'
print*,' Reads data from *.wav files.'
print*,''
print*,' jt9 -s <key> [-w patience] -e exe_dir -a data_dir -t temp_dir'
print*,' jt9 -s <key> [-w patience] [-m nthreads] -e exe_dir -a data_dir -t temp_dir'
print*,' Gets data from shared memory region with key==<key>'
go to 999
endif
! Import FFTW wisdom, if available:
open(14,file=trim(data_dir)//'/jt9_wisdom_status.txt',status='unknown',err=30)
open(28,file=trim(data_dir)//'/jt9_wisdom.dat',status='old',err=30)
read(28,1000,err=30,end=30) firstline
1000 format(a40)
rewind 28
isuccess=0
wisfile=trim(data_dir)//'/jt9_wisdom.dat'
n=len_trim(wisfile)
call import_wisdom(wisfile(1:n)//char(0),isuccess)
close(28)
30 if(isuccess.ne.0) then
write(14,1010) firstline
1010 format('Imported FFTW wisdom (jt9): ',a40)
else
write(14,1011)
1011 format('No imported FFTW wisdom (jt9):')
endif
call flush(14)
iret=fftwf_init_threads() !Initialize FFTW threading
call fftwf_plan_with_nthreads(nthreads)
! Import FFTW wisdom, if available
wisfile=trim(data_dir)//'/jt9_wisdom.dat'// C_NULL_CHAR
iret=fftwf_import_wisdom_from_filename(wisfile)
if (shmem) then
call jt9a()
@ -169,13 +163,7 @@ program jt9
print*,infile
999 continue
! Export FFTW wisdom
wisfile=trim(data_dir)//'/jt9_wisdom.dat'
n=len_trim(wisfile)
call export_wisdom(wisfile(1:n)//char(0))
write(14,1999)
1999 format('Exported FFTW wisdom (jt9): ')
call flush(14)
iret=fftwf_export_wisdom_to_filename(wisfile)
call four2a(a,-1,1,1,1) !Save wisdom and free memory
call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans

View File

@ -8,7 +8,7 @@ subroutine jt9c(ss,savg,id2,nparams0)
character*20 datetime
common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, &
ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime
common/patience/npatience
common/patience/npatience,nthreads
equivalence (nparams,nutc)
nutc=id2(1)+int(savg(1)) !Silence compiler warning

View File

@ -1,4 +1,5 @@
#include <unistd.h>
#include "sleep.h"
/* usleep(3) */
void usleep_(unsigned long *microsec)

View File

@ -375,7 +375,8 @@ MainWindow::MainWindow(bool multiple, QSettings * settings, QSharedMemory *shdme
QStringList jt9_args {
"-s", QApplication::applicationName ()
, "-w", "1"
, "-w", "1" //FFTW patience
, "-m", "1" //FFTW threads
, "-e", QDir::toNativeSeparators (m_appDir)
, "-a", QDir::toNativeSeparators (m_dataDir.absolutePath ())
, "-t", QDir::toNativeSeparators (m_config.temp_dir ().absolutePath ())