Many changes in program jt9[.exe] aimed at speeding up the decoders.

The long FFTs can now use the multi-threaded FFTW routines.
Subroutine decode9.f90 was renamed jt9fano.f90.
The JT9 decoder's top-level functions were removed from decoder.f90
and put into a separate subroutine decjt90.f90.
Subroutine decoder.f90 is now configured for possible use of OpenMP 
SECTIONS, with the JT9 and JT65 decoders running concurrently on
a multi-core machine.  Note, however, that this concurrent processing 
is not yet fully implemented.  Probably calls to timer need to be removed; 
some variables used in calls to jt65a and decjt9 may need to be 
declared PRIVATE in decoder; some sections probably need to be declared 
CRITICAL; probably some SAVE statements in downstream routines have
made them not thread-safe; etc., etc.  

I'm a neophyte at using OpenMP.  Comments, suggestions, and/or tests by
others will be welcome!



git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4919 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
Joe Taylor 2015-02-01 16:23:36 +00:00
parent 8c377b9c87
commit 94472ac31c
9 changed files with 75 additions and 166 deletions

View File

@ -241,8 +241,9 @@ set (wsjt_FSRCS
lib/dcoord.f90
lib/decode65a.f90
lib/decode65b.f90
lib/decode9.f90
lib/jt9fano.f90
lib/decoder.f90
lib/decjt9.f90
lib/deg2grid.f90
lib/demod64a.f90
lib/determ.f90

View File

@ -13,7 +13,7 @@ CC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gcc
FC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gfortran
CXX = c:/JTSDK/Qt5/Tools/mingw48_32/bin/g++
FFLAGS = -O2 -fbounds-check -Wall -fno-second-underscore -DWIN32
FFLAGS = -O2 -Wall -fno-second-underscore -DWIN32
CFLAGS = -I. -fbounds-check -mno-stack-arg-probe -DWIN32
# Default rules
@ -37,7 +37,7 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
nchar.o four2a.o grid2deg.o pfxdump.o wisdom.o \
symspec.o analytic.o db.o genjt9.o flat1.o smo.o \
packbits.o unpackbits.o encode232.o interleave9.o \
entail.o fano232.o gran.o sync9.o decode9.o \
entail.o fano232.o gran.o sync9.o jt9fano.o \
fil3.o decoder.o grid2n.o n2grid.o timer.o \
softsym.o getlags.o afc9.o fchisq.o twkfreq.o downsam9.o \
peakdt9.o symspec2.o stdmsg.o morse.o azdist.o geodist.o \
@ -47,14 +47,14 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
extract.o fchisq65.o demod64a.o chkhist.o interleave63.o ccf2.o \
move.o indexx.o graycode65.o twkfreq65.o smo121.o \
wrapkarn.o init_rs.o encode_rs.o decode_rs.o gen65.o fil4.o \
flat3.o polfit.o determ.o baddata.o prog_args.f90 \
options.o prog_args.o
flat3.o polfit.o determ.o baddata.o prog_args.o \
options.o fmtmsg.o decjt9.o
libjt9.a: $(OBJS1)
ar cr libjt9.a $(OBJS1)
ranlib libjt9.a
OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o
OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o
LIBS2 = -L'C:/JTSDK/Qt5/5.2.1/mingw48_32/lib' -lQt5Core
jt9.exe: $(OBJS2) libjt9.a
$(CXX) -o jt9.exe -static $(OBJS2) $(LIBS2) libjt9.a \

View File

@ -1,21 +1,13 @@
subroutine decoder(ss,id2)
! Decoder for JT9.
use prog_args
include 'constants.f90'
real ss(184,NSMAX)
character*22 msg
character*20 datetime
real*4 ccfred(NSMAX)
real*4 red2(NSMAX)
logical ccfok(NSMAX)
logical done(NSMAX)
logical done65,baddata
integer*2 id2(NTMAX*12000)
real*4 dd(NTMAX*12000)
integer*1 i1SoftSymbols(207)
common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, &
ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime
common/tracer/limtrace,lu
@ -29,10 +21,11 @@ subroutine decoder(ss,id2)
if (nagain .eq. 0) then
open(13,file=trim(temp_dir)//'/decoded.txt',status='unknown')
else
open(13,file=trim(temp_dir)//'/decoded.txt',status='unknown',position='append')
open(13,file=trim(temp_dir)//'/decoded.txt',status='unknown', &
position='append')
end if
open(22,file=trim(temp_dir)//'/kvasd.dat',access='direct',recl=1024,status='unknown')
open(22,file=trim(temp_dir)//'/kvasd.dat',access='direct',recl=1024, &
status='unknown')
npts65=52*12000
if(baddata(id2,npts65)) then
@ -48,138 +41,34 @@ subroutine decoder(ss,id2)
if(newdat.ne.0) dd(1:npts65)=id2(1:npts65)
nf1=nfa
nf2=nfb
! if(nmode.eq.65+9) nf2=nfsplit
call jt65a(dd,npts65,newdat,nutc,nf1,nf2,nfqso,ntol65,nagain,ndecoded)
done65=.true.
endif
if(nmode.eq.65) go to 800
nsynced=0
ndecoded=0
nsps=0
! print*,'A'
!!$OMP PARALLEL PRIVATE(id)
!!$OMP SECTIONS
nsps=6912 !Params for JT9-1
df3=1500.0/2048.0
tstep=0.5*nsps/12000.0 !Half-symbol step (seconds)
done=.false.
nf0=0
nf1=nfa
if(nmode.eq.65+9) nf1=nfsplit
ia=max(1,nint((nf1-nf0)/df3))
ib=min(NSMAX,nint((nfb-nf0)/df3))
lag1=-(2.5/tstep + 0.9999)
lag2=5.0/tstep + 0.9999
if(newdat.ne.0) then
call timer('sync9 ',0)
call sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipk)
call timer('sync9 ',1)
endif
nsps8=nsps/8
df8=1500.0/nsps8
dblim=db(864.0/nsps8) - 26.2
do nqd=1,0,-1
limit=5000
ccflim=3.0
red2lim=1.6
schklim=2.2
if(ndepth.eq.2) then
limit=10000
ccflim=2.7
endif
if(ndepth.ge.3 .or. nqd.eq.1) then
limit=100000
ccflim=2.5
endif
ccfok=.false.
if(nqd.eq.1) then
nfa1=nfqso-ntol
nfb1=nfqso+ntol
ia=max(1,nint((nfa1-nf0)/df3))
ib=min(NSMAX,nint((nfb1-nf0)/df3))
ccfok(ia:ib)=(ccfred(ia:ib).gt.(ccflim-2.0)) .and. &
(red2(ia:ib).gt.(red2lim-1.0))
ia1=ia
ib1=ib
else
nfa1=nf1
nfb1=nfb
ia=max(1,nint((nfa1-nf0)/df3))
ib=min(NSMAX,nint((nfb1-nf0)/df3))
do i=ia,ib
ccfok(i)=ccfred(i).gt.ccflim .and. red2(i).gt.red2lim
enddo
ccfok(ia1:ib1)=.false.
endif
fgood=0.
do i=ia,ib
if(done(i) .or. (.not.ccfok(i))) cycle
f=(i-1)*df3
if(nqd.eq.1 .or. &
(ccfred(i).ge.ccflim .and. abs(f-fgood).gt.10.0*df8)) then
if(nqd.eq.0) nfreqs0=nfreqs0+1
if(nqd.eq.1) nfreqs1=nfreqs1+1
call timer('softsym ',0)
fpk=nf0 + df3*(i-1)
call softsym(id2,npts8,nsps8,newdat,fpk,syncpk,snrdb,xdt, &
freq,drift,schk,i1SoftSymbols)
call timer('softsym ',1)
sync=(syncpk+1)/4.0
if(maxval(i1SoftSymbols).eq.0) cycle
if(nqd.eq.1 .and. ((sync.lt.0.5) .or. (schk.lt.2.0))) cycle
if(nqd.ne.1 .and. ((sync.lt.1.0) .or. (schk.lt.schklim))) cycle
call timer('decode9 ',0)
call decode9(i1SoftSymbols,limit,nlim,msg)
call timer('decode9 ',1)
if(sync.lt.0.0 .or. snrdb.lt.dblim-2.0) sync=0.0
nsync=sync
if(nsync.gt.10) nsync=10
nsnr=nint(snrdb)
ndrift=nint(drift/df3)
if(msg.ne.' ') then
if(nqd.eq.0) ndecodes0=ndecodes0+1
if(nqd.eq.1) ndecodes1=ndecodes1+1
write(*,1000) nutc,nsnr,xdt,nint(freq),msg
1000 format(i4.4,i4,f5.1,i5,1x,'@',1x,a22)
write(13,1002) nutc,nsync,nsnr,xdt,freq,ndrift,msg
1002 format(i4.4,i4,i5,f6.1,f8.0,i4,3x,a22,' JT9')
iaa=max(1,i-1)
ibb=min(NSMAX,i+22)
fgood=f
nsynced=1
ndecoded=1
ccfok(iaa:ibb)=.false.
done(iaa:ibb)=.true.
call flush(6)
endif
endif
enddo
call flush(6)
if(nagain.ne.0) exit
enddo
!!$OMP SECTION
! print*,'B'
call decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol,nzhsym, &
nagain,ndepth,nmode)
!!$OMP SECTION
if(nmode.ge.65 .and. (.not.done65)) then
if(newdat.ne.0) dd(1:npts65)=id2(1:npts65)
nf1=nfa
nf2=nfb
! print*,'C'
call jt65a(dd,npts65,newdat,nutc,nf1,nf2,nfqso,ntol65,nagain,ndecoded)
endif
!!$OMP END SECTIONS NOWAIT
!!$OMP END PARALLEL
! print*,'D'
! JT65 is not yet producing info for nsynced, ndecoded.
800 write(*,1010) nsynced,ndecoded
1010 format('<DecodeFinished>',2i4)

View File

@ -3,15 +3,22 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
!Downsample from id2() into C2() so as to yield nspsd samples per symbol,
!mixing from fpk down to zero frequency.
use, intrinsic :: iso_c_binding
use FFTW3
include 'constants.f90'
parameter (NMAX1=1024*1920)
! parameter (NMAX1=1024*1920)
parameter (NMAX1=884736)
type(C_PTR) :: plan !Pointers plan for big FFT
integer*2 id2(0:8*npts8-1)
real*4 x1(0:NMAX1-1)
complex c1(0:NMAX1/2)
complex c2(0:4096-1)
real s(5000)
equivalence (c1,x1)
save
logical first
common/patience/npatience,nthreads
data first/.true./
save plan,first
nfft1=1024*nsps8 !Forward FFT length
df1=12000.0/nfft1
@ -23,14 +30,33 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
x1(i)=fac*id2(i)
enddo
x1(npts:nfft1-1)=0. !Zero the rest of x1
call timer('fft_forw',0)
call four2a(c1,nfft1,1,-1,0) !Forward FFT, r2c
call timer('fft_forw',1)
endif
nadd=1.0/df1
if(first) then
nflags=FFTW_ESTIMATE
if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
if(npatience.eq.2) nflags=FFTW_MEASURE
if(npatience.eq.3) nflags=FFTW_PATIENT
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
! Plan the FFTs just once
plan=fftwf_plan_dft_r2c_1d(nfft1,x1,c1,nflags)
first=.false.
endif
if(newdat.eq.1) then
fac=6.963e-6 !Why this weird constant?
do i=0,npts-1
x1(i)=fac*id2(i)
enddo
x1(npts:nfft1-1)=0. !Zero the rest of x1
call timer('FFTbig9 ',0)
call fftwf_execute_dft_r2c(plan,x1,c1)
call timer('FFTbig9 ',1)
nadd=int(1.0/df1)
s=0.
do i=1,5000
j=(i-1)/df1
j=int((i-1)/df1)
do n=1,nadd
j=j+1
s(i)=s(i)+real(c1(j))**2 + aimag(c1(j))**2
@ -42,7 +68,7 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
nfft2=nfft1/ndown !Backward FFT length
nh2=nfft2/2
nf=nint(fpk)
i0=fpk/df1
i0=int(fpk/df1)
nw=100
ia=max(1,nf-nw)
@ -57,9 +83,9 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
if(i.gt.nh2) j=j-nfft2
c2(i)=fac*c1(j)
enddo
call timer('fft_back',0)
call timer('FFTsmal9',0)
call four2a(c2,nfft2,1,1,1) !FFT back to time domain
call timer('fft_back',1)
call timer('FFTsmal9',1)
nz2=8*npts8/ndown
return

View File

@ -20,7 +20,6 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
real rfilt(NFFT2) !Filter (real)
! integer*8 plan1,plan2,plan3
type(C_PTR) :: plan1,plan2,plan3 !Pointers to FFTW plans
logical first
! include 'fftw3.f90'
equivalence (rfilt,cfilt),(rca,ca)
@ -29,7 +28,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
5.89886379,1.59355187,-2.49138308,0.60910773,-0.04248129/
common/refspec/dfref,ref(NSZ)
common/patience/npatience,nthreads
save
save first,plan1,plan2,plan3
if(npts.lt.0) go to 900 !Clean up at end of program
@ -54,7 +53,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
cfilt(i)=fac*halfpulse(i)
cfilt(nfft2+2-i)=fac*halfpulse(i)
enddo
call sfftw_execute(plan3)
call fftwf_execute_dft(plan3,cfilt,cfilt)
base=real(cfilt(nfft2/2+1))
do i=1,nfft2
@ -73,7 +72,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
nz=min(npts,nfft1)
rca(1:nz)=dd(1:nz)
rca(nz+1:)=0.
call sfftw_execute(plan1)
call fftwf_execute_dft_r2c(plan1,rca,ca)
call timer('FFTbig ',1)
call timer('flatten ',0)
@ -124,14 +123,14 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
! Do the short reverse transform, to go back to time domain.
call timer('FFTsmall',0)
call sfftw_execute(plan2)
call fftwf_execute_dft(plan2,c4a,c4a)
call timer('FFTsmall',1)
n4=min(npts/8,nfft2)
return
900 call sfftw_destroy_plan(plan1)
call sfftw_destroy_plan(plan2)
call sfftw_destroy_plan(plan3)
900 call fftwf_destroy_plan(plan1)
call fftwf_destroy_plan(plan2)
call fftwf_destroy_plan(plan3)
return
end subroutine filbig

View File

@ -164,8 +164,9 @@ program jt9
999 continue
iret=fftwf_export_wisdom_to_filename(wisfile)
call four2a(a,-1,1,1,1) !Save wisdom and free memory
call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans
call four2a(a,-1,1,1,1) !Save wisdom and free memory
call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans
call fftwf_cleanup_threads()
call fftwf_cleanup()
end program jt9

View File

@ -1,4 +1,4 @@
subroutine decode9(i1SoftSymbols,limit,nlim,msg)
subroutine jt9fano(i1SoftSymbols,limit,nlim,msg)
! Decoder for JT9
! Input: i1SoftSymbols(207) - Single-bit soft symbols
@ -82,4 +82,4 @@ subroutine decode9(i1SoftSymbols,limit,nlim,msg)
endif
return
end subroutine decode9
end subroutine jt9fano

View File

@ -160,7 +160,7 @@ program jt9sim
if(i4.ge.128) i1SoftSymbols(i)=i4-256
enddo
limit=1000
call decode9(i1SoftSymbols,limit,nlim,msg)
call jt9fano(i1SoftSymbols,limit,nlim,msg)
if(msg.ne.msg0) print*,'Decode error: ',msg0,' ',msg
endif
enddo

View File

@ -1,7 +1,6 @@
subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
include 'constants.f90'
! parameter (NSMAX=1365) !Max length of saved spectra
real ss(184,NSMAX)
real ss1(184)
real ccfred(NSMAX)
@ -20,7 +19,6 @@ subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
do i=ia,ib !Loop over freq range
ss1=ss(1:184,i)
call pctile(ss1,nzhsym,40,xmed)
! xmed=sum(ss1(1:nzhsym))/nzhsym
ss1=ss1/xmed - 1.0
do j=1,nzhsym
@ -66,8 +64,6 @@ subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
do j=1,nzhsym
savg(ia:ib)=savg(ia:ib) + ss(j,ia:ib)
enddo
! df=1500.0/2048.0 ! 0.732422
! df9=12000.0/6912.0 ! 1.736111
savg(ia:ib)=savg(ia:ib)/nzhsym
smo(0:20)=1.0/21.0
smo(-5:-1)=-(1.0/21.0)*(21.0/10.0)
@ -92,10 +88,7 @@ subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
red2(i)=savg2(i)-ref
if(red2(i).lt.-99.0) red2(i)=-99.0
if(red2(i).gt.99.0) red2(i)=99.0
! write(30,3001) i,i*df+1000.0,savg2(i),red2(i),ccfred(i)
!3001 format(i8,4f10.3)
enddo
! call flush(30)
return
end subroutine sync9