mirror of
https://github.com/saitohirga/WSJT-X.git
synced 2024-11-23 12:48:40 -05:00
Many changes in program jt9[.exe] aimed at speeding up the decoders.
The long FFTs can now use the multi-threaded FFTW routines. Subroutine decode9.f90 was renamed jt9fano.f90. The JT9 decoder's top-level functions were removed from decoder.f90 and put into a separate subroutine decjt90.f90. Subroutine decoder.f90 is now configured for possible use of OpenMP SECTIONS, with the JT9 and JT65 decoders running concurrently on a multi-core machine. Note, however, that this concurrent processing is not yet fully implemented. Probably calls to timer need to be removed; some variables used in calls to jt65a and decjt9 may need to be declared PRIVATE in decoder; some sections probably need to be declared CRITICAL; probably some SAVE statements in downstream routines have made them not thread-safe; etc., etc. I'm a neophyte at using OpenMP. Comments, suggestions, and/or tests by others will be welcome! git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4919 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
parent
30416c287f
commit
960da46d28
@ -241,8 +241,9 @@ set (wsjt_FSRCS
|
||||
lib/dcoord.f90
|
||||
lib/decode65a.f90
|
||||
lib/decode65b.f90
|
||||
lib/decode9.f90
|
||||
lib/jt9fano.f90
|
||||
lib/decoder.f90
|
||||
lib/decjt9.f90
|
||||
lib/deg2grid.f90
|
||||
lib/demod64a.f90
|
||||
lib/determ.f90
|
||||
|
@ -13,7 +13,7 @@ CC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gcc
|
||||
FC = c:/JTSDK/Qt5/Tools/mingw48_32/bin/gfortran
|
||||
CXX = c:/JTSDK/Qt5/Tools/mingw48_32/bin/g++
|
||||
|
||||
FFLAGS = -O2 -fbounds-check -Wall -fno-second-underscore -DWIN32
|
||||
FFLAGS = -O2 -Wall -fno-second-underscore -DWIN32
|
||||
CFLAGS = -I. -fbounds-check -mno-stack-arg-probe -DWIN32
|
||||
|
||||
# Default rules
|
||||
@ -37,7 +37,7 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
|
||||
nchar.o four2a.o grid2deg.o pfxdump.o wisdom.o \
|
||||
symspec.o analytic.o db.o genjt9.o flat1.o smo.o \
|
||||
packbits.o unpackbits.o encode232.o interleave9.o \
|
||||
entail.o fano232.o gran.o sync9.o decode9.o \
|
||||
entail.o fano232.o gran.o sync9.o jt9fano.o \
|
||||
fil3.o decoder.o grid2n.o n2grid.o timer.o \
|
||||
softsym.o getlags.o afc9.o fchisq.o twkfreq.o downsam9.o \
|
||||
peakdt9.o symspec2.o stdmsg.o morse.o azdist.o geodist.o \
|
||||
@ -47,14 +47,14 @@ OBJS1 = pctile.o graycode.o sort.o ssort.o chkmsg.o \
|
||||
extract.o fchisq65.o demod64a.o chkhist.o interleave63.o ccf2.o \
|
||||
move.o indexx.o graycode65.o twkfreq65.o smo121.o \
|
||||
wrapkarn.o init_rs.o encode_rs.o decode_rs.o gen65.o fil4.o \
|
||||
flat3.o polfit.o determ.o baddata.o prog_args.f90 \
|
||||
options.o prog_args.o
|
||||
flat3.o polfit.o determ.o baddata.o prog_args.o \
|
||||
options.o fmtmsg.o decjt9.o
|
||||
|
||||
libjt9.a: $(OBJS1)
|
||||
ar cr libjt9.a $(OBJS1)
|
||||
ranlib libjt9.a
|
||||
|
||||
OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o
|
||||
OBJS2 = jt9.o jt9a.o jt9b.o jt9c.o ipcomm.o sec_midn.o usleep.o
|
||||
LIBS2 = -L'C:/JTSDK/Qt5/5.2.1/mingw48_32/lib' -lQt5Core
|
||||
jt9.exe: $(OBJS2) libjt9.a
|
||||
$(CXX) -o jt9.exe -static $(OBJS2) $(LIBS2) libjt9.a \
|
||||
|
145
lib/decoder.f90
145
lib/decoder.f90
@ -1,21 +1,13 @@
|
||||
subroutine decoder(ss,id2)
|
||||
|
||||
! Decoder for JT9.
|
||||
|
||||
use prog_args
|
||||
|
||||
include 'constants.f90'
|
||||
real ss(184,NSMAX)
|
||||
character*22 msg
|
||||
character*20 datetime
|
||||
real*4 ccfred(NSMAX)
|
||||
real*4 red2(NSMAX)
|
||||
logical ccfok(NSMAX)
|
||||
logical done(NSMAX)
|
||||
logical done65,baddata
|
||||
integer*2 id2(NTMAX*12000)
|
||||
real*4 dd(NTMAX*12000)
|
||||
integer*1 i1SoftSymbols(207)
|
||||
common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, &
|
||||
ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime
|
||||
common/tracer/limtrace,lu
|
||||
@ -29,10 +21,11 @@ subroutine decoder(ss,id2)
|
||||
if (nagain .eq. 0) then
|
||||
open(13,file=trim(temp_dir)//'/decoded.txt',status='unknown')
|
||||
else
|
||||
open(13,file=trim(temp_dir)//'/decoded.txt',status='unknown',position='append')
|
||||
open(13,file=trim(temp_dir)//'/decoded.txt',status='unknown', &
|
||||
position='append')
|
||||
end if
|
||||
|
||||
open(22,file=trim(temp_dir)//'/kvasd.dat',access='direct',recl=1024,status='unknown')
|
||||
open(22,file=trim(temp_dir)//'/kvasd.dat',access='direct',recl=1024, &
|
||||
status='unknown')
|
||||
|
||||
npts65=52*12000
|
||||
if(baddata(id2,npts65)) then
|
||||
@ -48,138 +41,34 @@ subroutine decoder(ss,id2)
|
||||
if(newdat.ne.0) dd(1:npts65)=id2(1:npts65)
|
||||
nf1=nfa
|
||||
nf2=nfb
|
||||
! if(nmode.eq.65+9) nf2=nfsplit
|
||||
call jt65a(dd,npts65,newdat,nutc,nf1,nf2,nfqso,ntol65,nagain,ndecoded)
|
||||
done65=.true.
|
||||
endif
|
||||
|
||||
if(nmode.eq.65) go to 800
|
||||
|
||||
nsynced=0
|
||||
ndecoded=0
|
||||
nsps=0
|
||||
! print*,'A'
|
||||
!!$OMP PARALLEL PRIVATE(id)
|
||||
!!$OMP SECTIONS
|
||||
|
||||
nsps=6912 !Params for JT9-1
|
||||
df3=1500.0/2048.0
|
||||
|
||||
tstep=0.5*nsps/12000.0 !Half-symbol step (seconds)
|
||||
done=.false.
|
||||
|
||||
nf0=0
|
||||
nf1=nfa
|
||||
if(nmode.eq.65+9) nf1=nfsplit
|
||||
ia=max(1,nint((nf1-nf0)/df3))
|
||||
ib=min(NSMAX,nint((nfb-nf0)/df3))
|
||||
lag1=-(2.5/tstep + 0.9999)
|
||||
lag2=5.0/tstep + 0.9999
|
||||
if(newdat.ne.0) then
|
||||
call timer('sync9 ',0)
|
||||
call sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipk)
|
||||
call timer('sync9 ',1)
|
||||
endif
|
||||
|
||||
nsps8=nsps/8
|
||||
df8=1500.0/nsps8
|
||||
dblim=db(864.0/nsps8) - 26.2
|
||||
|
||||
do nqd=1,0,-1
|
||||
limit=5000
|
||||
ccflim=3.0
|
||||
red2lim=1.6
|
||||
schklim=2.2
|
||||
if(ndepth.eq.2) then
|
||||
limit=10000
|
||||
ccflim=2.7
|
||||
endif
|
||||
if(ndepth.ge.3 .or. nqd.eq.1) then
|
||||
limit=100000
|
||||
ccflim=2.5
|
||||
endif
|
||||
ccfok=.false.
|
||||
|
||||
if(nqd.eq.1) then
|
||||
nfa1=nfqso-ntol
|
||||
nfb1=nfqso+ntol
|
||||
ia=max(1,nint((nfa1-nf0)/df3))
|
||||
ib=min(NSMAX,nint((nfb1-nf0)/df3))
|
||||
ccfok(ia:ib)=(ccfred(ia:ib).gt.(ccflim-2.0)) .and. &
|
||||
(red2(ia:ib).gt.(red2lim-1.0))
|
||||
ia1=ia
|
||||
ib1=ib
|
||||
else
|
||||
nfa1=nf1
|
||||
nfb1=nfb
|
||||
ia=max(1,nint((nfa1-nf0)/df3))
|
||||
ib=min(NSMAX,nint((nfb1-nf0)/df3))
|
||||
do i=ia,ib
|
||||
ccfok(i)=ccfred(i).gt.ccflim .and. red2(i).gt.red2lim
|
||||
enddo
|
||||
ccfok(ia1:ib1)=.false.
|
||||
endif
|
||||
|
||||
fgood=0.
|
||||
do i=ia,ib
|
||||
if(done(i) .or. (.not.ccfok(i))) cycle
|
||||
f=(i-1)*df3
|
||||
if(nqd.eq.1 .or. &
|
||||
(ccfred(i).ge.ccflim .and. abs(f-fgood).gt.10.0*df8)) then
|
||||
|
||||
if(nqd.eq.0) nfreqs0=nfreqs0+1
|
||||
if(nqd.eq.1) nfreqs1=nfreqs1+1
|
||||
|
||||
call timer('softsym ',0)
|
||||
fpk=nf0 + df3*(i-1)
|
||||
|
||||
call softsym(id2,npts8,nsps8,newdat,fpk,syncpk,snrdb,xdt, &
|
||||
freq,drift,schk,i1SoftSymbols)
|
||||
call timer('softsym ',1)
|
||||
|
||||
sync=(syncpk+1)/4.0
|
||||
if(maxval(i1SoftSymbols).eq.0) cycle
|
||||
if(nqd.eq.1 .and. ((sync.lt.0.5) .or. (schk.lt.2.0))) cycle
|
||||
if(nqd.ne.1 .and. ((sync.lt.1.0) .or. (schk.lt.schklim))) cycle
|
||||
|
||||
call timer('decode9 ',0)
|
||||
call decode9(i1SoftSymbols,limit,nlim,msg)
|
||||
call timer('decode9 ',1)
|
||||
|
||||
if(sync.lt.0.0 .or. snrdb.lt.dblim-2.0) sync=0.0
|
||||
nsync=sync
|
||||
if(nsync.gt.10) nsync=10
|
||||
nsnr=nint(snrdb)
|
||||
ndrift=nint(drift/df3)
|
||||
|
||||
if(msg.ne.' ') then
|
||||
if(nqd.eq.0) ndecodes0=ndecodes0+1
|
||||
if(nqd.eq.1) ndecodes1=ndecodes1+1
|
||||
|
||||
write(*,1000) nutc,nsnr,xdt,nint(freq),msg
|
||||
1000 format(i4.4,i4,f5.1,i5,1x,'@',1x,a22)
|
||||
write(13,1002) nutc,nsync,nsnr,xdt,freq,ndrift,msg
|
||||
1002 format(i4.4,i4,i5,f6.1,f8.0,i4,3x,a22,' JT9')
|
||||
|
||||
iaa=max(1,i-1)
|
||||
ibb=min(NSMAX,i+22)
|
||||
fgood=f
|
||||
nsynced=1
|
||||
ndecoded=1
|
||||
ccfok(iaa:ibb)=.false.
|
||||
done(iaa:ibb)=.true.
|
||||
call flush(6)
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
call flush(6)
|
||||
if(nagain.ne.0) exit
|
||||
enddo
|
||||
!!$OMP SECTION
|
||||
! print*,'B'
|
||||
call decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol,nzhsym, &
|
||||
nagain,ndepth,nmode)
|
||||
|
||||
!!$OMP SECTION
|
||||
if(nmode.ge.65 .and. (.not.done65)) then
|
||||
if(newdat.ne.0) dd(1:npts65)=id2(1:npts65)
|
||||
nf1=nfa
|
||||
nf2=nfb
|
||||
! print*,'C'
|
||||
call jt65a(dd,npts65,newdat,nutc,nf1,nf2,nfqso,ntol65,nagain,ndecoded)
|
||||
endif
|
||||
|
||||
!!$OMP END SECTIONS NOWAIT
|
||||
!!$OMP END PARALLEL
|
||||
! print*,'D'
|
||||
|
||||
! JT65 is not yet producing info for nsynced, ndecoded.
|
||||
800 write(*,1010) nsynced,ndecoded
|
||||
1010 format('<DecodeFinished>',2i4)
|
||||
|
@ -3,15 +3,22 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
|
||||
!Downsample from id2() into C2() so as to yield nspsd samples per symbol,
|
||||
!mixing from fpk down to zero frequency.
|
||||
|
||||
use, intrinsic :: iso_c_binding
|
||||
use FFTW3
|
||||
|
||||
include 'constants.f90'
|
||||
parameter (NMAX1=1024*1920)
|
||||
! parameter (NMAX1=1024*1920)
|
||||
parameter (NMAX1=884736)
|
||||
type(C_PTR) :: plan !Pointers plan for big FFT
|
||||
integer*2 id2(0:8*npts8-1)
|
||||
real*4 x1(0:NMAX1-1)
|
||||
complex c1(0:NMAX1/2)
|
||||
complex c2(0:4096-1)
|
||||
real s(5000)
|
||||
equivalence (c1,x1)
|
||||
save
|
||||
logical first
|
||||
common/patience/npatience,nthreads
|
||||
data first/.true./
|
||||
save plan,first
|
||||
|
||||
nfft1=1024*nsps8 !Forward FFT length
|
||||
df1=12000.0/nfft1
|
||||
@ -23,14 +30,33 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
|
||||
x1(i)=fac*id2(i)
|
||||
enddo
|
||||
x1(npts:nfft1-1)=0. !Zero the rest of x1
|
||||
call timer('fft_forw',0)
|
||||
call four2a(c1,nfft1,1,-1,0) !Forward FFT, r2c
|
||||
call timer('fft_forw',1)
|
||||
endif
|
||||
|
||||
nadd=1.0/df1
|
||||
if(first) then
|
||||
nflags=FFTW_ESTIMATE
|
||||
if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
|
||||
if(npatience.eq.2) nflags=FFTW_MEASURE
|
||||
if(npatience.eq.3) nflags=FFTW_PATIENT
|
||||
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
|
||||
! Plan the FFTs just once
|
||||
plan=fftwf_plan_dft_r2c_1d(nfft1,x1,c1,nflags)
|
||||
first=.false.
|
||||
endif
|
||||
|
||||
if(newdat.eq.1) then
|
||||
fac=6.963e-6 !Why this weird constant?
|
||||
do i=0,npts-1
|
||||
x1(i)=fac*id2(i)
|
||||
enddo
|
||||
x1(npts:nfft1-1)=0. !Zero the rest of x1
|
||||
call timer('FFTbig9 ',0)
|
||||
call fftwf_execute_dft_r2c(plan,x1,c1)
|
||||
call timer('FFTbig9 ',1)
|
||||
|
||||
nadd=int(1.0/df1)
|
||||
s=0.
|
||||
do i=1,5000
|
||||
j=(i-1)/df1
|
||||
j=int((i-1)/df1)
|
||||
do n=1,nadd
|
||||
j=j+1
|
||||
s(i)=s(i)+real(c1(j))**2 + aimag(c1(j))**2
|
||||
@ -42,7 +68,7 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
|
||||
nfft2=nfft1/ndown !Backward FFT length
|
||||
nh2=nfft2/2
|
||||
nf=nint(fpk)
|
||||
i0=fpk/df1
|
||||
i0=int(fpk/df1)
|
||||
|
||||
nw=100
|
||||
ia=max(1,nf-nw)
|
||||
@ -57,9 +83,9 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
|
||||
if(i.gt.nh2) j=j-nfft2
|
||||
c2(i)=fac*c1(j)
|
||||
enddo
|
||||
call timer('fft_back',0)
|
||||
call timer('FFTsmal9',0)
|
||||
call four2a(c2,nfft2,1,1,1) !FFT back to time domain
|
||||
call timer('fft_back',1)
|
||||
call timer('FFTsmal9',1)
|
||||
nz2=8*npts8/ndown
|
||||
|
||||
return
|
||||
|
@ -20,7 +20,6 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
|
||||
real rfilt(NFFT2) !Filter (real)
|
||||
! integer*8 plan1,plan2,plan3
|
||||
type(C_PTR) :: plan1,plan2,plan3 !Pointers to FFTW plans
|
||||
|
||||
logical first
|
||||
! include 'fftw3.f90'
|
||||
equivalence (rfilt,cfilt),(rca,ca)
|
||||
@ -29,7 +28,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
|
||||
5.89886379,1.59355187,-2.49138308,0.60910773,-0.04248129/
|
||||
common/refspec/dfref,ref(NSZ)
|
||||
common/patience/npatience,nthreads
|
||||
save
|
||||
save first,plan1,plan2,plan3
|
||||
|
||||
if(npts.lt.0) go to 900 !Clean up at end of program
|
||||
|
||||
@ -54,7 +53,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
|
||||
cfilt(i)=fac*halfpulse(i)
|
||||
cfilt(nfft2+2-i)=fac*halfpulse(i)
|
||||
enddo
|
||||
call sfftw_execute(plan3)
|
||||
call fftwf_execute_dft(plan3,cfilt,cfilt)
|
||||
|
||||
base=real(cfilt(nfft2/2+1))
|
||||
do i=1,nfft2
|
||||
@ -73,7 +72,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
|
||||
nz=min(npts,nfft1)
|
||||
rca(1:nz)=dd(1:nz)
|
||||
rca(nz+1:)=0.
|
||||
call sfftw_execute(plan1)
|
||||
call fftwf_execute_dft_r2c(plan1,rca,ca)
|
||||
call timer('FFTbig ',1)
|
||||
|
||||
call timer('flatten ',0)
|
||||
@ -124,14 +123,14 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
|
||||
|
||||
! Do the short reverse transform, to go back to time domain.
|
||||
call timer('FFTsmall',0)
|
||||
call sfftw_execute(plan2)
|
||||
call fftwf_execute_dft(plan2,c4a,c4a)
|
||||
call timer('FFTsmall',1)
|
||||
n4=min(npts/8,nfft2)
|
||||
return
|
||||
|
||||
900 call sfftw_destroy_plan(plan1)
|
||||
call sfftw_destroy_plan(plan2)
|
||||
call sfftw_destroy_plan(plan3)
|
||||
900 call fftwf_destroy_plan(plan1)
|
||||
call fftwf_destroy_plan(plan2)
|
||||
call fftwf_destroy_plan(plan3)
|
||||
|
||||
return
|
||||
end subroutine filbig
|
||||
|
@ -164,8 +164,9 @@ program jt9
|
||||
|
||||
999 continue
|
||||
iret=fftwf_export_wisdom_to_filename(wisfile)
|
||||
|
||||
call four2a(a,-1,1,1,1) !Save wisdom and free memory
|
||||
call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans
|
||||
call four2a(a,-1,1,1,1) !Save wisdom and free memory
|
||||
call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans
|
||||
call fftwf_cleanup_threads()
|
||||
call fftwf_cleanup()
|
||||
|
||||
end program jt9
|
||||
|
@ -1,4 +1,4 @@
|
||||
subroutine decode9(i1SoftSymbols,limit,nlim,msg)
|
||||
subroutine jt9fano(i1SoftSymbols,limit,nlim,msg)
|
||||
|
||||
! Decoder for JT9
|
||||
! Input: i1SoftSymbols(207) - Single-bit soft symbols
|
||||
@ -82,4 +82,4 @@ subroutine decode9(i1SoftSymbols,limit,nlim,msg)
|
||||
endif
|
||||
|
||||
return
|
||||
end subroutine decode9
|
||||
end subroutine jt9fano
|
@ -160,7 +160,7 @@ program jt9sim
|
||||
if(i4.ge.128) i1SoftSymbols(i)=i4-256
|
||||
enddo
|
||||
limit=1000
|
||||
call decode9(i1SoftSymbols,limit,nlim,msg)
|
||||
call jt9fano(i1SoftSymbols,limit,nlim,msg)
|
||||
if(msg.ne.msg0) print*,'Decode error: ',msg0,' ',msg
|
||||
endif
|
||||
enddo
|
||||
|
@ -1,7 +1,6 @@
|
||||
subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
|
||||
|
||||
include 'constants.f90'
|
||||
! parameter (NSMAX=1365) !Max length of saved spectra
|
||||
real ss(184,NSMAX)
|
||||
real ss1(184)
|
||||
real ccfred(NSMAX)
|
||||
@ -20,7 +19,6 @@ subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
|
||||
do i=ia,ib !Loop over freq range
|
||||
ss1=ss(1:184,i)
|
||||
call pctile(ss1,nzhsym,40,xmed)
|
||||
! xmed=sum(ss1(1:nzhsym))/nzhsym
|
||||
|
||||
ss1=ss1/xmed - 1.0
|
||||
do j=1,nzhsym
|
||||
@ -66,8 +64,6 @@ subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
|
||||
do j=1,nzhsym
|
||||
savg(ia:ib)=savg(ia:ib) + ss(j,ia:ib)
|
||||
enddo
|
||||
! df=1500.0/2048.0 ! 0.732422
|
||||
! df9=12000.0/6912.0 ! 1.736111
|
||||
savg(ia:ib)=savg(ia:ib)/nzhsym
|
||||
smo(0:20)=1.0/21.0
|
||||
smo(-5:-1)=-(1.0/21.0)*(21.0/10.0)
|
||||
@ -92,10 +88,7 @@ subroutine sync9(ss,nzhsym,lag1,lag2,ia,ib,ccfred,red2,ipkbest)
|
||||
red2(i)=savg2(i)-ref
|
||||
if(red2(i).lt.-99.0) red2(i)=-99.0
|
||||
if(red2(i).gt.99.0) red2(i)=99.0
|
||||
! write(30,3001) i,i*df+1000.0,savg2(i),red2(i),ccfred(i)
|
||||
!3001 format(i8,4f10.3)
|
||||
enddo
|
||||
! call flush(30)
|
||||
|
||||
return
|
||||
end subroutine sync9
|
||||
|
Loading…
Reference in New Issue
Block a user