Remove redundant copy of large array in JT9 decoder

Also moved  the same large array  from stack to heap  which along with
other prior  changes now allows  the Windows jt9 OpenMP  executable to
run with a default stack size again.

This also removes a crash on the Mac version which was probably due to
excessive stack usage.

Net result is an even faster JT9 decoder.

git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4942 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
Bill Somerville 2015-02-08 09:53:20 +00:00
parent c86f37cade
commit 127f72f3ad
2 changed files with 86 additions and 96 deletions

View File

@ -743,9 +743,6 @@ qt5_use_modules (jt9 Core)
if (${OPENMP_FOUND}) if (${OPENMP_FOUND})
add_executable (jt9_omp lib/jt9.f90 lib/jt9a.f90 lib/jt9b.f90 lib/jt9c.f90 ${jt9_CXXSRCS} wsjtx.rc) add_executable (jt9_omp lib/jt9.f90 lib/jt9a.f90 lib/jt9b.f90 lib/jt9c.f90 ${jt9_CXXSRCS} wsjtx.rc)
if (WIN32)
set (_extra_omp_link_flags "-Wl,--stack,8388608")
endif (WIN32)
set_target_properties (jt9_omp set_target_properties (jt9_omp
PROPERTIES PROPERTIES
COMPILE_FLAGS "${OpenMP_C_FLAGS}" COMPILE_FLAGS "${OpenMP_C_FLAGS}"

View File

@ -1,93 +1,86 @@
subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2) subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
!Downsample from id2() into c2() so as to yield nspsd samples per symbol, !Downsample from id2() into c2() so as to yield nspsd samples per symbol,
!mixing from fpk down to zero frequency. The downsample factor is 432. !mixing from fpk down to zero frequency. The downsample factor is 432.
use, intrinsic :: iso_c_binding use, intrinsic :: iso_c_binding
use FFTW3 use FFTW3
include 'constants.f90' include 'constants.f90'
parameter (NMAX1=604800) parameter (NMAX1=604800)
type(C_PTR) :: plan !Pointers plan for big FFT type(C_PTR) :: plan !Pointers plan for big FFT
integer*2 id2(0:8*npts8-1) integer*2 id2(0:8*npts8-1)
real*4 x1(0:NMAX1-1) real*4, pointer :: x1(:)
complex c1(0:NMAX1/2) complex c1(0:NMAX1/2)
complex c2(0:1440-1) complex c2(0:1440-1)
real s(5000) real s(5000)
logical first logical first
common/patience/npatience,nthreads common/patience/npatience,nthreads
data first/.true./ data first/.true./
save plan,first,c1,s save plan,first,c1,s,x1
nfft1=NMAX1 !Forward FFT length nfft1=NMAX1 !Forward FFT length
df1=12000.0/nfft1 df1=12000.0/nfft1
npts=8*npts8 npts=8*npts8
if(newdat.eq.1) then if(first) then
fac=6.963e-6 !Why this weird constant? nflags=FFTW_ESTIMATE
do i=0,npts-1 if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
x1(i)=fac*id2(i) if(npatience.eq.2) nflags=FFTW_MEASURE
enddo if(npatience.eq.3) nflags=FFTW_PATIENT
x1(npts:nfft1-1)=0. !Zero the rest of x1 if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
endif ! Plan the FFTs just once
if(first) then !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls
nflags=FFTW_ESTIMATE plan=fftwf_alloc_real(NMAX1)
if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT call c_f_pointer(plan,x1,[NMAX1])
if(npatience.eq.2) nflags=FFTW_MEASURE x1(0:NMAX1-1) => x1 !remap bounds
if(npatience.eq.3) nflags=FFTW_PATIENT call fftwf_plan_with_nthreads(nthreads)
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE plan=fftwf_plan_dft_r2c_1d(nfft1,x1,c1,nflags)
! Plan the FFTs just once call fftwf_plan_with_nthreads(1)
!$omp end critical(fftw)
!$omp critical(fftw) ! serialize non thread-safe FFTW3 calls
call fftwf_plan_with_nthreads(nthreads) first=.false.
plan=fftwf_plan_dft_r2c_1d(nfft1,x1,c1,nflags) endif
call fftwf_plan_with_nthreads(1)
!$omp end critical(fftw) if(newdat.eq.1) then
fac=6.963e-6 !Why this weird constant?
first=.false. x1(0:npts-1)=fac*id2(0:npts-1)
endif x1(npts:nfft1-1)=0. !Zero the rest of x1
call timer('FFTbig9 ',0)
if(newdat.eq.1) then call fftwf_execute_dft_r2c(plan,x1,c1)
fac=6.963e-6 !Why this weird constant? call timer('FFTbig9 ',1)
do i=0,npts-1
x1(i)=fac*id2(i) nadd=int(1.0/df1)
enddo s=0.
x1(npts:nfft1-1)=0. !Zero the rest of x1 do i=1,5000
call timer('FFTbig9 ',0) j=int((i-1)/df1)
call fftwf_execute_dft_r2c(plan,x1,c1) do n=1,nadd
call timer('FFTbig9 ',1) j=j+1
s(i)=s(i)+real(c1(j))**2 + aimag(c1(j))**2
nadd=int(1.0/df1) enddo
s=0. enddo
do i=1,5000 endif
j=int((i-1)/df1)
do n=1,nadd ndown=8*nsps8/nspsd !Downsample factor
j=j+1 nfft2=nfft1/ndown !Backward FFT length
s(i)=s(i)+real(c1(j))**2 + aimag(c1(j))**2 nh2=nfft2/2
enddo nf=nint(fpk)
enddo i0=int(fpk/df1)
endif
nw=100
ndown=8*nsps8/nspsd !Downsample factor ia=max(1,nf-nw)
nfft2=nfft1/ndown !Backward FFT length ib=min(5000,nf+nw)
nh2=nfft2/2 call pctile(s(ia),ib-ia+1,40,avenoise)
nf=nint(fpk)
i0=int(fpk/df1) fac=sqrt(1.0/avenoise)
do i=0,nfft2-1
nw=100 j=i0+i
ia=max(1,nf-nw) if(i.gt.nh2) j=j-nfft2
ib=min(5000,nf+nw) c2(i)=fac*c1(j)
call pctile(s(ia),ib-ia+1,40,avenoise) enddo
call four2a(c2,nfft2,1,1,1) !FFT back to time domain
fac=sqrt(1.0/avenoise) nz2=8*npts8/ndown
do i=0,nfft2-1
j=i0+i return
if(i.gt.nh2) j=j-nfft2 end subroutine downsam9
c2(i)=fac*c1(j)
enddo
call four2a(c2,nfft2,1,1,1) !FFT back to time domain
nz2=8*npts8/ndown
return
end subroutine downsam9