mirror of
https://github.com/saitohirga/WSJT-X.git
synced 2024-11-12 07:06:17 -05:00
Remove redundant copy of large array in JT9 decoder
Also moved the same large array from stack to heap which along with other prior changes now allows the Windows jt9 OpenMP executable to run with a default stack size again. This also removes a crash on the Mac version which was probably due to excessive stack usage. Net result is an even faster JT9 decoder. git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4942 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
parent
c86f37cade
commit
127f72f3ad
@ -743,9 +743,6 @@ qt5_use_modules (jt9 Core)
|
|||||||
|
|
||||||
if (${OPENMP_FOUND})
|
if (${OPENMP_FOUND})
|
||||||
add_executable (jt9_omp lib/jt9.f90 lib/jt9a.f90 lib/jt9b.f90 lib/jt9c.f90 ${jt9_CXXSRCS} wsjtx.rc)
|
add_executable (jt9_omp lib/jt9.f90 lib/jt9a.f90 lib/jt9b.f90 lib/jt9c.f90 ${jt9_CXXSRCS} wsjtx.rc)
|
||||||
if (WIN32)
|
|
||||||
set (_extra_omp_link_flags "-Wl,--stack,8388608")
|
|
||||||
endif (WIN32)
|
|
||||||
set_target_properties (jt9_omp
|
set_target_properties (jt9_omp
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
COMPILE_FLAGS "${OpenMP_C_FLAGS}"
|
COMPILE_FLAGS "${OpenMP_C_FLAGS}"
|
||||||
|
179
lib/downsam9.f90
179
lib/downsam9.f90
@ -1,93 +1,86 @@
|
|||||||
subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
|
subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2)
|
||||||
|
|
||||||
!Downsample from id2() into c2() so as to yield nspsd samples per symbol,
|
!Downsample from id2() into c2() so as to yield nspsd samples per symbol,
|
||||||
!mixing from fpk down to zero frequency. The downsample factor is 432.
|
!mixing from fpk down to zero frequency. The downsample factor is 432.
|
||||||
|
|
||||||
use, intrinsic :: iso_c_binding
|
use, intrinsic :: iso_c_binding
|
||||||
use FFTW3
|
use FFTW3
|
||||||
|
|
||||||
include 'constants.f90'
|
include 'constants.f90'
|
||||||
parameter (NMAX1=604800)
|
parameter (NMAX1=604800)
|
||||||
type(C_PTR) :: plan !Pointers plan for big FFT
|
type(C_PTR) :: plan !Pointers plan for big FFT
|
||||||
integer*2 id2(0:8*npts8-1)
|
integer*2 id2(0:8*npts8-1)
|
||||||
real*4 x1(0:NMAX1-1)
|
real*4, pointer :: x1(:)
|
||||||
complex c1(0:NMAX1/2)
|
complex c1(0:NMAX1/2)
|
||||||
complex c2(0:1440-1)
|
complex c2(0:1440-1)
|
||||||
real s(5000)
|
real s(5000)
|
||||||
logical first
|
logical first
|
||||||
common/patience/npatience,nthreads
|
common/patience/npatience,nthreads
|
||||||
data first/.true./
|
data first/.true./
|
||||||
save plan,first,c1,s
|
save plan,first,c1,s,x1
|
||||||
|
|
||||||
nfft1=NMAX1 !Forward FFT length
|
nfft1=NMAX1 !Forward FFT length
|
||||||
df1=12000.0/nfft1
|
df1=12000.0/nfft1
|
||||||
npts=8*npts8
|
npts=8*npts8
|
||||||
|
|
||||||
if(newdat.eq.1) then
|
if(first) then
|
||||||
fac=6.963e-6 !Why this weird constant?
|
nflags=FFTW_ESTIMATE
|
||||||
do i=0,npts-1
|
if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
|
||||||
x1(i)=fac*id2(i)
|
if(npatience.eq.2) nflags=FFTW_MEASURE
|
||||||
enddo
|
if(npatience.eq.3) nflags=FFTW_PATIENT
|
||||||
x1(npts:nfft1-1)=0. !Zero the rest of x1
|
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
|
||||||
endif
|
! Plan the FFTs just once
|
||||||
|
|
||||||
if(first) then
|
!$omp critical(fftw) ! serialize non thread-safe FFTW3 calls
|
||||||
nflags=FFTW_ESTIMATE
|
plan=fftwf_alloc_real(NMAX1)
|
||||||
if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT
|
call c_f_pointer(plan,x1,[NMAX1])
|
||||||
if(npatience.eq.2) nflags=FFTW_MEASURE
|
x1(0:NMAX1-1) => x1 !remap bounds
|
||||||
if(npatience.eq.3) nflags=FFTW_PATIENT
|
call fftwf_plan_with_nthreads(nthreads)
|
||||||
if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE
|
plan=fftwf_plan_dft_r2c_1d(nfft1,x1,c1,nflags)
|
||||||
! Plan the FFTs just once
|
call fftwf_plan_with_nthreads(1)
|
||||||
|
!$omp end critical(fftw)
|
||||||
!$omp critical(fftw) ! serialize non thread-safe FFTW3 calls
|
|
||||||
call fftwf_plan_with_nthreads(nthreads)
|
first=.false.
|
||||||
plan=fftwf_plan_dft_r2c_1d(nfft1,x1,c1,nflags)
|
endif
|
||||||
call fftwf_plan_with_nthreads(1)
|
|
||||||
!$omp end critical(fftw)
|
if(newdat.eq.1) then
|
||||||
|
fac=6.963e-6 !Why this weird constant?
|
||||||
first=.false.
|
x1(0:npts-1)=fac*id2(0:npts-1)
|
||||||
endif
|
x1(npts:nfft1-1)=0. !Zero the rest of x1
|
||||||
|
call timer('FFTbig9 ',0)
|
||||||
if(newdat.eq.1) then
|
call fftwf_execute_dft_r2c(plan,x1,c1)
|
||||||
fac=6.963e-6 !Why this weird constant?
|
call timer('FFTbig9 ',1)
|
||||||
do i=0,npts-1
|
|
||||||
x1(i)=fac*id2(i)
|
nadd=int(1.0/df1)
|
||||||
enddo
|
s=0.
|
||||||
x1(npts:nfft1-1)=0. !Zero the rest of x1
|
do i=1,5000
|
||||||
call timer('FFTbig9 ',0)
|
j=int((i-1)/df1)
|
||||||
call fftwf_execute_dft_r2c(plan,x1,c1)
|
do n=1,nadd
|
||||||
call timer('FFTbig9 ',1)
|
j=j+1
|
||||||
|
s(i)=s(i)+real(c1(j))**2 + aimag(c1(j))**2
|
||||||
nadd=int(1.0/df1)
|
enddo
|
||||||
s=0.
|
enddo
|
||||||
do i=1,5000
|
endif
|
||||||
j=int((i-1)/df1)
|
|
||||||
do n=1,nadd
|
ndown=8*nsps8/nspsd !Downsample factor
|
||||||
j=j+1
|
nfft2=nfft1/ndown !Backward FFT length
|
||||||
s(i)=s(i)+real(c1(j))**2 + aimag(c1(j))**2
|
nh2=nfft2/2
|
||||||
enddo
|
nf=nint(fpk)
|
||||||
enddo
|
i0=int(fpk/df1)
|
||||||
endif
|
|
||||||
|
nw=100
|
||||||
ndown=8*nsps8/nspsd !Downsample factor
|
ia=max(1,nf-nw)
|
||||||
nfft2=nfft1/ndown !Backward FFT length
|
ib=min(5000,nf+nw)
|
||||||
nh2=nfft2/2
|
call pctile(s(ia),ib-ia+1,40,avenoise)
|
||||||
nf=nint(fpk)
|
|
||||||
i0=int(fpk/df1)
|
fac=sqrt(1.0/avenoise)
|
||||||
|
do i=0,nfft2-1
|
||||||
nw=100
|
j=i0+i
|
||||||
ia=max(1,nf-nw)
|
if(i.gt.nh2) j=j-nfft2
|
||||||
ib=min(5000,nf+nw)
|
c2(i)=fac*c1(j)
|
||||||
call pctile(s(ia),ib-ia+1,40,avenoise)
|
enddo
|
||||||
|
call four2a(c2,nfft2,1,1,1) !FFT back to time domain
|
||||||
fac=sqrt(1.0/avenoise)
|
nz2=8*npts8/ndown
|
||||||
do i=0,nfft2-1
|
|
||||||
j=i0+i
|
return
|
||||||
if(i.gt.nh2) j=j-nfft2
|
end subroutine downsam9
|
||||||
c2(i)=fac*c1(j)
|
|
||||||
enddo
|
|
||||||
call four2a(c2,nfft2,1,1,1) !FFT back to time domain
|
|
||||||
nz2=8*npts8/ndown
|
|
||||||
|
|
||||||
return
|
|
||||||
end subroutine downsam9
|
|
||||||
|
Loading…
Reference in New Issue
Block a user