diff --git a/lib/decjt9.f90 b/lib/decjt9.f90 index f916b8c96..799903c0f 100644 --- a/lib/decjt9.f90 +++ b/lib/decjt9.f90 @@ -110,9 +110,13 @@ subroutine decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol, & if(nqd.eq.0) ndecodes0=ndecodes0+1 if(nqd.eq.1) ndecodes1=ndecodes1+1 + !$omp critical(decode_results) ! serialize writes - see also jt65a.f90 write(*,1000) nutc,nsnr,xdt,nint(freq),msg -1000 format(i4.4,i4,f5.1,i5,1x,'@',1x,a22) write(13,1002) nutc,nsync,nsnr,xdt,freq,ndrift,msg + call flush(6) + !$omp end critical(decode_results) + +1000 format(i4.4,i4,f5.1,i5,1x,'@',1x,a22) 1002 format(i4.4,i4,i5,f6.1,f8.0,i4,3x,a22,' JT9') iaa=max(1,i-1) @@ -122,11 +126,9 @@ subroutine decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol, & ndecoded=1 ccfok(iaa:ibb)=.false. done(iaa:ibb)=.true. - call flush(6) endif endif enddo - call flush(6) if(nagain.ne.0) exit enddo diff --git a/lib/decoder.f90 b/lib/decoder.f90 index 4d3013b1d..1d1e49f6c 100644 --- a/lib/decoder.f90 +++ b/lib/decoder.f90 @@ -49,29 +49,25 @@ subroutine decoder(ss,id2) if(nmode.eq.65) go to 800 -! print*,'A' -!$OMP PARALLEL SECTIONS PRIVATE(id) + !$omp parallel sections -!$OMP SECTION -! print*,'B' + !$omp section call timer('decjt9 ',0) call decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol,nzhsym, & nagain,ndepth,nmode) call timer('decjt9 ',1) -!$OMP SECTION + !$omp section if(nmode.ge.65 .and. (.not.done65)) then if(newdat.ne.0) dd(1:npts65)=id2(1:npts65) nf1=nfa nf2=nfb -! print*,'C' call timer('jt65a ',0) call jt65a(dd,npts65,newdat,nutc,nf1,nf2,nfqso,ntol65,nagain,ndecoded) call timer('jt65a ',1) endif -!$OMP END PARALLEL SECTIONS -! print*,'D' + !$omp end parallel sections ! JT65 is not yet producing info for nsynced, ndecoded. 800 write(*,1010) nsynced,ndecoded diff --git a/lib/downsam9.f90 b/lib/downsam9.f90 index 9096f90f2..575ff7d61 100644 --- a/lib/downsam9.f90 +++ b/lib/downsam9.f90 @@ -39,7 +39,11 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2,nz2) if(npatience.eq.3) nflags=FFTW_PATIENT if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE ! Plan the FFTs just once + + !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls plan=fftwf_plan_dft_r2c_1d(nfft1,x1,c1,nflags) + !$omp end critical(fftw) + first=.false. endif diff --git a/lib/filbig.f90 b/lib/filbig.f90 index a817411e3..26b23c3a8 100644 --- a/lib/filbig.f90 +++ b/lib/filbig.f90 @@ -38,10 +38,13 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) if(npatience.eq.2) nflags=FFTW_MEASURE if(npatience.eq.3) nflags=FFTW_PATIENT if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE + ! Plan the FFTs just once + !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls plan1=fftwf_plan_dft_r2c_1d(nfft1,rca,ca,nflags) plan2=fftwf_plan_dft_1d(nfft2,c4a,c4a,-1,nflags) plan3=fftwf_plan_dft_1d(nfft2,cfilt,cfilt,+1,nflags) + !$omp end critical(fftw) ! Convert impulse response to filter function do i=1,nfft2 @@ -128,9 +131,13 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) n4=min(npts/8,nfft2) return -900 call fftwf_destroy_plan(plan1) +900 continue + + !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls + call fftwf_destroy_plan(plan1) call fftwf_destroy_plan(plan2) call fftwf_destroy_plan(plan3) + !$omp end critical(fftw) return end subroutine filbig diff --git a/lib/four2a.f90 b/lib/four2a.f90 index c3b377557..57c7239e1 100644 --- a/lib/four2a.f90 +++ b/lib/four2a.f90 @@ -26,6 +26,7 @@ subroutine four2a(a,nfft,ndim,isign,iform) integer nn(NPMAX),ns(NPMAX),nf(NPMAX) !Params of stored plans integer*8 nl(NPMAX),nloc !More params of plans integer*8 plan(NPMAX) !Pointers to stored plans + logical found_plan data nplan/0/ !Number of stored plans common/patience/npatience,nthreads !Patience and threads for FFTW plans include 'fftw3.f90' !FFTW definitions @@ -34,60 +35,81 @@ subroutine four2a(a,nfft,ndim,isign,iform) if(nfft.lt.0) go to 999 nloc=loc(a) + + found_plan = .false. + !$omp critical(four2a_setup) do i=1,nplan if(nfft.eq.nn(i) .and. isign.eq.ns(i) .and. & - iform.eq.nf(i) .and. nloc.eq.nl(i)) go to 10 + iform.eq.nf(i) .and. nloc.eq.nl(i)) then + found_plan = .true. + exit + end if enddo - if(nplan.ge.NPMAX) stop 'Too many FFTW plans requested.' - nplan=nplan+1 - i=nplan - nn(i)=nfft - ns(i)=isign - nf(i)=iform - nl(i)=nloc + + if(i.ge.NPMAX) stop 'Too many FFTW plans requested.' + + if (.not. found_plan) then + nplan=nplan+1 + i=nplan + + nn(i)=nfft + ns(i)=isign + nf(i)=iform + nl(i)=nloc ! Planning: FFTW_ESTIMATE, FFTW_ESTIMATE_PATIENT, FFTW_MEASURE, ! FFTW_PATIENT, FFTW_EXHAUSTIVE - nflags=FFTW_ESTIMATE - if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT - if(npatience.eq.2) nflags=FFTW_MEASURE - if(npatience.eq.3) nflags=FFTW_PATIENT - if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE + nflags=FFTW_ESTIMATE + if(npatience.eq.1) nflags=FFTW_ESTIMATE_PATIENT + if(npatience.eq.2) nflags=FFTW_MEASURE + if(npatience.eq.3) nflags=FFTW_PATIENT + if(npatience.eq.4) nflags=FFTW_EXHAUSTIVE - if(nfft.le.NSMALL) then - jz=nfft - if(iform.eq.0) jz=nfft/2 - aa(1:jz)=a(1:jz) - endif + if(nfft.le.NSMALL) then + jz=nfft + if(iform.eq.0) jz=nfft/2 + aa(1:jz)=a(1:jz) + endif - if(isign.eq.-1 .and. iform.eq.1) then - call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_FORWARD,nflags) - else if(isign.eq.1 .and. iform.eq.1) then - call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_BACKWARD,nflags) - else if(isign.eq.-1 .and. iform.eq.0) then - call sfftw_plan_dft_r2c_1d(plan(i),nfft,a,a,nflags) - else if(isign.eq.1 .and. iform.eq.-1) then - call sfftw_plan_dft_c2r_1d(plan(i),nfft,a,a,nflags) - else - stop 'Unsupported request in four2a' - endif + !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls + if(isign.eq.-1 .and. iform.eq.1) then + call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_FORWARD,nflags) + else if(isign.eq.1 .and. iform.eq.1) then + call sfftw_plan_dft_1d(plan(i),nfft,a,a,FFTW_BACKWARD,nflags) + else if(isign.eq.-1 .and. iform.eq.0) then + call sfftw_plan_dft_r2c_1d(plan(i),nfft,a,a,nflags) + else if(isign.eq.1 .and. iform.eq.-1) then + call sfftw_plan_dft_c2r_1d(plan(i),nfft,a,a,nflags) + else + stop 'Unsupported request in four2a' + endif + !$omp end critical(fftw) - i=nplan - if(nfft.le.NSMALL) then - jz=nfft - if(iform.eq.0) jz=nfft/2 - a(1:jz)=aa(1:jz) - endif + if(nfft.le.NSMALL) then + jz=nfft + if(iform.eq.0) jz=nfft/2 + a(1:jz)=aa(1:jz) + endif + end if + !$omp end critical(four2a_setup) -10 continue call sfftw_execute(plan(i)) return -999 do i=1,nplan +999 continue + + !$omp critical(four2a) + do i=1,nplan ! The test is only to silence a compiler warning: - if(ndim.ne.-999) call sfftw_destroy_plan(plan(i)) + if(ndim.ne.-999) then + !$omp critical(fftw) ! serialize non thread-safe FFTW3 calls + call sfftw_destroy_plan(plan(i)) + !$omp end critical(fftw) + end if enddo + nplan=0 + !$omp end critical(four2a) return end subroutine four2a diff --git a/lib/jt65a.f90 b/lib/jt65a.f90 index 347a03450..2fb4e1fa1 100644 --- a/lib/jt65a.f90 +++ b/lib/jt65a.f90 @@ -76,10 +76,15 @@ subroutine jt65a(dd0,npts,newdat,nutc,nf1,nf2,nfqso,ntol,nagain,ndecoded) if(nsnr.lt.-30) nsnr=-30 if(nsnr.gt.-1) nsnr=-1 dt=dt-tskip + + !$omp critical(decode_results) ! serialize writes - see also decjt9.f90 write(*,1010) nutc,nsnr,dt,nfreq,decoded -1010 format(i4.4,i4,f5.1,i5,1x,'#',1x,a22) write(13,1012) nutc,nint(sync1),nsnr,dt,float(nfreq),ndrift, & decoded,nbmkv + call flush(6) + !$omp end critical(decode_results) + +1010 format(i4.4,i4,f5.1,i5,1x,'#',1x,a22) 1012 format(i4.4,i4,i5,f6.1,f8.0,i4,3x,a22,' JT65',i4) freq0=freq+a(1) i2=min(NSZ,i+15) !### ??? ### diff --git a/lib/jt9.f90 b/lib/jt9.f90 index 0f8d8f475..64774ce44 100644 --- a/lib/jt9.f90 +++ b/lib/jt9.f90 @@ -163,8 +163,9 @@ program jt9 print*,infile 999 continue + !Save wisdom and free memory iret=fftwf_export_wisdom_to_filename(wisfile) - call four2a(a,-1,1,1,1) !Save wisdom and free memory + call four2a(a,-1,1,1,1) call filbig(a,-1,1,0.0,0,0,0,0,0) !used for FFT plans call fftwf_cleanup_threads() call fftwf_cleanup()