diff --git a/CMakeLists.txt b/CMakeLists.txt index cab8fd26c..6aa588756 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -347,6 +347,7 @@ set (wsjt_FSRCS lib/interleave9.f90 lib/inter_wspr.f90 lib/iscat.f90 + lib/iso_c_utilities.f90 lib/jplsubs.f lib/jt4.f90 lib/jt4a.f90 @@ -393,7 +394,9 @@ set (wsjt_FSRCS lib/sync9f.f90 lib/synciscat.f90 lib/syncmsk.f90 - lib/timer.f90 + lib/timer_C_wrapper.f90 + lib/timer_impl.f90 + lib/timer_module.f90 lib/timf2.f90 lib/tweak1.f90 lib/twkfreq.f90 diff --git a/lib/constants.f90 b/lib/constants.f90 index e9a4c034a..939cc3071 100644 --- a/lib/constants.f90 +++ b/lib/constants.f90 @@ -1,5 +1,5 @@ - parameter (NTMAX=120) - parameter (NMAX=NTMAX*12000) !Total sample intervals (one minute) - parameter (NDMAX=NTMAX*1500) !Sample intervals at 1500 Hz rate - parameter (NSMAX=6827) !Max length of saved spectra - parameter (MAXFFT3=16384) + integer, parameter :: NTMAX=120 + integer, parameter :: NMAX=NTMAX*12000 !Total sample intervals (one minute) + integer, parameter :: NDMAX=NTMAX*1500 !Sample intervals at 1500 Hz rate + integer, parameter :: NSMAX=6827 !Max length of saved spectra + integer, parameter :: MAXFFT3=16384 diff --git a/lib/decjt9.f90 b/lib/decjt9.f90 index d12c188e3..140f6e9d7 100644 --- a/lib/decjt9.f90 +++ b/lib/decjt9.f90 @@ -1,6 +1,8 @@ subroutine decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol, & nzhsym,nagain,ndepth,nmode) + use timer_module, only: timer + include 'constants.f90' real ss(184,NSMAX) character*22 msg diff --git a/lib/decode65a.f90 b/lib/decode65a.f90 index ce4b163e9..cb0116ab7 100644 --- a/lib/decode65a.f90 +++ b/lib/decode65a.f90 @@ -4,6 +4,8 @@ subroutine decode65a(dd,npts,newdat,nqd,f0,nflip,mode65,ntrials, & ! Apply AFC corrections to a candidate JT65 signal, then decode it. + use timer_module, only: timer + parameter (NMAX=60*12000) !Samples per 60 s real*4 dd(NMAX) !92 MB: raw data from Linrad timf2 complex cx(NMAX/8) !Data at 1378.125 samples/s diff --git a/lib/decoder.f90 b/lib/decoder.f90 index bdb40e0f2..b611cc61b 100644 --- a/lib/decoder.f90 +++ b/lib/decoder.f90 @@ -1,18 +1,17 @@ subroutine decoder(ss,id2,params,nfsample) - use prog_args !$ use omp_lib + use prog_args + use timer_module, only: timer include 'jt9com.f90' + include 'timer_common.inc' + real ss(184,NSMAX) logical baddata integer*2 id2(NTMAX*12000) type(params_block) :: params real*4 dd(NTMAX*12000) - common/tracer/limtrace,lu - integer onlevel(0:10) - common/tracer_priv/level,onlevel -!$omp threadprivate(/tracer_priv/) save if(mod(params%nranera,2).eq.0) ntrials=10**(params%nranera/2) @@ -57,7 +56,7 @@ subroutine decoder(ss,id2,params,nfsample) newdat9=params%newdat !$ call omp_set_dynamic(.true.) -!$omp parallel sections num_threads(2) copyin(/tracer_priv/) shared(ndecoded) if(.true.) !iif() needed on Mac +!$omp parallel sections num_threads(2) copyin(/timer_private/) shared(ndecoded) if(.true.) !iif() needed on Mac !$omp section if(params%nmode.eq.65 .or. (params%nmode.eq.(65+9) .and. params%ntxmode.eq.65)) then diff --git a/lib/downsam9.f90 b/lib/downsam9.f90 index 09fab6f77..3eba1d091 100644 --- a/lib/downsam9.f90 +++ b/lib/downsam9.f90 @@ -5,6 +5,7 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2) use, intrinsic :: iso_c_binding use FFTW3 + use timer_module, only: timer include 'constants.f90' integer(C_SIZE_T) NMAX1 diff --git a/lib/extract.f90 b/lib/extract.f90 index 400781255..8cce83284 100644 --- a/lib/extract.f90 +++ b/lib/extract.f90 @@ -15,6 +15,7 @@ subroutine extract(s3,nadd,ntrials,naggressive,ndepth,mycall_12, & use prog_args !shm_key, exe_dir, data_dir use packjt + use timer_module, only: timer real s3(64,63) character decoded*22 diff --git a/lib/fchisq65.f90 b/lib/fchisq65.f90 index fbfc4a265..3c624143c 100644 --- a/lib/fchisq65.f90 +++ b/lib/fchisq65.f90 @@ -1,5 +1,7 @@ real function fchisq65(cx,npts,fsample,nflip,a,ccfmax,dtmax) + use timer_module, only: timer + parameter (NMAX=60*12000) !Samples per 60 s complex cx(npts) real a(5) diff --git a/lib/filbig.f90 b/lib/filbig.f90 index bb1ea52a0..26a5edf52 100644 --- a/lib/filbig.f90 +++ b/lib/filbig.f90 @@ -5,6 +5,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0) use, intrinsic :: iso_c_binding use FFTW3 + use timer_module, only: timer parameter (NSZ=3413) parameter (NFFT1=672000,NFFT2=77175) diff --git a/lib/iso_c_utilities.f90 b/lib/iso_c_utilities.f90 new file mode 100644 index 000000000..cdcc1a749 --- /dev/null +++ b/lib/iso_c_utilities.f90 @@ -0,0 +1,87 @@ +module iso_c_utilities + + use, intrinsic :: iso_c_binding, only: c_ptr, c_char, c_f_pointer, c_associated + implicit none + + public :: c_to_f_string, c_f_dyn_string + + private + + character(c_char), dimension(1), save, target :: dummy_string = "?" + + interface ! strlen is a standard C function from + ! int strlen(char *string) + function strlen (string) result (len) bind (c, name="strlen") + use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t + implicit none + type(c_ptr), value :: string + integer(kind=c_size_t) :: len + end function strlen + + ! void free(void * p) + subroutine c_free (p) bind (c, name="free") + use, intrinsic :: iso_c_binding, only: c_ptr + implicit none + type(c_ptr), value :: p + end subroutine c_free + end interface + +contains + + ! + ! Cast C string pointer to Fortran string pointer + ! + ! Warning! - C data must outlive result scope + ! + function c_to_f_string (c_str) result (f_str) + use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer, c_char + implicit none + type(c_ptr), intent(in) :: c_str + character(kind=c_char, len=:), pointer :: f_str + character(kind=c_char), pointer :: arr(:) + interface ! strlen is a standard C function from + ! int strlen(char *string) + function strlen (string) result (len) bind (c, name="strlen") + use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t + implicit none + type(c_ptr), value :: string + integer(kind=c_size_t) :: len + end function strlen + end interface + call c_f_pointer (c_str, arr, [strlen (c_str)]) + call get_scalar_pointer (size (arr), arr, f_str) + end function c_to_f_string + + subroutine get_scalar_pointer (scalar_len, scalar, fptr) + ! Convert a null-terminated C string into a Fortran character pointer + use, intrinsic :: iso_c_binding, only: c_char + integer, intent(in) :: scalar_len + character(kind=c_char, len=scalar_len), intent(in), target :: scalar(1) + character(kind=c_char, len=:), pointer :: fptr + fptr => scalar(1) + end subroutine get_scalar_pointer + + function c_f_dyn_string (cptr) result (fstr) + ! Convert a null-terminated malloc'ed C string into a Fortran character array + type(c_ptr), intent(in) :: cptr ! The C address + character(kind=c_char), allocatable :: fstr(:) + character(kind=c_char), pointer :: fptr(:) + interface ! strlen is a standard C function from + ! void free(void * p) + subroutine c_free (p) bind (c, name="free") + use, intrinsic :: iso_c_binding, only: c_ptr + implicit none + type(c_ptr), value :: p + end subroutine c_free + end interface + if (c_associated (cptr)) then + call c_f_pointer (fptr=fptr, cptr=cptr, shape=[strlen(cptr)]) + else + ! To avoid segfaults, associate FPTR with a dummy target: + fptr => dummy_string + end if + fstr = fptr + call c_free (cptr) + end function c_f_dyn_string + +end module diff --git a/lib/jt4a.f90 b/lib/jt4a.f90 index 9fd283f29..cec80764f 100644 --- a/lib/jt4a.f90 +++ b/lib/jt4a.f90 @@ -2,6 +2,8 @@ subroutine jt4a(dd,jz,nutc,nfqso,ntol0,emedelay,dttol,nagain,ndepth, & nclearave,minsync,minw,nsubmode,mycall,hiscall,hisgrid,nlist0,listutc0) use jt4 + use timer_module, only: timer + integer listutc0(10) real*4 dd(jz) real*4 dat(30*12000) diff --git a/lib/jt65.f90 b/lib/jt65.f90 index 2b52bd876..52e9f8716 100644 --- a/lib/jt65.f90 +++ b/lib/jt65.f90 @@ -3,6 +3,9 @@ program jt65 ! Test the JT65 decoder for WSJT-X use options + use timer_module, only: timer + use timer_impl, only: init_timer + character c logical :: display_help=.false. parameter (NZMAX=60*12000) @@ -13,7 +16,6 @@ program jt65 character(len=500) optarg character*12 mycall,hiscall character*6 hisgrid - common/tracer/limtrace,lu equivalence (lenfile,ihdr(2)) type (option) :: long_options(9) = [ & option ('freq',.true.,'f','signal frequency, default FREQ=1270','FREQ'), & @@ -27,8 +29,6 @@ program jt65 ,'experience decoding options (1..n), default FLAGS=0','FLAGS'), & option ('single-signal-mode',.false.,'s','decode at signal frequency only','') ] -limtrace=0 -lu=12 ntol=10 nfqso=1270 nagain=0 @@ -84,7 +84,7 @@ naggressive=1 go to 999 endif - open(12,file='timer.out',status='unknown') + call init_timer() call timer('jt65 ',0) ndecoded=0 diff --git a/lib/jt65a.f90 b/lib/jt65a.f90 index 2f4698dc0..2a236a2eb 100644 --- a/lib/jt65a.f90 +++ b/lib/jt65a.f90 @@ -4,6 +4,8 @@ subroutine jt65a(dd0,npts,newdat,nutc,nf1,nf2,nfqso,ntol,nsubmode, & ! Process dd0() data to find and decode JT65 signals. + use timer_module, only: timer + parameter (NSZ=3413,NZMAX=60*12000) parameter (NFFT=1000) real dd0(NZMAX) diff --git a/lib/jt9.f90 b/lib/jt9.f90 index 13f69e5b1..88c53486c 100644 --- a/lib/jt9.f90 +++ b/lib/jt9.f90 @@ -7,6 +7,8 @@ program jt9 use prog_args use, intrinsic :: iso_c_binding use FFTW3 + use timer_module, only: timer + use timer_impl, only: init_timer, fini_timer include 'jt9com.f90' @@ -61,7 +63,6 @@ program jt9 type(dec_data), allocatable :: shared_data character(len=12) :: mycall, hiscall character(len=6) :: mygrid, hisgrid - common/tracer/limtrace,lu common/patience/npatience,nthreads common/decstats/ntry65a,ntry65b,n65a,n65b,num9,numfano data npatience/1/,nthreads/1/ @@ -161,8 +162,6 @@ program jt9 endif allocate(shared_data) - limtrace=0 !We're running jt9 in stand-alone mode - lu=12 nflatten=0 do iarg = offset + 1, offset + remain @@ -205,7 +204,7 @@ program jt9 nhsym0=-999 npts=(60*ntrperiod-6)*12000 if(iarg .eq. offset + 1) then - open(12,file=trim(data_dir)//'/timer.out',status='unknown') + call init_timer (trim(data_dir)//'/timer.out') call timer('jt9 ',0) endif @@ -288,7 +287,9 @@ program jt9 print*,infile 999 continue -! Output decoder statistics + ! Output decoder statistics + call fini_timer () + open (unit=12, file=trim(data_dir)//'/timer.out', status='unknown', position='append') write(12,1100) n65a,ntry65a,n65b,ntry65b,numfano,num9 1100 format(58('-')/' JT65_1 Tries_1 JT65_2 Tries_2 JT9 Tries'/ & 58('-')/6i8) diff --git a/lib/jt9a.f90 b/lib/jt9a.f90 index 4af00c6aa..d3a8e2e98 100644 --- a/lib/jt9a.f90 +++ b/lib/jt9a.f90 @@ -1,6 +1,8 @@ subroutine jt9a() use, intrinsic :: iso_c_binding, only: c_f_pointer use prog_args + use timer_module, only: timer + use timer_impl, only: init_timer !, limtrace include 'jt9com.f90' @@ -15,24 +17,19 @@ subroutine jt9a() integer*1 attach_jt9 ! integer*1 lock_jt9,unlock_jt9 integer size_jt9 - character*80 cwd ! Multiple instances: character*80 mykey type(dec_data), pointer :: shared_data type(params_block) :: local_params logical fileExists - common/tracer/limtrace,lu ! Multiple instances: i0 = len(trim(shm_key)) - call getcwd(cwd) - open(12,file=trim(data_dir)//'/timer.out',status='unknown') + call init_timer (trim(data_dir)//'/timer.out') ! open(23,file=trim(data_dir)//'/CALL3.TXT',status='unknown') - limtrace=0 ! limtrace=-1 !Disable all calls to timer() - lu=12 ! Multiple instances: set the shared memory key before attaching mykey=trim(repeat(shm_key,1)) diff --git a/lib/jtmsk.f90 b/lib/jtmsk.f90 index c2de56307..de9b24036 100644 --- a/lib/jtmsk.f90 +++ b/lib/jtmsk.f90 @@ -2,6 +2,9 @@ subroutine jtmsk(id2,narg,line) ! Decoder for JTMSK + use timer_module, only: timer + use timer_impl, only: limtrace + parameter (NMAX=30*12000) parameter (NFFTMAX=512*1024) parameter (NSPM=1404) !Samples per JTMSK message @@ -18,10 +21,8 @@ subroutine jtmsk(id2,narg,line) integer narg(0:11) !Arguments passed from calling pgm character*22 msg,msg0 !Decoded message character*80 line(100) !Decodes passed back to caller - common/tracer/ limtrace,lu limtrace=-1 - lu=12 ! Parameters from GUI are in narg(): nutc=narg(0) !UTC npts=min(narg(1),NMAX) !Number of samples in id2 (12000 Hz) diff --git a/lib/softsym.f90 b/lib/softsym.f90 index b94a269cc..1e0867448 100644 --- a/lib/softsym.f90 +++ b/lib/softsym.f90 @@ -3,6 +3,8 @@ subroutine softsym(id2,npts8,nsps8,newdat,fpk,syncpk,snrdb,xdt, & ! Compute the soft symbols + use timer_module, only: timer + parameter (NZ2=1512,NZ3=1360) complex c2(0:NZ2-1) complex c3(0:NZ3-1) diff --git a/lib/subtract65.f90 b/lib/subtract65.f90 index 2fd6e39e0..88b35506d 100644 --- a/lib/subtract65.f90 +++ b/lib/subtract65.f90 @@ -8,6 +8,8 @@ subroutine subtract65(dd,npts,f0,dt) ! Subtract : dd(t) = dd(t) - 2*REAL{cref*cfilt} use packjt + use timer_module, only: timer + integer correct(63) parameter (NMAX=60*12000) !Samples per 60 s parameter (NFILT=1600) diff --git a/lib/symspec.f90 b/lib/symspec.f90 index 9e081007c..75d23e2ae 100644 --- a/lib/symspec.f90 +++ b/lib/symspec.f90 @@ -18,6 +18,7 @@ subroutine symspec(shared_data,k,ntrperiod,nsps,ingain,nminw,pxdb,s, & ! ss() JT9 symbol spectra at half-symbol steps ! savg() average spectra for waterfall display + use, intrinsic :: iso_c_binding, only: c_int, c_short, c_float, c_char include 'jt9com.f90' type(dec_data) :: shared_data diff --git a/lib/sync4.f90 b/lib/sync4.f90 index 68de1a345..dd92ea123 100644 --- a/lib/sync4.f90 +++ b/lib/sync4.f90 @@ -3,6 +3,8 @@ subroutine sync4(dat,jz,mode4,minw) ! Synchronizes JT4 data, finding the best-fit DT and DF. use jt4 + use timer_module, only: timer + parameter (NFFTMAX=2520) !Max length of FFTs parameter (NHMAX=NFFTMAX/2) !Max length of power spectra parameter (NSMAX=525) !Max number of half-symbol steps diff --git a/lib/syncmsk.f90 b/lib/syncmsk.f90 index cc796ccf0..48843e2f9 100644 --- a/lib/syncmsk.f90 +++ b/lib/syncmsk.f90 @@ -5,6 +5,8 @@ subroutine syncmsk(cdat,npts,jpk,ipk,idf,rmax,snr,metric,decoded) use iso_c_binding, only: c_loc,c_size_t use packjt use hashing + use timer_module, only: timer + parameter (NSPM=1404,NSAVE=2000) complex cdat(npts) !Analytic signal complex cb(66) !Complex waveform for Barker-11 code diff --git a/lib/testmsk.f90 b/lib/testmsk.f90 index 8c5c0fc27..ce013c736 100644 --- a/lib/testmsk.f90 +++ b/lib/testmsk.f90 @@ -1,5 +1,7 @@ program testmsk + use timer_module, only: timer + parameter (NMAX=359424) integer*2 id2(NMAX) integer narg(0:11) @@ -22,6 +24,7 @@ program testmsk ttotal=0. ndecodes=0 + call init_timer() call timer('testmsk ',0) do ifile=1,nfiles call getarg(ifile,infile) diff --git a/lib/timer.f90 b/lib/timer.f90 deleted file mode 100644 index 5293a573e..000000000 --- a/lib/timer.f90 +++ /dev/null @@ -1,200 +0,0 @@ -subroutine timer(dname,k) - -! Times procedure number n between a call with k=0 (tstart) and with -! k=1 (tstop). Accumulates sums of these times in array ut (user time). -! Also traces all calls (for debugging purposes) if limtrace.gt.0 -! -! If this is used with OpenMP than the /tracer_priv/ common block must -! be copyed into each thread of a thread team by using the copyin() -! clause on the !$omp parallel directive that creates the team. - - !$ use omp_lib - - character*8 dname - !$ integer tid - integer onlevel(0:10) - common/tracer/ limtrace,lu - common/tracer_priv/level,onlevel - - parameter (MAXCALL=100) - character*8 name(MAXCALL),space - logical on(MAXCALL) - real ut(MAXCALL),ut0(MAXCALL) - !$ integer ntid(MAXCALL) - integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) - common/data/nmax,name,on,ut,ut0,dut, & - !$ ntid, & - ncall,nlevel,nparent,total,sum,sumf,space - - data eps/0.000001/,ntrace/0/ - data level/0/,nmax/0/,space/' '/ - data limtrace/0/,lu/-1/ - - !$omp threadprivate(/tracer_priv/) - - !$omp critical(timer) - if(limtrace.lt.0) go to 999 - if(lu.lt.1) lu=6 - if(k.gt.1) go to 40 !Check for "all done" (k>1) - onlevel(0)=0 - - !$ tid=omp_get_thread_num() - do n=1,nmax !Check for existing name/parent[/thread] - if(name(n).eq.dname & - !$ .and.ntid(n).eq.tid & - ) then - if (on(n)) then - if (nparent(n).eq.onlevel(level-1)) goto 20 - else - if (nparent(n).eq.onlevel(level)) goto 20 - end if - end if - enddo - - nmax=nmax+1 !This is a new one - n=nmax - !$ ntid(n)=tid - ncall(n)=0 - on(n)=.false. - ut(n)=eps - name(n)=dname - -20 if(k.eq.0) then !Get start times (k=0) - if(on(n)) then - print*,'Error in timer: ',dname,' already on.' - end if - level=level+1 !Increment the level - on(n)=.true. -! call system_clock(icount,irate) -! ut0(n)=float(icount)/irate -! call cpu_time(ut0(n)) - ut0(n)=secnds(0.0) - - ncall(n)=ncall(n)+1 - if(ncall(n).gt.1.and.nlevel(n).ne.level) then - !recursion is happening - ! - !TODO: somehow need to account for this deeper call at the - !shallowest instance in the call chain and this needs to be - !done without incrementing anything here other than counters - !and timers - ! - nlevel(n)=-1 - else - nlevel(n)=level - endif - nparent(n)=onlevel(level-1) - onlevel(level)=n - - else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) - if(on(n)) then - on(n)=.false. -! call system_clock(icount,irate) -! ut1=float(icount)/irate -! call cpu_time(ut1) - ut1=secnds(0.0) - - ut(n)=ut(n)+ut1-ut0(n) - endif - level=level-1 - endif - - ntrace=ntrace+1 - if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n) -1020 format(i8,': ',a8,3i5) - go to 998 - -! Write out the timer statistics - -40 write(lu,1040) -1040 format(/' Name Time Frac dTime', & - ' dFrac Calls'/58('-')) - - !$ !walk backwards through the database rolling up thread data by call chain - !$ do i=nmax,1,-1 - !$ do j=1,i-1 - !$ l=j - !$ m=i - !$ do while (name(l).eq.name(m)) - !$ l=nparent(l) - !$ m=nparent(m) - !$ if (l.eq.0.or.m.eq.0) exit - !$ end do - !$ if (l.eq.0.and.m.eq.0) then - !$ !same call chain so roll up data - !$ ncall(j)=ncall(j)+ncall(i) - !$ ut(j)=ut(j)+ut(i) - !$ do n=1,nmax - !$ if (nparent(n).eq.i) nparent(n)=j - !$ end do - !$ name(i)=space - !$ exit - !$ end if - !$ end do - !$ end do - - if(k.gt.100) then - ndiv=k-100 - do i=1,nmax - ncall(i)=ncall(i)/ndiv - ut(i)=ut(i)/ndiv - enddo - endif - - total=ut(1) - sum=0. - sumf=0. - call print_root(1) - write(lu,1070) sum,sumf -1070 format(58('-')/32x,f10.3,f6.2) - nmax=0 - eps=0.000001 - ntrace=0 - level=0 - onlevel(0)=0 - -998 flush(lu) - -999 continue - - !$omp end critical(timer) - return -end subroutine timer - -recursive subroutine print_root(i) - character*16 sname - - common/tracer/ limtrace,lu - - parameter (MAXCALL=100) - character*8 name(MAXCALL),space - logical on(MAXCALL) - real ut(MAXCALL),ut0(MAXCALL) - !$ integer ntid(MAXCALL) - integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) - common/data/nmax,name,on,ut,ut0,dut, & - !$ ntid, & - ncall,nlevel,nparent,total,sum,sumf,space - - if (i.le.nmax) then - if (name(i).ne.space) then - dut=ut(i) - do j=i,nmax - if (name(j).ne.space.and.nparent(j).eq.i) dut=dut-ut(j) - enddo - if(dut.lt.0.0) dut=0.0 - utf=ut(i)/total - dutf=dut/total - sum=sum+dut - sumf=sumf+dutf - kk=nlevel(i) - sname=space(1:kk)//name(i)//space(1:8-kk) - write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i) -2000 format(a16,2(f10.3,f6.2),i9) - do j=i,nmax - if(nparent(j).eq.i) call print_root(j) - enddo - end if - end if - return -end subroutine print_root diff --git a/lib/timer_C_wrapper.f90 b/lib/timer_C_wrapper.f90 new file mode 100644 index 000000000..866a8508b --- /dev/null +++ b/lib/timer_C_wrapper.f90 @@ -0,0 +1,57 @@ +module timer_c_wrapper + use :: iso_c_binding, only: c_ptr + use timer_module, only: timer, null_timer + implicit none + + ! + ! C interoperable callback setup + ! + abstract interface + subroutine c_timer_callback (context, dname, k) + use, intrinsic :: iso_c_binding, only: c_ptr, c_char + implicit none + type(c_ptr), value, intent(in) :: context + character(c_char), intent(in) :: dname(*) + integer, intent(in), value :: k + end subroutine c_timer_callback + end interface + + public :: init, fini + + private + + ! + ! the following are singleton items which assumes that any timer + ! implementation should only assume one global instance, probably a + ! struct or class object whose address is stored the context below + ! + type(c_ptr), private :: the_context + procedure(C_timer_callback), pointer, private :: the_callback + +contains + subroutine timer_callback_wrapper (dname, k) + use, intrinsic :: iso_c_binding, only: c_null_char + implicit none + character(len=8), intent(in) :: dname + integer, intent(in) :: k + call the_callback (the_context, trim (dname) // c_null_char, k) + end subroutine timer_callback_wrapper + + subroutine init (context, callback) + use, intrinsic :: iso_c_binding, only: c_ptr, c_funptr, c_f_procpointer + use iso_c_utilities, only: c_to_f_string + use timer_module, only: timer + implicit none + type(c_ptr), value, intent(in) :: context + type(c_funptr), value, intent(in) :: callback + the_context=context + call c_f_procpointer (callback, the_callback) + timer => timer_callback_wrapper + end subroutine init + + subroutine fini () + implicit none + timer => null_timer + end subroutine fini + +end module timer_c_wrapper diff --git a/lib/timer_common.inc b/lib/timer_common.inc new file mode 100644 index 000000000..1e7f32a94 --- /dev/null +++ b/lib/timer_common.inc @@ -0,0 +1,3 @@ + integer :: level, onlevel(0:10) + common/timer_private/ level, onlevel + !$omp threadprivate(/timer_private/) diff --git a/lib/timer_impl.f90 b/lib/timer_impl.f90 new file mode 100644 index 000000000..4e35162cb --- /dev/null +++ b/lib/timer_impl.f90 @@ -0,0 +1,262 @@ +module timer_impl + !$ use omp_lib + use :: iso_c_binding, only: c_ptr + use timer_module, only: timer_callback + implicit none + + public :: init_timer, fini_timer + integer, public :: limtrace=0 + + private + + integer, parameter :: MAXCALL=100 + integer :: lu=6 + real :: dut + integer :: i,nmax=0,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) + character(len=8) :: name(MAXCALL),space=' ' + logical :: on(MAXCALL) + real :: total,sum,sumf,ut(MAXCALL),ut0(MAXCALL) + !$ integer :: j,l,m,ntid(MAXCALL) + + ! + ! C interoperable callback setup + ! + public :: C_init_timer + abstract interface + subroutine C_timer_callback (context, dname, k) + use, intrinsic :: iso_c_binding, only: c_ptr + implicit none + type(c_ptr), intent(in) :: context + character(len=8), intent(in) :: dname + integer, intent(in) :: k + end subroutine C_timer_callback + end interface + type(c_ptr), private :: the_context + procedure(C_timer_callback), pointer, private :: the_C_callback + +contains + subroutine timer_callback_wrapper (dname, k) + implicit none + character(len=8), intent(in) :: dname + integer, intent(in) :: k + call the_C_callback (the_context, dname, k) + end subroutine timer_callback_wrapper + + subroutine C_init_timer (context, callback) bind(C) + use, intrinsic :: iso_c_binding, only: c_ptr, c_funptr, c_f_procpointer + use iso_c_utilities, only: c_to_f_string + use timer_module, only: timer + implicit none + type(c_ptr), intent(in) :: context + type(c_funptr), intent(in) :: callback + the_context=context + call c_f_procpointer (callback, the_C_callback) + timer => timer_callback_wrapper + end subroutine C_init_timer + + ! + ! default Fortran implementation which is thread safe using OpenMP + ! + subroutine default_timer (dname, k) + + ! Times procedure number n between a call with k=0 (tstart) and with + ! k=1 (tstop). Accumulates sums of these times in array ut (user time). + ! Also traces all calls (for debugging purposes) if limtrace.gt.0 + ! + ! If this is used with OpenMP than the /timer_private/ common + ! block must be copyed into each thread of a thread team by using + ! the copyin() clause on the !$omp parallel directive that creates + ! the team. + + implicit none + + character(len=8), intent(in) :: dname + integer, intent(in) :: k + + real :: ut1,eps=0.000001 + integer :: n,ndiv,ntrace=0 + !$ integer :: tid + character(len=8) :: tname + include 'timer_common.inc' + + !$omp critical(timer) + if(limtrace.lt.0) go to 999 + if(k.gt.1) go to 40 !Check for "all done" (k>1) + onlevel(0)=0 + + !$ tid=omp_get_thread_num() + do n=1,nmax !Check for existing name/parent[/thread] + if(name(n).eq.dname & + !$ .and.ntid(n).eq.tid & + ) then + if (on(n)) then + if (nparent(n).eq.onlevel(level-1)) goto 20 + else + if (nparent(n).eq.onlevel(level)) goto 20 + end if + end if + enddo + + nmax=nmax+1 !This is a new one + n=nmax + !$ ntid(n)=tid + ncall(n)=0 + on(n)=.false. + ut(n)=eps + name(n)=dname + +20 if(k.eq.0) then !Get start times (k=0) + if(on(n)) then + print*,'Error in timer: ',dname,' already on.' + end if + level=level+1 !Increment the level + on(n)=.true. + ! call system_clock(icount,irate) + ! ut0(n)=float(icount)/irate + ! call cpu_time(ut0(n)) + ut0(n)=secnds(0.0) + + ncall(n)=ncall(n)+1 + if(ncall(n).gt.1.and.nlevel(n).ne.level) then + !recursion is happening + ! + !TODO: somehow need to account for this deeper call at the + !shallowest instance in the call chain and this needs to be + !done without incrementing anything here other than counters + !and timers + ! + nlevel(n)=-1 + else + nlevel(n)=level + endif + nparent(n)=onlevel(level-1) + onlevel(level)=n + + else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) + if(on(n)) then + on(n)=.false. + ! call system_clock(icount,irate) + ! ut1=float(icount)/irate + ! call cpu_time(ut1) + ut1=secnds(0.0) + + ut(n)=ut(n)+ut1-ut0(n) + endif + level=level-1 + endif + + ntrace=ntrace+1 + if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n) +1020 format(i8,': ',a8,3i5) + go to 998 + + ! Write out the timer statistics + +40 write(lu,1040) +1040 format(/' Name Time Frac dTime', & + ' dFrac Calls'/58('-')) + + !$ !walk backwards through the database rolling up thread data by call chain + !$ do i=nmax,1,-1 + !$ do j=1,i-1 + !$ l=j + !$ m=i + !$ do while (name(l).eq.name(m)) + !$ l=nparent(l) + !$ m=nparent(m) + !$ if (l.eq.0.or.m.eq.0) exit + !$ end do + !$ if (l.eq.0.and.m.eq.0) then + !$ !same call chain so roll up data + !$ ncall(j)=ncall(j)+ncall(i) + !$ ut(j)=ut(j)+ut(i) + !$ do n=1,nmax + !$ if (nparent(n).eq.i) nparent(n)=j + !$ end do + !$ name(i)=space + !$ exit + !$ end if + !$ end do + !$ end do + + if(k.gt.100) then + ndiv=k-100 + do i=1,nmax + ncall(i)=ncall(i)/ndiv + ut(i)=ut(i)/ndiv + enddo + endif + + total=ut(1) + sum=0. + sumf=0. + call print_root(1) + write(lu,1070) sum,sumf +1070 format(58('-')/32x,f10.3,f6.2) + nmax=0 + eps=0.000001 + ntrace=0 + level=0 + onlevel(0)=0 + +998 flush(lu) + +999 continue + + !$omp end critical(timer) + return + end subroutine default_timer + + recursive subroutine print_root(i) + implicit none + integer, intent(in) :: i + character(len=16) :: sname + real :: dutf, utf + integer :: j, kk + + if (i.le.nmax) then + if (name(i).ne.space) then + dut=ut(i) + do j=i,nmax + if (name(j).ne.space.and.nparent(j).eq.i) dut=dut-ut(j) + enddo + if(dut.lt.0.0) dut=0.0 + utf=ut(i)/total + dutf=dut/total + sum=sum+dut + sumf=sumf+dutf + kk=nlevel(i) + sname=space(1:kk)//name(i)//space(1:8-kk) + write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i) +2000 format(a16,2(f10.3,f6.2),i9) + do j=i,nmax + if(nparent(j).eq.i) call print_root(j) + enddo + end if + end if + return + end subroutine print_root + + subroutine init_timer (filename) + use, intrinsic :: iso_c_binding, only: c_char + use timer_module, only: timer + implicit none + character(len=*), optional, intent(in) :: filename + include 'timer_common.inc' + data level/0/, onlevel/11 * 0/ + if (present (filename)) then + open (newunit=lu, file=filename, status='unknown') + else + open (newunit=lu, file='timer.out', status='unknown') + end if + timer => default_timer + end subroutine init_timer + + subroutine fini_timer () + use timer_module, only: timer, null_timer + implicit none + timer => null_timer + close (lu) + end subroutine fini_timer + +end module timer_impl diff --git a/lib/timer_module.f90 b/lib/timer_module.f90 new file mode 100644 index 000000000..167e9c9f7 --- /dev/null +++ b/lib/timer_module.f90 @@ -0,0 +1,23 @@ +module timer_module + implicit none + + abstract interface + subroutine timer_callback (dname, k) + character(len=8), intent(in) :: dname + integer, intent(in) :: k + end subroutine timer_callback + end interface + + public :: null_timer + procedure(timer_callback), pointer :: timer => null_timer + +contains + ! + ! default Fortran implementation which does nothing + ! + subroutine null_timer (dname, k) + implicit none + character(len=8), intent(in) :: dname + integer, intent(in) :: k + end subroutine null_timer +end module timer_module diff --git a/lib/wsjt4.f90 b/lib/wsjt4.f90 index 96afa5160..67571ea12 100644 --- a/lib/wsjt4.f90 +++ b/lib/wsjt4.f90 @@ -8,6 +8,8 @@ subroutine wsjt4(dat,npts,nutc,NClearAve,minsync,ntol,emedelay,dttol, & ! range -- analogous to the nqd=1 step in JT9 and JT65. use jt4 + use timer_module, only: timer + real dat(npts) !Raw data real z(458,65) logical first,prtavg