Make Fortran profiling timer function a callback with a default null implementation

Groundwork for calling the decoders directly from C/C++ threads.

To  access   the  timer   module  timer_module   must  now   be  used.
Instrumented code need  only use the module function  'timer' which is
now a  procedure pointer that  is guaranteed to be  associated (unless
null()  is assigned  to it,  which should  not be  done). The  default
behaviour of 'timer' is to do nothing.

If a  Fortran program  wishes to  profile code it  should now  use the
timer_impl module  which contains a default  timer implementation. The
main program should call 'init_timer([filename])' before using 'timer'
or     calling     routines     that     are     instrumented.      If
'init_timer([filename])'.  If it is called  then an optional file name
may  be  provided  with  'timer.out'  being used  as  a  default.  The
procedure 'fini_timer()' may be called to close the file.

The default  timer implementation is  thread safe if used  with OpenMP
multi-threaded code  so long as  the OpenMP  thread team is  given the
copyin(/timer_private/) attribute  for correct operation.   The common
block /timer_private/ should  be included for OpenMP  use by including
the file 'timer_common.inc'.

The module 'lib/timer_C_wrapper.f90' provides  a Fortran wrapper along
with 'init' and 'fini' subroutines  which allow a C/C++ application to
call timer instrumented  Fortran code and for it  to receive callbacks
of 'timer()' subroutine invocations.  No C/C++ timer implementation is
provided at this stage.

git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@6320 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
Bill Somerville 2015-12-27 15:40:57 +00:00
parent 29f309bbc0
commit f416a52def
28 changed files with 489 additions and 229 deletions

View File

@ -347,6 +347,7 @@ set (wsjt_FSRCS
lib/interleave9.f90
lib/inter_wspr.f90
lib/iscat.f90
lib/iso_c_utilities.f90
lib/jplsubs.f
lib/jt4.f90
lib/jt4a.f90
@ -393,7 +394,9 @@ set (wsjt_FSRCS
lib/sync9f.f90
lib/synciscat.f90
lib/syncmsk.f90
lib/timer.f90
lib/timer_C_wrapper.f90
lib/timer_impl.f90
lib/timer_module.f90
lib/timf2.f90
lib/tweak1.f90
lib/twkfreq.f90

View File

@ -1,5 +1,5 @@
parameter (NTMAX=120)
parameter (NMAX=NTMAX*12000) !Total sample intervals (one minute)
parameter (NDMAX=NTMAX*1500) !Sample intervals at 1500 Hz rate
parameter (NSMAX=6827) !Max length of saved spectra
parameter (MAXFFT3=16384)
integer, parameter :: NTMAX=120
integer, parameter :: NMAX=NTMAX*12000 !Total sample intervals (one minute)
integer, parameter :: NDMAX=NTMAX*1500 !Sample intervals at 1500 Hz rate
integer, parameter :: NSMAX=6827 !Max length of saved spectra
integer, parameter :: MAXFFT3=16384

View File

@ -1,6 +1,8 @@
subroutine decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol, &
nzhsym,nagain,ndepth,nmode)
use timer_module, only: timer
include 'constants.f90'
real ss(184,NSMAX)
character*22 msg

View File

@ -4,6 +4,8 @@ subroutine decode65a(dd,npts,newdat,nqd,f0,nflip,mode65,ntrials, &
! Apply AFC corrections to a candidate JT65 signal, then decode it.
use timer_module, only: timer
parameter (NMAX=60*12000) !Samples per 60 s
real*4 dd(NMAX) !92 MB: raw data from Linrad timf2
complex cx(NMAX/8) !Data at 1378.125 samples/s

View File

@ -1,18 +1,17 @@
subroutine decoder(ss,id2,params,nfsample)
use prog_args
!$ use omp_lib
use prog_args
use timer_module, only: timer
include 'jt9com.f90'
include 'timer_common.inc'
real ss(184,NSMAX)
logical baddata
integer*2 id2(NTMAX*12000)
type(params_block) :: params
real*4 dd(NTMAX*12000)
common/tracer/limtrace,lu
integer onlevel(0:10)
common/tracer_priv/level,onlevel
!$omp threadprivate(/tracer_priv/)
save
if(mod(params%nranera,2).eq.0) ntrials=10**(params%nranera/2)
@ -57,7 +56,7 @@ subroutine decoder(ss,id2,params,nfsample)
newdat9=params%newdat
!$ call omp_set_dynamic(.true.)
!$omp parallel sections num_threads(2) copyin(/tracer_priv/) shared(ndecoded) if(.true.) !iif() needed on Mac
!$omp parallel sections num_threads(2) copyin(/timer_private/) shared(ndecoded) if(.true.) !iif() needed on Mac
!$omp section
if(params%nmode.eq.65 .or. (params%nmode.eq.(65+9) .and. params%ntxmode.eq.65)) then

View File

@ -5,6 +5,7 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2)
use, intrinsic :: iso_c_binding
use FFTW3
use timer_module, only: timer
include 'constants.f90'
integer(C_SIZE_T) NMAX1

View File

@ -15,6 +15,7 @@ subroutine extract(s3,nadd,ntrials,naggressive,ndepth,mycall_12, &
use prog_args !shm_key, exe_dir, data_dir
use packjt
use timer_module, only: timer
real s3(64,63)
character decoded*22

View File

@ -1,5 +1,7 @@
real function fchisq65(cx,npts,fsample,nflip,a,ccfmax,dtmax)
use timer_module, only: timer
parameter (NMAX=60*12000) !Samples per 60 s
complex cx(npts)
real a(5)

View File

@ -5,6 +5,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
use, intrinsic :: iso_c_binding
use FFTW3
use timer_module, only: timer
parameter (NSZ=3413)
parameter (NFFT1=672000,NFFT2=77175)

87
lib/iso_c_utilities.f90 Normal file
View File

@ -0,0 +1,87 @@
module iso_c_utilities
use, intrinsic :: iso_c_binding, only: c_ptr, c_char, c_f_pointer, c_associated
implicit none
public :: c_to_f_string, c_f_dyn_string
private
character(c_char), dimension(1), save, target :: dummy_string = "?"
interface ! strlen is a standard C function from <string.h>
! int strlen(char *string)
function strlen (string) result (len) bind (c, name="strlen")
use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t
implicit none
type(c_ptr), value :: string
integer(kind=c_size_t) :: len
end function strlen
! void free(void * p)
subroutine c_free (p) bind (c, name="free")
use, intrinsic :: iso_c_binding, only: c_ptr
implicit none
type(c_ptr), value :: p
end subroutine c_free
end interface
contains
!
! Cast C string pointer to Fortran string pointer
!
! Warning! - C data must outlive result scope
!
function c_to_f_string (c_str) result (f_str)
use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer, c_char
implicit none
type(c_ptr), intent(in) :: c_str
character(kind=c_char, len=:), pointer :: f_str
character(kind=c_char), pointer :: arr(:)
interface ! strlen is a standard C function from <string.h>
! int strlen(char *string)
function strlen (string) result (len) bind (c, name="strlen")
use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t
implicit none
type(c_ptr), value :: string
integer(kind=c_size_t) :: len
end function strlen
end interface
call c_f_pointer (c_str, arr, [strlen (c_str)])
call get_scalar_pointer (size (arr), arr, f_str)
end function c_to_f_string
subroutine get_scalar_pointer (scalar_len, scalar, fptr)
! Convert a null-terminated C string into a Fortran character pointer
use, intrinsic :: iso_c_binding, only: c_char
integer, intent(in) :: scalar_len
character(kind=c_char, len=scalar_len), intent(in), target :: scalar(1)
character(kind=c_char, len=:), pointer :: fptr
fptr => scalar(1)
end subroutine get_scalar_pointer
function c_f_dyn_string (cptr) result (fstr)
! Convert a null-terminated malloc'ed C string into a Fortran character array
type(c_ptr), intent(in) :: cptr ! The C address
character(kind=c_char), allocatable :: fstr(:)
character(kind=c_char), pointer :: fptr(:)
interface ! strlen is a standard C function from <string.h>
! void free(void * p)
subroutine c_free (p) bind (c, name="free")
use, intrinsic :: iso_c_binding, only: c_ptr
implicit none
type(c_ptr), value :: p
end subroutine c_free
end interface
if (c_associated (cptr)) then
call c_f_pointer (fptr=fptr, cptr=cptr, shape=[strlen(cptr)])
else
! To avoid segfaults, associate FPTR with a dummy target:
fptr => dummy_string
end if
fstr = fptr
call c_free (cptr)
end function c_f_dyn_string
end module

View File

@ -2,6 +2,8 @@ subroutine jt4a(dd,jz,nutc,nfqso,ntol0,emedelay,dttol,nagain,ndepth, &
nclearave,minsync,minw,nsubmode,mycall,hiscall,hisgrid,nlist0,listutc0)
use jt4
use timer_module, only: timer
integer listutc0(10)
real*4 dd(jz)
real*4 dat(30*12000)

View File

@ -3,6 +3,9 @@ program jt65
! Test the JT65 decoder for WSJT-X
use options
use timer_module, only: timer
use timer_impl, only: init_timer
character c
logical :: display_help=.false.
parameter (NZMAX=60*12000)
@ -13,7 +16,6 @@ program jt65
character(len=500) optarg
character*12 mycall,hiscall
character*6 hisgrid
common/tracer/limtrace,lu
equivalence (lenfile,ihdr(2))
type (option) :: long_options(9) = [ &
option ('freq',.true.,'f','signal frequency, default FREQ=1270','FREQ'), &
@ -27,8 +29,6 @@ program jt65
,'experience decoding options (1..n), default FLAGS=0','FLAGS'), &
option ('single-signal-mode',.false.,'s','decode at signal frequency only','') ]
limtrace=0
lu=12
ntol=10
nfqso=1270
nagain=0
@ -84,7 +84,7 @@ naggressive=1
go to 999
endif
open(12,file='timer.out',status='unknown')
call init_timer()
call timer('jt65 ',0)
ndecoded=0

View File

@ -4,6 +4,8 @@ subroutine jt65a(dd0,npts,newdat,nutc,nf1,nf2,nfqso,ntol,nsubmode, &
! Process dd0() data to find and decode JT65 signals.
use timer_module, only: timer
parameter (NSZ=3413,NZMAX=60*12000)
parameter (NFFT=1000)
real dd0(NZMAX)

View File

@ -7,6 +7,8 @@ program jt9
use prog_args
use, intrinsic :: iso_c_binding
use FFTW3
use timer_module, only: timer
use timer_impl, only: init_timer, fini_timer
include 'jt9com.f90'
@ -61,7 +63,6 @@ program jt9
type(dec_data), allocatable :: shared_data
character(len=12) :: mycall, hiscall
character(len=6) :: mygrid, hisgrid
common/tracer/limtrace,lu
common/patience/npatience,nthreads
common/decstats/ntry65a,ntry65b,n65a,n65b,num9,numfano
data npatience/1/,nthreads/1/
@ -161,8 +162,6 @@ program jt9
endif
allocate(shared_data)
limtrace=0 !We're running jt9 in stand-alone mode
lu=12
nflatten=0
do iarg = offset + 1, offset + remain
@ -205,7 +204,7 @@ program jt9
nhsym0=-999
npts=(60*ntrperiod-6)*12000
if(iarg .eq. offset + 1) then
open(12,file=trim(data_dir)//'/timer.out',status='unknown')
call init_timer (trim(data_dir)//'/timer.out')
call timer('jt9 ',0)
endif
@ -288,7 +287,9 @@ program jt9
print*,infile
999 continue
! Output decoder statistics
! Output decoder statistics
call fini_timer ()
open (unit=12, file=trim(data_dir)//'/timer.out', status='unknown', position='append')
write(12,1100) n65a,ntry65a,n65b,ntry65b,numfano,num9
1100 format(58('-')/' JT65_1 Tries_1 JT65_2 Tries_2 JT9 Tries'/ &
58('-')/6i8)

View File

@ -1,6 +1,8 @@
subroutine jt9a()
use, intrinsic :: iso_c_binding, only: c_f_pointer
use prog_args
use timer_module, only: timer
use timer_impl, only: init_timer !, limtrace
include 'jt9com.f90'
@ -15,24 +17,19 @@ subroutine jt9a()
integer*1 attach_jt9
! integer*1 lock_jt9,unlock_jt9
integer size_jt9
character*80 cwd
! Multiple instances:
character*80 mykey
type(dec_data), pointer :: shared_data
type(params_block) :: local_params
logical fileExists
common/tracer/limtrace,lu
! Multiple instances:
i0 = len(trim(shm_key))
call getcwd(cwd)
open(12,file=trim(data_dir)//'/timer.out',status='unknown')
call init_timer (trim(data_dir)//'/timer.out')
! open(23,file=trim(data_dir)//'/CALL3.TXT',status='unknown')
limtrace=0
! limtrace=-1 !Disable all calls to timer()
lu=12
! Multiple instances: set the shared memory key before attaching
mykey=trim(repeat(shm_key,1))

View File

@ -2,6 +2,9 @@ subroutine jtmsk(id2,narg,line)
! Decoder for JTMSK
use timer_module, only: timer
use timer_impl, only: limtrace
parameter (NMAX=30*12000)
parameter (NFFTMAX=512*1024)
parameter (NSPM=1404) !Samples per JTMSK message
@ -18,10 +21,8 @@ subroutine jtmsk(id2,narg,line)
integer narg(0:11) !Arguments passed from calling pgm
character*22 msg,msg0 !Decoded message
character*80 line(100) !Decodes passed back to caller
common/tracer/ limtrace,lu
limtrace=-1
lu=12
! Parameters from GUI are in narg():
nutc=narg(0) !UTC
npts=min(narg(1),NMAX) !Number of samples in id2 (12000 Hz)

View File

@ -3,6 +3,8 @@ subroutine softsym(id2,npts8,nsps8,newdat,fpk,syncpk,snrdb,xdt, &
! Compute the soft symbols
use timer_module, only: timer
parameter (NZ2=1512,NZ3=1360)
complex c2(0:NZ2-1)
complex c3(0:NZ3-1)

View File

@ -8,6 +8,8 @@ subroutine subtract65(dd,npts,f0,dt)
! Subtract : dd(t) = dd(t) - 2*REAL{cref*cfilt}
use packjt
use timer_module, only: timer
integer correct(63)
parameter (NMAX=60*12000) !Samples per 60 s
parameter (NFILT=1600)

View File

@ -18,6 +18,7 @@ subroutine symspec(shared_data,k,ntrperiod,nsps,ingain,nminw,pxdb,s, &
! ss() JT9 symbol spectra at half-symbol steps
! savg() average spectra for waterfall display
use, intrinsic :: iso_c_binding, only: c_int, c_short, c_float, c_char
include 'jt9com.f90'
type(dec_data) :: shared_data

View File

@ -3,6 +3,8 @@ subroutine sync4(dat,jz,mode4,minw)
! Synchronizes JT4 data, finding the best-fit DT and DF.
use jt4
use timer_module, only: timer
parameter (NFFTMAX=2520) !Max length of FFTs
parameter (NHMAX=NFFTMAX/2) !Max length of power spectra
parameter (NSMAX=525) !Max number of half-symbol steps

View File

@ -5,6 +5,8 @@ subroutine syncmsk(cdat,npts,jpk,ipk,idf,rmax,snr,metric,decoded)
use iso_c_binding, only: c_loc,c_size_t
use packjt
use hashing
use timer_module, only: timer
parameter (NSPM=1404,NSAVE=2000)
complex cdat(npts) !Analytic signal
complex cb(66) !Complex waveform for Barker-11 code

View File

@ -1,5 +1,7 @@
program testmsk
use timer_module, only: timer
parameter (NMAX=359424)
integer*2 id2(NMAX)
integer narg(0:11)
@ -22,6 +24,7 @@ program testmsk
ttotal=0.
ndecodes=0
call init_timer()
call timer('testmsk ',0)
do ifile=1,nfiles
call getarg(ifile,infile)

View File

@ -1,200 +0,0 @@
subroutine timer(dname,k)
! Times procedure number n between a call with k=0 (tstart) and with
! k=1 (tstop). Accumulates sums of these times in array ut (user time).
! Also traces all calls (for debugging purposes) if limtrace.gt.0
!
! If this is used with OpenMP than the /tracer_priv/ common block must
! be copyed into each thread of a thread team by using the copyin()
! clause on the !$omp parallel directive that creates the team.
!$ use omp_lib
character*8 dname
!$ integer tid
integer onlevel(0:10)
common/tracer/ limtrace,lu
common/tracer_priv/level,onlevel
parameter (MAXCALL=100)
character*8 name(MAXCALL),space
logical on(MAXCALL)
real ut(MAXCALL),ut0(MAXCALL)
!$ integer ntid(MAXCALL)
integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
common/data/nmax,name,on,ut,ut0,dut, &
!$ ntid, &
ncall,nlevel,nparent,total,sum,sumf,space
data eps/0.000001/,ntrace/0/
data level/0/,nmax/0/,space/' '/
data limtrace/0/,lu/-1/
!$omp threadprivate(/tracer_priv/)
!$omp critical(timer)
if(limtrace.lt.0) go to 999
if(lu.lt.1) lu=6
if(k.gt.1) go to 40 !Check for "all done" (k>1)
onlevel(0)=0
!$ tid=omp_get_thread_num()
do n=1,nmax !Check for existing name/parent[/thread]
if(name(n).eq.dname &
!$ .and.ntid(n).eq.tid &
) then
if (on(n)) then
if (nparent(n).eq.onlevel(level-1)) goto 20
else
if (nparent(n).eq.onlevel(level)) goto 20
end if
end if
enddo
nmax=nmax+1 !This is a new one
n=nmax
!$ ntid(n)=tid
ncall(n)=0
on(n)=.false.
ut(n)=eps
name(n)=dname
20 if(k.eq.0) then !Get start times (k=0)
if(on(n)) then
print*,'Error in timer: ',dname,' already on.'
end if
level=level+1 !Increment the level
on(n)=.true.
! call system_clock(icount,irate)
! ut0(n)=float(icount)/irate
! call cpu_time(ut0(n))
ut0(n)=secnds(0.0)
ncall(n)=ncall(n)+1
if(ncall(n).gt.1.and.nlevel(n).ne.level) then
!recursion is happening
!
!TODO: somehow need to account for this deeper call at the
!shallowest instance in the call chain and this needs to be
!done without incrementing anything here other than counters
!and timers
!
nlevel(n)=-1
else
nlevel(n)=level
endif
nparent(n)=onlevel(level-1)
onlevel(level)=n
else if(k.eq.1) then !Get stop times and accumulate sums. (k=1)
if(on(n)) then
on(n)=.false.
! call system_clock(icount,irate)
! ut1=float(icount)/irate
! call cpu_time(ut1)
ut1=secnds(0.0)
ut(n)=ut(n)+ut1-ut0(n)
endif
level=level-1
endif
ntrace=ntrace+1
if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n)
1020 format(i8,': ',a8,3i5)
go to 998
! Write out the timer statistics
40 write(lu,1040)
1040 format(/' Name Time Frac dTime', &
' dFrac Calls'/58('-'))
!$ !walk backwards through the database rolling up thread data by call chain
!$ do i=nmax,1,-1
!$ do j=1,i-1
!$ l=j
!$ m=i
!$ do while (name(l).eq.name(m))
!$ l=nparent(l)
!$ m=nparent(m)
!$ if (l.eq.0.or.m.eq.0) exit
!$ end do
!$ if (l.eq.0.and.m.eq.0) then
!$ !same call chain so roll up data
!$ ncall(j)=ncall(j)+ncall(i)
!$ ut(j)=ut(j)+ut(i)
!$ do n=1,nmax
!$ if (nparent(n).eq.i) nparent(n)=j
!$ end do
!$ name(i)=space
!$ exit
!$ end if
!$ end do
!$ end do
if(k.gt.100) then
ndiv=k-100
do i=1,nmax
ncall(i)=ncall(i)/ndiv
ut(i)=ut(i)/ndiv
enddo
endif
total=ut(1)
sum=0.
sumf=0.
call print_root(1)
write(lu,1070) sum,sumf
1070 format(58('-')/32x,f10.3,f6.2)
nmax=0
eps=0.000001
ntrace=0
level=0
onlevel(0)=0
998 flush(lu)
999 continue
!$omp end critical(timer)
return
end subroutine timer
recursive subroutine print_root(i)
character*16 sname
common/tracer/ limtrace,lu
parameter (MAXCALL=100)
character*8 name(MAXCALL),space
logical on(MAXCALL)
real ut(MAXCALL),ut0(MAXCALL)
!$ integer ntid(MAXCALL)
integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
common/data/nmax,name,on,ut,ut0,dut, &
!$ ntid, &
ncall,nlevel,nparent,total,sum,sumf,space
if (i.le.nmax) then
if (name(i).ne.space) then
dut=ut(i)
do j=i,nmax
if (name(j).ne.space.and.nparent(j).eq.i) dut=dut-ut(j)
enddo
if(dut.lt.0.0) dut=0.0
utf=ut(i)/total
dutf=dut/total
sum=sum+dut
sumf=sumf+dutf
kk=nlevel(i)
sname=space(1:kk)//name(i)//space(1:8-kk)
write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i)
2000 format(a16,2(f10.3,f6.2),i9)
do j=i,nmax
if(nparent(j).eq.i) call print_root(j)
enddo
end if
end if
return
end subroutine print_root

57
lib/timer_C_wrapper.f90 Normal file
View File

@ -0,0 +1,57 @@
module timer_c_wrapper
use :: iso_c_binding, only: c_ptr
use timer_module, only: timer, null_timer
implicit none
!
! C interoperable callback setup
!
abstract interface
subroutine c_timer_callback (context, dname, k)
use, intrinsic :: iso_c_binding, only: c_ptr, c_char
implicit none
type(c_ptr), value, intent(in) :: context
character(c_char), intent(in) :: dname(*)
integer, intent(in), value :: k
end subroutine c_timer_callback
end interface
public :: init, fini
private
!
! the following are singleton items which assumes that any timer
! implementation should only assume one global instance, probably a
! struct or class object whose address is stored the context below
!
type(c_ptr), private :: the_context
procedure(C_timer_callback), pointer, private :: the_callback
contains
subroutine timer_callback_wrapper (dname, k)
use, intrinsic :: iso_c_binding, only: c_null_char
implicit none
character(len=8), intent(in) :: dname
integer, intent(in) :: k
call the_callback (the_context, trim (dname) // c_null_char, k)
end subroutine timer_callback_wrapper
subroutine init (context, callback)
use, intrinsic :: iso_c_binding, only: c_ptr, c_funptr, c_f_procpointer
use iso_c_utilities, only: c_to_f_string
use timer_module, only: timer
implicit none
type(c_ptr), value, intent(in) :: context
type(c_funptr), value, intent(in) :: callback
the_context=context
call c_f_procpointer (callback, the_callback)
timer => timer_callback_wrapper
end subroutine init
subroutine fini ()
implicit none
timer => null_timer
end subroutine fini
end module timer_c_wrapper

3
lib/timer_common.inc Normal file
View File

@ -0,0 +1,3 @@
integer :: level, onlevel(0:10)
common/timer_private/ level, onlevel
!$omp threadprivate(/timer_private/)

262
lib/timer_impl.f90 Normal file
View File

@ -0,0 +1,262 @@
module timer_impl
!$ use omp_lib
use :: iso_c_binding, only: c_ptr
use timer_module, only: timer_callback
implicit none
public :: init_timer, fini_timer
integer, public :: limtrace=0
private
integer, parameter :: MAXCALL=100
integer :: lu=6
real :: dut
integer :: i,nmax=0,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
character(len=8) :: name(MAXCALL),space=' '
logical :: on(MAXCALL)
real :: total,sum,sumf,ut(MAXCALL),ut0(MAXCALL)
!$ integer :: j,l,m,ntid(MAXCALL)
!
! C interoperable callback setup
!
public :: C_init_timer
abstract interface
subroutine C_timer_callback (context, dname, k)
use, intrinsic :: iso_c_binding, only: c_ptr
implicit none
type(c_ptr), intent(in) :: context
character(len=8), intent(in) :: dname
integer, intent(in) :: k
end subroutine C_timer_callback
end interface
type(c_ptr), private :: the_context
procedure(C_timer_callback), pointer, private :: the_C_callback
contains
subroutine timer_callback_wrapper (dname, k)
implicit none
character(len=8), intent(in) :: dname
integer, intent(in) :: k
call the_C_callback (the_context, dname, k)
end subroutine timer_callback_wrapper
subroutine C_init_timer (context, callback) bind(C)
use, intrinsic :: iso_c_binding, only: c_ptr, c_funptr, c_f_procpointer
use iso_c_utilities, only: c_to_f_string
use timer_module, only: timer
implicit none
type(c_ptr), intent(in) :: context
type(c_funptr), intent(in) :: callback
the_context=context
call c_f_procpointer (callback, the_C_callback)
timer => timer_callback_wrapper
end subroutine C_init_timer
!
! default Fortran implementation which is thread safe using OpenMP
!
subroutine default_timer (dname, k)
! Times procedure number n between a call with k=0 (tstart) and with
! k=1 (tstop). Accumulates sums of these times in array ut (user time).
! Also traces all calls (for debugging purposes) if limtrace.gt.0
!
! If this is used with OpenMP than the /timer_private/ common
! block must be copyed into each thread of a thread team by using
! the copyin() clause on the !$omp parallel directive that creates
! the team.
implicit none
character(len=8), intent(in) :: dname
integer, intent(in) :: k
real :: ut1,eps=0.000001
integer :: n,ndiv,ntrace=0
!$ integer :: tid
character(len=8) :: tname
include 'timer_common.inc'
!$omp critical(timer)
if(limtrace.lt.0) go to 999
if(k.gt.1) go to 40 !Check for "all done" (k>1)
onlevel(0)=0
!$ tid=omp_get_thread_num()
do n=1,nmax !Check for existing name/parent[/thread]
if(name(n).eq.dname &
!$ .and.ntid(n).eq.tid &
) then
if (on(n)) then
if (nparent(n).eq.onlevel(level-1)) goto 20
else
if (nparent(n).eq.onlevel(level)) goto 20
end if
end if
enddo
nmax=nmax+1 !This is a new one
n=nmax
!$ ntid(n)=tid
ncall(n)=0
on(n)=.false.
ut(n)=eps
name(n)=dname
20 if(k.eq.0) then !Get start times (k=0)
if(on(n)) then
print*,'Error in timer: ',dname,' already on.'
end if
level=level+1 !Increment the level
on(n)=.true.
! call system_clock(icount,irate)
! ut0(n)=float(icount)/irate
! call cpu_time(ut0(n))
ut0(n)=secnds(0.0)
ncall(n)=ncall(n)+1
if(ncall(n).gt.1.and.nlevel(n).ne.level) then
!recursion is happening
!
!TODO: somehow need to account for this deeper call at the
!shallowest instance in the call chain and this needs to be
!done without incrementing anything here other than counters
!and timers
!
nlevel(n)=-1
else
nlevel(n)=level
endif
nparent(n)=onlevel(level-1)
onlevel(level)=n
else if(k.eq.1) then !Get stop times and accumulate sums. (k=1)
if(on(n)) then
on(n)=.false.
! call system_clock(icount,irate)
! ut1=float(icount)/irate
! call cpu_time(ut1)
ut1=secnds(0.0)
ut(n)=ut(n)+ut1-ut0(n)
endif
level=level-1
endif
ntrace=ntrace+1
if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n)
1020 format(i8,': ',a8,3i5)
go to 998
! Write out the timer statistics
40 write(lu,1040)
1040 format(/' Name Time Frac dTime', &
' dFrac Calls'/58('-'))
!$ !walk backwards through the database rolling up thread data by call chain
!$ do i=nmax,1,-1
!$ do j=1,i-1
!$ l=j
!$ m=i
!$ do while (name(l).eq.name(m))
!$ l=nparent(l)
!$ m=nparent(m)
!$ if (l.eq.0.or.m.eq.0) exit
!$ end do
!$ if (l.eq.0.and.m.eq.0) then
!$ !same call chain so roll up data
!$ ncall(j)=ncall(j)+ncall(i)
!$ ut(j)=ut(j)+ut(i)
!$ do n=1,nmax
!$ if (nparent(n).eq.i) nparent(n)=j
!$ end do
!$ name(i)=space
!$ exit
!$ end if
!$ end do
!$ end do
if(k.gt.100) then
ndiv=k-100
do i=1,nmax
ncall(i)=ncall(i)/ndiv
ut(i)=ut(i)/ndiv
enddo
endif
total=ut(1)
sum=0.
sumf=0.
call print_root(1)
write(lu,1070) sum,sumf
1070 format(58('-')/32x,f10.3,f6.2)
nmax=0
eps=0.000001
ntrace=0
level=0
onlevel(0)=0
998 flush(lu)
999 continue
!$omp end critical(timer)
return
end subroutine default_timer
recursive subroutine print_root(i)
implicit none
integer, intent(in) :: i
character(len=16) :: sname
real :: dutf, utf
integer :: j, kk
if (i.le.nmax) then
if (name(i).ne.space) then
dut=ut(i)
do j=i,nmax
if (name(j).ne.space.and.nparent(j).eq.i) dut=dut-ut(j)
enddo
if(dut.lt.0.0) dut=0.0
utf=ut(i)/total
dutf=dut/total
sum=sum+dut
sumf=sumf+dutf
kk=nlevel(i)
sname=space(1:kk)//name(i)//space(1:8-kk)
write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i)
2000 format(a16,2(f10.3,f6.2),i9)
do j=i,nmax
if(nparent(j).eq.i) call print_root(j)
enddo
end if
end if
return
end subroutine print_root
subroutine init_timer (filename)
use, intrinsic :: iso_c_binding, only: c_char
use timer_module, only: timer
implicit none
character(len=*), optional, intent(in) :: filename
include 'timer_common.inc'
data level/0/, onlevel/11 * 0/
if (present (filename)) then
open (newunit=lu, file=filename, status='unknown')
else
open (newunit=lu, file='timer.out', status='unknown')
end if
timer => default_timer
end subroutine init_timer
subroutine fini_timer ()
use timer_module, only: timer, null_timer
implicit none
timer => null_timer
close (lu)
end subroutine fini_timer
end module timer_impl

23
lib/timer_module.f90 Normal file
View File

@ -0,0 +1,23 @@
module timer_module
implicit none
abstract interface
subroutine timer_callback (dname, k)
character(len=8), intent(in) :: dname
integer, intent(in) :: k
end subroutine timer_callback
end interface
public :: null_timer
procedure(timer_callback), pointer :: timer => null_timer
contains
!
! default Fortran implementation which does nothing
!
subroutine null_timer (dname, k)
implicit none
character(len=8), intent(in) :: dname
integer, intent(in) :: k
end subroutine null_timer
end module timer_module

View File

@ -8,6 +8,8 @@ subroutine wsjt4(dat,npts,nutc,NClearAve,minsync,ntol,emedelay,dttol, &
! range -- analogous to the nqd=1 step in JT9 and JT65.
use jt4
use timer_module, only: timer
real dat(npts) !Raw data
real z(458,65)
logical first,prtavg