mirror of
https://github.com/saitohirga/WSJT-X.git
synced 2024-11-22 04:11:16 -05:00
Make Fortran profiling timer function a callback with a default null implementation
Groundwork for calling the decoders directly from C/C++ threads. To access the timer module timer_module must now be used. Instrumented code need only use the module function 'timer' which is now a procedure pointer that is guaranteed to be associated (unless null() is assigned to it, which should not be done). The default behaviour of 'timer' is to do nothing. If a Fortran program wishes to profile code it should now use the timer_impl module which contains a default timer implementation. The main program should call 'init_timer([filename])' before using 'timer' or calling routines that are instrumented. If 'init_timer([filename])'. If it is called then an optional file name may be provided with 'timer.out' being used as a default. The procedure 'fini_timer()' may be called to close the file. The default timer implementation is thread safe if used with OpenMP multi-threaded code so long as the OpenMP thread team is given the copyin(/timer_private/) attribute for correct operation. The common block /timer_private/ should be included for OpenMP use by including the file 'timer_common.inc'. The module 'lib/timer_C_wrapper.f90' provides a Fortran wrapper along with 'init' and 'fini' subroutines which allow a C/C++ application to call timer instrumented Fortran code and for it to receive callbacks of 'timer()' subroutine invocations. No C/C++ timer implementation is provided at this stage. git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@6320 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
parent
29f309bbc0
commit
f416a52def
@ -347,6 +347,7 @@ set (wsjt_FSRCS
|
||||
lib/interleave9.f90
|
||||
lib/inter_wspr.f90
|
||||
lib/iscat.f90
|
||||
lib/iso_c_utilities.f90
|
||||
lib/jplsubs.f
|
||||
lib/jt4.f90
|
||||
lib/jt4a.f90
|
||||
@ -393,7 +394,9 @@ set (wsjt_FSRCS
|
||||
lib/sync9f.f90
|
||||
lib/synciscat.f90
|
||||
lib/syncmsk.f90
|
||||
lib/timer.f90
|
||||
lib/timer_C_wrapper.f90
|
||||
lib/timer_impl.f90
|
||||
lib/timer_module.f90
|
||||
lib/timf2.f90
|
||||
lib/tweak1.f90
|
||||
lib/twkfreq.f90
|
||||
|
@ -1,5 +1,5 @@
|
||||
parameter (NTMAX=120)
|
||||
parameter (NMAX=NTMAX*12000) !Total sample intervals (one minute)
|
||||
parameter (NDMAX=NTMAX*1500) !Sample intervals at 1500 Hz rate
|
||||
parameter (NSMAX=6827) !Max length of saved spectra
|
||||
parameter (MAXFFT3=16384)
|
||||
integer, parameter :: NTMAX=120
|
||||
integer, parameter :: NMAX=NTMAX*12000 !Total sample intervals (one minute)
|
||||
integer, parameter :: NDMAX=NTMAX*1500 !Sample intervals at 1500 Hz rate
|
||||
integer, parameter :: NSMAX=6827 !Max length of saved spectra
|
||||
integer, parameter :: MAXFFT3=16384
|
||||
|
@ -1,6 +1,8 @@
|
||||
subroutine decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol, &
|
||||
nzhsym,nagain,ndepth,nmode)
|
||||
|
||||
use timer_module, only: timer
|
||||
|
||||
include 'constants.f90'
|
||||
real ss(184,NSMAX)
|
||||
character*22 msg
|
||||
|
@ -4,6 +4,8 @@ subroutine decode65a(dd,npts,newdat,nqd,f0,nflip,mode65,ntrials, &
|
||||
|
||||
! Apply AFC corrections to a candidate JT65 signal, then decode it.
|
||||
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NMAX=60*12000) !Samples per 60 s
|
||||
real*4 dd(NMAX) !92 MB: raw data from Linrad timf2
|
||||
complex cx(NMAX/8) !Data at 1378.125 samples/s
|
||||
|
@ -1,18 +1,17 @@
|
||||
subroutine decoder(ss,id2,params,nfsample)
|
||||
|
||||
use prog_args
|
||||
!$ use omp_lib
|
||||
use prog_args
|
||||
use timer_module, only: timer
|
||||
|
||||
include 'jt9com.f90'
|
||||
include 'timer_common.inc'
|
||||
|
||||
real ss(184,NSMAX)
|
||||
logical baddata
|
||||
integer*2 id2(NTMAX*12000)
|
||||
type(params_block) :: params
|
||||
real*4 dd(NTMAX*12000)
|
||||
common/tracer/limtrace,lu
|
||||
integer onlevel(0:10)
|
||||
common/tracer_priv/level,onlevel
|
||||
!$omp threadprivate(/tracer_priv/)
|
||||
save
|
||||
|
||||
if(mod(params%nranera,2).eq.0) ntrials=10**(params%nranera/2)
|
||||
@ -57,7 +56,7 @@ subroutine decoder(ss,id2,params,nfsample)
|
||||
newdat9=params%newdat
|
||||
|
||||
!$ call omp_set_dynamic(.true.)
|
||||
!$omp parallel sections num_threads(2) copyin(/tracer_priv/) shared(ndecoded) if(.true.) !iif() needed on Mac
|
||||
!$omp parallel sections num_threads(2) copyin(/timer_private/) shared(ndecoded) if(.true.) !iif() needed on Mac
|
||||
|
||||
!$omp section
|
||||
if(params%nmode.eq.65 .or. (params%nmode.eq.(65+9) .and. params%ntxmode.eq.65)) then
|
||||
|
@ -5,6 +5,7 @@ subroutine downsam9(id2,npts8,nsps8,newdat,nspsd,fpk,c2)
|
||||
|
||||
use, intrinsic :: iso_c_binding
|
||||
use FFTW3
|
||||
use timer_module, only: timer
|
||||
|
||||
include 'constants.f90'
|
||||
integer(C_SIZE_T) NMAX1
|
||||
|
@ -15,6 +15,7 @@ subroutine extract(s3,nadd,ntrials,naggressive,ndepth,mycall_12, &
|
||||
|
||||
use prog_args !shm_key, exe_dir, data_dir
|
||||
use packjt
|
||||
use timer_module, only: timer
|
||||
|
||||
real s3(64,63)
|
||||
character decoded*22
|
||||
|
@ -1,5 +1,7 @@
|
||||
real function fchisq65(cx,npts,fsample,nflip,a,ccfmax,dtmax)
|
||||
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NMAX=60*12000) !Samples per 60 s
|
||||
complex cx(npts)
|
||||
real a(5)
|
||||
|
@ -5,6 +5,7 @@ subroutine filbig(dd,npts,f0,newdat,c4a,n4,sq0)
|
||||
|
||||
use, intrinsic :: iso_c_binding
|
||||
use FFTW3
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NSZ=3413)
|
||||
parameter (NFFT1=672000,NFFT2=77175)
|
||||
|
87
lib/iso_c_utilities.f90
Normal file
87
lib/iso_c_utilities.f90
Normal file
@ -0,0 +1,87 @@
|
||||
module iso_c_utilities
|
||||
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr, c_char, c_f_pointer, c_associated
|
||||
implicit none
|
||||
|
||||
public :: c_to_f_string, c_f_dyn_string
|
||||
|
||||
private
|
||||
|
||||
character(c_char), dimension(1), save, target :: dummy_string = "?"
|
||||
|
||||
interface ! strlen is a standard C function from <string.h>
|
||||
! int strlen(char *string)
|
||||
function strlen (string) result (len) bind (c, name="strlen")
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t
|
||||
implicit none
|
||||
type(c_ptr), value :: string
|
||||
integer(kind=c_size_t) :: len
|
||||
end function strlen
|
||||
|
||||
! void free(void * p)
|
||||
subroutine c_free (p) bind (c, name="free")
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr
|
||||
implicit none
|
||||
type(c_ptr), value :: p
|
||||
end subroutine c_free
|
||||
end interface
|
||||
|
||||
contains
|
||||
|
||||
!
|
||||
! Cast C string pointer to Fortran string pointer
|
||||
!
|
||||
! Warning! - C data must outlive result scope
|
||||
!
|
||||
function c_to_f_string (c_str) result (f_str)
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr, c_f_pointer, c_char
|
||||
implicit none
|
||||
type(c_ptr), intent(in) :: c_str
|
||||
character(kind=c_char, len=:), pointer :: f_str
|
||||
character(kind=c_char), pointer :: arr(:)
|
||||
interface ! strlen is a standard C function from <string.h>
|
||||
! int strlen(char *string)
|
||||
function strlen (string) result (len) bind (c, name="strlen")
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t
|
||||
implicit none
|
||||
type(c_ptr), value :: string
|
||||
integer(kind=c_size_t) :: len
|
||||
end function strlen
|
||||
end interface
|
||||
call c_f_pointer (c_str, arr, [strlen (c_str)])
|
||||
call get_scalar_pointer (size (arr), arr, f_str)
|
||||
end function c_to_f_string
|
||||
|
||||
subroutine get_scalar_pointer (scalar_len, scalar, fptr)
|
||||
! Convert a null-terminated C string into a Fortran character pointer
|
||||
use, intrinsic :: iso_c_binding, only: c_char
|
||||
integer, intent(in) :: scalar_len
|
||||
character(kind=c_char, len=scalar_len), intent(in), target :: scalar(1)
|
||||
character(kind=c_char, len=:), pointer :: fptr
|
||||
fptr => scalar(1)
|
||||
end subroutine get_scalar_pointer
|
||||
|
||||
function c_f_dyn_string (cptr) result (fstr)
|
||||
! Convert a null-terminated malloc'ed C string into a Fortran character array
|
||||
type(c_ptr), intent(in) :: cptr ! The C address
|
||||
character(kind=c_char), allocatable :: fstr(:)
|
||||
character(kind=c_char), pointer :: fptr(:)
|
||||
interface ! strlen is a standard C function from <string.h>
|
||||
! void free(void * p)
|
||||
subroutine c_free (p) bind (c, name="free")
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr
|
||||
implicit none
|
||||
type(c_ptr), value :: p
|
||||
end subroutine c_free
|
||||
end interface
|
||||
if (c_associated (cptr)) then
|
||||
call c_f_pointer (fptr=fptr, cptr=cptr, shape=[strlen(cptr)])
|
||||
else
|
||||
! To avoid segfaults, associate FPTR with a dummy target:
|
||||
fptr => dummy_string
|
||||
end if
|
||||
fstr = fptr
|
||||
call c_free (cptr)
|
||||
end function c_f_dyn_string
|
||||
|
||||
end module
|
@ -2,6 +2,8 @@ subroutine jt4a(dd,jz,nutc,nfqso,ntol0,emedelay,dttol,nagain,ndepth, &
|
||||
nclearave,minsync,minw,nsubmode,mycall,hiscall,hisgrid,nlist0,listutc0)
|
||||
|
||||
use jt4
|
||||
use timer_module, only: timer
|
||||
|
||||
integer listutc0(10)
|
||||
real*4 dd(jz)
|
||||
real*4 dat(30*12000)
|
||||
|
@ -3,6 +3,9 @@ program jt65
|
||||
! Test the JT65 decoder for WSJT-X
|
||||
|
||||
use options
|
||||
use timer_module, only: timer
|
||||
use timer_impl, only: init_timer
|
||||
|
||||
character c
|
||||
logical :: display_help=.false.
|
||||
parameter (NZMAX=60*12000)
|
||||
@ -13,7 +16,6 @@ program jt65
|
||||
character(len=500) optarg
|
||||
character*12 mycall,hiscall
|
||||
character*6 hisgrid
|
||||
common/tracer/limtrace,lu
|
||||
equivalence (lenfile,ihdr(2))
|
||||
type (option) :: long_options(9) = [ &
|
||||
option ('freq',.true.,'f','signal frequency, default FREQ=1270','FREQ'), &
|
||||
@ -27,8 +29,6 @@ program jt65
|
||||
,'experience decoding options (1..n), default FLAGS=0','FLAGS'), &
|
||||
option ('single-signal-mode',.false.,'s','decode at signal frequency only','') ]
|
||||
|
||||
limtrace=0
|
||||
lu=12
|
||||
ntol=10
|
||||
nfqso=1270
|
||||
nagain=0
|
||||
@ -84,7 +84,7 @@ naggressive=1
|
||||
go to 999
|
||||
endif
|
||||
|
||||
open(12,file='timer.out',status='unknown')
|
||||
call init_timer()
|
||||
call timer('jt65 ',0)
|
||||
|
||||
ndecoded=0
|
||||
|
@ -4,6 +4,8 @@ subroutine jt65a(dd0,npts,newdat,nutc,nf1,nf2,nfqso,ntol,nsubmode, &
|
||||
|
||||
! Process dd0() data to find and decode JT65 signals.
|
||||
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NSZ=3413,NZMAX=60*12000)
|
||||
parameter (NFFT=1000)
|
||||
real dd0(NZMAX)
|
||||
|
11
lib/jt9.f90
11
lib/jt9.f90
@ -7,6 +7,8 @@ program jt9
|
||||
use prog_args
|
||||
use, intrinsic :: iso_c_binding
|
||||
use FFTW3
|
||||
use timer_module, only: timer
|
||||
use timer_impl, only: init_timer, fini_timer
|
||||
|
||||
include 'jt9com.f90'
|
||||
|
||||
@ -61,7 +63,6 @@ program jt9
|
||||
type(dec_data), allocatable :: shared_data
|
||||
character(len=12) :: mycall, hiscall
|
||||
character(len=6) :: mygrid, hisgrid
|
||||
common/tracer/limtrace,lu
|
||||
common/patience/npatience,nthreads
|
||||
common/decstats/ntry65a,ntry65b,n65a,n65b,num9,numfano
|
||||
data npatience/1/,nthreads/1/
|
||||
@ -161,8 +162,6 @@ program jt9
|
||||
endif
|
||||
|
||||
allocate(shared_data)
|
||||
limtrace=0 !We're running jt9 in stand-alone mode
|
||||
lu=12
|
||||
nflatten=0
|
||||
|
||||
do iarg = offset + 1, offset + remain
|
||||
@ -205,7 +204,7 @@ program jt9
|
||||
nhsym0=-999
|
||||
npts=(60*ntrperiod-6)*12000
|
||||
if(iarg .eq. offset + 1) then
|
||||
open(12,file=trim(data_dir)//'/timer.out',status='unknown')
|
||||
call init_timer (trim(data_dir)//'/timer.out')
|
||||
call timer('jt9 ',0)
|
||||
endif
|
||||
|
||||
@ -288,7 +287,9 @@ program jt9
|
||||
print*,infile
|
||||
|
||||
999 continue
|
||||
! Output decoder statistics
|
||||
! Output decoder statistics
|
||||
call fini_timer ()
|
||||
open (unit=12, file=trim(data_dir)//'/timer.out', status='unknown', position='append')
|
||||
write(12,1100) n65a,ntry65a,n65b,ntry65b,numfano,num9
|
||||
1100 format(58('-')/' JT65_1 Tries_1 JT65_2 Tries_2 JT9 Tries'/ &
|
||||
58('-')/6i8)
|
||||
|
@ -1,6 +1,8 @@
|
||||
subroutine jt9a()
|
||||
use, intrinsic :: iso_c_binding, only: c_f_pointer
|
||||
use prog_args
|
||||
use timer_module, only: timer
|
||||
use timer_impl, only: init_timer !, limtrace
|
||||
|
||||
include 'jt9com.f90'
|
||||
|
||||
@ -15,24 +17,19 @@ subroutine jt9a()
|
||||
integer*1 attach_jt9
|
||||
! integer*1 lock_jt9,unlock_jt9
|
||||
integer size_jt9
|
||||
character*80 cwd
|
||||
! Multiple instances:
|
||||
character*80 mykey
|
||||
type(dec_data), pointer :: shared_data
|
||||
type(params_block) :: local_params
|
||||
logical fileExists
|
||||
common/tracer/limtrace,lu
|
||||
|
||||
! Multiple instances:
|
||||
i0 = len(trim(shm_key))
|
||||
|
||||
call getcwd(cwd)
|
||||
open(12,file=trim(data_dir)//'/timer.out',status='unknown')
|
||||
call init_timer (trim(data_dir)//'/timer.out')
|
||||
! open(23,file=trim(data_dir)//'/CALL3.TXT',status='unknown')
|
||||
|
||||
limtrace=0
|
||||
! limtrace=-1 !Disable all calls to timer()
|
||||
lu=12
|
||||
|
||||
! Multiple instances: set the shared memory key before attaching
|
||||
mykey=trim(repeat(shm_key,1))
|
||||
|
@ -2,6 +2,9 @@ subroutine jtmsk(id2,narg,line)
|
||||
|
||||
! Decoder for JTMSK
|
||||
|
||||
use timer_module, only: timer
|
||||
use timer_impl, only: limtrace
|
||||
|
||||
parameter (NMAX=30*12000)
|
||||
parameter (NFFTMAX=512*1024)
|
||||
parameter (NSPM=1404) !Samples per JTMSK message
|
||||
@ -18,10 +21,8 @@ subroutine jtmsk(id2,narg,line)
|
||||
integer narg(0:11) !Arguments passed from calling pgm
|
||||
character*22 msg,msg0 !Decoded message
|
||||
character*80 line(100) !Decodes passed back to caller
|
||||
common/tracer/ limtrace,lu
|
||||
|
||||
limtrace=-1
|
||||
lu=12
|
||||
! Parameters from GUI are in narg():
|
||||
nutc=narg(0) !UTC
|
||||
npts=min(narg(1),NMAX) !Number of samples in id2 (12000 Hz)
|
||||
|
@ -3,6 +3,8 @@ subroutine softsym(id2,npts8,nsps8,newdat,fpk,syncpk,snrdb,xdt, &
|
||||
|
||||
! Compute the soft symbols
|
||||
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NZ2=1512,NZ3=1360)
|
||||
complex c2(0:NZ2-1)
|
||||
complex c3(0:NZ3-1)
|
||||
|
@ -8,6 +8,8 @@ subroutine subtract65(dd,npts,f0,dt)
|
||||
! Subtract : dd(t) = dd(t) - 2*REAL{cref*cfilt}
|
||||
|
||||
use packjt
|
||||
use timer_module, only: timer
|
||||
|
||||
integer correct(63)
|
||||
parameter (NMAX=60*12000) !Samples per 60 s
|
||||
parameter (NFILT=1600)
|
||||
|
@ -18,6 +18,7 @@ subroutine symspec(shared_data,k,ntrperiod,nsps,ingain,nminw,pxdb,s, &
|
||||
! ss() JT9 symbol spectra at half-symbol steps
|
||||
! savg() average spectra for waterfall display
|
||||
|
||||
use, intrinsic :: iso_c_binding, only: c_int, c_short, c_float, c_char
|
||||
include 'jt9com.f90'
|
||||
|
||||
type(dec_data) :: shared_data
|
||||
|
@ -3,6 +3,8 @@ subroutine sync4(dat,jz,mode4,minw)
|
||||
! Synchronizes JT4 data, finding the best-fit DT and DF.
|
||||
|
||||
use jt4
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NFFTMAX=2520) !Max length of FFTs
|
||||
parameter (NHMAX=NFFTMAX/2) !Max length of power spectra
|
||||
parameter (NSMAX=525) !Max number of half-symbol steps
|
||||
|
@ -5,6 +5,8 @@ subroutine syncmsk(cdat,npts,jpk,ipk,idf,rmax,snr,metric,decoded)
|
||||
use iso_c_binding, only: c_loc,c_size_t
|
||||
use packjt
|
||||
use hashing
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NSPM=1404,NSAVE=2000)
|
||||
complex cdat(npts) !Analytic signal
|
||||
complex cb(66) !Complex waveform for Barker-11 code
|
||||
|
@ -1,5 +1,7 @@
|
||||
program testmsk
|
||||
|
||||
use timer_module, only: timer
|
||||
|
||||
parameter (NMAX=359424)
|
||||
integer*2 id2(NMAX)
|
||||
integer narg(0:11)
|
||||
@ -22,6 +24,7 @@ program testmsk
|
||||
ttotal=0.
|
||||
ndecodes=0
|
||||
|
||||
call init_timer()
|
||||
call timer('testmsk ',0)
|
||||
do ifile=1,nfiles
|
||||
call getarg(ifile,infile)
|
||||
|
200
lib/timer.f90
200
lib/timer.f90
@ -1,200 +0,0 @@
|
||||
subroutine timer(dname,k)
|
||||
|
||||
! Times procedure number n between a call with k=0 (tstart) and with
|
||||
! k=1 (tstop). Accumulates sums of these times in array ut (user time).
|
||||
! Also traces all calls (for debugging purposes) if limtrace.gt.0
|
||||
!
|
||||
! If this is used with OpenMP than the /tracer_priv/ common block must
|
||||
! be copyed into each thread of a thread team by using the copyin()
|
||||
! clause on the !$omp parallel directive that creates the team.
|
||||
|
||||
!$ use omp_lib
|
||||
|
||||
character*8 dname
|
||||
!$ integer tid
|
||||
integer onlevel(0:10)
|
||||
common/tracer/ limtrace,lu
|
||||
common/tracer_priv/level,onlevel
|
||||
|
||||
parameter (MAXCALL=100)
|
||||
character*8 name(MAXCALL),space
|
||||
logical on(MAXCALL)
|
||||
real ut(MAXCALL),ut0(MAXCALL)
|
||||
!$ integer ntid(MAXCALL)
|
||||
integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
|
||||
common/data/nmax,name,on,ut,ut0,dut, &
|
||||
!$ ntid, &
|
||||
ncall,nlevel,nparent,total,sum,sumf,space
|
||||
|
||||
data eps/0.000001/,ntrace/0/
|
||||
data level/0/,nmax/0/,space/' '/
|
||||
data limtrace/0/,lu/-1/
|
||||
|
||||
!$omp threadprivate(/tracer_priv/)
|
||||
|
||||
!$omp critical(timer)
|
||||
if(limtrace.lt.0) go to 999
|
||||
if(lu.lt.1) lu=6
|
||||
if(k.gt.1) go to 40 !Check for "all done" (k>1)
|
||||
onlevel(0)=0
|
||||
|
||||
!$ tid=omp_get_thread_num()
|
||||
do n=1,nmax !Check for existing name/parent[/thread]
|
||||
if(name(n).eq.dname &
|
||||
!$ .and.ntid(n).eq.tid &
|
||||
) then
|
||||
if (on(n)) then
|
||||
if (nparent(n).eq.onlevel(level-1)) goto 20
|
||||
else
|
||||
if (nparent(n).eq.onlevel(level)) goto 20
|
||||
end if
|
||||
end if
|
||||
enddo
|
||||
|
||||
nmax=nmax+1 !This is a new one
|
||||
n=nmax
|
||||
!$ ntid(n)=tid
|
||||
ncall(n)=0
|
||||
on(n)=.false.
|
||||
ut(n)=eps
|
||||
name(n)=dname
|
||||
|
||||
20 if(k.eq.0) then !Get start times (k=0)
|
||||
if(on(n)) then
|
||||
print*,'Error in timer: ',dname,' already on.'
|
||||
end if
|
||||
level=level+1 !Increment the level
|
||||
on(n)=.true.
|
||||
! call system_clock(icount,irate)
|
||||
! ut0(n)=float(icount)/irate
|
||||
! call cpu_time(ut0(n))
|
||||
ut0(n)=secnds(0.0)
|
||||
|
||||
ncall(n)=ncall(n)+1
|
||||
if(ncall(n).gt.1.and.nlevel(n).ne.level) then
|
||||
!recursion is happening
|
||||
!
|
||||
!TODO: somehow need to account for this deeper call at the
|
||||
!shallowest instance in the call chain and this needs to be
|
||||
!done without incrementing anything here other than counters
|
||||
!and timers
|
||||
!
|
||||
nlevel(n)=-1
|
||||
else
|
||||
nlevel(n)=level
|
||||
endif
|
||||
nparent(n)=onlevel(level-1)
|
||||
onlevel(level)=n
|
||||
|
||||
else if(k.eq.1) then !Get stop times and accumulate sums. (k=1)
|
||||
if(on(n)) then
|
||||
on(n)=.false.
|
||||
! call system_clock(icount,irate)
|
||||
! ut1=float(icount)/irate
|
||||
! call cpu_time(ut1)
|
||||
ut1=secnds(0.0)
|
||||
|
||||
ut(n)=ut(n)+ut1-ut0(n)
|
||||
endif
|
||||
level=level-1
|
||||
endif
|
||||
|
||||
ntrace=ntrace+1
|
||||
if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n)
|
||||
1020 format(i8,': ',a8,3i5)
|
||||
go to 998
|
||||
|
||||
! Write out the timer statistics
|
||||
|
||||
40 write(lu,1040)
|
||||
1040 format(/' Name Time Frac dTime', &
|
||||
' dFrac Calls'/58('-'))
|
||||
|
||||
!$ !walk backwards through the database rolling up thread data by call chain
|
||||
!$ do i=nmax,1,-1
|
||||
!$ do j=1,i-1
|
||||
!$ l=j
|
||||
!$ m=i
|
||||
!$ do while (name(l).eq.name(m))
|
||||
!$ l=nparent(l)
|
||||
!$ m=nparent(m)
|
||||
!$ if (l.eq.0.or.m.eq.0) exit
|
||||
!$ end do
|
||||
!$ if (l.eq.0.and.m.eq.0) then
|
||||
!$ !same call chain so roll up data
|
||||
!$ ncall(j)=ncall(j)+ncall(i)
|
||||
!$ ut(j)=ut(j)+ut(i)
|
||||
!$ do n=1,nmax
|
||||
!$ if (nparent(n).eq.i) nparent(n)=j
|
||||
!$ end do
|
||||
!$ name(i)=space
|
||||
!$ exit
|
||||
!$ end if
|
||||
!$ end do
|
||||
!$ end do
|
||||
|
||||
if(k.gt.100) then
|
||||
ndiv=k-100
|
||||
do i=1,nmax
|
||||
ncall(i)=ncall(i)/ndiv
|
||||
ut(i)=ut(i)/ndiv
|
||||
enddo
|
||||
endif
|
||||
|
||||
total=ut(1)
|
||||
sum=0.
|
||||
sumf=0.
|
||||
call print_root(1)
|
||||
write(lu,1070) sum,sumf
|
||||
1070 format(58('-')/32x,f10.3,f6.2)
|
||||
nmax=0
|
||||
eps=0.000001
|
||||
ntrace=0
|
||||
level=0
|
||||
onlevel(0)=0
|
||||
|
||||
998 flush(lu)
|
||||
|
||||
999 continue
|
||||
|
||||
!$omp end critical(timer)
|
||||
return
|
||||
end subroutine timer
|
||||
|
||||
recursive subroutine print_root(i)
|
||||
character*16 sname
|
||||
|
||||
common/tracer/ limtrace,lu
|
||||
|
||||
parameter (MAXCALL=100)
|
||||
character*8 name(MAXCALL),space
|
||||
logical on(MAXCALL)
|
||||
real ut(MAXCALL),ut0(MAXCALL)
|
||||
!$ integer ntid(MAXCALL)
|
||||
integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
|
||||
common/data/nmax,name,on,ut,ut0,dut, &
|
||||
!$ ntid, &
|
||||
ncall,nlevel,nparent,total,sum,sumf,space
|
||||
|
||||
if (i.le.nmax) then
|
||||
if (name(i).ne.space) then
|
||||
dut=ut(i)
|
||||
do j=i,nmax
|
||||
if (name(j).ne.space.and.nparent(j).eq.i) dut=dut-ut(j)
|
||||
enddo
|
||||
if(dut.lt.0.0) dut=0.0
|
||||
utf=ut(i)/total
|
||||
dutf=dut/total
|
||||
sum=sum+dut
|
||||
sumf=sumf+dutf
|
||||
kk=nlevel(i)
|
||||
sname=space(1:kk)//name(i)//space(1:8-kk)
|
||||
write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i)
|
||||
2000 format(a16,2(f10.3,f6.2),i9)
|
||||
do j=i,nmax
|
||||
if(nparent(j).eq.i) call print_root(j)
|
||||
enddo
|
||||
end if
|
||||
end if
|
||||
return
|
||||
end subroutine print_root
|
57
lib/timer_C_wrapper.f90
Normal file
57
lib/timer_C_wrapper.f90
Normal file
@ -0,0 +1,57 @@
|
||||
module timer_c_wrapper
|
||||
use :: iso_c_binding, only: c_ptr
|
||||
use timer_module, only: timer, null_timer
|
||||
implicit none
|
||||
|
||||
!
|
||||
! C interoperable callback setup
|
||||
!
|
||||
abstract interface
|
||||
subroutine c_timer_callback (context, dname, k)
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr, c_char
|
||||
implicit none
|
||||
type(c_ptr), value, intent(in) :: context
|
||||
character(c_char), intent(in) :: dname(*)
|
||||
integer, intent(in), value :: k
|
||||
end subroutine c_timer_callback
|
||||
end interface
|
||||
|
||||
public :: init, fini
|
||||
|
||||
private
|
||||
|
||||
!
|
||||
! the following are singleton items which assumes that any timer
|
||||
! implementation should only assume one global instance, probably a
|
||||
! struct or class object whose address is stored the context below
|
||||
!
|
||||
type(c_ptr), private :: the_context
|
||||
procedure(C_timer_callback), pointer, private :: the_callback
|
||||
|
||||
contains
|
||||
subroutine timer_callback_wrapper (dname, k)
|
||||
use, intrinsic :: iso_c_binding, only: c_null_char
|
||||
implicit none
|
||||
character(len=8), intent(in) :: dname
|
||||
integer, intent(in) :: k
|
||||
call the_callback (the_context, trim (dname) // c_null_char, k)
|
||||
end subroutine timer_callback_wrapper
|
||||
|
||||
subroutine init (context, callback)
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr, c_funptr, c_f_procpointer
|
||||
use iso_c_utilities, only: c_to_f_string
|
||||
use timer_module, only: timer
|
||||
implicit none
|
||||
type(c_ptr), value, intent(in) :: context
|
||||
type(c_funptr), value, intent(in) :: callback
|
||||
the_context=context
|
||||
call c_f_procpointer (callback, the_callback)
|
||||
timer => timer_callback_wrapper
|
||||
end subroutine init
|
||||
|
||||
subroutine fini ()
|
||||
implicit none
|
||||
timer => null_timer
|
||||
end subroutine fini
|
||||
|
||||
end module timer_c_wrapper
|
3
lib/timer_common.inc
Normal file
3
lib/timer_common.inc
Normal file
@ -0,0 +1,3 @@
|
||||
integer :: level, onlevel(0:10)
|
||||
common/timer_private/ level, onlevel
|
||||
!$omp threadprivate(/timer_private/)
|
262
lib/timer_impl.f90
Normal file
262
lib/timer_impl.f90
Normal file
@ -0,0 +1,262 @@
|
||||
module timer_impl
|
||||
!$ use omp_lib
|
||||
use :: iso_c_binding, only: c_ptr
|
||||
use timer_module, only: timer_callback
|
||||
implicit none
|
||||
|
||||
public :: init_timer, fini_timer
|
||||
integer, public :: limtrace=0
|
||||
|
||||
private
|
||||
|
||||
integer, parameter :: MAXCALL=100
|
||||
integer :: lu=6
|
||||
real :: dut
|
||||
integer :: i,nmax=0,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
|
||||
character(len=8) :: name(MAXCALL),space=' '
|
||||
logical :: on(MAXCALL)
|
||||
real :: total,sum,sumf,ut(MAXCALL),ut0(MAXCALL)
|
||||
!$ integer :: j,l,m,ntid(MAXCALL)
|
||||
|
||||
!
|
||||
! C interoperable callback setup
|
||||
!
|
||||
public :: C_init_timer
|
||||
abstract interface
|
||||
subroutine C_timer_callback (context, dname, k)
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr
|
||||
implicit none
|
||||
type(c_ptr), intent(in) :: context
|
||||
character(len=8), intent(in) :: dname
|
||||
integer, intent(in) :: k
|
||||
end subroutine C_timer_callback
|
||||
end interface
|
||||
type(c_ptr), private :: the_context
|
||||
procedure(C_timer_callback), pointer, private :: the_C_callback
|
||||
|
||||
contains
|
||||
subroutine timer_callback_wrapper (dname, k)
|
||||
implicit none
|
||||
character(len=8), intent(in) :: dname
|
||||
integer, intent(in) :: k
|
||||
call the_C_callback (the_context, dname, k)
|
||||
end subroutine timer_callback_wrapper
|
||||
|
||||
subroutine C_init_timer (context, callback) bind(C)
|
||||
use, intrinsic :: iso_c_binding, only: c_ptr, c_funptr, c_f_procpointer
|
||||
use iso_c_utilities, only: c_to_f_string
|
||||
use timer_module, only: timer
|
||||
implicit none
|
||||
type(c_ptr), intent(in) :: context
|
||||
type(c_funptr), intent(in) :: callback
|
||||
the_context=context
|
||||
call c_f_procpointer (callback, the_C_callback)
|
||||
timer => timer_callback_wrapper
|
||||
end subroutine C_init_timer
|
||||
|
||||
!
|
||||
! default Fortran implementation which is thread safe using OpenMP
|
||||
!
|
||||
subroutine default_timer (dname, k)
|
||||
|
||||
! Times procedure number n between a call with k=0 (tstart) and with
|
||||
! k=1 (tstop). Accumulates sums of these times in array ut (user time).
|
||||
! Also traces all calls (for debugging purposes) if limtrace.gt.0
|
||||
!
|
||||
! If this is used with OpenMP than the /timer_private/ common
|
||||
! block must be copyed into each thread of a thread team by using
|
||||
! the copyin() clause on the !$omp parallel directive that creates
|
||||
! the team.
|
||||
|
||||
implicit none
|
||||
|
||||
character(len=8), intent(in) :: dname
|
||||
integer, intent(in) :: k
|
||||
|
||||
real :: ut1,eps=0.000001
|
||||
integer :: n,ndiv,ntrace=0
|
||||
!$ integer :: tid
|
||||
character(len=8) :: tname
|
||||
include 'timer_common.inc'
|
||||
|
||||
!$omp critical(timer)
|
||||
if(limtrace.lt.0) go to 999
|
||||
if(k.gt.1) go to 40 !Check for "all done" (k>1)
|
||||
onlevel(0)=0
|
||||
|
||||
!$ tid=omp_get_thread_num()
|
||||
do n=1,nmax !Check for existing name/parent[/thread]
|
||||
if(name(n).eq.dname &
|
||||
!$ .and.ntid(n).eq.tid &
|
||||
) then
|
||||
if (on(n)) then
|
||||
if (nparent(n).eq.onlevel(level-1)) goto 20
|
||||
else
|
||||
if (nparent(n).eq.onlevel(level)) goto 20
|
||||
end if
|
||||
end if
|
||||
enddo
|
||||
|
||||
nmax=nmax+1 !This is a new one
|
||||
n=nmax
|
||||
!$ ntid(n)=tid
|
||||
ncall(n)=0
|
||||
on(n)=.false.
|
||||
ut(n)=eps
|
||||
name(n)=dname
|
||||
|
||||
20 if(k.eq.0) then !Get start times (k=0)
|
||||
if(on(n)) then
|
||||
print*,'Error in timer: ',dname,' already on.'
|
||||
end if
|
||||
level=level+1 !Increment the level
|
||||
on(n)=.true.
|
||||
! call system_clock(icount,irate)
|
||||
! ut0(n)=float(icount)/irate
|
||||
! call cpu_time(ut0(n))
|
||||
ut0(n)=secnds(0.0)
|
||||
|
||||
ncall(n)=ncall(n)+1
|
||||
if(ncall(n).gt.1.and.nlevel(n).ne.level) then
|
||||
!recursion is happening
|
||||
!
|
||||
!TODO: somehow need to account for this deeper call at the
|
||||
!shallowest instance in the call chain and this needs to be
|
||||
!done without incrementing anything here other than counters
|
||||
!and timers
|
||||
!
|
||||
nlevel(n)=-1
|
||||
else
|
||||
nlevel(n)=level
|
||||
endif
|
||||
nparent(n)=onlevel(level-1)
|
||||
onlevel(level)=n
|
||||
|
||||
else if(k.eq.1) then !Get stop times and accumulate sums. (k=1)
|
||||
if(on(n)) then
|
||||
on(n)=.false.
|
||||
! call system_clock(icount,irate)
|
||||
! ut1=float(icount)/irate
|
||||
! call cpu_time(ut1)
|
||||
ut1=secnds(0.0)
|
||||
|
||||
ut(n)=ut(n)+ut1-ut0(n)
|
||||
endif
|
||||
level=level-1
|
||||
endif
|
||||
|
||||
ntrace=ntrace+1
|
||||
if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n)
|
||||
1020 format(i8,': ',a8,3i5)
|
||||
go to 998
|
||||
|
||||
! Write out the timer statistics
|
||||
|
||||
40 write(lu,1040)
|
||||
1040 format(/' Name Time Frac dTime', &
|
||||
' dFrac Calls'/58('-'))
|
||||
|
||||
!$ !walk backwards through the database rolling up thread data by call chain
|
||||
!$ do i=nmax,1,-1
|
||||
!$ do j=1,i-1
|
||||
!$ l=j
|
||||
!$ m=i
|
||||
!$ do while (name(l).eq.name(m))
|
||||
!$ l=nparent(l)
|
||||
!$ m=nparent(m)
|
||||
!$ if (l.eq.0.or.m.eq.0) exit
|
||||
!$ end do
|
||||
!$ if (l.eq.0.and.m.eq.0) then
|
||||
!$ !same call chain so roll up data
|
||||
!$ ncall(j)=ncall(j)+ncall(i)
|
||||
!$ ut(j)=ut(j)+ut(i)
|
||||
!$ do n=1,nmax
|
||||
!$ if (nparent(n).eq.i) nparent(n)=j
|
||||
!$ end do
|
||||
!$ name(i)=space
|
||||
!$ exit
|
||||
!$ end if
|
||||
!$ end do
|
||||
!$ end do
|
||||
|
||||
if(k.gt.100) then
|
||||
ndiv=k-100
|
||||
do i=1,nmax
|
||||
ncall(i)=ncall(i)/ndiv
|
||||
ut(i)=ut(i)/ndiv
|
||||
enddo
|
||||
endif
|
||||
|
||||
total=ut(1)
|
||||
sum=0.
|
||||
sumf=0.
|
||||
call print_root(1)
|
||||
write(lu,1070) sum,sumf
|
||||
1070 format(58('-')/32x,f10.3,f6.2)
|
||||
nmax=0
|
||||
eps=0.000001
|
||||
ntrace=0
|
||||
level=0
|
||||
onlevel(0)=0
|
||||
|
||||
998 flush(lu)
|
||||
|
||||
999 continue
|
||||
|
||||
!$omp end critical(timer)
|
||||
return
|
||||
end subroutine default_timer
|
||||
|
||||
recursive subroutine print_root(i)
|
||||
implicit none
|
||||
integer, intent(in) :: i
|
||||
character(len=16) :: sname
|
||||
real :: dutf, utf
|
||||
integer :: j, kk
|
||||
|
||||
if (i.le.nmax) then
|
||||
if (name(i).ne.space) then
|
||||
dut=ut(i)
|
||||
do j=i,nmax
|
||||
if (name(j).ne.space.and.nparent(j).eq.i) dut=dut-ut(j)
|
||||
enddo
|
||||
if(dut.lt.0.0) dut=0.0
|
||||
utf=ut(i)/total
|
||||
dutf=dut/total
|
||||
sum=sum+dut
|
||||
sumf=sumf+dutf
|
||||
kk=nlevel(i)
|
||||
sname=space(1:kk)//name(i)//space(1:8-kk)
|
||||
write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i)
|
||||
2000 format(a16,2(f10.3,f6.2),i9)
|
||||
do j=i,nmax
|
||||
if(nparent(j).eq.i) call print_root(j)
|
||||
enddo
|
||||
end if
|
||||
end if
|
||||
return
|
||||
end subroutine print_root
|
||||
|
||||
subroutine init_timer (filename)
|
||||
use, intrinsic :: iso_c_binding, only: c_char
|
||||
use timer_module, only: timer
|
||||
implicit none
|
||||
character(len=*), optional, intent(in) :: filename
|
||||
include 'timer_common.inc'
|
||||
data level/0/, onlevel/11 * 0/
|
||||
if (present (filename)) then
|
||||
open (newunit=lu, file=filename, status='unknown')
|
||||
else
|
||||
open (newunit=lu, file='timer.out', status='unknown')
|
||||
end if
|
||||
timer => default_timer
|
||||
end subroutine init_timer
|
||||
|
||||
subroutine fini_timer ()
|
||||
use timer_module, only: timer, null_timer
|
||||
implicit none
|
||||
timer => null_timer
|
||||
close (lu)
|
||||
end subroutine fini_timer
|
||||
|
||||
end module timer_impl
|
23
lib/timer_module.f90
Normal file
23
lib/timer_module.f90
Normal file
@ -0,0 +1,23 @@
|
||||
module timer_module
|
||||
implicit none
|
||||
|
||||
abstract interface
|
||||
subroutine timer_callback (dname, k)
|
||||
character(len=8), intent(in) :: dname
|
||||
integer, intent(in) :: k
|
||||
end subroutine timer_callback
|
||||
end interface
|
||||
|
||||
public :: null_timer
|
||||
procedure(timer_callback), pointer :: timer => null_timer
|
||||
|
||||
contains
|
||||
!
|
||||
! default Fortran implementation which does nothing
|
||||
!
|
||||
subroutine null_timer (dname, k)
|
||||
implicit none
|
||||
character(len=8), intent(in) :: dname
|
||||
integer, intent(in) :: k
|
||||
end subroutine null_timer
|
||||
end module timer_module
|
@ -8,6 +8,8 @@ subroutine wsjt4(dat,npts,nutc,NClearAve,minsync,ntol,emedelay,dttol, &
|
||||
! range -- analogous to the nqd=1 step in JT9 and JT65.
|
||||
|
||||
use jt4
|
||||
use timer_module, only: timer
|
||||
|
||||
real dat(npts) !Raw data
|
||||
real z(458,65)
|
||||
logical first,prtavg
|
||||
|
Loading…
Reference in New Issue
Block a user