Thread safe lib/timer.f90

Accounts for each traced call per thread and accumulates by rolling up
calls with an identical call chain before printing the statistics. The
print now accounts for function calls  in their call chain so the same
function will be reported more than  once if it is called in different
places.

git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4937 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
Bill Somerville 2015-02-05 22:07:19 +00:00
parent 8073acae1f
commit ad94752967
2 changed files with 199 additions and 148 deletions

View File

@ -1,12 +1,7 @@
subroutine decoder(ss,id2) subroutine decoder(ss,id2)
use prog_args use prog_args
!$ use omp_lib
!$ interface
!$ subroutine omp_set_dynamic (flag)
!$ logical flag
!$ end subroutine omp_set_dynamic
!$ end interface
include 'constants.f90' include 'constants.f90'
real ss(184,NSMAX) real ss(184,NSMAX)
@ -17,6 +12,9 @@ subroutine decoder(ss,id2)
common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, & common/npar/nutc,ndiskdat,ntrperiod,nfqso,newdat,npts8,nfa,nfsplit,nfb, &
ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime ntol,kin,nzhsym,nsave,nagain,ndepth,ntxmode,nmode,datetime
common/tracer/limtrace,lu common/tracer/limtrace,lu
integer onlevel(0:10)
common/tracer_priv/level,onlevel
!$omp threadprivate(/tracer_priv/)
save save
nfreqs0=0 nfreqs0=0
@ -43,7 +41,7 @@ subroutine decoder(ss,id2)
ntol65=20 ntol65=20
!$ call omp_set_dynamic(.true.) !$ call omp_set_dynamic(.true.)
!$omp parallel sections num_threads(2) !$omp parallel sections num_threads(2) copyin(/tracer_priv/)
!$omp section !$omp section
if(nmode.eq.65 .or. (nmode.gt.65 .and. ntxmode.eq.65)) then if(nmode.eq.65 .or. (nmode.gt.65 .and. ntxmode.eq.65)) then

View File

@ -1,141 +1,194 @@
subroutine timer(dname,k) subroutine timer(dname,k)
! Times procedure number n between a call with k=0 (tstart) and with ! Times procedure number n between a call with k=0 (tstart) and with
! k=1 (tstop). Accumulates sums of these times in array ut (user time). ! k=1 (tstop). Accumulates sums of these times in array ut (user time).
! Also traces all calls (for debugging purposes) if limtrace.gt.0 ! Also traces all calls (for debugging purposes) if limtrace.gt.0
!
!$ interface ! If this is used with OpenMP than the /tracer_priv/ common block must
!$ integer function omp_get_thread_num() ! be copyed into each thread of a thread team by using the copyin()
!$ end function ! clause on the !$omp parallel directive that creates the team.
!$ end interface
!$ use omp_lib
parameter (MAXCALL=100)
character*8 dname character*8 dname
!$ character thread !$ integer tid
character*11 tname,ename integer onlevel(0:10)
character*16 sname common/tracer/ limtrace,lu
character*11, save :: space common/tracer_priv/level,onlevel
integer, save :: level, nmax
character*11, save :: name(MAXCALL) parameter (MAXCALL=100)
logical, save :: on(MAXCALL) character*8 name(MAXCALL),space
real, save :: ut(MAXCALL),ut0(MAXCALL),dut(MAXCALL) logical on(MAXCALL)
integer, save :: ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL) real ut(MAXCALL),ut0(MAXCALL)
integer, save :: onlevel(0:10) !$ integer ntid(MAXCALL)
common/tracer/ limtrace,lu integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
data eps/0.000001/,ntrace/0/ common/data/nmax,name,on,ut,ut0,dut,ntid,ncall,nlevel,nparent,total,sum,sumf,space
data level/0/,nmax/0/,space/' '/
data limtrace/0/,lu/-1/ data eps/0.000001/,ntrace/0/
!$omp threadprivate(level,space,onlevel) data level/0/,nmax/0/,space/' '/
data limtrace/0/,lu/-1/
! ! currently this module is broken if called from multiple threads
! !$ return ! diable if usinh OpenMP !$omp threadprivate(/tracer_priv/)
tname=dname !$omp critical(timer)
!$ write(thread,'(i1)') omp_get_thread_num() if(limtrace.lt.0) go to 999
!$ tname=trim(dname)//'('//thread//')' ! decorate name with thread number if(lu.lt.1) lu=6
if(k.gt.1) go to 40 !Check for "all done" (k>1)
!$omp critical(timer) onlevel(0)=0
if(limtrace.lt.0) go to 999
if(lu.lt.1) lu=6 !$ tid=omp_get_thread_num()
if(k.gt.1) go to 40 !Check for "all done" (k>1) do n=1,nmax !Check for existing name/parent[/thread]
onlevel(0)=0 if(name(n).eq.dname &
!$ .and.ntid(n).eq.tid &
do n=1,nmax !Check for existing name ) then
if(name(n).eq.tname) go to 20 if (on(n)) then
enddo if (nparent(n).eq.onlevel(level-1)) goto 20
else
nmax=nmax+1 !This is a new one if (nparent(n).eq.onlevel(level)) goto 20
n=nmax end if
ncall(n)=0 end if
on(n)=.false. enddo
ut(n)=eps
name(n)=tname nmax=nmax+1 !This is a new one
n=nmax
20 if(k.eq.0) then !Get start times (k=0) !$ ntid(n)=tid
if(on(n)) print*,'Error in timer: ',tname,' already on.' ncall(n)=0
level=level+1 !Increment the level on(n)=.false.
on(n)=.true. ut(n)=eps
! call system_clock(icount,irate) name(n)=dname
! ut0(n)=float(icount)/irate
! call cpu_time(ut0(n)) 20 if(k.eq.0) then !Get start times (k=0)
ut0(n)=secnds(0.0) if(on(n)) then
print*,'Error in timer: ',dname,' already on.'
ncall(n)=ncall(n)+1 end if
if(ncall(n).gt.1.and.nlevel(n).ne.level) then level=level+1 !Increment the level
nlevel(n)=-1 on(n)=.true.
else ! call system_clock(icount,irate)
nlevel(n)=level ! ut0(n)=float(icount)/irate
endif ! call cpu_time(ut0(n))
nparent(n)=onlevel(level-1) ut0(n)=secnds(0.0)
onlevel(level)=n
ncall(n)=ncall(n)+1
else if(k.eq.1) then !Get stop times and accumulate sums. (k=1) if(ncall(n).gt.1.and.nlevel(n).ne.level) then
if(on(n)) then !recursion is happening
on(n)=.false. !
! call system_clock(icount,irate) !TODO: somehow need to account for this deeper call at the
! ut1=float(icount)/irate !shallowest instance in the call chain and this needs to be
! call cpu_time(ut1) !done without incrementing anything here other than counters
ut1=secnds(0.0) !and timers
!
ut(n)=ut(n)+ut1-ut0(n) nlevel(n)=-1
endif else
level=level-1 nlevel(n)=level
endif endif
nparent(n)=onlevel(level-1)
ntrace=ntrace+1 onlevel(level)=n
if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n)
1020 format(i8,': ',a11,3i5) else if(k.eq.1) then !Get stop times and accumulate sums. (k=1)
go to 998 if(on(n)) then
on(n)=.false.
! Write out the timer statistics ! call system_clock(icount,irate)
! ut1=float(icount)/irate
40 write(lu,1040) ! call cpu_time(ut1)
1040 format(/' name time frac dtime', & ut1=secnds(0.0)
' dfrac calls level parent'/73('-'))
ut(n)=ut(n)+ut1-ut0(n)
if(k.gt.100) then endif
ndiv=k-100 level=level-1
do i=1,nmax endif
ncall(i)=ncall(i)/ndiv
ut(i)=ut(i)/ndiv ntrace=ntrace+1
enddo if(ntrace.lt.limtrace) write(lu,1020) ntrace,tname,k,level,nparent(n)
endif 1020 format(i8,': ',a8,3i5)
go to 998
total=ut(1)
sum=0. ! Write out the timer statistics
sumf=0.
do i=1,nmax 40 write(lu,1040)
dut(i)=ut(i) 1040 format(/' name time frac dtime', &
do j=i,nmax ' dfrac calls'/56('-'))
if(nparent(j).eq.i) dut(i)=dut(i)-ut(j)
enddo !$ !walk backwards through the database rolling up thread data by call chain
if(dut(i).lt.0.0) dut(i)=0.0 !$ do i=nmax,1,-1
utf=ut(i)/total !$ do j=1,i-1
dutf=dut(i)/total !$ l=j
sum=sum+dut(i) !$ m=i
sumf=sumf+dutf !$ do while (name(l).eq.name(m))
kk=nlevel(i) !$ l=nparent(l)
sname=space(1:kk)//name(i)//space(1:11-kk) !$ m=nparent(m)
ename=space !$ if (l.eq.0.or.m.eq.0) exit
if(nparent(i).ge.1) ename=name(nparent(i)) !$ end do
write(lu,1060) float(i),sname,ut(i),utf,dut(i),dutf, & !$ if (l.eq.0.and.m.eq.0) then
ncall(i),nlevel(i),ename !$ !same call chain so roll up data
1060 format(f4.0,a16,2(f10.3,f6.2),i7,i5,2x,a11) !$ ncall(j)=ncall(j)+ncall(i)
enddo !$ ut(j)=ut(j)+ut(i)
!$ name(i)=space
write(lu,1070) sum,sumf !$ exit
1070 format(/36x,f10.2,f6.2) !$ end if
nmax=0 !$ end do
eps=0.000001 !$ end do
ntrace=0
level=0 if(k.gt.100) then
space=' ' ndiv=k-100
onlevel(0)=0 do i=1,nmax
ncall(i)=ncall(i)/ndiv
998 flush(lu) ut(i)=ut(i)/ndiv
enddo
999 continue endif
!$omp end critical(timer) total=ut(1)
return sum=0.
end subroutine timer sumf=0.
i=1
call print_root(i)
write(lu,1070) sum,sumf
1070 format(/32x,f10.3,f6.2)
nmax=0
eps=0.000001
ntrace=0
level=0
onlevel(0)=0
998 flush(lu)
999 continue
!$omp end critical(timer)
return
end subroutine timer
recursive subroutine print_root(i)
character*16 sname
common/tracer/ limtrace,lu
parameter (MAXCALL=100)
character*8 name(MAXCALL),space
logical on(MAXCALL)
real ut(MAXCALL),ut0(MAXCALL)
!$ integer ntid(MAXCALL)
integer nmax,ncall(MAXCALL),nlevel(MAXCALL),nparent(MAXCALL)
common/data/nmax,name,on,ut,ut0,dut,ntid,ncall,nlevel,nparent,total,sum,sumf,space
if (i.le.nmax) then
if (name(i).ne.space) then
dut=ut(i)
do j=i,nmax
if(nparent(j).eq.i) dut=dut-ut(j)
enddo
if(dut.lt.0.0) dut=0.0
utf=ut(i)/total
dutf=dut/total
sum=sum+dut
sumf=sumf+dutf
kk=nlevel(i)
sname=space(1:kk)//name(i)//space(1:8-kk)
write(lu,2000) sname,ut(i),utf,dut,dutf,ncall(i)
do j=i,nmax
if(nparent(j).eq.i) call print_root(j)
enddo
end if
end if
2000 format(a16,2(f10.3,f6.2),i7,i5)
return
end subroutine print_root