Build an OpenMP version of jt9 where possible

This change  introduces the program jt9_omp  which is a testbed  for a
multi-threaded version of the jt9 decoder program. The program jt9_omp
should be  a directly substitutable for  jt9 except that JT65  and JT9
decodes are computed in parallel.

Also enable  the OpenMP directives in  decoder.f90 - note this  is not
yet a working multi-threaded decoder and the existing jt9 is still the
correct decoder to be used in WSJT-X.

Increased the available  stack size for jt9_omp.exe as this  is a hard
limit on  Windows and  the default  is not big  enough for  the OpenMP
version of jt9.

Also  Fortran  array  bounds  checking is  now  disabled  for  Release
configuration builds so as to improve performance a little.

git-svn-id: svn+ssh://svn.code.sf.net/p/wsjt/wsjt/branches/wsjtx@4922 ab8295b8-cf94-4d9e-aec4-7959e3be5d79
This commit is contained in:
Bill Somerville 2015-02-02 11:24:20 +00:00
parent 7f70863caa
commit 2903788e4c
2 changed files with 38 additions and 13 deletions

View File

@ -241,6 +241,7 @@ set (wsjt_FSRCS
lib/dcoord.f90 lib/dcoord.f90
lib/decode65a.f90 lib/decode65a.f90
lib/decode65b.f90 lib/decode65b.f90
lib/fftw3mod.f90
lib/jt9fano.f90 lib/jt9fano.f90
lib/decoder.f90 lib/decoder.f90
lib/decjt9.f90 lib/decjt9.f90
@ -256,7 +257,6 @@ set (wsjt_FSRCS
lib/fano232.f90 lib/fano232.f90
lib/fchisq.f90 lib/fchisq.f90
lib/fchisq65.f90 lib/fchisq65.f90
lib/fftw3mod.f90
lib/fil3.f90 lib/fil3.f90
lib/fil4.f90 lib/fil4.f90
lib/fil6521.f90 lib/fil6521.f90
@ -492,7 +492,7 @@ endif (APPLE)
# #
# Fortran setup # Fortran setup
# #
set (General_FFLAGS "-fbounds-check -Wall -Wno-conversion -fno-second-underscore") set (General_FFLAGS "-Wall -Wno-conversion -fno-second-underscore")
# FFLAGS depend on the compiler # FFLAGS depend on the compiler
get_filename_component (Fortran_COMPILER_NAME ${CMAKE_Fortran_COMPILER} NAME) get_filename_component (Fortran_COMPILER_NAME ${CMAKE_Fortran_COMPILER} NAME)
@ -509,7 +509,7 @@ if (Fortran_COMPILER_NAME MATCHES "gfortran.*")
endif (CMAKE_OSX_SYSROOT) endif (CMAKE_OSX_SYSROOT)
set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -funroll-all-loops -fno-f2c ${General_FFLAGS}") set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -funroll-all-loops -fno-f2c ${General_FFLAGS}")
set (CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fno-f2c ${General_FFLAGS}") set (CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fbounds-check -fno-f2c ${General_FFLAGS}")
elseif (Fortran_COMPILER_NAME MATCHES "ifort.*") elseif (Fortran_COMPILER_NAME MATCHES "ifort.*")
# ifort (untested) # ifort (untested)
set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -f77rtl ${General_FFLAGS}") set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -f77rtl ${General_FFLAGS}")
@ -517,13 +517,13 @@ elseif (Fortran_COMPILER_NAME MATCHES "ifort.*")
elseif (Fortran_COMPILER_NAME MATCHES "g77") elseif (Fortran_COMPILER_NAME MATCHES "g77")
# g77 # g77
set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -funroll-all-loops -fno-f2c -m32 ${General_FFLAGS}") set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -funroll-all-loops -fno-f2c -m32 ${General_FFLAGS}")
set (CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fno-f2c -m32 ${General_FFLAGS}") set (CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fbounds-check -fno-f2c -m32 ${General_FFLAGS}")
else (Fortran_COMPILER_NAME MATCHES "gfortran.*") else (Fortran_COMPILER_NAME MATCHES "gfortran.*")
message ("CMAKE_Fortran_COMPILER full path: " ${CMAKE_Fortran_COMPILER}) message ("CMAKE_Fortran_COMPILER full path: " ${CMAKE_Fortran_COMPILER})
message ("Fortran compiler: " ${Fortran_COMPILER_NAME}) message ("Fortran compiler: " ${Fortran_COMPILER_NAME})
message ("No optimized Fortran compiler flags are known, we just try -O2...") message ("No optimized Fortran compiler flags are known, we just try -O2...")
set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -O2 ${General_FFLAGS}") set (CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -O2 ${General_FFLAGS}")
set (CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} ${General_FFLAGS}") set (CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fbounds-check ${General_FFLAGS}")
endif (Fortran_COMPILER_NAME MATCHES "gfortran.*") endif (Fortran_COMPILER_NAME MATCHES "gfortran.*")
@ -593,6 +593,12 @@ if (NOT "${QT_LIBRARY_DIR}" STREQUAL "/lib" AND NOT "${QT_LIBRARY_DIR}" STREQUAL
endif () endif ()
#
# OpenMP
#
find_package (OpenMP)
# #
# fftw3 single precsion library # fftw3 single precsion library
# #
@ -698,8 +704,17 @@ endif (WIN32)
# targets # targets
# #
# build a library of package functionality # build a library of package functionality (without and optionally with OpenMP support)
add_library (wsjt STATIC ${wsjt_CSRCS} ${wsjt_CXXSRCS} ${wsjt_FSRCS}) add_library (wsjt STATIC ${wsjt_CSRCS} ${wsjt_CXXSRCS} ${wsjt_FSRCS})
if (${OPENMP_FOUND})
add_library (wsjt_omp STATIC ${wsjt_CSRCS} ${wsjt_CXXSRCS} ${wsjt_FSRCS})
file (MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/fortran_modules_omp)
set_target_properties (wsjt_omp
PROPERTIES
COMPILE_FLAGS ${OpenMP_C_FLAGS}
Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/fortran_modules_omp
)
endif (${OPENMP_FOUND})
# build a library of package Qt functionality # build a library of package Qt functionality
add_library (wsjt_qt STATIC ${wsjt_qt_CXXSRCS} ${wsjt_qt_GENUISRCS} ${GENAXSRCS}) add_library (wsjt_qt STATIC ${wsjt_qt_CXXSRCS} ${wsjt_qt_GENUISRCS} ${GENAXSRCS})
@ -721,6 +736,18 @@ add_executable (jt9 lib/jt9.f90 lib/jt9a.f90 lib/jt9b.f90 lib/jt9c.f90 ${jt9_CXX
target_link_libraries (jt9 wsjt ${FFTW3_LIBRARIES}) target_link_libraries (jt9 wsjt ${FFTW3_LIBRARIES})
qt5_use_modules (jt9 Core) qt5_use_modules (jt9 Core)
if (${OPENMP_FOUND})
add_executable (jt9_omp lib/jt9.f90 lib/jt9a.f90 lib/jt9b.f90 lib/jt9c.f90 ${jt9_CXXSRCS} wsjtx.rc)
set_target_properties (jt9_omp
PROPERTIES
COMPILE_FLAGS ${OpenMP_C_FLAGS}
Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/fortran_modules_omp
LINK_FLAGS "${OpenMP_C_FLAGS} -Wl,--stack,8388608"
)
target_link_libraries (jt9_omp wsjt_omp ${FFTW3_LIBRARIES})
qt5_use_modules (jt9_omp Core)
endif (${OPENMP_FOUND})
# build configuration dialog and transceiver interface test application # build configuration dialog and transceiver interface test application
#add_executable (ConfigTest ${ConfigTest_CXXSRCS} ${ConfigTest_GENUISRCS} wsjtx.rc) #add_executable (ConfigTest ${ConfigTest_CXXSRCS} ${ConfigTest_GENUISRCS} wsjtx.rc)
#target_link_libraries (ConfigTest wsjt wsjt_qt ${hamlib_LIBRARIES}) #target_link_libraries (ConfigTest wsjt wsjt_qt ${hamlib_LIBRARIES})
@ -790,7 +817,7 @@ install (TARGETS wsjtx
BUNDLE DESTINATION . COMPONENT runtime BUNDLE DESTINATION . COMPONENT runtime
) )
install (TARGETS jt9 jt65code jt9code install (TARGETS jt9 jt65code jt9code jt9_omp
RUNTIME DESTINATION ${WSJT_BIN_DESTINATION} COMPONENT runtime RUNTIME DESTINATION ${WSJT_BIN_DESTINATION} COMPONENT runtime
BUNDLE DESTINATION ${WSJT_BIN_DESTINATION} COMPONENT runtime BUNDLE DESTINATION ${WSJT_BIN_DESTINATION} COMPONENT runtime
) )

View File

@ -50,17 +50,16 @@ subroutine decoder(ss,id2)
if(nmode.eq.65) go to 800 if(nmode.eq.65) go to 800
! print*,'A' ! print*,'A'
!!$OMP PARALLEL PRIVATE(id) !$OMP PARALLEL SECTIONS PRIVATE(id)
!!$OMP SECTIONS
!!$OMP SECTION !$OMP SECTION
! print*,'B' ! print*,'B'
call timer('decjt9 ',0) call timer('decjt9 ',0)
call decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol,nzhsym, & call decjt9(ss,id2,nutc,nfqso,newdat,npts8,nfa,nfsplit,nfb,ntol,nzhsym, &
nagain,ndepth,nmode) nagain,ndepth,nmode)
call timer('decjt9 ',1) call timer('decjt9 ',1)
!!$OMP SECTION !$OMP SECTION
if(nmode.ge.65 .and. (.not.done65)) then if(nmode.ge.65 .and. (.not.done65)) then
if(newdat.ne.0) dd(1:npts65)=id2(1:npts65) if(newdat.ne.0) dd(1:npts65)=id2(1:npts65)
nf1=nfa nf1=nfa
@ -71,8 +70,7 @@ subroutine decoder(ss,id2)
call timer('jt65a ',1) call timer('jt65a ',1)
endif endif
!!$OMP END SECTIONS NOWAIT !$OMP END PARALLEL SECTIONS
!!$OMP END PARALLEL
! print*,'D' ! print*,'D'
! JT65 is not yet producing info for nsynced, ndecoded. ! JT65 is not yet producing info for nsynced, ndecoded.