mirror of
https://github.com/f4exb/sdrangel.git
synced 2025-08-16 12:42:26 -04:00
Use more precise SIMD flags and detect actual x86_64 SIMD features
This commit is contained in:
parent
dbbbfa12ee
commit
63d6eea066
@ -61,10 +61,6 @@ if (NOT BUILD_DEBIAN)
|
|||||||
find_package(SerialDV)
|
find_package(SerialDV)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|x86")
|
|
||||||
SET(USE_SSE "SSE4_1" CACHE STRING "Use SSE 4.1 SIMD instructions")
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
# MacOS Compatibility
|
# MacOS Compatibility
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
find_package(ICONV)
|
find_package(ICONV)
|
||||||
@ -411,17 +407,87 @@ include_directories(
|
|||||||
${OPENGL_INCLUDE_DIR}
|
${OPENGL_INCLUDE_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
if(USE_SSE MATCHES SSE4_1)
|
##############################################################################
|
||||||
|
|
||||||
|
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
|
||||||
|
message( STATUS "Architecture: ${ARCHITECTURE}" )
|
||||||
|
|
||||||
|
if (${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
|
||||||
|
EXECUTE_PROCESS( COMMAND grep flags /proc/cpuinfo OUTPUT_VARIABLE CPU_FLAGS )
|
||||||
|
if (${CPU_FLAGS} MATCHES "avx2")
|
||||||
|
set(HAS_AVX2 ON CACHE BOOL "Architecture has AVX2 SIMD enabled")
|
||||||
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||||
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mavx2" )
|
||||||
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mavx2" )
|
||||||
|
message(STATUS "Use AVX2 SIMD instructions")
|
||||||
|
add_definitions(-DUSE_AVX2)
|
||||||
|
else()
|
||||||
|
set(HAS_AVX2 OFF CACHE BOOL "Architecture does not have AVX2 SIMD enabled")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (${CPU_FLAGS} MATCHES "sse4_1")
|
||||||
|
set(HAS_SSE4_1 ON CACHE BOOL "Architecture has SSE 4.1 SIMD enabled")
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse4.1" )
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse4.1" )
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse4.1" )
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse4.1" )
|
||||||
add_definitions(-DUSE_SSE)
|
message(STATUS "Use SSE 4.1 SIMD instructions")
|
||||||
|
add_definitions(-DUSE_SSE4_1)
|
||||||
elseif(MSVC)
|
elseif(MSVC)
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE4_1" )
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE4_1" )
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE4_1" )
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE4_1" )
|
||||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||||
add_definitions(-DUSE_SSE)
|
add_definitions(-DUSE_SSE4_1)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(HAS_SSE4_1 OFF CACHE BOOL "Architecture does not have SSE 4.1 SIMD enabled")
|
||||||
|
endif()
|
||||||
|
if (${CPU_FLAGS} MATCHES "ssse3")
|
||||||
|
set(HAS_SSSE3 ON CACHE BOOL "Architecture has SSSE3 SIMD enabled")
|
||||||
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||||
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
|
||||||
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
|
||||||
|
message(STATUS "Use SSSE3 SIMD instructions")
|
||||||
|
add_definitions(-DUSE_SSSE3)
|
||||||
|
elseif(MSVC)
|
||||||
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
|
||||||
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
|
||||||
|
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||||
|
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||||
|
add_definitions(-DUSE_SSSE3)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(HAS_SSSE3 OFF CACHE BOOL "Architecture does not have SSSE3 SIMD enabled")
|
||||||
|
endif()
|
||||||
|
if (${CPU_FLAGS} MATCHES "sse2")
|
||||||
|
set(HAS_SSE2 ON CACHE BOOL "Architecture has SSE2 SIMD enabled")
|
||||||
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||||
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse2" )
|
||||||
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse2" )
|
||||||
|
message(STATUS "Use SSE2 SIMD instructions")
|
||||||
|
add_definitions(-DUSE_SSE2)
|
||||||
|
elseif(MSVC)
|
||||||
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSE2" )
|
||||||
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSE2" )
|
||||||
|
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
||||||
|
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
||||||
|
add_definitions(-DUSE_SSE2)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(HAS_SSE2 OFF CACHE BOOL "Architecture does not have SSE2 SIMD enabled")
|
||||||
|
endif()
|
||||||
|
elseif (${ARCHITECTURE} MATCHES "armv7l")
|
||||||
|
EXECUTE_PROCESS( COMMAND grep Features /proc/cpuinfo OUTPUT_VARIABLE CPU_FLAGS )
|
||||||
|
if (${CPU_FLAGS} MATCHES "neon")
|
||||||
|
set(HAS_NEON ON CACHE BOOL "Architecture has NEON SIMD enabled")
|
||||||
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
||||||
|
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
|
||||||
|
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
|
||||||
|
message(STATUS "Use NEON SIMD instructions")
|
||||||
|
add_definitions(-DUSE_NEON)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(HAS_NEON OFF CACHE BOOL "Architecture does not have NEON SIMD enabled")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -1,39 +1,11 @@
|
|||||||
project(cm256cc)
|
project(cm256cc)
|
||||||
|
|
||||||
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
|
if (HAS_SSSE3)
|
||||||
message( STATUS "CM256cc: Architecture: ${ARCHITECTURE}" )
|
message(STATUS "SDRdaemonFEC: use SSSE3 SIMD" )
|
||||||
|
elseif (HAS_NEON)
|
||||||
if(${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
|
message(STATUS "SDRdaemonFEC: use Neon SIMD" )
|
||||||
SET(USE_SIMD "SSSE3")
|
|
||||||
elseif(${ARCHITECTURE} MATCHES "armv7l")
|
|
||||||
SET(USE_SIMD "NEON")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
message( STATUS "CM256cc: use SIMD: ${USE_SIMD}" )
|
|
||||||
|
|
||||||
if(USE_SIMD MATCHES SSSE3)
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
|
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
|
|
||||||
message(STATUS "g++ SSSE3")
|
|
||||||
add_definitions(-DUSE_SIMD)
|
|
||||||
elseif(MSVC)
|
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
|
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
|
|
||||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
|
||||||
message(STATUS "MSVC SSSE3")
|
|
||||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
|
||||||
add_definitions(-DUSE_SIMD)
|
|
||||||
endif()
|
|
||||||
elseif(USE_SIMD MATCHES NEON)
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
|
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
|
|
||||||
message(STATUS "g++ NEON")
|
|
||||||
add_definitions(-DUSE_NEON)
|
|
||||||
endif()
|
|
||||||
else()
|
else()
|
||||||
message(STATUS "CM256cc: Unsupported architecture")
|
message(STATUS "SDRdaemonFEC: Unsupported architecture")
|
||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ CONFIG(MINGW64):LIBCM256CCSRC = "D:\softs\cm256cc"
|
|||||||
INCLUDEPATH += $$LIBCM256CCSRC
|
INCLUDEPATH += $$LIBCM256CCSRC
|
||||||
|
|
||||||
DEFINES += __WINDOWS__=1
|
DEFINES += __WINDOWS__=1
|
||||||
DEFINES += USE_SIMD=1
|
DEFINES += USE_SSSE3=1
|
||||||
QMAKE_CXXFLAGS += -msse4.1
|
QMAKE_CXXFLAGS += -msse4.1
|
||||||
|
|
||||||
CONFIG(Release):build_subdir = release
|
CONFIG(Release):build_subdir = release
|
||||||
|
@ -13,7 +13,7 @@ TARGET = modam
|
|||||||
INCLUDEPATH += $$PWD
|
INCLUDEPATH += $$PWD
|
||||||
INCLUDEPATH += ../../../sdrbase
|
INCLUDEPATH += ../../../sdrbase
|
||||||
|
|
||||||
DEFINES += USE_SSE=1
|
DEFINES += USE_SSE4_1=1
|
||||||
QMAKE_CXXFLAGS += -msse4.1
|
QMAKE_CXXFLAGS += -msse4.1
|
||||||
|
|
||||||
CONFIG(Release):build_subdir = release
|
CONFIG(Release):build_subdir = release
|
||||||
|
@ -2,40 +2,10 @@ project(sdrdaemonfec)
|
|||||||
|
|
||||||
find_package(LibNANOMSG)
|
find_package(LibNANOMSG)
|
||||||
|
|
||||||
EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
|
if (HAS_SSSE3)
|
||||||
message( STATUS "SDRdaemonFEC: Architecture: ${ARCHITECTURE}" )
|
message(STATUS "SDRdaemonFEC: use SSSE3 SIMD" )
|
||||||
|
elseif (HAS_NEON)
|
||||||
if(${ARCHITECTURE} MATCHES "x86_64|AMD64|x86")
|
message(STATUS "SDRdaemonFEC: use Neon SIMD" )
|
||||||
SET(USE_SIMD "SSSE3")
|
|
||||||
elseif(${ARCHITECTURE} MATCHES "armv7l")
|
|
||||||
SET(USE_SIMD "NEON")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
message( STATUS "SDRdaemonFEC: use SIMD: ${USE_SIMD}" )
|
|
||||||
|
|
||||||
if(USE_SIMD MATCHES SSSE3)
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mssse3" )
|
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mssse3" )
|
|
||||||
message(STATUS "SDRdaemonFEC: g++ SSSE3")
|
|
||||||
add_definitions(-DUSE_SIMD)
|
|
||||||
add_definitions(-DUSE_SSE)
|
|
||||||
elseif(MSVC)
|
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /arch:SSSE3" )
|
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oi /GL /Ot /Ox /arch:SSSE3" )
|
|
||||||
set( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG" )
|
|
||||||
message(STATUS "SDRdaemonFEC: MSVC SSSE3")
|
|
||||||
add_definitions (/D "_CRT_SECURE_NO_WARNINGS")
|
|
||||||
add_definitions(-DUSE_SIMD)
|
|
||||||
add_definitions(-DUSE_SSE)
|
|
||||||
endif()
|
|
||||||
elseif(USE_SIMD MATCHES NEON)
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX)
|
|
||||||
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon" )
|
|
||||||
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon" )
|
|
||||||
message(STATUS "SDRdaemonFEC: g++ NEON")
|
|
||||||
add_definitions(-DUSE_NEON)
|
|
||||||
endif()
|
|
||||||
else()
|
else()
|
||||||
message(STATUS "SDRdaemonFEC: Unsupported architecture")
|
message(STATUS "SDRdaemonFEC: Unsupported architecture")
|
||||||
return()
|
return()
|
||||||
|
@ -23,9 +23,8 @@ INCLUDEPATH += ../../../lz4
|
|||||||
INCLUDEPATH += $$LIBNANOMSGSRC/src
|
INCLUDEPATH += $$LIBNANOMSGSRC/src
|
||||||
INCLUDEPATH += $$LIBCM256CCSRC
|
INCLUDEPATH += $$LIBCM256CCSRC
|
||||||
|
|
||||||
DEFINES += USE_SIMD=1
|
DEFINES += USE_SSSE3=1
|
||||||
DEFINES += USE_SSE=1
|
QMAKE_CXXFLAGS += -mssse3
|
||||||
QMAKE_CXXFLAGS += -msse4.1
|
|
||||||
|
|
||||||
CONFIG(Release):build_subdir = release
|
CONFIG(Release):build_subdir = release
|
||||||
CONFIG(Debug):build_subdir = debug
|
CONFIG(Debug):build_subdir = debug
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
#define INCLUDE_GPL_DSP_DECIMATORS_H_
|
#define INCLUDE_GPL_DSP_DECIMATORS_H_
|
||||||
|
|
||||||
#include "dsp/dsptypes.h"
|
#include "dsp/dsptypes.h"
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
#include "dsp/inthalfbandfiltereo1.h"
|
#include "dsp/inthalfbandfiltereo1.h"
|
||||||
#else
|
#else
|
||||||
#include "dsp/inthalfbandfilterdb.h"
|
#include "dsp/inthalfbandfilterdb.h"
|
||||||
@ -124,7 +124,7 @@ public:
|
|||||||
void decimate64_cen(SampleVector::iterator* it, const T* buf, qint32 len);
|
void decimate64_cen(SampleVector::iterator* it, const T* buf, qint32 len);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator2; // 1st stages
|
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator2; // 1st stages
|
||||||
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator4; // 2nd stages
|
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator4; // 2nd stages
|
||||||
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator8; // 3rd stages
|
IntHalfbandFilterEO1<DECIMATORS_HB_FILTER_ORDER> m_decimator8; // 3rd stages
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
#ifndef INCLUDE_INTERPOLATOR_H
|
#ifndef INCLUDE_INTERPOLATOR_H
|
||||||
#define INCLUDE_INTERPOLATOR_H
|
#define INCLUDE_INTERPOLATOR_H
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE2
|
||||||
#include <immintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#include "dsp/dsptypes.h"
|
#include "dsp/dsptypes.h"
|
||||||
#include "util/export.h"
|
#include "util/export.h"
|
||||||
@ -125,7 +125,7 @@ private:
|
|||||||
{
|
{
|
||||||
if (phase < 0)
|
if (phase < 0)
|
||||||
phase = 0;
|
phase = 0;
|
||||||
#if USE_SSE
|
#if USE_SSE2
|
||||||
// beware of the ringbuffer
|
// beware of the ringbuffer
|
||||||
if(m_ptr == 0) {
|
if(m_ptr == 0) {
|
||||||
// only one straight block
|
// only one straight block
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO_H_
|
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO_H_
|
||||||
#define SDRBASE_DSP_INTHALFBANDFILTEREO_H_
|
#define SDRBASE_DSP_INTHALFBANDFILTEREO_H_
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -464,7 +464,7 @@ protected:
|
|||||||
qint32 iAcc = 0;
|
qint32 iAcc = 0;
|
||||||
qint32 qAcc = 0;
|
qint32 qAcc = 0;
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
//#warning "IntHalfbandFiler SIMD"
|
//#warning "IntHalfbandFiler SIMD"
|
||||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||||
__m128i sumI = _mm_setzero_si128();
|
__m128i sumI = _mm_setzero_si128();
|
||||||
@ -551,7 +551,7 @@ protected:
|
|||||||
qint32 iAcc = 0;
|
qint32 iAcc = 0;
|
||||||
qint32 qAcc = 0;
|
qint32 qAcc = 0;
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||||
__m128i sumI = _mm_setzero_si128();
|
__m128i sumI = _mm_setzero_si128();
|
||||||
__m128i sumQ = _mm_setzero_si128();
|
__m128i sumQ = _mm_setzero_si128();
|
||||||
|
@ -22,10 +22,14 @@
|
|||||||
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
|
#ifndef SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
|
||||||
#define SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
|
#define SDRBASE_DSP_INTHALFBANDFILTEREO2_H_
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_NEON
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "dsp/dsptypes.h"
|
#include "dsp/dsptypes.h"
|
||||||
#include "dsp/hbfiltertraits.h"
|
#include "dsp/hbfiltertraits.h"
|
||||||
@ -484,8 +488,7 @@ protected:
|
|||||||
qint32 iAcc = 0;
|
qint32 iAcc = 0;
|
||||||
qint32 qAcc = 0;
|
qint32 qAcc = 0;
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#if defined(USE_SSE4_1)
|
||||||
//#warning "IntHalfbandFiler SIMD"
|
|
||||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||||
__m128i sumI = _mm_setzero_si128();
|
__m128i sumI = _mm_setzero_si128();
|
||||||
__m128i sumQ = _mm_setzero_si128();
|
__m128i sumQ = _mm_setzero_si128();
|
||||||
@ -528,6 +531,47 @@ protected:
|
|||||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
|
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
|
||||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
|
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
|
||||||
qAcc = _mm_cvtsi128_si32(sumQ);
|
qAcc = _mm_cvtsi128_si32(sumQ);
|
||||||
|
#elif defined(USE_NEON)
|
||||||
|
int32x4_t sumI = vdupq_n_s32(0);
|
||||||
|
int32x4_t sumQ = vdupq_n_s32(0);
|
||||||
|
int32x4_t sa, sb, sh;
|
||||||
|
|
||||||
|
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
|
||||||
|
{
|
||||||
|
sh = vld1_s32(&h[4*i]);
|
||||||
|
|
||||||
|
if ((m_ptrB % 2) == 0)
|
||||||
|
{
|
||||||
|
sa = vld1q_s32(&(m_evenA[0][a]));
|
||||||
|
sb = vld1q_s32(&(m_evenB[0][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
|
||||||
|
sa = vld1q_s32(&(m_evenA[1][a]));
|
||||||
|
sb = vld1q_s32(&(m_evenB[1][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sa = vld1q_s32(&(m_oddA[0][a]));
|
||||||
|
sb = vld1q_s32(&(m_oddB[0][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
|
||||||
|
sa = vld1q_s32(&(m_oddA[1][a]));
|
||||||
|
sb = vld1q_s32(&(m_oddB[1][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
}
|
||||||
|
|
||||||
|
a += 4;
|
||||||
|
b += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
|
||||||
|
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
|
||||||
|
iAcc = vget_lane_s32(sumI2, 0);
|
||||||
|
|
||||||
|
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
|
||||||
|
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
|
||||||
|
qAcc = vget_lane_s32(sumQ2, 0);
|
||||||
#else
|
#else
|
||||||
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
|
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
|
||||||
{
|
{
|
||||||
@ -570,7 +614,7 @@ protected:
|
|||||||
qint32 iAcc = 0;
|
qint32 iAcc = 0;
|
||||||
qint32 qAcc = 0;
|
qint32 qAcc = 0;
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#if defined(USE_SSE4_1)
|
||||||
//#warning "IntHalfbandFiler SIMD"
|
//#warning "IntHalfbandFiler SIMD"
|
||||||
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
const __m128i* h = (const __m128i*) HBFIRFilterTraits<HBFilterOrder>::hbCoeffs;
|
||||||
__m128i sumI = _mm_setzero_si128();
|
__m128i sumI = _mm_setzero_si128();
|
||||||
@ -614,6 +658,47 @@ protected:
|
|||||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
|
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 8));
|
||||||
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
|
sumQ = _mm_add_epi32(sumQ, _mm_srli_si128(sumQ, 4));
|
||||||
qAcc = _mm_cvtsi128_si32(sumQ);
|
qAcc = _mm_cvtsi128_si32(sumQ);
|
||||||
|
#elif defined(USE_NEON)
|
||||||
|
int32x4_t sumI = vdupq_n_s32(0);
|
||||||
|
int32x4_t sumQ = vdupq_n_s32(0);
|
||||||
|
int32x4_t sa, sb, sh;
|
||||||
|
|
||||||
|
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 16; i++)
|
||||||
|
{
|
||||||
|
sh = vld1_s32(&h[4*i]);
|
||||||
|
|
||||||
|
if ((m_ptrB % 2) == 0)
|
||||||
|
{
|
||||||
|
sa = vld1q_s32(&(m_evenA[0][a]));
|
||||||
|
sb = vld1q_s32(&(m_evenB[0][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
|
||||||
|
sa = vld1q_s32(&(m_evenA[1][a]));
|
||||||
|
sb = vld1q_s32(&(m_evenB[1][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sa = vld1q_s32(&(m_oddA[0][a]));
|
||||||
|
sb = vld1q_s32(&(m_oddB[0][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
|
||||||
|
sa = vld1q_s32(&(m_oddA[1][a]));
|
||||||
|
sb = vld1q_s32(&(m_oddB[1][b]));
|
||||||
|
sumI = vmlaq_s32(sumI, vaddq_s32(sa, sb), sh);
|
||||||
|
}
|
||||||
|
|
||||||
|
a += 4;
|
||||||
|
b += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32x2_t sumI1 = vpadd_s32(vget_high_s32(sumI), vget_low_s32(sumI));
|
||||||
|
int32x2_t sumI2 = vpadd_s32(sumI1, sumI1);
|
||||||
|
iAcc = vget_lane_s32(sumI2, 0);
|
||||||
|
|
||||||
|
int32x2_t sumQ1 = vpadd_s32(vget_high_s32(sumQ), vget_low_s32(sumQ));
|
||||||
|
int32x2_t sumQ2 = vpadd_s32(sumQ1, sumQ1);
|
||||||
|
qAcc = vget_lane_s32(sumQ2, 0);
|
||||||
#else
|
#else
|
||||||
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
|
for (int i = 0; i < HBFIRFilterTraits<HBFilterOrder>::hbOrder / 4; i++)
|
||||||
{
|
{
|
||||||
|
@ -201,7 +201,7 @@ void UpChannelizer::applyConfiguration()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
UpChannelizer::FilterStage::FilterStage(Mode mode) :
|
UpChannelizer::FilterStage::FilterStage(Mode mode) :
|
||||||
m_filter(new IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>),
|
m_filter(new IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>),
|
||||||
m_workFunction(0)
|
m_workFunction(0)
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
#include <QMutex>
|
#include <QMutex>
|
||||||
#include "util/export.h"
|
#include "util/export.h"
|
||||||
#include "util/message.h"
|
#include "util/message.h"
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
#include "dsp/inthalfbandfiltereo2.h"
|
#include "dsp/inthalfbandfiltereo2.h"
|
||||||
#else
|
#else
|
||||||
#include "dsp/inthalfbandfilterdb.h"
|
#include "dsp/inthalfbandfilterdb.h"
|
||||||
@ -73,7 +73,7 @@ protected:
|
|||||||
ModeUpperHalf
|
ModeUpperHalf
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE4_1
|
||||||
typedef bool (IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>::*WorkFunction)(Sample* sIn, Sample *sOut);
|
typedef bool (IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>::*WorkFunction)(Sample* sIn, Sample *sOut);
|
||||||
IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>* m_filter;
|
IntHalfbandFilterEO2<UPCHANNELIZER_HB_FILTER_ORDER>* m_filter;
|
||||||
#else
|
#else
|
||||||
|
@ -15,8 +15,8 @@
|
|||||||
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
|
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
|
||||||
///////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#ifdef USE_SSE
|
#ifdef USE_SSE2
|
||||||
#include <immintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <QMouseEvent>
|
#include <QMouseEvent>
|
||||||
@ -381,19 +381,7 @@ void GLSpectrum::updateHistogram(const std::vector<Real>& spectrum)
|
|||||||
|
|
||||||
m_currentSpectrum = &spectrum; // Store spectrum for current spectrum line display
|
m_currentSpectrum = &spectrum; // Store spectrum for current spectrum line display
|
||||||
|
|
||||||
#ifndef USE_SSE
|
#ifdef USE_SSE2
|
||||||
for(int i = 0; i < m_fftSize; i++) {
|
|
||||||
int v = (int)((spectrum[i] - m_referenceLevel) * 100.0 / m_powerRange + 100.0);
|
|
||||||
|
|
||||||
if ((v >= 0) && (v <= 99)) {
|
|
||||||
b = m_histogram + i * 100 + v;
|
|
||||||
if(*b < 220)
|
|
||||||
*b += m_histogramStroke; // was 4
|
|
||||||
else if(*b < 239)
|
|
||||||
*b += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if(m_decay >= 0) { // normal
|
if(m_decay >= 0) { // normal
|
||||||
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
|
const __m128 refl = {m_referenceLevel, m_referenceLevel, m_referenceLevel, m_referenceLevel};
|
||||||
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
|
const __m128 power = {m_powerRange, m_powerRange, m_powerRange, m_powerRange};
|
||||||
@ -458,6 +446,18 @@ void GLSpectrum::updateHistogram(const std::vector<Real>& spectrum)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
for(int i = 0; i < m_fftSize; i++) {
|
||||||
|
int v = (int)((spectrum[i] - m_referenceLevel) * 100.0 / m_powerRange + 100.0);
|
||||||
|
|
||||||
|
if ((v >= 0) && (v <= 99)) {
|
||||||
|
b = m_histogram + i * 100 + v;
|
||||||
|
if(*b < 220)
|
||||||
|
*b += m_histogramStroke; // was 4
|
||||||
|
else if(*b < 239)
|
||||||
|
*b += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,8 +14,8 @@ INCLUDEPATH += $$PWD
|
|||||||
DEFINES += USE_KISSFFT=1
|
DEFINES += USE_KISSFFT=1
|
||||||
DEFINES += __WINDOWS__=1
|
DEFINES += __WINDOWS__=1
|
||||||
DEFINES += DSD_USE_SERIALDV=1
|
DEFINES += DSD_USE_SERIALDV=1
|
||||||
DEFINES += USE_SSE=1
|
DEFINES += USE_SSE2=1
|
||||||
QMAKE_CXXFLAGS += -msse4.1
|
QMAKE_CXXFLAGS += -msse2
|
||||||
|
|
||||||
CONFIG(Release):build_subdir = release
|
CONFIG(Release):build_subdir = release
|
||||||
CONFIG(Debug):build_subdir = debug
|
CONFIG(Debug):build_subdir = debug
|
||||||
|
Loading…
x
Reference in New Issue
Block a user